Difference between revisions of "Recycling Corner/DNA Generator"
(POLYMERAZE - a DNA generator script) |
|||
| Line 1: | Line 1: | ||
| − | POLYMERAZE takes a string message encoding a nucleotide (nt) sequence and generates a corresponding double helix one nt at a time from the 5' terminus to the 3' terminus rotating the emerging helix as it goes. | + | '''POLYMERAZE''' takes a string message encoding a nucleotide (nt) sequence and generates a corresponding double helix one nt at a time from the 5' terminus to the 3' terminus rotating the emerging helix as it goes. |
The message is a string entered by the user at a prompt. | The message is a string entered by the user at a prompt. | ||
| Line 14: | Line 14: | ||
The IUPAC/IUBMB 1 letter code is used: A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil | The IUPAC/IUBMB 1 letter code is used: A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil | ||
| + | |||
| + | The top level function '''plicoGenNt''' prompts the user for input. | ||
| + | |||
| + | The top level function '''plicoGenHelix''' accepts a string as a parameter. | ||
<pre># POLYMERAZE - Jmol script by Ron Mignery | <pre># POLYMERAZE - Jmol script by Ron Mignery | ||
| − | # v1. | + | # v1.2 beta 1/22/2014 for Jmol 13.4 - fix user cancel bug |
| + | # | ||
| + | # POLYMERAZE takes a string message encoding a nucleotide (nt) sequence | ||
| + | # and generates a corresponding double helix one nt at a time from the | ||
| + | # 5' terminus to the 3' terminus rotating the emerging helix as it goes. | ||
| + | # | ||
| + | # The message is a string entered by the user at a prompt. | ||
| + | # It may be typed in or pasted in and be of any length | ||
| + | # If prepended with '3' then the string is considered as 3' to 5' | ||
| + | # If prepended with 'R' then RNA is generated instead of DNA | ||
| + | # If prepended with 'S' then a single strand helix is produced | ||
| + | # If prepended with 'M' then a mixed helix is produced where the first | ||
| + | # strand is DNA and the second RNA - multiple prepends are allowed | ||
| + | # though 'M' is inconsistent with 'R' or 'S' | ||
| + | # | ||
| + | # If the 3d character is ':' then the two chains are labeled by the | ||
| + | # two preceding characters instead of the default 'A' and 'B' | ||
| + | # Likewise if the 2d character is ':' then the presumably single chain is | ||
| + | # labeled by the single preceding character | ||
# | # | ||
| + | # The IUPAC/IUBMB 1 letter code is used: | ||
| + | # A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil | ||
| + | |||
# The following constant values determine the pitch of the helices | # The following constant values determine the pitch of the helices | ||
gC5O5PO3 = -27.0 | gC5O5PO3 = -27.0 | ||
| Line 340: | Line 365: | ||
# Generate a helix or two | # Generate a helix or two | ||
| − | function | + | function plicoGenHelix(gSeq) { |
var single = FALSE | var single = FALSE | ||
var reverse = FALSE | var reverse = FALSE | ||
| Line 388: | Line 413: | ||
} | } | ||
| − | |||
| − | |||
| − | # Get the sequence from the user | + | function plicoGenNT { |
| − | gSeq = prompt("Enter NT sequence (<3RSM>ACGTU)", "")%9999%0 | + | echo Generating Nuleotide Helix |
| − | if (gSeq.count > 0) { | + | |
| − | + | # Get the sequence from the user | |
| + | gSeq = prompt("Enter NT sequence (<3RSM>ACGTU)", "")%9999%0 | ||
| + | if ((gSeq != "NULL") and (gSeq.count > 0)) { | ||
| + | plicoGenHelix(gSeq) | ||
| + | } | ||
} | } | ||
</pre> | </pre> | ||
Revision as of 17:50, 22 January 2014
POLYMERAZE takes a string message encoding a nucleotide (nt) sequence and generates a corresponding double helix one nt at a time from the 5' terminus to the 3' terminus rotating the emerging helix as it goes.
The message is a string entered by the user at a prompt. It may be typed in or pasted in and be of any length. If prepended with '3' then the string is considered as 3' to 5'. If prepended with 'R' then RNA is generated instead of DNA. If prepended with 'S' then a single strand helix is produced. If prepended with 'M' then a mixed helix is produced where the first strand is DNA and the second RNA. Multiple prepends are allowed (though 'M' would be inconsistent with 'R' or 'S').
If the 3d character is ':' then the two chains are labeled by the two preceding characters instead of the default 'A' and 'B'. Likewise if the 2d character is ':' then the presumably single chain is labeled by the preceding single character.
A polynucleotide may be added onto by subsequent runs if the previous helices are not moved away. Note that a single chain helix could then be added to a double chain or RNA to DNA or whatever. Have fun...
The IUPAC/IUBMB 1 letter code is used: A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil
The top level function plicoGenNt prompts the user for input.
The top level function plicoGenHelix accepts a string as a parameter.
# POLYMERAZE - Jmol script by Ron Mignery
# v1.2 beta 1/22/2014 for Jmol 13.4 - fix user cancel bug
#
# POLYMERAZE takes a string message encoding a nucleotide (nt) sequence
# and generates a corresponding double helix one nt at a time from the
# 5' terminus to the 3' terminus rotating the emerging helix as it goes.
#
# The message is a string entered by the user at a prompt.
# It may be typed in or pasted in and be of any length
# If prepended with '3' then the string is considered as 3' to 5'
# If prepended with 'R' then RNA is generated instead of DNA
# If prepended with 'S' then a single strand helix is produced
# If prepended with 'M' then a mixed helix is produced where the first
# strand is DNA and the second RNA - multiple prepends are allowed
# though 'M' is inconsistent with 'R' or 'S'
#
# If the 3d character is ':' then the two chains are labeled by the
# two preceding characters instead of the default 'A' and 'B'
# Likewise if the 2d character is ':' then the presumably single chain is
# labeled by the single preceding character
#
# The IUPAC/IUBMB 1 letter code is used:
# A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil
# The following constant values determine the pitch of the helices
gC5O5PO3 = -27.0
gO5PO3C3 = -117.8
gPO3C3C4 = -171.9
gCHAIN1 = 'A' # The chain id
gCHAIN2 = 'B' # The complementary chain id
# Lookup 3 letter code from 1 letter code
gNt3from1 = {"A":" DA", "C":" DC", "G":" DG", "T":" DT", "U":" DU"}
gNtComp = {"A":"T", "C":"G", "G":"C", "T":"A", "U":"A"}
# Generate PDB atom record
# Writes gNa or gNb
function genAtom(atomname, group, resno, xyz, comp) {
# Fixed column format:
#ATOM 500 O4' DA B 29 -3.745 7.211 45.474
while (atomname.size < 3) {
atomname += " ";
}
var a = format("ATOM %5d %4s %3s ", (comp ? gNb : gNa), atomname, group )
a += format("%s%4d %8.3f", (comp ? gCHAIN2 : gCHAIN1), resno, xyz[1] )
a += format("%8.3f%8.3f\n", xyz[2], xyz[3] )
if (comp) gNb++; else gNa++
return a
};
# Generate a PDB nucleotide record set
# Calls genAtom that writes gNa or gNb
function genNT(i, nt, rna, comp) {
# From constructed nucleotides
var P0 = [0.000, 0.000, 0.000]
var OP1= [-0.973,0.363,-1.067]
var OP2= [0.297,-1.428, 0.272]
var O5p= [1.351, 0.795,-0.286]
var C5p= [1.345, 2.211,-0.125]
var C4p= [2.732, 2.786,-0.255]
var O4p= [3.413, 2.900, 1.019]
var C3p= [3.670, 2.020,-1.178]
var O3p= [4.269, 2.960,-2.051]
var C2p= [4.717, 1.445,-0.238]
var O2p= [6.046, 1.365,-0.884]
var C1p= [4.758, 2.505, 0.846]
var N1ct= [5.277, 2.056, 2.143]
var C2ct= [6.236, 2.836, 2.740]
var O2ct= [6.670, 3.853, 2.230]
var N3ct= [6.674, 2.381, 3.958]
var C4ct= [6.256, 1.245, 4.622]
var NO4ct [6.726, 0.972, 5.728]
var C5ct= [5.255, 0.455, 3.924]
var C6ct= [4.820, 0.900, 2.737]
var nC7ct [4.762,-0.811, 4.551]
var N9ag= [5.256, 2.091, 2.152]
var C8ag= [4.867, 1.016, 2.913]
var N7ag= [5.532, 0.894, 4.035]
var C5ag= [6.425, 1.959, 4.013]
var C6ag= [7.401, 2.391, 4.922]
var NO6ag=[7.656, 1.780, 6.081]
var N1ag= [8.118, 3.493, 4.599]
var C2ag= [7.865, 4.104, 3.438]
var nN2ag [8.616, 5.197, 3.181]
var N3ag= [6.968, 3.796, 2.503]
var C4ag= [6.271, 2.701, 2.856]
# Build PDB atom records common to all NTs
n3 = gNt3from1[nt]
if (n3 = "") {
n3 = " D?"
}
var a = genAtom(" P ", n3, i, P0, comp)
a += genAtom(" OP1", n3, i, OP1, comp)
a += genAtom(" OP2", n3, i, OP2, comp)
a += genAtom(" O5'", n3, i, O5p, comp)
a += genAtom(" C5'", n3, i, C5p, comp)
a += genAtom(" C4'", n3, i, C4p, comp)
a += genAtom(" O4'", n3, i, O4p, comp)
a += genAtom(" C3'", n3, i, C3p, comp)
a += genAtom(" O3'", n3, i, O3p, comp)
a += genAtom(" C2'", n3, i, C2p, comp)
a += genAtom(" C1'", n3, i, C1p, comp)
if (rna) {
a += genAtom(" O2'", n3, i, O2p, comp)
}
# Now add NT specific atom records
switch (nt) {
case 'A' :
a += genAtom(" N9 ", n3, i, N9ag, comp)
a += genAtom(" C8 ", n3, i, C8ag, comp)
a += genAtom(" N7 ", n3, i, N7ag, comp)
a += genAtom(" C5 ", n3, i, C5ag, comp)
a += genAtom(" C6 ", n3, i, C6ag, comp)
a += genAtom(" N6 ", n3, i, NO6ag, comp)
a += genAtom(" N1 ", n3, i, N1ag, comp)
a += genAtom(" C2 ", n3, i, C2ag, comp)
a += genAtom(" N3 ", n3, i, N3ag, comp)
a += genAtom(" C4 ", n3, i, C4ag, comp)
break;
case 'C' :
a += genAtom(" N1 ", n3, i, N1ct, comp)
a += genAtom(" C2 ", n3, i, C2ct, comp)
a += genAtom(" O2 ", n3, i, O2ct, comp)
a += genAtom(" N3 ", n3, i, N3ct, comp)
a += genAtom(" C4 ", n3, i, C4ct, comp)
a += genAtom(" N4 ", n3, i, NO4ct, comp)
a += genAtom(" C5 ", n3, i, C5ct, comp)
a += genAtom(" C6 ", n3, i, C6ct, comp)
break;
case 'G' :
a += genAtom(" N9 ", n3, i, N9ag, comp)
a += genAtom(" C8 ", n3, i, C8ag, comp)
a += genAtom(" N7 ", n3, i, N7ag, comp)
a += genAtom(" C5 ", n3, i, C5ag, comp)
a += genAtom(" C6 ", n3, i, C6ag, comp)
a += genAtom(" O6 ", n3, i, NO6ag, comp)
a += genAtom(" N1 ", n3, i, N1ag, comp)
a += genAtom(" C2 ", n3, i, C2ag, comp)
a += genAtom(" N2 ", n3, i, nN2ag, comp)
a += genAtom(" N3 ", n3, i, N3ag, comp)
a += genAtom(" C4 ", n3, i, C4ag, comp)
break;
case 'T' :
a += genAtom(" N1 ", n3, i, N1ct, comp)
a += genAtom(" C2 ", n3, i, C2ct, comp)
a += genAtom(" O2 ", n3, i, O2ct, comp)
a += genAtom(" N3 ", n3, i, N3ct, comp)
a += genAtom(" C4 ", n3, i, C4ct, comp)
a += genAtom(" O4 ", n3, i, NO4ct, comp)
a += genAtom(" C5 ", n3, i, C5ct, comp)
a += genAtom(" C6 ", n3, i, C6ct, comp)
a += genAtom(" C7 ", n3, i, nC7ct, comp)
break;
case 'U' :
a += genAtom(" N1 ", n3, i, N1ct, comp)
a += genAtom(" C2 ", n3, i, C2ct, comp)
a += genAtom(" O2 ", n3, i, O2ct, comp)
a += genAtom(" N3 ", n3, i, N3ct, comp)
a += genAtom(" C4 ", n3, i, C4ct, comp)
a += genAtom(" O4 ", n3, i, NO4ct, comp)
a += genAtom(" C5 ", n3, i, C5ct, comp)
a += genAtom(" C6 ", n3, i, C6ct, comp)
break;
default :
break;
}
return a
};
# Rotate a1 on a2 in the plane of a1, a2 and a3 to the given angle
# a1 and all connected except by a2 must be selected
function setAngle (a1, a2, a3, toangle) {
var v1={atomno = a1}.xyz - {atomno = a2}.xyz
var v2={atomno = a3}.xyz - {atomno = a2}.xyz
var axis = cross(v1, v2) + {atomno = a2}.xyz
var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3})
rotateselected @axis {atomno = a2} @{curangle-toangle}
}
# Set the dihedral to the given angle
# a1 (or a4) and all connected except by a2 (or a3) must be selected
# If selected < unselected ==> a2 < a3 and vice versa
function setDihedral (a1, a2, a3, a4, toangle) {
var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3}, {atomno=a4})
rotateselected {atomno=a2} {atomno=a3} @{toangle-curangle}
}
function countAtoms(seq, rna, start, finish) {
var ntc = {"A":21, "C":20, "G":22, "T":20, "U":19}
var cnt = 0
for (var i = start; i <= finish; i++) {
cnt += (ntc[seq[i]] + (rna ? 1 : 0))
}
return cnt
}
# Generate a helix
function genHelixStrand(gSeq, reverse, drm, double) {
var cha = ":" + gCHAIN1
var chb = ":" + gCHAIN2
var seq = ""
if (reverse) {
for (var i = gSeq.count; i > 0; i--) {
seq += gSeq[i]%9999%0
}
}
else {
seq = gSeq%9999%0
}
cSeq = ""
if (double) {
for (var i = seq.count; i > 0; i--) {
cSeq += ((seq[i] == 'A') and (dr > 0)) ? "U" : gNtComp[seq[i]]
}
}
var aAtomCount = countAtoms(seq, (drm == 1), 1, seq.count)
var bAtomCount = countAtoms(cSeq, (drm > 0), 1, cSeq.count)
gNa = 1 # global new P atom index for chain A
gNb = 0
if (double) {
gNb = (aAtomCount + bAtomCount
- countAtoms(cSeq, (drm>0), cSeq.count, cSeq.count)) # last P in cSeq
}
#var bBase = (all.count + countAtoms(seq, (drm > 0), seq.count) + 1)
# Find last linkable P if any
var aResno = 1
var pNa = 1 # previous gNa
for (var i = all.count; i > 0; i--) {
# If A strand found at {0,0,0}
if (distance({atomno=i}, {0,0,0}) < 0.1) {
if ({atomno=i}.chain == gCHAIN1) {
# Add to existing strand
echo "Adding to existing strand..."
pNa = i
aResno = {chain=gCHAIN1}.resno.max + 1
gNa = {chain=gCHAIN1}.atomno.max + 1
gNb += gNa
# Bump up all B chain atomno and resno
# KLUDGE to work-around of Jmol's lack of resno rewrite
savNb = gNb
gNb = aAtomCount + bAtomCount + gNa
gA = "data \"append nt\"\n" # global PDB atom record
for (j = 1; j <= all.atomno.max; j++) {
if ({atomno=j}.chain == gCHAIN2) {
gA += genAtom({atomno=j}.atomName, {atomno=j}.group,
({atomno=j}.resno+seq.count+cSeq.count),
array({atomno=j}.x, {atomno=j}.y, {atomno=j}.z), true)
}
}
gA += "end \"append nt\""
delete @chb
script inline @{gA} # <== new atoms added here
gNb = savNb
break;
}
}
}
var bResno = aResno + seq.count + cSeq.count - 1
var nNa = gNa # new P
var nNb = 0#bBase # new comp P
# For each NT
set appendnew false
for (var i = 1; i <= seq.count; i++) {
if (seq[i] == "") {
continue
}
# Move polynucleotide O3p to bond distance from new nt P
var pO3 = {-0.521, 0.638, 1.234}
select all
if ((i + aResno) > 2) {
var nO3 = {atomno=@{pNa+8}}.xyz
var xyz = @{pO3 - nO3}
translateselected @xyz
}
# Gen NT ==================================================
gA = "data \"append nt\"\n" # global PDB atom record
gA += genNT(aResno, seq[i], (drm == 1), FALSE); # gNa updated
if (double) {
nNb = gNb
var nti = cSeq.count-i+1
gA += genNT(bResno, cSeq[nti], (drm > 0), TRUE); # gNb++
if (i > 0) {
gNb -= countAtoms(cSeq, (drm>0), nti-1, nti)
}
}
gA += "end \"append nt\""
script inline @{gA} # <== new atoms added here
# Flip comp to comp strand
if (double) {
select @{"" + bResno + chb}
var v1={8.238, 2.809, 6.004}
var v2={8.461, 4.646, 4.125}
rotateSelected @v2 @v1 180.0
}
# If any older NTs
if ((i + aResno) > 2) {
# Set the angles between the new NT and the old NTs
select (@cha and (atomno < nNa) or (@chb and (resno != bResno)))
setAngle(nNa, pNa+8, pNa+7, 120.0)
select (@cha and (atomno < @{nNa+3}) or (@chb and (resno != bResno)))
setDihedral(nNa+4, nNa+3, nNa, pNa+8, gC5O5PO3)
select (@cha and (atomno < nNa) or (@chb and (resno != bResno)))
setDihedral(nNa+3, nNa, pNa+8, pNa+7, gO5PO3C3)
setDihedral(nNa, pNa+8, pNa+7, pNa+5, gPO3C3C4)
}
# Step new and previous N
aResno++; bResno--
pNa = nNa; pnb = nNb
nNa = gNa; nNb = gNb
}
# Make the nucleotide bonds
connect
# Clean up
select all
print format("%d atoms generated for chain %s", gNa+gNb,
(comp ? gCHAIN2 : gCHAIN1))
}
# Generate a helix or two
function plicoGenHelix(gSeq) {
var single = FALSE
var reverse = FALSE
var drm = 0
var done = FALSE
gSeq = gSeq%9999%0
print format ("Seq=%s", gSeq)
if (gSeq[2] == ':') {
gCHAIN1 = gSeq[1]
gSeq[1] = ' '; gSeq[2] = ' '
gSeq = gSeq%0
}
else if (gSeq[3] == ':') {
gCHAIN1 = gSeq[1]
gCHAIN2 = gSeq[2]
gSeq[1] = ' '; gSeq[2] = ' '; gSeq[3] = ' '
gSeq = gSeq%0
}
while (done == FALSE) {
done = TRUE;
if (gSeq[1] == 'S') {
single = TRUE;
done = FALSE;
}
else if (gSeq[1] == '3') {
reverse = TRUE;
done = FALSE;
}
else if (gSeq[1] == 'R') {
drm = 1;
done = FALSE;
}
else if (gSeq[1] == 'M') {
drm = 2;
done = FALSE;
}
if (done == FALSE) {
gSeq[1] = ' '
gSeq = gSeq%0
}
}
# Generate
genHelixStrand(gSeq, reverse, drm, single ? FALSE : TRUE)
}
function plicoGenNT {
echo Generating Nuleotide Helix
# Get the sequence from the user
gSeq = prompt("Enter NT sequence (<3RSM>ACGTU)", "")%9999%0
if ((gSeq != "NULL") and (gSeq.count > 0)) {
plicoGenHelix(gSeq)
}
}