Difference between revisions of "User talk:Remig"
m |
(Polymeraze - a DNA Generator Script) |
||
Line 1: | Line 1: | ||
− | I use Jmol to study protein folding. | + | I use Jmol to study protein folding. The script here before that generates polypeptide helices is now available in the Recycling Corner: [[Recycling_Corner/Alpha_Helix_Generator]] so I have removed it from here. In its place is a similar script I wrote that accepts a nucleotide sequence (1 letter encoding: "TACGAAC...GCT" for example) and generates a DNA or RNA single or double helix using the Model Kit: |
− | <pre># | + | <pre># POLYMERAZE - Jmol script by Ron Mignery |
− | # v1. | + | # v1.0 beta 10/31/2013 |
# | # | ||
− | # | + | # POLYMERAZE takes a string message encoding a nucleotide (nt) sequence |
− | + | # and generates a corresponding double helix one nt at a time from the | |
− | + | # 5' terminus to the 3' terminus rotating the emerging helix as it goes. | |
− | |||
− | |||
− | |||
− | |||
− | # and generates a corresponding | ||
− | # | ||
# | # | ||
# The message is a string entered by the user at a prompt. | # The message is a string entered by the user at a prompt. | ||
# It may be typed in or pasted in and be of any length | # It may be typed in or pasted in and be of any length | ||
− | # If | + | # If prepended with '3' then the string is considered as 3' to 5' |
− | # then | + | # If prepended with 'R' then RNA is generated instead of DNA |
− | # | + | # If prepended with 'S' then a single strand helix is produced |
# | # | ||
# The IUPAC/IUBMB 1 letter code is used: | # The IUPAC/IUBMB 1 letter code is used: | ||
− | # A= | + | # A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | # The following constant values determine the pitch of the | + | # The following constant values determine the pitch of the helices |
− | + | gO4C4C5O5 = -82.3 - 10 # Values from 30-31:B of 3BSE | |
− | + | gC4C5O5P = 172.5 + 0 # with the indicated tweaks to make | |
− | + | gC5O5PO3 = -44.0 + 9 # it work (sort of - yes, I know | |
− | + | gO5PO3C3 = -109.8 + 0 # the B chain P is a bit screwy) | |
− | + | gPO3C3C4 = -172.9 - 1 # | |
− | + | gCHAIN1 = 'A' # The chain id | |
+ | gCHAIN2 = 'B' # The complementary chain id | ||
# Lookup 3 letter code from 1 letter code | # Lookup 3 letter code from 1 letter code | ||
− | g3from1 = {"A":" | + | g3from1 = {"A":" DA", "C":" DC","G":" DG", "T":" DT", "U":" DU"} |
− | + | gComp = {"A":"T", "C":"G","G":"C", "T":"A", "U":"A"} | |
− | |||
− | |||
# Generate PDB atom record | # Generate PDB atom record | ||
− | function genAtom(e, aa, i, xyz) { | + | # Writes gNa or gNb |
− | + | function genAtom(e, aa, i, xyz, comp) { | |
− | + | # Fixed column format: | |
− | + | #ATOM 500 O4' DA B 29 -3.745 7.211 45.474 | |
− | + | var a = format("ATOM %5d %4s %3s ", (comp ? gNb : gNa), e, aa ) | |
− | return | + | a += format("%s%4d %8.3f", (comp ? gCHAIN2 : gCHAIN1), i, xyz[1] ) |
+ | a += format("%8.3f%8.3f\n", xyz[2], xyz[3] ) | ||
+ | if (comp) gNb++; else gNa++ | ||
+ | |||
+ | return a | ||
}; | }; | ||
− | # Generate a PDB | + | # Generate a PDB nucleotide record set |
− | function | + | # Calls genAtom that writes gNa or gNb |
+ | function genNT(i, nt, rna, comp) { | ||
− | # From constructed | + | # From constructed nucleotides |
− | var | + | var P0 = (comp ? [16.753, 4.551, 8.935] : [0.000, 0.000, 0.000]) |
− | var | + | var OP1= (comp ? [17.916, 5.432, 9.209] : [-0.973, 0.363, -1.067]) |
− | var | + | var OP2= (comp ? [16.030, 4.050, 10.108] : [0.297, -1.428, 0.272]) |
− | var | + | var O5p= (comp ? [16.110, 5.278, 7.954] : [1.351, 0.795, -0.286]) |
− | var | + | var C5p= (comp ? [16.536, 5.450, 6.610] : [1.345, 2.211, -0.125]) |
+ | var C4p= (comp ? [15.487, 6.211, 5.838] : [2.739, 2.779, -0.195]) | ||
+ | var O4p= (comp ? [14.427, 5.302, 5.454] : [3.469, 2.629, 1.048]) | ||
+ | var C3p= (comp ? [14.830, 7.309, 6.676] : [3.624, 2.197, -1.288]) | ||
+ | var O3p= (comp ? [14.657, 8.490, 5.878] : [4.212, 3.282, -1.983]) | ||
+ | var C2p= (comp ? [13.509, 6.688, 7.119] : [4.692, 1.432, -0.523]) | ||
+ | var O2p= (comp ? [12.530, 7.743, 7.153] : [5.994, 1.461, -1.226]) | ||
+ | var C1p= (comp ? [13.175, 5.739, 5.976] : [4.797, 2.255, 0.746]) | ||
− | var | + | var N1ct=(comp ? [12.404, 4.556, 6.401] : [5.353, 1.551, 1.907]) |
− | var | + | var C2ct=(comp ? [11.423, 4.111, 5.551] : [6.351, 2.183, 2.606]) |
− | var | + | var O2ct=(comp ? [11.137, 4.685, 4.516] : [6.789, 3.274, 2.289]) |
− | var | + | var N3ct=(comp ? [10.784, 2.966, 5.956] : [6.823, 1.489, 3.692]) |
− | var | + | var C4ct=(comp ? [11.025, 2.241, 7.106] : [6.404, 0.251, 4.135]) |
− | var | + | var NO4ct=(comp ?[10.382, 1.213, 7.329] : [6.908, -0.242, 5.146]) |
− | var | + | var C5ct=(comp ? [12.061, 2.780, 7.971] : [5.360, -0.370, 3.337]) |
− | + | var C6ct=(comp ? [12.692, 3.894, 7.575] : [4.892, 0.307, 2.279]) | |
− | var | + | var nC7ct=(comp ?[12.421, 2.090, 9.243] : [4.862, -1.727, 3.721]) |
− | var | + | var N9ag=(comp ? [12.426, 4.545, 6.367] : [5.333, 1.584, 1.923]) |
− | var | + | var C8ag=(comp ? [12.706, 3.662, 7.382] : [4.870, 0.450, 2.545]) |
+ | var N7ag=(comp ? [11.856, 2.668, 7.467] : [5.595, 0.076, 3.570]) | ||
+ | var C5ag=(comp ? [10.952, 2.911, 6.441] : [6.608, 1.025, 3.629]) | ||
+ | var C6ag=(comp ? [9.818, 2.215, 5.997] : [7.694, 1.194, 4.500]) | ||
+ | var NO6ag=(comp ?[9.379, 1.083, 6.552] : [7.955, 0.379, 5.525]) | ||
+ | var N1ag=(comp ? [9.137, 2.727, 4.945] : [8.517, 2.246, 4.283]) | ||
+ | var C2ag=(comp ? [9.573, 3.862, 4.390] : [8.259, 3.061, 3.256]) | ||
+ | var nN2ag=(comp ?[8.847, 4.313, 3.345] : [9.119, 4.090, 3.100]) | ||
+ | var N3ag=(comp ? [10.630, 4.605, 4.715] : [7.265, 3.009, 2.370]) | ||
+ | var C4ag=(comp ? [11.285, 4.068, 5.759] : [6.465, 1.956, 2.616]) | ||
− | + | # Build PDB atom records common to all NTs | |
− | + | n3 = g3from1[nt] | |
− | + | if (n3 = "") { | |
− | + | n3 = " D?" | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | # Build PDB atom records common to all | ||
− | |||
− | if ( | ||
− | |||
} | } | ||
− | print format("+ %s%d/%d", | + | print format("+ %s%d/%d", n3, i, gSeq.count + gResno) |
− | + | var a = genAtom(" P ", n3, i, P0, comp) | |
− | + | a += genAtom(" OP1", n3, i, OP1, comp) | |
− | + | a += genAtom(" OP2", n3, i, OP2, comp) | |
− | + | a += genAtom(" O5'", n3, i, O5p, comp) | |
− | + | a += genAtom(" C5'", n3, i, C5p, comp) | |
− | + | a += genAtom(" C4'", n3, i, C4p, comp) | |
+ | a += genAtom(" O4'", n3, i, O4p, comp) | ||
+ | a += genAtom(" C3'", n3, i, C3p, comp) | ||
+ | a += genAtom(" O3'", n3, i, O3p, comp) | ||
+ | a += genAtom(" C2'", n3, i, C2p, comp) | ||
+ | a += genAtom(" C1'", n3, i, C1p, comp) | ||
+ | if (rna) { | ||
+ | a += genAtom(" O2'", n3, i, O2p, comp) | ||
} | } | ||
− | # Now add | + | # Now add NT specific atom records |
− | switch ( | + | switch (nt) { |
case 'A' : | case 'A' : | ||
− | + | a += genAtom(" N9 ", n3, i, N9ag, comp) | |
− | + | a += genAtom(" C8 ", n3, i, C8ag, comp) | |
− | + | a += genAtom(" N7 ", n3, i, N7ag, comp) | |
− | + | a += genAtom(" C5 ", n3, i, C5ag, comp) | |
− | + | a += genAtom(" C6 ", n3, i, C6ag, comp) | |
− | + | a += genAtom(" N6 ", n3, i, NO6ag, comp) | |
+ | a += genAtom(" N1 ", n3, i, N1ag, comp) | ||
+ | a += genAtom(" C2 ", n3, i, C2ag, comp) | ||
+ | a += genAtom(" N3 ", n3, i, N3ag, comp) | ||
+ | a += genAtom(" C4 ", n3, i, C4ag, comp) | ||
break; | break; | ||
case 'C' : | case 'C' : | ||
− | + | a += genAtom(" N1 ", n3, i, N1ct, comp) | |
− | + | a += genAtom(" C2 ", n3, i, C2ct, comp) | |
− | + | a += genAtom(" O2 ", n3, i, O2ct, comp) | |
− | + | a += genAtom(" N3 ", n3, i, N3ct, comp) | |
− | + | a += genAtom(" C4 ", n3, i, C4ct, comp) | |
− | + | a += genAtom(" N4 ", n3, i, NO4ct, comp) | |
− | + | a += genAtom(" C5 ", n3, i, C5ct, comp) | |
− | + | a += genAtom(" C6 ", n3, i, C6ct, comp) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
break; | break; | ||
case 'G' : | case 'G' : | ||
− | + | a += genAtom(" N9 ", n3, i, N9ag, comp) | |
− | + | a += genAtom(" C8 ", n3, i, C8ag, comp) | |
− | + | a += genAtom(" N7 ", n3, i, N7ag, comp) | |
− | + | a += genAtom(" C5 ", n3, i, C5ag, comp) | |
− | + | a += genAtom(" C6 ", n3, i, C6ag, comp) | |
− | + | a += genAtom(" O6 ", n3, i, NO6ag, comp) | |
− | + | a += genAtom(" N1 ", n3, i, N1ag, comp) | |
− | + | a += genAtom(" C2 ", n3, i, C2ag, comp) | |
− | + | a += genAtom(" N2 ", n3, i, nN2ag, comp) | |
− | + | a += genAtom(" N3 ", n3, i, N3ag, comp) | |
− | + | a += genAtom(" C4 ", n3, i, C4ag, comp) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
break; | break; | ||
case 'T' : | case 'T' : | ||
− | + | a += genAtom(" N1 ", n3, i, N1ct, comp) | |
− | + | a += genAtom(" C2 ", n3, i, C2ct, comp) | |
+ | a += genAtom(" O2 ", n3, i, O2ct, comp) | ||
+ | a += genAtom(" N3 ", n3, i, N3ct, comp) | ||
+ | a += genAtom(" C4 ", n3, i, C4ct, comp) | ||
+ | a += genAtom(" O4 ", n3, i, NO4ct, comp) | ||
+ | a += genAtom(" C5 ", n3, i, C5ct, comp) | ||
+ | a += genAtom(" C6 ", n3, i, C6ct, comp) | ||
+ | a += genAtom(" C7 ", n3, i, nC7ct, comp) | ||
break; | break; | ||
case 'U' : | case 'U' : | ||
− | + | a += genAtom(" N1 ", n3, i, N1ct, comp) | |
− | + | a += genAtom(" C2 ", n3, i, C2ct, comp) | |
− | + | a += genAtom(" O2 ", n3, i, O2ct, comp) | |
− | + | a += genAtom(" N3 ", n3, i, N3ct, comp) | |
− | + | a += genAtom(" C4 ", n3, i, C4ct, comp) | |
− | + | a += genAtom(" O4 ", n3, i, NO4ct, comp) | |
− | + | a += genAtom(" C5 ", n3, i, C5ct, comp) | |
− | + | a += genAtom(" C6 ", n3, i, C6ct, comp) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
break; | break; | ||
default : | default : | ||
Line 240: | Line 165: | ||
} | } | ||
− | return | + | return a |
}; | }; | ||
− | # | + | # Rotate a1 on a2 in the plane of a1, a2 and a3 to the given angle |
− | function | + | # a1 and all connected except by a2 must be selected |
+ | function setAngle (a1, a2, a3, toangle) { | ||
+ | var v1={atomno = a1}.xyz - {atomno = a2}.xyz | ||
+ | var v2={atomno = a3}.xyz - {atomno = a2}.xyz | ||
+ | var axis = cross(v1, v2) + {atomno = a2}.xyz | ||
+ | var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3}) | ||
+ | rotateselected @axis {atomno = a2} @{curangle-toangle} | ||
+ | } | ||
+ | |||
+ | # Set the dihedral to the given angle | ||
+ | # a1 (or a4) and all connected except by a2 (or a3) must be selected | ||
+ | # If selected < unselected ==> a2 < a3 and vice versa | ||
+ | function setDihedral (a1, a2, a3, a4, toangle) { | ||
+ | var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3}, {atomno=a4}) | ||
+ | rotateselected {atomno=a2} {atomno=a3} @{toangle-curangle} | ||
+ | } | ||
+ | |||
+ | function countAtoms(seq, rna) { | ||
+ | var ntc = {"A":21, "C":20, "G":22, "T":20, "U":19} | ||
+ | var cnt = 0 | ||
+ | for (var i = 1; i <= seq.count; i++) { | ||
+ | cnt += (ntc[seq[i]] + (rna ? 1 : 0)) | ||
+ | } | ||
+ | return cnt | ||
+ | } | ||
− | + | # Generate a helix | |
+ | function genHelixStrand(gSeq, reverse, rna, double) { | ||
+ | var cha = ":A" | ||
+ | var chb = ":B" | ||
+ | var seq = "" | ||
+ | if (reverse) { | ||
+ | for (var i = gSeq.count; i > 0; i--) { | ||
+ | seq += gSeq[i] | ||
+ | } | ||
+ | } | ||
+ | else { | ||
+ | seq = gSeq | ||
+ | } | ||
− | # Find last linkable | + | gNa = all.count + 1 # global new N atom index |
− | gResno = 0 # global pre-existing | + | gNb = (double ? (gNa + countAtoms(seq) + ((gResno > 0) ? 0 : 1)) : 0) |
− | var | + | print format("gNa=%d gNb = %d", gNa, gNb) |
+ | # Find last linkable P if any | ||
+ | gResno = 0 # global pre-existing NT count | ||
+ | var pna = 1 # previous gN | ||
for (var i = all.count-1; i > 0; i--) { | for (var i = all.count-1; i > 0; i--) { | ||
− | + | ||
# If found | # If found | ||
if (distance({atomno=i}, {0,0,0}) < 0.1) { | if (distance({atomno=i}, {0,0,0}) < 0.1) { | ||
− | + | if ({atomno=i}.chain == chain[2]) { | |
− | + | pna = i | |
− | |||
− | if ({atomno=i}.chain | ||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
+ | gResno = {atomno=i}.resno | ||
break; | break; | ||
} | } | ||
} | } | ||
− | # For each | + | # For each nt |
set appendnew false | set appendnew false | ||
− | var | + | var nna = gNa # new P |
− | for (var i = 1; i <= | + | var nnb = gNb # new P |
− | + | for (var i = 1; i <= seq.count; i++) { | |
− | # Move | + | if (seq[i] == "") { |
+ | continue | ||
+ | } | ||
+ | |||
+ | gA = "data \"append nt\"\n" # global PDB atom record | ||
+ | |||
+ | # Move polynucleotide O3p to bond distance from new nt P | ||
+ | var pO3 = {-0.521, 0.638, 1.234} | ||
select all | select all | ||
− | + | if ((i + gResno) > 1) { | |
− | + | var nO3 = {atomno=@{pna+8}}.xyz | |
+ | var xyz = @{pO3 - nO3} | ||
+ | translateselected @xyz | ||
+ | } | ||
− | # | + | # Else 1st 5' nt so add OP3 |
− | gA = " | + | else { |
− | gA += | + | var O3n = {pO3n}.xyz |
− | gA += "end \"append | + | gA += genAtom(" OP3", g3from1[seq[i]], i + gResno, O3n, FALSE) |
− | script inline @{gA} | + | nna++ |
+ | } | ||
+ | |||
+ | # Gen NT ================================================== | ||
+ | gA += genNT(i + gResno, seq[i], rna, FALSE); # gNa updated | ||
+ | if (double) { | ||
+ | gA += genNT(i + gResno + seq.count, gComp[seq[i]], rna, TRUE); # gNb updated | ||
+ | } | ||
+ | gA += "end \"append nt\"" | ||
+ | script inline @{gA} # <== new atoms added here | ||
− | # | + | # First shape up the comp side |
− | + | if (double) { | |
− | + | select (@chb and (atomno < @{nnb + 6}) && (atomno >= nnb)) | |
− | + | setDihedral(nnb+6, nnb+5, nnb+4, nnb+3, gO4C4C5O5) | |
− | + | select selected and (atomno != @{nnb + 5}) | |
− | } | + | setDihedral(nnb+5, nnb+4, nnb+3, nnb, gC4C5O5P) |
− | } | + | } |
− | + | # Adjust link dihedrals of the new | |
− | # If | + | select (@cha and (atomno > @{nna+4}) or (@chb and (atomno >= nnb))) |
− | if ( | + | setDihedral(nna+3, nna+4, nna+5, nna+6, gO4C4C5O5) |
− | + | setDihedral(nna, nna+3, nna+4, nna+5, gC4C5O5P) | |
− | # | + | |
− | + | # If any older | |
− | + | if (i > 1) { | |
− | + | # Now move the old | |
− | + | select (@cha and (atomno < nna) or (@chb and (atomno < nnb))) | |
+ | setAngle(nna, pna+8, pna+7, 120.0) | ||
+ | |||
+ | select (@cha and (atomno < @{nna+3}) or (@chb and (atomno < nnb))) | ||
+ | setDihedral(nna+4, nna+3, nna, pna+8, gC5O5PO3) | ||
− | + | select (@cha and (atomno < nna) or (@chb and (atomno < nnb))) | |
− | + | setDihedral(nna+3, nna, pna+8, pna+7, gO5PO3C3) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | setDihedral(nna, pna+8, pna+7, pna+5, gPO3C3C4) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
# Step new and previous N | # Step new and previous N | ||
− | + | pna = nna; pnb = nnb | |
− | + | nna = gNa; nnb = gNb | |
− | |||
− | |||
− | |||
} | } | ||
+ | |||
+ | # Make the nucleotide bonds | ||
+ | connect | ||
# Clean up | # Clean up | ||
− | |||
select all | select all | ||
− | + | print format("%d atoms generated for chain %s", gNa+gNb, | |
− | print format("%d atoms generated", | + | (comp ? gCHAIN2 : gCHAIN1)) |
} | } | ||
+ | # Generate a helix or two | ||
+ | function genHelix(gSeq) { | ||
+ | var single = FALSE | ||
+ | var reverse = FALSE | ||
+ | var rna = FALSE | ||
+ | var done = FALSE | ||
+ | if (gSeq[2] == ':') { | ||
+ | gCHAIN1 = gSeq[1] | ||
+ | gSeq[1] = ' '; gSeq[2] = ' ' | ||
+ | gSeq = gSeq%0 | ||
+ | } | ||
+ | else if (gSeq[3] == ':') { | ||
+ | gCHAIN1 = gSeq[1] | ||
+ | gCHAIN2 = gSeq[2] | ||
+ | gSeq[1] = ' '; gSeq[2] = ' '; gSeq[3] = ' ' | ||
+ | gSeq = gSeq%0 | ||
+ | } | ||
+ | while (done == FALSE) { | ||
+ | done = TRUE; | ||
+ | if (gSeq[1] == 'S') { | ||
+ | single = TRUE; | ||
+ | done = FALSE; | ||
+ | } | ||
+ | else if (gSeq[1] == '3') { | ||
+ | reverse = TRUE; | ||
+ | done = FALSE; | ||
+ | } | ||
+ | else if (gSeq[1] == 'R') { | ||
+ | rna = TRUE; | ||
+ | done = FALSE; | ||
+ | } | ||
+ | if (done == FALSE) { | ||
+ | gSeq[1] = ' ' | ||
+ | gSeq = gSeq%0 | ||
+ | } | ||
+ | } | ||
+ | print format ("Sequence=%s single=%s reverse=%s", gSeq, single, reverse) | ||
+ | print format ("rna=%s", rna) | ||
+ | |||
+ | # Gen first strand | ||
+ | genHelixStrand(gSeq, reverse, rna, single ? FALSE : TRUE) | ||
+ | |||
+ | } | ||
+ | |||
+ | # ============================================== | ||
echo Generating Alpha Helix | echo Generating Alpha Helix | ||
# Get the sequence from the user | # Get the sequence from the user | ||
− | gSeq = prompt("Enter | + | gSeq = prompt("Enter NT sequence (ACGTU)", "")%9999%0 |
if (gSeq.count > 0) { | if (gSeq.count > 0) { | ||
− | + | genHelix(gSeq) | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
</pre> | </pre> | ||
+ | |||
+ | I thought adapting the Ribozome script to polynucleotides would be easy since there are only four types rather than 20. I was not easy and this script has its problems. With five rotors in a row for each nucleotide, finding correct values for the bond angles was beyond me. If anyone can tweak them better, please do and let me know. Regardless I plan to develop some more utility scripts to make that process easier and hope to update this script in the hopefully not too distant future. |
Revision as of 19:25, 31 October 2013
I use Jmol to study protein folding. The script here before that generates polypeptide helices is now available in the Recycling Corner: Recycling_Corner/Alpha_Helix_Generator so I have removed it from here. In its place is a similar script I wrote that accepts a nucleotide sequence (1 letter encoding: "TACGAAC...GCT" for example) and generates a DNA or RNA single or double helix using the Model Kit:
# POLYMERAZE - Jmol script by Ron Mignery # v1.0 beta 10/31/2013 # # POLYMERAZE takes a string message encoding a nucleotide (nt) sequence # and generates a corresponding double helix one nt at a time from the # 5' terminus to the 3' terminus rotating the emerging helix as it goes. # # The message is a string entered by the user at a prompt. # It may be typed in or pasted in and be of any length # If prepended with '3' then the string is considered as 3' to 5' # If prepended with 'R' then RNA is generated instead of DNA # If prepended with 'S' then a single strand helix is produced # # The IUPAC/IUBMB 1 letter code is used: # A=Adenine C=Cytosine G=Guanine T=Thymine U=Uracil # The following constant values determine the pitch of the helices gO4C4C5O5 = -82.3 - 10 # Values from 30-31:B of 3BSE gC4C5O5P = 172.5 + 0 # with the indicated tweaks to make gC5O5PO3 = -44.0 + 9 # it work (sort of - yes, I know gO5PO3C3 = -109.8 + 0 # the B chain P is a bit screwy) gPO3C3C4 = -172.9 - 1 # gCHAIN1 = 'A' # The chain id gCHAIN2 = 'B' # The complementary chain id # Lookup 3 letter code from 1 letter code g3from1 = {"A":" DA", "C":" DC","G":" DG", "T":" DT", "U":" DU"} gComp = {"A":"T", "C":"G","G":"C", "T":"A", "U":"A"} # Generate PDB atom record # Writes gNa or gNb function genAtom(e, aa, i, xyz, comp) { # Fixed column format: #ATOM 500 O4' DA B 29 -3.745 7.211 45.474 var a = format("ATOM %5d %4s %3s ", (comp ? gNb : gNa), e, aa ) a += format("%s%4d %8.3f", (comp ? gCHAIN2 : gCHAIN1), i, xyz[1] ) a += format("%8.3f%8.3f\n", xyz[2], xyz[3] ) if (comp) gNb++; else gNa++ return a }; # Generate a PDB nucleotide record set # Calls genAtom that writes gNa or gNb function genNT(i, nt, rna, comp) { # From constructed nucleotides var P0 = (comp ? [16.753, 4.551, 8.935] : [0.000, 0.000, 0.000]) var OP1= (comp ? [17.916, 5.432, 9.209] : [-0.973, 0.363, -1.067]) var OP2= (comp ? [16.030, 4.050, 10.108] : [0.297, -1.428, 0.272]) var O5p= (comp ? [16.110, 5.278, 7.954] : [1.351, 0.795, -0.286]) var C5p= (comp ? [16.536, 5.450, 6.610] : [1.345, 2.211, -0.125]) var C4p= (comp ? [15.487, 6.211, 5.838] : [2.739, 2.779, -0.195]) var O4p= (comp ? [14.427, 5.302, 5.454] : [3.469, 2.629, 1.048]) var C3p= (comp ? [14.830, 7.309, 6.676] : [3.624, 2.197, -1.288]) var O3p= (comp ? [14.657, 8.490, 5.878] : [4.212, 3.282, -1.983]) var C2p= (comp ? [13.509, 6.688, 7.119] : [4.692, 1.432, -0.523]) var O2p= (comp ? [12.530, 7.743, 7.153] : [5.994, 1.461, -1.226]) var C1p= (comp ? [13.175, 5.739, 5.976] : [4.797, 2.255, 0.746]) var N1ct=(comp ? [12.404, 4.556, 6.401] : [5.353, 1.551, 1.907]) var C2ct=(comp ? [11.423, 4.111, 5.551] : [6.351, 2.183, 2.606]) var O2ct=(comp ? [11.137, 4.685, 4.516] : [6.789, 3.274, 2.289]) var N3ct=(comp ? [10.784, 2.966, 5.956] : [6.823, 1.489, 3.692]) var C4ct=(comp ? [11.025, 2.241, 7.106] : [6.404, 0.251, 4.135]) var NO4ct=(comp ?[10.382, 1.213, 7.329] : [6.908, -0.242, 5.146]) var C5ct=(comp ? [12.061, 2.780, 7.971] : [5.360, -0.370, 3.337]) var C6ct=(comp ? [12.692, 3.894, 7.575] : [4.892, 0.307, 2.279]) var nC7ct=(comp ?[12.421, 2.090, 9.243] : [4.862, -1.727, 3.721]) var N9ag=(comp ? [12.426, 4.545, 6.367] : [5.333, 1.584, 1.923]) var C8ag=(comp ? [12.706, 3.662, 7.382] : [4.870, 0.450, 2.545]) var N7ag=(comp ? [11.856, 2.668, 7.467] : [5.595, 0.076, 3.570]) var C5ag=(comp ? [10.952, 2.911, 6.441] : [6.608, 1.025, 3.629]) var C6ag=(comp ? [9.818, 2.215, 5.997] : [7.694, 1.194, 4.500]) var NO6ag=(comp ?[9.379, 1.083, 6.552] : [7.955, 0.379, 5.525]) var N1ag=(comp ? [9.137, 2.727, 4.945] : [8.517, 2.246, 4.283]) var C2ag=(comp ? [9.573, 3.862, 4.390] : [8.259, 3.061, 3.256]) var nN2ag=(comp ?[8.847, 4.313, 3.345] : [9.119, 4.090, 3.100]) var N3ag=(comp ? [10.630, 4.605, 4.715] : [7.265, 3.009, 2.370]) var C4ag=(comp ? [11.285, 4.068, 5.759] : [6.465, 1.956, 2.616]) # Build PDB atom records common to all NTs n3 = g3from1[nt] if (n3 = "") { n3 = " D?" } print format("+ %s%d/%d", n3, i, gSeq.count + gResno) var a = genAtom(" P ", n3, i, P0, comp) a += genAtom(" OP1", n3, i, OP1, comp) a += genAtom(" OP2", n3, i, OP2, comp) a += genAtom(" O5'", n3, i, O5p, comp) a += genAtom(" C5'", n3, i, C5p, comp) a += genAtom(" C4'", n3, i, C4p, comp) a += genAtom(" O4'", n3, i, O4p, comp) a += genAtom(" C3'", n3, i, C3p, comp) a += genAtom(" O3'", n3, i, O3p, comp) a += genAtom(" C2'", n3, i, C2p, comp) a += genAtom(" C1'", n3, i, C1p, comp) if (rna) { a += genAtom(" O2'", n3, i, O2p, comp) } # Now add NT specific atom records switch (nt) { case 'A' : a += genAtom(" N9 ", n3, i, N9ag, comp) a += genAtom(" C8 ", n3, i, C8ag, comp) a += genAtom(" N7 ", n3, i, N7ag, comp) a += genAtom(" C5 ", n3, i, C5ag, comp) a += genAtom(" C6 ", n3, i, C6ag, comp) a += genAtom(" N6 ", n3, i, NO6ag, comp) a += genAtom(" N1 ", n3, i, N1ag, comp) a += genAtom(" C2 ", n3, i, C2ag, comp) a += genAtom(" N3 ", n3, i, N3ag, comp) a += genAtom(" C4 ", n3, i, C4ag, comp) break; case 'C' : a += genAtom(" N1 ", n3, i, N1ct, comp) a += genAtom(" C2 ", n3, i, C2ct, comp) a += genAtom(" O2 ", n3, i, O2ct, comp) a += genAtom(" N3 ", n3, i, N3ct, comp) a += genAtom(" C4 ", n3, i, C4ct, comp) a += genAtom(" N4 ", n3, i, NO4ct, comp) a += genAtom(" C5 ", n3, i, C5ct, comp) a += genAtom(" C6 ", n3, i, C6ct, comp) break; case 'G' : a += genAtom(" N9 ", n3, i, N9ag, comp) a += genAtom(" C8 ", n3, i, C8ag, comp) a += genAtom(" N7 ", n3, i, N7ag, comp) a += genAtom(" C5 ", n3, i, C5ag, comp) a += genAtom(" C6 ", n3, i, C6ag, comp) a += genAtom(" O6 ", n3, i, NO6ag, comp) a += genAtom(" N1 ", n3, i, N1ag, comp) a += genAtom(" C2 ", n3, i, C2ag, comp) a += genAtom(" N2 ", n3, i, nN2ag, comp) a += genAtom(" N3 ", n3, i, N3ag, comp) a += genAtom(" C4 ", n3, i, C4ag, comp) break; case 'T' : a += genAtom(" N1 ", n3, i, N1ct, comp) a += genAtom(" C2 ", n3, i, C2ct, comp) a += genAtom(" O2 ", n3, i, O2ct, comp) a += genAtom(" N3 ", n3, i, N3ct, comp) a += genAtom(" C4 ", n3, i, C4ct, comp) a += genAtom(" O4 ", n3, i, NO4ct, comp) a += genAtom(" C5 ", n3, i, C5ct, comp) a += genAtom(" C6 ", n3, i, C6ct, comp) a += genAtom(" C7 ", n3, i, nC7ct, comp) break; case 'U' : a += genAtom(" N1 ", n3, i, N1ct, comp) a += genAtom(" C2 ", n3, i, C2ct, comp) a += genAtom(" O2 ", n3, i, O2ct, comp) a += genAtom(" N3 ", n3, i, N3ct, comp) a += genAtom(" C4 ", n3, i, C4ct, comp) a += genAtom(" O4 ", n3, i, NO4ct, comp) a += genAtom(" C5 ", n3, i, C5ct, comp) a += genAtom(" C6 ", n3, i, C6ct, comp) break; default : break; } return a }; # Rotate a1 on a2 in the plane of a1, a2 and a3 to the given angle # a1 and all connected except by a2 must be selected function setAngle (a1, a2, a3, toangle) { var v1={atomno = a1}.xyz - {atomno = a2}.xyz var v2={atomno = a3}.xyz - {atomno = a2}.xyz var axis = cross(v1, v2) + {atomno = a2}.xyz var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3}) rotateselected @axis {atomno = a2} @{curangle-toangle} } # Set the dihedral to the given angle # a1 (or a4) and all connected except by a2 (or a3) must be selected # If selected < unselected ==> a2 < a3 and vice versa function setDihedral (a1, a2, a3, a4, toangle) { var curangle = angle({atomno=a1}, {atomno=a2}, {atomno=a3}, {atomno=a4}) rotateselected {atomno=a2} {atomno=a3} @{toangle-curangle} } function countAtoms(seq, rna) { var ntc = {"A":21, "C":20, "G":22, "T":20, "U":19} var cnt = 0 for (var i = 1; i <= seq.count; i++) { cnt += (ntc[seq[i]] + (rna ? 1 : 0)) } return cnt } # Generate a helix function genHelixStrand(gSeq, reverse, rna, double) { var cha = ":A" var chb = ":B" var seq = "" if (reverse) { for (var i = gSeq.count; i > 0; i--) { seq += gSeq[i] } } else { seq = gSeq } gNa = all.count + 1 # global new N atom index gNb = (double ? (gNa + countAtoms(seq) + ((gResno > 0) ? 0 : 1)) : 0) print format("gNa=%d gNb = %d", gNa, gNb) # Find last linkable P if any gResno = 0 # global pre-existing NT count var pna = 1 # previous gN for (var i = all.count-1; i > 0; i--) { # If found if (distance({atomno=i}, {0,0,0}) < 0.1) { if ({atomno=i}.chain == chain[2]) { pna = i } gResno = {atomno=i}.resno break; } } # For each nt set appendnew false var nna = gNa # new P var nnb = gNb # new P for (var i = 1; i <= seq.count; i++) { if (seq[i] == "") { continue } gA = "data \"append nt\"\n" # global PDB atom record # Move polynucleotide O3p to bond distance from new nt P var pO3 = {-0.521, 0.638, 1.234} select all if ((i + gResno) > 1) { var nO3 = {atomno=@{pna+8}}.xyz var xyz = @{pO3 - nO3} translateselected @xyz } # Else 1st 5' nt so add OP3 else { var O3n = {pO3n}.xyz gA += genAtom(" OP3", g3from1[seq[i]], i + gResno, O3n, FALSE) nna++ } # Gen NT ================================================== gA += genNT(i + gResno, seq[i], rna, FALSE); # gNa updated if (double) { gA += genNT(i + gResno + seq.count, gComp[seq[i]], rna, TRUE); # gNb updated } gA += "end \"append nt\"" script inline @{gA} # <== new atoms added here # First shape up the comp side if (double) { select (@chb and (atomno < @{nnb + 6}) && (atomno >= nnb)) setDihedral(nnb+6, nnb+5, nnb+4, nnb+3, gO4C4C5O5) select selected and (atomno != @{nnb + 5}) setDihedral(nnb+5, nnb+4, nnb+3, nnb, gC4C5O5P) } # Adjust link dihedrals of the new select (@cha and (atomno > @{nna+4}) or (@chb and (atomno >= nnb))) setDihedral(nna+3, nna+4, nna+5, nna+6, gO4C4C5O5) setDihedral(nna, nna+3, nna+4, nna+5, gC4C5O5P) # If any older if (i > 1) { # Now move the old select (@cha and (atomno < nna) or (@chb and (atomno < nnb))) setAngle(nna, pna+8, pna+7, 120.0) select (@cha and (atomno < @{nna+3}) or (@chb and (atomno < nnb))) setDihedral(nna+4, nna+3, nna, pna+8, gC5O5PO3) select (@cha and (atomno < nna) or (@chb and (atomno < nnb))) setDihedral(nna+3, nna, pna+8, pna+7, gO5PO3C3) setDihedral(nna, pna+8, pna+7, pna+5, gPO3C3C4) } # Step new and previous N pna = nna; pnb = nnb nna = gNa; nnb = gNb } # Make the nucleotide bonds connect # Clean up select all print format("%d atoms generated for chain %s", gNa+gNb, (comp ? gCHAIN2 : gCHAIN1)) } # Generate a helix or two function genHelix(gSeq) { var single = FALSE var reverse = FALSE var rna = FALSE var done = FALSE if (gSeq[2] == ':') { gCHAIN1 = gSeq[1] gSeq[1] = ' '; gSeq[2] = ' ' gSeq = gSeq%0 } else if (gSeq[3] == ':') { gCHAIN1 = gSeq[1] gCHAIN2 = gSeq[2] gSeq[1] = ' '; gSeq[2] = ' '; gSeq[3] = ' ' gSeq = gSeq%0 } while (done == FALSE) { done = TRUE; if (gSeq[1] == 'S') { single = TRUE; done = FALSE; } else if (gSeq[1] == '3') { reverse = TRUE; done = FALSE; } else if (gSeq[1] == 'R') { rna = TRUE; done = FALSE; } if (done == FALSE) { gSeq[1] = ' ' gSeq = gSeq%0 } } print format ("Sequence=%s single=%s reverse=%s", gSeq, single, reverse) print format ("rna=%s", rna) # Gen first strand genHelixStrand(gSeq, reverse, rna, single ? FALSE : TRUE) } # ============================================== echo Generating Alpha Helix # Get the sequence from the user gSeq = prompt("Enter NT sequence (ACGTU)", "")%9999%0 if (gSeq.count > 0) { genHelix(gSeq) }
I thought adapting the Ribozome script to polynucleotides would be easy since there are only four types rather than 20. I was not easy and this script has its problems. With five rotors in a row for each nucleotide, finding correct values for the bond angles was beyond me. If anyone can tweak them better, please do and let me know. Regardless I plan to develop some more utility scripts to make that process easier and hope to update this script in the hopefully not too distant future.