Difference between revisions of "User talk:Remig"

From Jmol
Jump to navigation Jump to search
(Ribozome - an Alpha Helix Generator)
(Alpha Helix generator)
Line 1: Line 1:
 
I use Jmol to study protein folding. Here is a script I wrote that accepts an amino acid sequence (1 letter encoding: "AAC...FYW" for example) and generates an alpha helix using the Model Kit:
 
I use Jmol to study protein folding. Here is a script I wrote that accepts an amino acid sequence (1 letter encoding: "AAC...FYW" for example) and generates an alpha helix using the Model Kit:
<pre># RIBOZOME - Jmol script by Ron Mignery  
+
<pre>#   RIBOZOME - Jmol script by Ron Mignery with help from Dr. Angel Herráez
# v1.0 beta 10/19/2013
+
#   v1.2 beta   10/23/2013
 
#
 
#
# RIBOZOME takes a string message encoding an amino acid (aa) sequence
+
#   New in v1.2 beta:
# and generates a corresponding alpha helix one aa at a time from the
+
#       No longer zaps existing atoms
# N terminus to the C terminus rotating the emerging helix as it goes.
+
#       Now adds on to existing helix on subsequent runs
 +
#      Fixes proline cross-link bug with XXXXPXP
 +
#      Localizes variables and globalizes constants
 
#
 
#
# The message is a string entered by the user at a prompt.
+
#   RIBOZOME takes a string message encoding an amino acid (aa) sequence
# It may be typed in or pasted in and be of any length
+
#  and generates a corresponding alpha helix one aa at a time from the
 +
#   N terminus to the C terminus rotating the emerging helix as it goes.
 
#
 
#
# The IUPAC/IUBMB 1 letter code is used:
+
#   The message is a string entered by the user at a prompt.
# A=ALAnine B=GLUtamine?* C=CYSteine D=ASPartate E=GLUtamate
+
#   It may be typed in or pasted in and be of any length
# F=PHEnylalanine G=GLYcine H=HIStidine I=IsoLEucine K=LYSine
+
If the message is prepended with <C>: (where C is any single letter)
# L=LEUcine M=METhionine N=ASparagiNe O=PYrroLysine*** P=PROline
+
#   then the chain is so labeled and separated from existing chains
# Q=GLutamiNe R=ARGinine S=SERine T=THReonine U=SElenoCysteine
+
#   if different from the first chain.
# V=VALine W=TRyPtophan X=UNKnown**** Y=TYRosine Z=ASparagiNe?**
 
#   *Either GLU or GLN: drawn as GLN with chi3 flipped
 
#   **Either ASP or ASN: drawn as ASN with chi3 flipped
 
# ***Not supported
 
# ****Unknown aa will be drawn as ALA
 
 
#
 
#
 +
#  The IUPAC/IUBMB 1 letter code is used:
 +
#  A=ALAnine B=GLutam?X* C=CYSteine D=ASPartate E=GLUtamate
 +
#  F=PHEnylalanine G=GLYcine H=HIStidine I=IsoLEucine K=LYSine
 +
#  L=LEUcine M=METhionine N=ASparagiNe O=PYrroLysine*** P=PROline
 +
#  Q=GLutamiNe R=ARGinine S=SERine T=THReonine U=SElenoCysteine
 +
#  V=VALine W=TRyPtophan X=UNKnown Y=TYRosine Z=ASpar?X**
 +
#    *Either GLU or GLN: drawn as GLN with chi3 flipped
 +
#    **Either ASP or ASN: drawn as ASN with chi3 flipped
 +
#  ***Not supported: drawn as ALA
  
 
# The following constant values determine the pitch of the alpha helix
 
# The following constant values determine the pitch of the alpha helix
var PHI = -50 # Dihedral angle of N-CA bond (nominally -50)
+
gPHI = -57    # Dihedral angle of N-CA bond (nominally -57)
var PSI = -60 # Dihedral angle of CA-C bond (nominally -60)
+
gPSI = -47    # Dihedral angle of CA-C bond (nominally -47)
var OMEGA = 180 # Dihedral angle of the peptide bond (nominally 180)
+
gOMEGA = 180   # Dihedral angle of the peptide bond (nominally 180)
var PEPTIDE_ANGLE = 110 # C-N-CA angle (nominally 110)
+
gPEPTIDE_ANGLE = 110   # C-N-CA angle (nominally 110)
var PRO_BUMP = -15 # Psi angle change to N-3psi when N is Pro
+
gPRO_BUMP = -10 # Psi angle change increment to N-3psi when N is Pro
 +
gCHAIN = 'A'    # The chain id
  
 
# Lookup 3 letter code from 1 letter code
 
# Lookup 3 letter code from 1 letter code
function get3from1(c) {
+
g3from1 = {"A":"ALA", "B":"GLX","C":"CYS", "D":"ASP","E":"GLU", "F":"PHE",
ret = ""
+
    "G":"GLY", "H":"HIS","I":"ILE", "K":"LYS","L":"LEU", "M":"MET",
switch (c) {
+
    "N":"GLN", "O":"PYL","P":"PRO", "Q":"GLN","R":"ARG", "S":"SER",
case 'A':
+
    "T":"THR", "U":"SEC","V":"VAL", "W":"TRP","X":"UNK", "Y":"TYR", "Z":"ASX"}
case 'X':
 
ret = "ALA";
 
break;
 
case 'C':
 
ret = "CYS";
 
break;
 
case 'D':
 
ret = "ASP";
 
break;
 
case 'E':
 
ret = "GLU";
 
break;
 
case 'F':
 
ret = "PHE";
 
break;
 
case 'G':
 
ret = "GLY";
 
break;
 
case 'H':
 
ret = "HIS";
 
break;
 
case 'I':
 
ret = "ILE";
 
break;
 
case 'K':
 
ret = "LYS";
 
break;
 
case 'L':
 
ret = "LEU";
 
break;
 
case 'M':
 
ret = "MET";
 
break;
 
case 'N':
 
case 'Z':
 
ret = "ASN";
 
break;
 
case 'P':
 
ret = "PRO";
 
break;
 
case 'B':
 
case 'Q':
 
ret = "GLN";
 
break;
 
case 'R':
 
ret = "ARG";
 
break;
 
case 'S':
 
ret = "SER";
 
break;
 
case 'T':
 
ret = "THR";
 
break;
 
case 'U':
 
ret = "SEC";
 
break;
 
case 'V':
 
ret = "VAL";
 
break;
 
case 'W':
 
ret = "TRP";
 
break;
 
case 'Y':
 
ret = "TYR";
 
break;
 
}
 
return ret
 
};
 
  
# Generate PDM atom record
+
# Generate PDB atom record
function genAtom(n, e, aa, i, xyz) {
+
function genAtom(e, aa, i, xyz) {
a =  format("ATOM  %5d %3s %3s A", n, e, aa )           
+
    gA =  format("ATOM  %5d %4s %3s ", gN, e, aa )           
a +=  format("%4d    %8.3f", i, xyz[1] )           
+
    gA +=  format("%s%4d    %8.3f", gCHAIN, i, xyz[1] )           
a +=  format("%8.3f%8.3f\n", xyz[2], xyz[3] )
+
    gA +=  format("%8.3f%8.3f\n", xyz[2], xyz[3] )
return a
+
    gn++
 +
    return gA
 
};
 
};
  
# Generate an amino acid record set
+
# Generate a PDB amino acid record set
function genAA(i, aa, x) {
+
function genAA(i, aa) {   # Writes globals gA and gN
n = x
 
 
 
# From constructed AAs
 
N0 = [0.0, 0.0, 0.0]
 
CA = [ 0.200, 1.174, 0.911 ]
 
C  = [ -1.110, 1.668, 1.425 ] #[ -1.129, 1.783, 1.241 ]
 
O  = [ -1.320, 1.693, 2.62 ] #[ -1.241, 1.967, 2.726 ]
 
CB = [ 1.062, 2.1950, 0.230 ]
 
 
G1 = [ 2.359, 1.607, -0.344] #2.396, 1.588, -0.091 ]
 
G2 = [ 1.363, 3.336, 1.157 ] #0.680, 3.652, 0.423]
 
D1 = [ 3.222, 2.656, -1.048 ] #[ 3.225, 2.340, -1.096]
 
D2 = [ 3.143, 0.904, 0.725 ] #[ 3.189, 1.093, 1.087]
 
E1 = [ 3.645, 3.749, -0.167 ] #[ 3.652, 3.503, -0.111 ]
 
E2 = [ 2.491, 3.234, -2.249 ] #[ 4.342, 1.591, -1.456 ]
 
Z  = [ 4.470, 4.717, -0.885 ] #[ 4.115, 3.339, 1.403 ]
 
H1 = [ 4.450, 6.006, -0.220 ] #[4.087, 4.572, 2.139]
 
H2 = [5.833, 4.228, -0.984 ] #[5.469, 2.866, 1.296]
 
 
Gp = [ 2.008, 1.24, -0.46 ]
 
Dp = [1.022, 0.213, -1.031 ]
 
 
Gfy  = [ 2.368, 1.471, -0.0152 ]
 
D1fy = [ 3.346, 1.524, 0.921 ]
 
D2fy = [ 2.493, 0.516, -1.151 ]
 
E1fy = [ 4.513, 0.615, 0.8244 ]
 
E2fy = [ 3.528, -0.336, -1.206 ]
 
Zfy  = [ 4.588, -0.285, -0.168 ]
 
Hfy = [ 5.738, -1.245, -0.233 ]
 
 
Ghw  = [ 2.406, 1.626, -0.134 ]
 
D1hw = [3.498, 1.936, 0.675]
 
D2hw = [ 2.713, 0.901, -1.211 ]
 
E1hw = [ 4.160, 0.518, -1.178 ]
 
E2hw = [ 4.622, 1.160, 0.0816 ]
 
E3hw = [ 3.789, 2.523, 1.944 ]
 
Z2hw = [ 5.973, 1.177, 0.689 ]
 
Z3hw = [ 5.014, 2.550, 2.503 ]
 
H2hw = [ 6.153, 1.846, 1.844 ]
 
 
N1 = [ 2.069, -2.122, -0.554] #[ -1.965, 2.307, 0.206 ]
 
 
# Build PDB atom records common to all AAs
 
a3 = get3from1(seq[i])
 
if (a3 == "") {
 
a3 = "UNK"
 
}
 
print format("+ %s%d", a3, i)
 
a = genAtom(n++, "N  ", a3, i, N0)
 
a += genAtom(n++, "CA ", a3, i, CA)
 
a += genAtom(n++, "C  ", a3, i, C)
 
a += genAtom(n++, "O  ", a3, i, O)
 
if (seq[i] != 'G') {
 
a += genAtom(n++, "CB ", a3, i, CB)
 
}
 
 
 
# Now add AA specific atom records
 
switch (aa) {
 
case 'A' :
 
case 'X' :
 
break;
 
case 'B' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "CD ", a3, i, D1)
 
a += genAtom(n++, "OE1", a3, i, E2) # GLN with Es switched
 
a += genAtom(n++, "NE2", a3, i, E1)
 
break;
 
case 'C' :
 
a += genAtom(n++, "SG ", a3, i, G2)
 
break;
 
case 'D' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "OD1", a3, i, D1)
 
a += genAtom(n++, "OD2", a3, i, D2)
 
break;
 
case 'E' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "CD ", a3, i, D1)
 
a += genAtom(n++, "OE1", a3, i, E1)
 
a += genAtom(n++, "OE2", a3, i, E2)
 
break;
 
case 'F' :
 
a += genAtom(n++, "CG ", a3, i, Gfy)
 
a += genAtom(n++, "CD1", a3, i, D1fy)
 
a += genAtom(n++, "CD2", a3, i, D2fy)
 
a += genAtom(n++, "CE1", a3, i, E1fy)
 
a += genAtom(n++, "CE2", a3, i, E2fy)
 
a += genAtom(n++, "CZ ", a3, i, Zfy)
 
break;
 
case 'G' :
 
break;
 
case 'H' :
 
a += genAtom(n++, "CG ", a3, i, Ghw)
 
a += genAtom(n++, "ND1", a3, i, D1hw)
 
a += genAtom(n++, "CD2", a3, i, D2hw)
 
a += genAtom(n++, "CE1", a3, i, E2hw)
 
a += genAtom(n++, "NE2", a3, i, E1hw)
 
break;
 
case 'I' :
 
a += genAtom(n++, "CG1", a3, i, G1)
 
a += genAtom(n++, "CG2", a3, i, G2)
 
a += genAtom(n++, "CD1", a3, i, D1)
 
break;
 
case 'K' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "CD ", a3, i, D1)
 
a += genAtom(n++, "CE ", a3, i, E1)
 
a += genAtom(n++, "NZ ", a3, i, Z)
 
break;
 
case 'L' :
 
a += genAtom(n++, "CG1", a3, i, G1)
 
a += genAtom(n++, "CD1", a3, i, D1)
 
a += genAtom(n++, "CD2", a3, i, D2)
 
break;
 
case 'M' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "SD ", a3, i, D1)
 
a += genAtom(n++, "CE ", a3, i, E1)
 
break;
 
case 'N' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "OD1", a3, i, D1)
 
a += genAtom(n++, "ND2", a3, i, D2)
 
break;
 
case 'P' :
 
a += genAtom(n++, "CG ", a3, i, GP)
 
a += genAtom(n++, "CD ", a3, i, DP)
 
break;
 
case 'Q' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "CD ", a3, i, D1)
 
a += genAtom(n++, "OE1", a3, i, E1)
 
a += genAtom(n++, "NE2", a3, i, E2)
 
break;
 
case 'R' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "CD ", a3, i, D1)
 
a += genAtom(n++, "NE ", a3, i, E1)
 
a += genAtom(n++, "CZ ", a3, i, Z)
 
a += genAtom(n++, "NH1", a3, i, H1)
 
a += genAtom(n++, "NH2", a3, i, H2)
 
break;
 
case 'S' :
 
a += genAtom(n++, "OG ", a3, i, G1)
 
break;
 
case 'T' :
 
a += genAtom(n++, "OG1", a3, i, G1)
 
a += genAtom(n++, "CG2", a3, i, G2)
 
break;
 
case 'U' :
 
a += genAtom(n++, "SeG", a3, i, G1)
 
break;
 
case 'V' :
 
a += genAtom(n++, "CG1", a3, i, G1)
 
a += genAtom(n++, "CG2", a3, i, G2)
 
break;
 
case 'W' :
 
a += genAtom(n++, "CG ", a3, i, Ghw)
 
a += genAtom(n++, "CD1", a3, i, D1hw)
 
a += genAtom(n++, "CD2", a3, i, D2hw)
 
a += genAtom(n++, "CE2", a3, i, E2hw)
 
a += genAtom(n++, "NE1", a3, i, E1hw)
 
a += genAtom(n++, "CE3", a3, i, E3hw)
 
a += genAtom(n++, "CZ2", a3, i, Z2hw)
 
a += genAtom(n++, "CZ3", a3, i, Z3hw)
 
a += genAtom(n++, "CH2", a3, i, H2hw)
 
break;
 
case 'Y' :
 
a += genAtom(n++, "CG ", a3, i, Gfy)
 
a += genAtom(n++, "CD1", a3, i, D1fy)
 
a += genAtom(n++, "CD2", a3, i, D2fy)
 
a += genAtom(n++, "CE1", a3, i, E1fy)
 
a += genAtom(n++, "CE2", a3, i, E2fy)
 
a += genAtom(n++, "CZ ", a3, i, Zfy)
 
a += genAtom(n++, "OH ", a3, i, Hfy)
 
break;
 
case 'Z' :
 
a += genAtom(n++, "CG ", a3, i, G1)
 
a += genAtom(n++, "OD1", a3, i, D2) # ASN with Ds switched
 
a += genAtom(n++, "ND2", a3, i, D1)
 
break;
 
default :
 
break;
 
}
 
  
return a
+
    # From constructed AAs
};
+
    var N0 = [0.0, 0.0, 0.0]
 +
    var CA = [ 0.200, 1.174, 0.911 ]
 +
    var C  = [ -1.110, 1.668, 1.425 ]
 +
    var O  = [ -1.320, 1.693, 2.62 ]
 +
    var CB = [ 1.062, 2.1950, 0.230 ]
 +
   
 +
    var G1 = [ 2.359, 1.607, -0.344]
 +
    var G2 = [ 1.363, 3.336, 1.157 ]
 +
    var D1 = [ 3.222, 2.656, -1.048 ]
 +
    var D2 = [ 3.143, 0.904, 0.725 ]
 +
    var E1 = [ 3.645, 3.749, -0.167 ]
 +
    var E2 = [ 2.491, 3.234, -2.249 ]
 +
    var Z  = [ 4.470, 4.717, -0.885 ]
 +
    var H1 = [ 4.450, 6.006, -0.220 ]
 +
    var H2 = [5.833, 4.228, -0.984 ]
 +
   
 +
    var Gp = [ 2.008, 1.24, -0.46 ]
 +
    var Dp = [1.022, 0.213, -1.031 ]
 +
   
 +
    var Gfy  = [ 2.368, 1.471, -0.0152 ]
 +
    var D1fy = [ 3.346, 1.524, 0.921 ]
 +
    var D2fy = [ 2.493, 0.516, -1.151 ]
 +
    var E1fy = [ 4.513, 0.615, 0.8244 ]
 +
    var E2fy = [ 3.528, -0.336, -1.206 ]
 +
    var Zfy  = [ 4.588, -0.285, -0.168 ]
 +
    var Hfy = [ 5.738, -1.245, -0.233 ]
 +
   
 +
    var Ghw  = [ 2.406, 1.626, -0.134 ]
 +
    var D1hw = [3.498, 1.936, 0.675]
 +
    var D2hw = [ 2.713, 0.901, -1.211 ]
 +
    var E1hw = [ 4.160, 0.518, -1.178 ]
 +
    var E2hw = [ 4.622, 1.160, 0.0816 ]
 +
    var E3hw = [ 3.789, 2.523, 1.944 ]
 +
    var Z2hw = [ 5.973, 1.177, 0.689 ]
 +
    var Z3hw = [ 5.014, 2.550, 2.503 ]
 +
    var H2hw = [ 6.153, 1.846, 1.844 ]
 +
   
 +
    #N1 = [ 2.069, -2.122, -0.554]
 +
   
 +
    # Build PDB atom records common to all AAs
 +
    a3 = g3from1[aa]
 +
    if (a3 = "") {
 +
        a3 = "UNK"
 +
    }
 +
    print format("+ %s%d/%d", a3, i, gSeq.count + gResno)
 +
    gA = genAtom(" N  ", a3, i, N0)
 +
    gA += genAtom(" CA ", a3, i, CA)
 +
    gA += genAtom(" C  ", a3, i, C)
 +
    gA += genAtom(" O  ", a3, i, O)
 +
    if ((aa != 'G') && (aa != 'X')) {
 +
        gA += genAtom(" CB ", a3, i, CB)
 +
    }
  
function rotateNward (a1, a2, angle) {
+
    # Now add AA specific atom records
select atomno<a1
+
    switch (aa) {
fix atomno>=a2
+
    case 'A' :
rotateselected {atomno=a1} {atomno=a2} @angle
+
        break;
#print format("a1=%d a2=%d angle=%d", a1, a2, angle) #DEBUG
+
    case 'B' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" CD ", a3, i, D1)
 +
        gA += genAtom(" OE1", a3, i, E2)    # GLN with Es switched
 +
        gA += genAtom(" NE2", a3, i, E1)
 +
        break;
 +
    case 'C' :
 +
        gA += genAtom(" SG ", a3, i, G2)
 +
        break;
 +
    case 'D' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" OD1", a3, i, D1)
 +
        gA += genAtom(" OD2", a3, i, D2)
 +
        break;
 +
    case 'E' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" CD ", a3, i, D1)
 +
        gA += genAtom(" OE1", a3, i, E1)
 +
        gA += genAtom(" OE2", a3, i, E2)
 +
        break;
 +
    case 'F' :
 +
        gA += genAtom(" CG ", a3, i, Gfy)
 +
        gA += genAtom(" CD1", a3, i, D1fy)
 +
        gA += genAtom(" CD2", a3, i, D2fy)
 +
        gA += genAtom(" CE1", a3, i, E1fy)
 +
        gA += genAtom(" CE2", a3, i, E2fy)
 +
        gA += genAtom(" CZ ", a3, i, Zfy)
 +
        break;
 +
    case 'G' :
 +
        break;
 +
    case 'H' :
 +
        gA += genAtom(" CG ", a3, i, Ghw)
 +
        gA += genAtom(" ND1", a3, i, D1hw)
 +
        gA += genAtom(" CD2", a3, i, D2hw)
 +
        gA += genAtom(" CE1", a3, i, E2hw)
 +
        gA += genAtom(" NE2", a3, i, E1hw)
 +
        break;
 +
    case 'I' :
 +
        gA += genAtom(" CG1", a3, i, G1)
 +
        gA += genAtom(" CG2", a3, i, G2)
 +
        gA += genAtom(" CD1", a3, i, D1)
 +
        break;
 +
    case 'K' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" CD ", a3, i, D1)
 +
        gA += genAtom(" CE ", a3, i, E1)
 +
        gA += genAtom(" NZ ", a3, i, Z)
 +
        break;
 +
    case 'L' :
 +
        gA += genAtom(" CG1", a3, i, G1)
 +
        gA += genAtom(" CD1", a3, i, D1)
 +
        gA += genAtom(" CD2", a3, i, D2)
 +
        break;
 +
    case 'M' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" SD ", a3, i, D1)
 +
        gA += genAtom(" CE ", a3, i, E1)
 +
        break;
 +
    case 'N' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" OD1", a3, i, D1)
 +
        gA += genAtom(" ND2", a3, i, D2)
 +
        break;
 +
    case 'P' :
 +
        gA += genAtom(" CG ", a3, i, GP)
 +
        gA += genAtom(" CD ", a3, i, DP)
 +
        break;
 +
    case 'Q' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" CD ", a3, i, D1)
 +
        gA += genAtom(" OE1", a3, i, E1)
 +
        gA += genAtom(" NE2", a3, i, E2)
 +
        break;
 +
    case 'R' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" CD ", a3, i, D1)
 +
        gA += genAtom(" NE ", a3, i, E1)
 +
        gA += genAtom(" CZ ", a3, i, Z)
 +
        gA += genAtom(" NH1", a3, i, H1)
 +
        gA += genAtom(" NH2", a3, i, H2)
 +
        break;
 +
    case 'S' :
 +
        gA += genAtom(" OG ", a3, i, G1)
 +
        break;
 +
    case 'T' :
 +
        gA += genAtom(" OG1", a3, i, G1)
 +
        gA += genAtom(" CG2", a3, i, G2)
 +
        break;
 +
    case 'U' :
 +
        gA += genAtom("SeG ", a3, i, G1)
 +
        break;
 +
    case 'V' :
 +
        gA += genAtom(" CG1", a3, i, G1)
 +
        gA += genAtom(" CG2", a3, i, G2)
 +
        break;
 +
    case 'W' :
 +
        gA += genAtom(" CG ", a3, i, Ghw)
 +
        gA += genAtom(" CD1", a3, i, D1hw)
 +
        gA += genAtom(" CD2", a3, i, D2hw)
 +
        gA += genAtom(" CE2", a3, i, E2hw)
 +
        gA += genAtom(" NE1", a3, i, E1hw)
 +
        gA += genAtom(" CE3", a3, i, E3hw)
 +
        gA += genAtom(" CZ2", a3, i, Z2hw)
 +
        gA += genAtom(" CZ3", a3, i, Z3hw)
 +
        gA += genAtom(" CH2", a3, i, H2hw)
 +
        break;
 +
    case 'X' :
 +
        gA += genAtom(" Xx ", a3, i, CB)
 +
        break;
 +
    case 'Y' :
 +
        gA += genAtom(" CG ", a3, i, Gfy)
 +
        gA += genAtom(" CD1", a3, i, D1fy)
 +
        gA += genAtom(" CD2", a3, i, D2fy)
 +
        gA += genAtom(" CE1", a3, i, E1fy)
 +
        gA += genAtom(" CE2", a3, i, E2fy)
 +
        gA += genAtom(" CZ ", a3, i, Zfy)
 +
        gA += genAtom(" OH ", a3, i, Hfy)
 +
        break;
 +
    case 'Z' :
 +
        gA += genAtom(" CG ", a3, i, G1)
 +
        gA += genAtom(" OD1", a3, i, D2)   # ASN with Ds switched
 +
        gA += genAtom(" ND2", a3, i, D1)
 +
        break;
 +
    default :
 +
        break;
 +
    }
  
 +
    return gA
 
};
 
};
  
# GenAlph
+
# Generate an alpha helix
function genAlpha(seq, PHI, PSI, OMEGA, PEPTIDE_ANGLE, PRO_BUMP) {
+
function genAlpha(gSeq) {
 
 
# For each aa
 
set appendnew false
 
n = 1
 
pn = 0
 
pc= 0 # previous C
 
ca1 = 0; ca2 = 0; ca3 = 0
 
for (var i = 1; i <= seq.count; i++) {
 
 
 
# Step previous N
 
pn = n
 
  
# Move polypeptide C to bond distance from new AA N
+
    gN = all.count    + 1 # global new N atom index
select all
 
fix none
 
translateselected {2.069, -2.122, -0.554 } #N1
 
  
# Gen AA
+
    # Find last linkable N if any
a = "data \"append aa\"\n"
+
    gResno = 0    # global pre-existing AA count
a += genAA(i, seq[i], n);
+
    var pn = 1    # previous gN
a += "end \"append aa\""
+
    for (var i = all.count-1; i  > 0; i--) {
script inline @{a}
+
   
 +
        # If found
 +
        if (distance({atomno=i}, {0,0,0})  < 0.1) {
 +
            pn = i
 +
           
 +
            # If new chain, separate from existing chain
 +
            if ({atomno=i}.chain != gCHAIN) {
 +
                select all
 +
                translateselected {2.069, -2.122, -0.554 } #N1
 +
            }
 +
            else {
 +
                gResno = {atomno=i}.resno
 +
            }
 +
            break;
 +
        }
 +
    }
  
# If PRO
+
    # For each aa
# Adjust i-3psi as rigid PRO D4 bumps Oi-4 as i-3psi
+
    set appendnew false
# is the most torqued psi and psis are the easiest to twist
+
    var nn = gN    # new N
if (seq[i] == 'P') {
+
    for (var i = 1; i <= gSeq.count; i++) {
rotateNward (ca3, ca3+1, PRO_BUMP)
 
}
 
  
# If not first AA
+
        # Move polypeptide C to bond distance from new AA N
if (pc > 0) {
+
        select all
 +
        fix none
 +
        translateselected {2.069, -2.122, -0.554 } #N1
  
# Gen axis on previous n perpendicular to the plane
+
        # Gen AA
# containing atoms pc, pn and pn+1
+
        gA = "data \"append aa\"\n"    # global PDB atom record
v1={atomno=pc}.xyz - {atomno=pn}.xyz
+
        gA += genAA(i + gResno, gSeq[i]);    # gN is updated in subroutine
v2={atomno=@{pn+1}}.xyz - {atomno=pn}.xyz
+
        gA += "end \"append aa\""
axis = cross(v1, v2)
+
        script inline @{gA}
 
# Center on atom previous n
 
axis += {atomno=pn}.xyz
 
  
# Rotate the polypeptide N on the new AA C to the
+
        # If PRO
# desired angle (nominally 110)
+
        var pb = 0
select atomno<pn
+
        if ((gSeq.count - i) >= 2) {
fix atomno>=pn
+
            if (gSeq[i + 2] == 'P') {
rotateselected @axis {atomno=pn} @{PEPTIDE_ANGLE - 77}
+
                pb = gPRO_BUMP
 +
            }
 +
        }
  
# Make omega dihedral OMEGA (nominally 180)
+
        # If not first AA
rotateselected {atomno=pc} {atomno=pn} @{OMEGA - 123}
+
        if (nn > 1) {
  
# Make the phi PHI (nominally -50)
+
            # Gen axis on new N perpendicular to the plane
rotateselected {atomno=pn} {atomno=@{pn+1}} @{PHI - 30}
+
            # containing atoms nn, pn+2 and nn+1
 +
            var v1={atomno = @{pn+2}}.xyz - {atomno = nn}.xyz
 +
            var v2={atomno = @{nn+1}}.xyz - {atomno = nn}.xyz
 +
            var axis = cross(v1, v2)
 +
           
 +
            # Center on atom previous C
 +
            axis += {atomno = @{pn+2}}.xyz
  
# Make the psi PSI (nominally -60)
+
            # Rotate the polypeptide N on the new AA C to tetrahedral (nominally 110)
fix atomno>pca
+
            select atomno < nn
select atomno<pn and (atomno != @{pc+1})
+
            fix atomno >= nn
rotateselected {atomno=pc} {atomno=@{pc-1}} @{PSI + 60}
+
            rotateselected @axis {atomno = nn} @{gPEPTIDE_ANGLE - 69.3}
 +
           
 +
            # Make omega dihedral = gOMEGA (nominally 180)
 +
            rotateselected {atomno=@{pn+2}} {atomno=nn} @{gOMEGA - 147.4}
  
# If aromatic go trans on chi 1
+
            # Make the new phi dihedral = gPHI (nominally -57)
select atomno>@{pn+4} and atomno<n
+
            rotateselected {atomno = nn} {atomno = @{nn+1}} @{gPHI - 8.7}
if ((seq[i] == 'H') || (seq[i] == 'W') || (seq[i] == 'F') || (seq[i] == 'Y')) {
 
#rotateselected {atomno=@{pn+1}} {atomno=@{pn+4}} -60
 
}
 
  
# Save last three CAs for proline bumps
+
            # Make the old psi dihedral = gPSI (nominally -47)
ca3 = ca2; ca2 = ca1; ca1 = pn + 1
+
            select atomno < @{pn+2} && atomno != @{pn+3}
}
+
            rotateselected {atomno=@{pn+1}} {atomno=@{pn+2}} @{gPSI + 33.4 + pb}
+
        }
# Step previous C
+
       
pc =pn + 2
+
        # Step new and previous N
 +
        pn = nn
 +
        nn = gN
  
# Make the peptide bond
+
        # Make the peptide bond
connect
+
        connect
}
+
    }
+
   
# Clean up
+
    # Clean up
select all
+
    connect ([UNK].CA) ([UNK].Xx and within(group, _1))
fix none
+
    select all
 +
    fix none
 +
    print format("%d atoms generated", gN)
 
}
 
}
  
Line 395: Line 339:
  
 
# Get the sequence from the user
 
# Get the sequence from the user
seq = prompt("*** Any existing will be cleared ***\nEnter AA sequence (1 char coded)", "")%9999
+
gSeq = prompt("Enter AA sequence (1 char coded)", "")%9999%0
if (seq.count > 0) {
+
if (gSeq.count > 0) {
zap # disable to keep existing structures
+
    if (gSeq[2] == ':') {
print format ("seq=%s  phi=%d  psi=%d", seq, PHI, PSI)
+
        gCHAIN = gSeq[1]
print format ("phi=%d  psi=%d  peptide angle=%d  pro bump=%d", PEPTIDE_ANGLE, PRO_BUMP)
+
        gSeq[1] = ' '
genAlpha(seq, PHI,PSI, OMEGA, PEPTIDE_ANGLE, PRO_BUMP) # defined at top of scripts
+
        gSeq[2] = ' '
 +
        gSeq = gSeq%0
 +
    }
 +
    print format ("Sequence=%s  phi=%d  psi=%d", gSeq, gPHI, gPSI)
 +
    print format ("chain=%s peptide angle=%d  pro bump=%d", gCHAIN, gPEPTIDE_ANGLE, gPRO_BUMP)
 +
    genAlpha(gSeq)
 
}
 
}
 
</pre>
 
</pre>

Revision as of 12:39, 23 October 2013

I use Jmol to study protein folding. Here is a script I wrote that accepts an amino acid sequence (1 letter encoding: "AAC...FYW" for example) and generates an alpha helix using the Model Kit:

#   RIBOZOME - Jmol script by Ron Mignery with help from Dr. Angel Herráez
#   v1.2 beta    10/23/2013
#
#   New in v1.2 beta:
#       No longer zaps existing atoms
#       Now adds on to existing helix on subsequent runs
#       Fixes proline cross-link bug with XXXXPXP
#       Localizes variables and globalizes constants
#
#   RIBOZOME takes a string message encoding an amino acid (aa) sequence
#   and generates a corresponding alpha helix one aa at a time from the
#   N terminus to the C terminus rotating the emerging helix as it goes.
#
#   The message is a string entered by the user at a prompt.
#   It may be typed in or pasted in and be of any length
#   If the message is prepended with <C>: (where C is any single letter)
#   then the chain is so labeled and separated from existing chains 
#   if different from the first chain.
#
#   The IUPAC/IUBMB 1 letter code is used:
#   A=ALAnine B=GLutam?X* C=CYSteine D=ASPartate E=GLUtamate
#   F=PHEnylalanine G=GLYcine H=HIStidine I=IsoLEucine K=LYSine
#   L=LEUcine M=METhionine N=ASparagiNe O=PYrroLysine*** P=PROline
#   Q=GLutamiNe R=ARGinine S=SERine T=THReonine U=SElenoCysteine
#   V=VALine W=TRyPtophan X=UNKnown Y=TYRosine Z=ASpar?X**
#     *Either GLU or GLN: drawn as GLN with chi3 flipped 
#    **Either ASP or ASN: drawn as ASN with chi3 flipped
#   ***Not supported: drawn as ALA

# The following constant values determine the pitch of the alpha helix
gPHI = -57    # Dihedral angle of N-CA bond (nominally -57)
gPSI = -47    # Dihedral angle of CA-C bond (nominally -47)
gOMEGA = 180    # Dihedral angle of the peptide bond (nominally 180)
gPEPTIDE_ANGLE = 110    # C-N-CA angle (nominally 110)
gPRO_BUMP = -10 # Psi angle change increment to N-3psi when N is Pro
gCHAIN = 'A'    # The chain id

# Lookup 3 letter code from 1 letter code
g3from1 = {"A":"ALA", "B":"GLX","C":"CYS", "D":"ASP","E":"GLU", "F":"PHE",
    "G":"GLY", "H":"HIS","I":"ILE", "K":"LYS","L":"LEU", "M":"MET",
    "N":"GLN", "O":"PYL","P":"PRO", "Q":"GLN","R":"ARG", "S":"SER",
    "T":"THR", "U":"SEC","V":"VAL", "W":"TRP","X":"UNK", "Y":"TYR", "Z":"ASX"}

# Generate PDB atom record
function genAtom(e, aa, i, xyz) {
    gA =  format("ATOM  %5d %4s %3s ", gN, e, aa )          
    gA +=  format("%s%4d    %8.3f", gCHAIN, i, xyz[1] )          
    gA +=  format("%8.3f%8.3f\n", xyz[2], xyz[3] )
    gn++
    return gA
};

# Generate a PDB amino acid record set
function genAA(i, aa) {    # Writes globals gA and gN

    # From constructed AAs
    var N0 = [0.0, 0.0, 0.0]
    var CA = [ 0.200, 1.174, 0.911 ]
    var C  = [ -1.110, 1.668, 1.425 ]
    var O  = [ -1.320, 1.693, 2.62 ]
    var CB = [ 1.062, 2.1950, 0.230 ]
    
    var G1 = [ 2.359, 1.607, -0.344]
    var G2 = [ 1.363, 3.336, 1.157 ]
    var D1 = [ 3.222, 2.656, -1.048 ]
    var D2 = [ 3.143, 0.904, 0.725 ]
    var E1 = [ 3.645, 3.749, -0.167 ]
    var E2 = [ 2.491, 3.234, -2.249 ]
    var Z  = [ 4.470, 4.717, -0.885 ]
    var H1 = [ 4.450, 6.006, -0.220 ]
    var H2 = [5.833, 4.228, -0.984 ]
    
    var Gp = [ 2.008, 1.24, -0.46 ]
    var Dp = [1.022, 0.213, -1.031 ]
    
    var Gfy  = [ 2.368, 1.471, -0.0152 ]
    var D1fy = [ 3.346, 1.524, 0.921 ]
    var D2fy = [ 2.493, 0.516, -1.151 ]
    var E1fy = [ 4.513, 0.615, 0.8244 ]
    var E2fy = [ 3.528, -0.336, -1.206 ]
    var Zfy  = [ 4.588, -0.285, -0.168 ]
    var Hfy = [ 5.738, -1.245, -0.233 ]
    
    var Ghw  = [ 2.406, 1.626, -0.134 ]
    var D1hw = [3.498, 1.936, 0.675]
    var D2hw = [ 2.713, 0.901, -1.211 ]
    var E1hw = [ 4.160, 0.518, -1.178 ]
    var E2hw = [ 4.622, 1.160, 0.0816 ]
    var E3hw = [ 3.789, 2.523, 1.944 ]
    var Z2hw = [ 5.973, 1.177, 0.689 ]
    var Z3hw = [ 5.014, 2.550, 2.503 ]
    var H2hw = [ 6.153, 1.846, 1.844 ]
    
    #N1 = [ 2.069, -2.122, -0.554]
    
    # Build PDB atom records common to all AAs
    a3 = g3from1[aa]
    if (a3 = "") {
        a3 = "UNK"
    }
    print format("+ %s%d/%d", a3, i, gSeq.count + gResno)
    gA = genAtom(" N  ", a3, i, N0)
    gA += genAtom(" CA ", a3, i, CA)
    gA += genAtom(" C  ", a3, i, C)
    gA += genAtom(" O  ", a3, i, O)
    if ((aa != 'G') && (aa != 'X')) {
        gA += genAtom(" CB ", a3, i, CB)
    }

    # Now add AA specific atom records
    switch (aa) {
    case 'A' :
        break;
    case 'B' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" CD ", a3, i, D1)
        gA += genAtom(" OE1", a3, i, E2)    # GLN with Es switched
        gA += genAtom(" NE2", a3, i, E1)
        break;
    case 'C' :
        gA += genAtom(" SG ", a3, i, G2)
        break;
    case 'D' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" OD1", a3, i, D1)
        gA += genAtom(" OD2", a3, i, D2)
        break;
    case 'E' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" CD ", a3, i, D1)
        gA += genAtom(" OE1", a3, i, E1)
        gA += genAtom(" OE2", a3, i, E2)
        break;
    case 'F' :
        gA += genAtom(" CG ", a3, i, Gfy)
        gA += genAtom(" CD1", a3, i, D1fy)
        gA += genAtom(" CD2", a3, i, D2fy)
        gA += genAtom(" CE1", a3, i, E1fy)
        gA += genAtom(" CE2", a3, i, E2fy)
        gA += genAtom(" CZ ", a3, i, Zfy)
        break;
    case 'G' :
        break;
    case 'H' :
        gA += genAtom(" CG ", a3, i, Ghw)
        gA += genAtom(" ND1", a3, i, D1hw)
        gA += genAtom(" CD2", a3, i, D2hw)
        gA += genAtom(" CE1", a3, i, E2hw)
        gA += genAtom(" NE2", a3, i, E1hw)
        break;
    case 'I' :
        gA += genAtom(" CG1", a3, i, G1)
        gA += genAtom(" CG2", a3, i, G2)
        gA += genAtom(" CD1", a3, i, D1)
        break;
    case 'K' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" CD ", a3, i, D1)
        gA += genAtom(" CE ", a3, i, E1)
        gA += genAtom(" NZ ", a3, i, Z)
        break;
    case 'L' :
        gA += genAtom(" CG1", a3, i, G1)
        gA += genAtom(" CD1", a3, i, D1)
        gA += genAtom(" CD2", a3, i, D2)
        break;
    case 'M' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" SD ", a3, i, D1)
        gA += genAtom(" CE ", a3, i, E1)
        break;
    case 'N' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" OD1", a3, i, D1)
        gA += genAtom(" ND2", a3, i, D2)
        break;
    case 'P' :
        gA += genAtom(" CG ", a3, i, GP)
        gA += genAtom(" CD ", a3, i, DP)
        break;
    case 'Q' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" CD ", a3, i, D1)
        gA += genAtom(" OE1", a3, i, E1)
        gA += genAtom(" NE2", a3, i, E2)
        break;
    case 'R' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" CD ", a3, i, D1)
        gA += genAtom(" NE ", a3, i, E1)
        gA += genAtom(" CZ ", a3, i, Z)
        gA += genAtom(" NH1", a3, i, H1)
        gA += genAtom(" NH2", a3, i, H2)
        break;
    case 'S' :
        gA += genAtom(" OG ", a3, i, G1)
        break;
    case 'T' :
        gA += genAtom(" OG1", a3, i, G1)
        gA += genAtom(" CG2", a3, i, G2)
        break;
    case 'U' :
        gA += genAtom("SeG ", a3, i, G1)
        break;
    case 'V' :
        gA += genAtom(" CG1", a3, i, G1)
        gA += genAtom(" CG2", a3, i, G2)
        break;
    case 'W' :
        gA += genAtom(" CG ", a3, i, Ghw)
        gA += genAtom(" CD1", a3, i, D1hw)
        gA += genAtom(" CD2", a3, i, D2hw)
        gA += genAtom(" CE2", a3, i, E2hw)
        gA += genAtom(" NE1", a3, i, E1hw)
        gA += genAtom(" CE3", a3, i, E3hw)
        gA += genAtom(" CZ2", a3, i, Z2hw)
        gA += genAtom(" CZ3", a3, i, Z3hw)
        gA += genAtom(" CH2", a3, i, H2hw)
        break;
    case 'X' :
        gA += genAtom(" Xx ", a3, i, CB)
        break;
    case 'Y' :
        gA += genAtom(" CG ", a3, i, Gfy)
        gA += genAtom(" CD1", a3, i, D1fy)
        gA += genAtom(" CD2", a3, i, D2fy)
        gA += genAtom(" CE1", a3, i, E1fy)
        gA += genAtom(" CE2", a3, i, E2fy)
        gA += genAtom(" CZ ", a3, i, Zfy)
        gA += genAtom(" OH ", a3, i, Hfy)
        break;
    case 'Z' :
        gA += genAtom(" CG ", a3, i, G1)
        gA += genAtom(" OD1", a3, i, D2)    # ASN with Ds switched
        gA += genAtom(" ND2", a3, i, D1)
        break;
    default :
        break;
    }

    return gA
};

# Generate an alpha helix
function genAlpha(gSeq) {

    gN = all.count    + 1 # global new N atom index

    # Find last linkable N if any
    gResno = 0    # global pre-existing AA count
    var pn = 1    # previous gN
    for (var i = all.count-1; i  > 0; i--) {
    
        # If found
        if (distance({atomno=i}, {0,0,0})  < 0.1) {
            pn = i
            
            # If new chain, separate from existing chain
            if ({atomno=i}.chain != gCHAIN) {
                select all
                translateselected {2.069, -2.122, -0.554 } #N1
            }
            else {
                gResno = {atomno=i}.resno
            }
            break;
        }
    }

    # For each aa
    set appendnew false
    var nn = gN    # new N
    for (var i = 1; i <= gSeq.count; i++) {

        # Move polypeptide C to bond distance from new AA N
        select all
        fix none
        translateselected {2.069, -2.122, -0.554 } #N1

        # Gen AA
        gA = "data \"append aa\"\n"    # global PDB atom record
        gA += genAA(i + gResno, gSeq[i]);    # gN is updated in subroutine
        gA += "end \"append aa\""
        script inline @{gA}

        # If PRO
        var pb = 0
        if ((gSeq.count - i) >= 2) {
            if (gSeq[i + 2] == 'P') {
                pb = gPRO_BUMP
            }
        }	

        # If not first AA
        if (nn > 1) {

            # Gen axis on new N perpendicular to the plane
            # containing atoms nn, pn+2 and nn+1
            var v1={atomno = @{pn+2}}.xyz - {atomno = nn}.xyz
            var v2={atomno = @{nn+1}}.xyz - {atomno = nn}.xyz
            var axis = cross(v1, v2)
            
            # Center on atom previous C
            axis += {atomno = @{pn+2}}.xyz

            # Rotate the polypeptide N on the new AA C to tetrahedral (nominally 110)
            select atomno < nn
            fix atomno >= nn
            rotateselected @axis {atomno = nn} @{gPEPTIDE_ANGLE - 69.3}
            
            # Make omega dihedral = gOMEGA (nominally 180)
            rotateselected {atomno=@{pn+2}} {atomno=nn} @{gOMEGA - 147.4}

            # Make the new phi dihedral = gPHI (nominally -57)
            rotateselected {atomno = nn} {atomno = @{nn+1}} @{gPHI - 8.7}

            # Make the old psi dihedral = gPSI (nominally -47)
            select atomno < @{pn+2} && atomno != @{pn+3}
            rotateselected {atomno=@{pn+1}} {atomno=@{pn+2}} @{gPSI + 33.4 + pb}
        }
        
        # Step new and previous N
        pn = nn
        nn = gN

        # Make the peptide bond
        connect
    }
    
    # Clean up
    connect ([UNK].CA) ([UNK].Xx and within(group, _1))
    select all
    fix none
    print format("%d atoms generated", gN)
}

echo Generating Alpha Helix

# Get the sequence from the user
gSeq = prompt("Enter AA sequence (1 char coded)", "")%9999%0
if (gSeq.count > 0) {
    if (gSeq[2] == ':') {
        gCHAIN = gSeq[1]
        gSeq[1] = ' '
        gSeq[2] = ' '
        gSeq = gSeq%0
    }
    print format ("Sequence=%s  phi=%d  psi=%d", gSeq, gPHI, gPSI)
    print format ("chain=%s peptide angle=%d  pro bump=%d", gCHAIN, gPEPTIDE_ANGLE, gPRO_BUMP)
    genAlpha(gSeq)
}

Contributors

Remig