#!/usr/local/bin/gawk -f # name : predict.awk # author : Eiso AB ; eisoab@gmail.com #@ purpose : make list of predicted peaks from shift-lists # input : assignments ( shift.list ) # correlations : [x]cor-file: spec.cor spec.xcor # # example: # predict.awk out=sparky /software/src/PREDICT/COR/hnca.cor prot.shifts # predict.awk out=xeasy spec=XH.cor $name.shifts # # # for examples of corerlation files see: # /software/src/PREDICT/COR/*.cor # or: ./scripts/predict/COR/*.cor (from scripts.tgz) # # # disregard comments # # recognized input: # # # correlate nucl1 nucl2 nucl3 ref1 ref2 ref3 [ filter1 [filter2 [..]]] # correlation type # correlate exit # skip rest of file # dom 1 3 2 # after this line, # switch spectral domains ; also with cmdline option: -v dom=1,3,2 # order: 1st column,3rd column, 2nd column # # # ... comments # all chars after # are skipped # # nodiag 1,2 # sets var nodiag=1 # remove diagonal peaks from plain 1,2 [ default] # also achieved with option: -v nodiag=1 # # keepdiag # keep diagonal peaks # sets var nodiag=0 # # CORRELATION FORMAT: # # correlate nucl1 nucl2 nucl3 ref1 ref2 ref3 [ filter1 [filter2 [..]]] # # the nucl. and filter keywords should match atom-names # and are interpreted as: # 1. regular expresions if given as /string/ # 2. exact matches if given as string # NB strings should not contain spaces # # examples # expression matches # ha -> ha # /ha/ -> ha1, ha2 # /as./ -> asp, asn ; # /gl[un]/ -> glu, gln # /[qh]b[123]?/ -> hb, hb1 hb2 hb3 qb3 # /[qh]b/ -> hb, hb1 hb2 hb3 qb3 # /^hn?$/ -> h , hn # # the ref values are relative sequence numbers # the filters refer to relative sequence number 0 # # that means that for this correlation the h and n atoms should # be from an asn or gln: # # correlate h c n 0 -1 0 /asn|gln/ # # and in the next the c comes from and asn or gln. # # correlate h c n 1 0 1 /asn|gln/ # # # SHIFT LIST FORMAT: [free-format, input is converted to lower-case] # resnum resnam atnam shift shift-error # # example: # 20 ala ha 4.696 0.014 # # xeasy .prot files are also accepted # # hn is converted to h unless the commandline option # -v keephn=1 is used [???check] # # # DEBUGGING INFO # cmdline options # -v debug=1 # # -v out="" show peak types [ pktyp specname peaknr nucl1 nucl2 .... int ] # # -v out=ppm give peak positions in ppm format [will print errors too] # # -v out=asg give peak positions + assignments # # -v out=noe give peak positions + assignments # # -v out=nv equal to out=xpk # -v out=xpk give nmrview (xpk) file with predicted peaks # if this option is given a *.xpk file is expected as additional input file # for the header # # will be set automatically if the file # of the last argument is '.xpk' # # -v out=xeasy equal to out=peaks # -v out=peaks give xeasy (.peaks) file with predicted peaks # if this option is given a *.peaks file belonging to the spectrum # is expected as additional input file for the header # # will be set automatically if the file # of the last argument is '.peaks' # # # -v cyaf cyanaformat, e.g. # #CYANAFORMAT HNh # # [-v] errout=1 print ppm errors as in 45.343~1.230 # # [-v] dom=1,2,3 set domains # # [-v] spec=name default:XH # read from cor-file /software/src/PREDICT/COR/name.cor # [ spectrum name to insert in peaklist ] # XH gives 2d hsqc (C and/or N) # # -v nodiag=1 [ no diagonals ] # -v keepdiag=1 [ keep diagonals ] # -v nocharge=1 [ remove charge of residues ] # # -v keepunassigned=0 [ if 1, keep unassigned shifts (ppm>900) ] # # or one of the following strings in the input: # DEBUG # sets debug=1 # NODEBUG # sets debug=0 # # # a .peaks can be given as input file in order to get the peaks headers # a .xpk can be given as input file in order to get the peaks headers # # gawk '$4>0.1 && $11>0.1 && $18 > 1{next}1' hbhacbcaconh_pred_stat.xpk > qq.xpk BEGIN { IGNORECASE=1 if (!stderr) stderr="/dev/stderr" progname="predict.awk" if ( ARGV[1]~/^(h|help)$/ || help!="" ) { print ARGV[0] system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } keepnh=1 if (keepdiag) nodiag=0 if (nodiag!="") { if ( 1 stderr nodiag="1,2" split(nodiag,deldiag,/,/) } } if (out!="") { if (out=="xeasy") out="peaks" if (out=="nv") out="xpk" } else { if (ARGC-1 in ARGV) { lastarg=ARGV[ARGC-1] if (lastarg~/[.]xpk$/) { out="xpk" } else if (lastarg~/[.]peaks/) { out="peaks" } print "## setting output format to ",out > stderr } } if (out=="") out="peaks" if (cyaf=="") cyaf="NHc" } FNR==1 { if (!dom) dom ="1,2,3" if (spec=="") spec="XH" for (f in ARGV) { if (toupper(ARGV[f])~/[.]COR$/) corfile=ARGV[f] } if (spec && corfile=="") { home=ENVIRON["HOME"] corfile=home"/software/PREDICT/COR/"spec".cor" sub(".cor.cor",".cor",corfile) print ".. corfile:",corfile>stderr ARGV[ARGC++]=corfile } } FILENAME ~ /[.]seq$/ { seq = FILENAME if (NF==2) resn=$2 ; else resn++ residue[resn]=toupper($1) next } FILENAME ~ /[.]xpk$/ { out=="xpk" #label dataset sw sf #D0 13C 15N #if2c1_hnca.nv #{5000.00 } {5000.00 } {2000.00 } #{700.1330 } {176.0000 } {70.9520 } # D0.L D0.P D0.W D0.B D0.E D0.J D0.U 13C.L 13C.P 13C.W 13C.B 13C.E 13C.J 13C.U 15N.L 15N.P 15N.W 15N.B 15N.E 15N.J 15N.U vol int stat comment flag0 # 5 {5.HN} 8.080 0.000 0.000 ++ 0.000 {?} {5.N} 122.860 0.000 0.000 ++ 0.000 {?} {5.CA} 53.050 0.000 0.000 ++ 0.000 {?} 1 0.000 0 {?} 0 if (FNR>6) next if (xpkheader!="") xpkheader=xpkheader "\n" $0 else xpkheader= $0 if (FNR==1) { print "## reading header from xpk-file: ",FILENAME > stderr if (!/label +dataset +sw +sf/) { print "error in xpk-header:",$0;exit } } if (FNR==2) { xpkdim=split($0,nvnuc) } if (FNR==6) { xpklabels=$0 } print next } FILENAME ~ /[.]peaks$/ && !peaksheaderfinished { out=="peaks" if ($1 ~ /^#/ ) { peaksheader=peaksheader "\n" $0 } else { peaksheaderfinished = 1 } } /^correlate +exit/ { skipfile=FILENAME print "## skipping rest of :",FILENAME > stderr } skipfile==FILENAME { next } /^#/ { next } / #/ && FILENAME!~/[.]prot$/ { #print "<<",$0 $0=substr($0,1,index($0," #")-1) #print " >",$0 } /^DEBUG/ { debug=1;next } /^NODEBUG/ { debug=0;next } # read correlation types # correlate h c n 0 -1 0 # correlate exit /dom/ { print "## Setting spectral domains to ",$0 > stderr dom = $2","$3","$4 } /nodiag/ { nodiag=1 if (NF==1) { nodiag="1,2" } else if (NF==2) { nodiag=$2 } else if ( NF==3 ) nodiag=$2","$3 split(nodiag,deldiag,/,/) } /keepdiag/ { nodiag=0 } /^correlate / { ncor++ specfile=FILENAME if (ncor==1) print "## Reading correlation type from ",FILENAME > stderr corsrc[ncor]=$0 if (dom!="") cdom[ncor]=dom # nu1[ncor]=$(ddom[1]+1);nu2[ncor]=$(ddom[2]+1);nu3[ncor]=$(ddom[3]+1) # pos1[ncor]=$(ddom[1]+4);pos2[ncor]=$(ddom[2]+4);pos3[ncor]=$(ddom[3]+4) nu1[ncor] =$2; nu2[ncor]=$3; nu3[ncor]=$4 pos1[ncor]=$5;pos2[ncor]=$6;pos3[ncor]=$7 numcor=ncor numfilt[ncor]=NF-7 if (NF>7) { for (f=8;f<=NF;f++) { filter[ncor,f-7]=$f filter[ncor]=filter[ncor]" "$f } } next } # select residue format for reading chemical shifts (shift header?) # grepping line with eg:10 ala hh21 # FILENAME ~ /[.]prot$/ { print "##",$0 } FILENAME ~ /[.]prot$/ { resn=$5 resname="" if (resn in residue) { resname=residue[resn] } else if ($6=="#" && $7~/^[a-z][a-z][a-z][-+]?$/) { resname=$7 } else { print "ERROR: no residue name found for residue:'",resname,"' ["$0"]" > stderr print " give sequence file or shiftlist with residue names" > stderr } CYATNR[resn" "resname" "$4]=$1 $0=resn " " resname " " $4 " " $2 " " $3 # print "##",$0 } FILENAME ~ /[.]prot$/ && $6!="#" && $7~/^[a-z][a-z][a-z][-+]?$/ { CYATNR[$5" "$7" "$4]=$1 #if ($5==2) print $0 > stderr $0=$5 " " $7 " " $4 " " $2 " " $3 #if ($1==2) print "|",$0 > stderr # print "##",$0 } { if (verbose>1) print "---",$0 >stderr } /^ *[0-9]+ +[a-z]+[-+]? +[a-z0-9#]+ / { shiftsread++ if ($3~/h[a-z1-3]+#$/ && !keepwildcard) { $3="Q" substr($3,2,length($3)-2) } if ($4 > 900 && keepunassigned+0==0 ) next match($0,/^ *[0-9]+ +[a-z]+[-+]? +[a-z0-9#]+ /) if (verbose>1) print ">>>shift ",substr($0,RSTART,RLENGTH) >stderr #if (/^ *2 /) print "###"$0> stderr if (!shiftfile++) print "## reading shiftfile",FILENAME > stderr if (maxres<$1) maxres=$1 if (minres>$1) minres=$1 $1=$1+0 # make numerical 01 --> 1 if (!keepnh && tolower($3)=="hn") $3=substr($3,1,1) tid=$1" "$2" "$3; resnum[tid]=$1 resnam[tid]=$2 resnam[$1]=$2 resat[tid]=$3 res[tid]=$1" "$2 shift[tid]=$4 shifterr[tid]=$5 #if (tid~/^ *2 /) print "tid:"tid,resnam[tid],resnam[$1],$1 > stderr if (atomlist[$1]) { if (!(match(atomlist[$1]," "$3" "))) atomlist[$1]=atomlist[$1]" "$3" " } else atomlist[$1]=" "$3" " } END { if (help) exit if (dom) print "dom",dom > stderr if (!shiftsread) { print "## give shifts file! aborting";exit } if (debug==1) printf "\n## debuglevel %s",debug > stderr # if var spec is not set, use name of .cor-file if (!spec && specfile ) { spec=specfile sub(/[.]x?cor/,"",spec) gsub(/.*[/]/,"",spec) } print "\n## processing\n"> stderr printf "## number of correlation types %i\n",numcor > stderr printf "## residues between %i and %i\n",minres,maxres > stderr tkn="";tk1="nucl";tk3="residue";tk2="height" header=sprintf("%-8s %11s1 %11s2 %11s3 %8s c %11s1 %11s2 %11s3", " #pk",tk1,tk1,tk1,tk2,tk3,tk3,tk3) print "## output format:",out > stderr if ( out=="sparky" ) { print "\n Assignment w1 w2 w3\n" } if ( out=="peaks" ) if (peaksheader!="" ) { print peaksheader } else { #print "peakheader" ttdom=dom;gsub(/,/," ",ttdom) print "# Number of dimensions "(xdimens=split(ttdom,tttddom)) print "#CYANAFORMAT " substr(cyaf,1,xdimens) } for ( cor=1;cor<=numcor;cor++) { printf "\b\b\b\b%4i",cor > "/dev/stderr" if (cor % 10 == 0 ) printf "\b\b\b\b%4i",cor > stderr nu[1]=nu1[cor];nu[2]=nu2[cor];nu[3]=nu3[cor] pos[1]=pos1[cor];pos[2]=pos2[cor];pos[3]=pos3[cor] if (out!="xpk" && out !="sparky") { printf "# dom %s \n",cdom[cor] printf "# %s %s %s %i %i %i", nu[1],nu[2],nu[3],pos[1],pos[2],pos[3] } for (f in ff) delete ff[f] #for (invf in iff) delete iff[invf] sf="" for (f=1;f<=numfilt[cor];f++) { ff[f]=filter[cor,f] #iff[filter[cor,f]]=f sf=sf" "ff[f] if (out!="xpk" && out !="sparky") printf " %s",ff[f] } if (out!="xpk") print "" if (out!="xpk" && out !="peaks" && out !="sparky") { print header printf "##filter %s \n",sf print "# dom ",cdom[ncor] } ndom=split(cdom[cor],ddom,",") if (debug>1) for ( d in ddom ) print d,ddom[d] #if (cdom[cor]!="1,2,3") # print "dom!",cdom[cor] for ( i=minres;i<=maxres;i++ ) { if (i % 10 == 0 || i==maxres) printf "%4i\b\b\b\b",i > stderr if (!( i in atomlist )) continue if (debug>2) print resnam[i],filter[cor],gematch(resnam[i],filter[cor]) >stderr dcon=0 if ( !gematch(resnam[i],filter[cor]) ) { if (debug>0) print "## skipping:",resnam[i]," by filter:",filter[cor] dcon=1 } else if (!gematch(substr(resnam[i],1,3),filter[cor])) { if (debug>0) print "## skipping:",substr(resnam[i],1,3)," by filter:",filter[cor] dcon=1 } if (dcon==1) continue nk=split(atomlist[i+pos[1]],list1) nl=split(atomlist[i+pos[2]],list2) nm=split(atomlist[i+pos[3]],list3) if (debug==1) { printf "# for residue %i: %s \n",i,corsrc[cor] printf "# searching %i x %i x %i space \n#",nk,nl,nm for (k=1;k<=nk;k++) {printf " %s ",list1[k]} printf "\n#" for (l=1;l<=nl;l++) {printf " %s ",list2[l]} printf "\n#" for (m=1;m<=nm;m++) {printf " %s ",list3[m]} printf "\n" } num1=i+pos[1] num2=i+pos[2] num3=i+pos[3] tnu[1]=nu[1];tnu[2]=nu[2];tnu[3]=nu[3] delete ttt for (k=1;k<=nk;k++) { ta[1]=list1[k] if (resnam[num1]!~/[A-Z]/) print "????",resnam[num1],num1 > stderr ttt[1]=num1" "resnam[num1]" "ta[1] if (ematch(ta[1],nu[1]) == 0) continue tnu[2]=nupp2(nu[2],ta) for (l=1;l<=nl;l++) { if (ematch(list2[l],tnu[2]) == 0) continue ttt[2]=num2" "resnam[num2]" "list2[l] ta[2]=list2[l] if (match(nu[3],"^%[0-9]+$")) { #?? count[i]++; ttt[3]=ttt[1] } if (debug>2) { for ( ita in ta ) print "#ta["ita"]="ta[ita] } tnu[3]=nupp2(nu[3],ta) ; # tnu[3]="/"tnu[3]"/" for (m=1;m<=nm;m++) { if (ematch(list3[m],tnu[3]) == 0) continue # remove diagonal # print nodiag,deldiag[1],ttt[deldiag[2]] if (nodiag!="" && ttt[deldiag[1]]==ttt[deldiag[2]] ) continue # print the predicted peak: ttt[3]=num3" "resnam[num3]" "list3[m] rn=ttt[ddom[1]]+0 count[rn]++ prpknr=rn+(count[rn]-1)*1000 dbkey=ttt[1] SUBSEP ttt[2] SUBSEP ttt[3] if (!( dbkey in corrdb )) { corrdb[dbkey]++ if (out=="ppm") { printf "pkppm %8i %8s %8s %8s 1.0 # pktyp %s %10i %12s . %12s .", prpknr, shift[ttt[ddom[1]]],shift[ttt[ddom[2]]],shift[ttt[ddom[3]]], spec,prpknr,ttt[ddom[1]],ttt[ddom[2]] if (ttt[ddom[3]]!="") printf " %12s . ",ttt[ddom[3]] printf " 1.0" } else if (out=="asg" || out=="noe") { printf out " %s %8i %12s %8s %12s %8s %12s %8s 1.0", spec,prpknr, ttt[ddom[1]],shift[ttt[ddom[1]]], ttt[ddom[2]],shift[ttt[ddom[2]]], ttt[ddom[3]],shift[ttt[ddom[3]]] } else if (out=="peaks") { #cyana peaks printf " %8i %8s%s %8s%s %8s%s 1 U %s %s a 0 %i %i %i # %s", prpknr, shift[ttt[ddom[1]]],(errout?"~"shifterr[ttt[ddom[1]]]:""), shift[ttt[ddom[2]]],(errout?"~"shifterr[ttt[ddom[2]]]:""), shift[ttt[ddom[3]]],(errout?"~"shifterr[ttt[ddom[3]]]:""), "1.0e+00","0.00e+00", CYATNR[ttt[ddom[1]]]+0, CYATNR[ttt[ddom[2]]]+0, CYATNR[ttt[ddom[3]]]+0,ttt[ddom[1]] " " ttt[ddom[2]] " " ttt[ddom[3]] } else if (out=="sparky") { #sparky peaklist [.list] spasg[1]=sparkyasg(ttt[ddom[1]]) spasg[2]=sparkyasg(ttt[ddom[2]]) spasg[3]=sparkyasg(ttt[ddom[3]]) spasgs=spasg[1] "-" spasg[2] ( spasg[3]=="" ? "" : "-" spasg[3]) printf "%17s %10s %10s %10s %s", spasgs, shift[ttt[ddom[1]]], shift[ttt[ddom[2]]], shift[ttt[ddom[3]]],"1.0" } else if (out=="xpk") { #nmrview peaks if (!printedxpkheader) { #print xpkheader #printedxpkheader++ } XPKSTR="" for (nvd=1;nvd<=xpkdim;nvd++) { aaa=ttt[ddom[nvd]]; sub(/ +([A-Z][A-Z][A-Z])[-+]? +/,".",aaa) if (aaa~/[-]/) print aaa > stderr aaa="{"aaa"}" ppm=shift[ttt[ddom[nvd]]] dppm=shifterr[ttt[ddom[nvd]]] xpkstr[nvd]=sprintf("%10s %.3f %5.3f %5.3f ? 0.000 {?}",aaa,ppm,dppm,dppm*2) XPKSTR=XPKSTR " " xpkstr[nvd] } # "vol int stat comment " XPKSTR = sprintf("%-4s %s %s",xpkpknr++,XPKSTR," 1.00000 0.00000 0 {?} 0 ") print XPKSTR | "sort -n" } else { printf "pktyp %s %10i %12s . %12s .", spec,prpknr, ttt[ddom[1]],ttt[ddom[2]] if (ttt[ddom[3]]!="") printf " %12s .",ttt[ddom[3]] printf " 1.0" } if (out!="xpk") printf "\n" if (out=="ppm" ) { printf "pkdppm %7i %12s %12s %12s\n", prpknr, shifterr[ttt[ddom[1]]],shifterr[ttt[ddom[2]]], shifterr[ttt[ddom[3]]] } } } } } } } print "" > "/dev/stderr" } # ematch function ematch(str,reg, retval) { if ( substr(reg,1,1)=="/" && substr(reg,length(reg),1)=="/" ) retval = match(str,substr(reg,2,length(reg)-2)) else retval = (toupper(str)==toupper(reg)) if (debug>2 && retval) print "ematch",toupper(str),toupper(reg),resnam[num1],retval > stderr return retval } # nematch function nematch(str,reg) { if (debug>2) print "nematch",str,reg > stderr if (substr(reg,1,1)=="-") { reg=substr(reg,2) if ( substr(reg,1,1)=="/" && substr(reg,length(reg),1)=="/" ) return !match(str,substr(reg,2,length(reg)-2)) return !(toupper(str)==toupper(reg)) } } # gematch function gematch(str,filt) { # ,f,filters) split(filt,filters," ") for ( f in filters ) { reg=filters[f] if (substr(reg,1,1)=="-") if (ematch(str,substr(reg,2))) return 0 } nf=0 for ( f in filters ) { reg=filters[f] if (substr(reg,1,1)!="-") { nf++ if (ematch(str,reg)) return 1 } } return !(nf) } # nupp (str1) function nupp(rexp,ar) { # %1: take nucleus at pos 1 if (match(rexp,"%[0-9]+")) { if (debug>=1) printf "# nucleus preprocessor[l=%i] : %s",l,rexp sub("%[0-9]+",ar[substr(rexp,RSTART+1,RLENGTH-1)],rexp) if (debug>=1) printf " --> %s ",rexp } if (debug>=1 && !match(rexp,"@")) printf "\n" if (!match(rexp,"@")) return rexp # c@hd1 / c@%1 replace first char of hd1 with 'c' lhs=substr(rexp,1,RSTART-1) rhs=substr(rexp,RSTART+1) if (debug>=1) printf " --> %s",lhs # substitute first alfanumeric in rhs for lhs # special case: hn -> h [ prevent rhs->nn ] if (lhs~/^n$/ && rhs~/^hn$/) rhs = substr(rhs,1,1) sub("[a-zA-Z0-9]",lhs,rhs) # remove last digit from rhs shr=revstr(rhs);sub("[0-9]","",shr); rhs=revstr(shr) if (debug>=1) printf " --> %s\n","[?"lhs"]"rhs return rhs } # nupp (str1) function nupp2(rexp,ar) { # %n: take nucleus at pos n if (match(rexp,"%[0-9]+")) { if (debug>=1) printf "# nucleus preprocessor[l=%i] : %s",l,rexp sub("%[0-9]+",ar[substr(rexp,RSTART+1,RLENGTH-1)],rexp) if (debug>=1) printf " --> %s ",rexp } if (debug>=1 && !match(rexp,"@")) printf "\n" if (!match(rexp,"@")) return rexp # c@hd1 / c@%1 replace first char of hd1 with 'c' lhs=substr(rexp,1,RSTART-1) rhs=substr(rexp,RSTART+1) #if (debug>=1) printf " --> %s",lhs # substitute first alfanumeric in rhs for lhs # special case: hn -> h [ prevent rhs->nn ] if (lhs~/^n$/ && rhs~/^hn$/) rhs = substr(rhs,1,1) sub("[a-zA-Z0-9#]",lhs,rhs) # remove last digit from rhs shr=revstr(rhs);sub("[0-9]","?&",shr); rhs=revstr(shr) rhs="/^"rhs"$/" if (debug>=1) printf " --> %s\n",rhs return rhs } function revstr(str,i) { rts="" for (i=1;i<=length(str);i++) { rts=substr(str,i,1)rts } return rts } function AAtranslate(str,res) { res=str if (str=="A") res="ALA" else if (str=="C") res="CYS" else if (str=="D") res="ASP-" else if (str=="E") res="GLU-" else if (str=="F") res="PHE" else if (str=="G") res="GLY" else if (str=="H") res="HIS+" else if (str=="I") res="ILE" else if (str=="K") res="LYS+" else if (str=="L") res="LEU" else if (str=="M") res="MET" else if (str=="N") res="ASN" else if (str=="P") res="PRO" else if (str=="Q") res="GLN" else if (str=="R") res="ARG+" else if (str=="S") res="SER" else if (str=="T") res="THR" else if (str=="V") res="VAL" else if (str=="W") res="TRP" else if (str=="Y") res="TYR" # else if (str=="X") res="" # else if (str=="Z") res="" return res } function AAoneletter(res,str) { str=res if (res=="ALA") str="A" else if (res=="CYS") str="C" else if (res=="ASP") str="D" else if (res=="GLU") str="E" else if (res=="PHE") str="F" else if (res=="GLY") str="G" else if (res=="HIS") str="H" else if (res=="ILE") str="I" else if (res=="LYS") str="K" else if (res=="LEU") str="L" else if (res=="MET") str="M" else if (res=="ASN") str="N" else if (res=="PRO") str="P" else if (res=="GLN") str="Q" else if (res=="ARG") str="R" else if (res=="SER") str="S" else if (res=="THR") str="T" else if (res=="VAL") str="V" else if (res=="TRP") str="W" else if (res=="TYR") str="Y" else if (res~"^LYS") str="K" else if (res~"^ARG") str="R" else if (res~"^HIS") str="H" else if (res~"^ASP") str="D" else if (res~"^GLU") str="E" # else if (str=="X") res="" # else if (str=="Z") res="" return str } function sparkyasg(spin,ret,ss,ns) { if (spin=="") return "" else if (split(spin,ss)!=3) { print "ERROR in spin:",spin,"; expect three words" exit } else { ret = AAoneletter(ss[2]) ss[1] ss[3] } return ret }