#!/usr/local/bin/gawk -f # #@ convert (ambiguous) distance restraints in #@ xplor *.tbl file to xeasy/cyana .upl/lol file #@ Use: #@ tbl2upl.awk name.(seq|pdb) unambig.tbl > out.upl # # command line options: # [-v] lower=1 : print out lower bounds instead of upper # [-v] echoall=1 : echo all input # [-v] keepnames=1 : if not set, names with [%#+*] will be adjusted # so that they can be read with cyana [version>2.0] # that means that any number of [#*%+] at the end # of an atname will be removed. # [-v] distformat="%.2f" : format for the distance # [-v] verbose=0 : verbosity level [default:""] # BEGIN { stderr="/dev/stderr" IGNORECASE=1 RS="ASSI" progname="tbl2upl.awk" if ( ARGV[1]~/^(h|help)$/ || help!="" ) { #print ARGV[0] # because of the which command, help is only available if # this script is in the PATH system("gawk 'FNR>1&&/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } if (!distanceformat) distanceformat=" %6.2f" } FNR==1 { f++ } NR==1 { } FILENAME ~ /[.]seqs$/ { if (verbose)print "FILENAME",FILENAME > stderr seq = FILENAME nr=split($0,rrll,"\n") for (l=1;l<=nr;l++) { nf=split(rrll[l],an," ") if (nf==2) resnum=an[2] ; else resnum++ residue[resnum]=toupper(an[1]) if (verbose) print "#",resnum,residue[resnum] > stderr if (minres=="") minres=resnum minres = (minres-resnum>0 ? resnum : minres ) if (maxres=="") maxres=resnum maxres = (maxres-resnum<0 ? resnum : maxres ) } next } FILENAME ~ /[.](pdb|seq)$/ { if (verbose) print "FILENAME",FILENAME > stderr seq = FILENAME nr=split($0,rrll,"\n") for (l=1;l<=nr;l++) { nf=split(rrll[l],an," ") #print an[1] if (an[1]=="ATOM" && FILENAME ~ /[.]pdb$/) { resnam=substr(rrll[l],18,4);gsub(/ /,"",resnam) resnum=substr(rrll[l],23,4);gsub(/ /,"",resnum) residue[resnum]=toupper(resnam) } else if (FILENAME ~ /[.]seq$/) { if (nf==2) resnum=an[2] ; else resnum++ residue[resnum]=toupper(an[1]) } if (verbose) print "#",resnum,residue[resnum] > stderr if (minres=="") minres=resnum minres = (minres-resnum>0 ? resnum : minres ) if (maxres=="") maxres=resnum maxres = (maxres-resnum<0 ? resnum : maxres ) } next } FILENAME ~ /[.]tbl$/ { tbl=FILENAME o=$0 gsub(/[()]/," & ") gsub(/\n/," ") #print RT,$0 regex=".* resid +([0-9]+) .*" rsub="\\1" $1=$1;$0=$0 na=split($0,rr,/OR/) minresdif="" for (a=1;a<=na;a++) { res1=res2=at1=at2="" ro=rr[a] match(rr[a],/.*resid? +([0-9]+) .* resid? +([0-9]+) /,rres) if (1 in rres) res1=rres[1]+0 if (2 in rres) res2=rres[2]+0 if (minres=="") minres=res1 minres = (minres-res1>0 ? res1 : minres ) minres = (minres-res2>0 ? res2 : minres ) if (maxres=="") maxres=res1 maxres = (maxres-res1<0 ? res1 : maxres ) maxres = (maxres-res2<0 ? res2 : maxres ) if (res1=="" || res2=="") { print "???",FNR":[",ro"]" ; next } match(rr[a],/.*(name|atom) +([a-z]+[0-9#*%+]*) .* (name|atom) +([a-z]+[0-9#*%+]*) /,rrat) if (1 in rrat) at1=rrat[2] if (2 in rrat) at2=rrat[4] if (a==1) { match(rr[1],/[)] +([0-9.]+) +([0-9.]+) +([0-9.]+)/,ddd) #print "ddd",ddd[1] if (1 in ddd) { dist=ddd[1] if (2 in ddd) lol=dist-ddd[2] if (3 in ddd) upl=dist+ddd[3] } } else { upl=0 } if (a>1||echoall) print "#",rr[a] dr = ( lower==1 ? lol : upl ) # see if there are residue names nam1= ( res1 in residue ? residue[res1] : "xxx" ) nam2= ( res2 in residue ? residue[res2] : "xxx" ) atomformat="%3i %-4s %-4s" format=atomformat " " atomformat " " distanceformat "\n" if (!keepnames) { sub(/[%#*+]+$/,"",at1) sub(/[%#*+]+$/,"",at2) #print at1,at2 } printf format,res1,nam1,at1,res2,nam2,at2,dr } next } END { if(help)exit } function abs(x) { return int( x<0 ? -x : x ) } function AAtranslate(str,res) { res=str if (str=="A") res="ALA" else if (str=="C") res="CYS" else if (str=="D") res="ASP" else if (str=="E") res="GLU" else if (str=="F") res="PHE" else if (str=="G") res="GLY" else if (str=="H") res="HIS" else if (str=="I") res="ILE" else if (str=="K") res="LYS" else if (str=="L") res="LEU" else if (str=="M") res="MET" else if (str=="N") res="ASN" else if (str=="P") res="PRO" else if (str=="Q") res="GLN" else if (str=="R") res="ARG" else if (str=="S") res="SER" else if (str=="T") res="THR" else if (str=="V") res="VAL" else if (str=="W") res="TRP" else if (str=="Y") res="TYR" else if (str=="U") res="UNK" # unknown residue else if (str=="B") res="UPR" # unknown i-1 neighbour return res } function AAoneletter(res,str) { str=res if (res=="ALA") str="A" else if (res=="CYS") str="C" else if (res=="ASP") str="D" else if (res=="GLU") str="E" else if (res=="PHE") str="F" else if (res=="GLY") str="G" else if (res=="HIS") str="H" else if (res=="ILE") str="I" else if (res=="LYS") str="K" else if (res=="LEU") str="L" else if (res=="MET") str="M" else if (res=="ASN") str="N" else if (res=="PRO") str="P" else if (res=="GLN") str="Q" else if (res=="ARG") str="R" else if (res=="SER") str="S" else if (res=="THR") str="T" else if (res=="VAL") str="V" else if (res=="TRP") str="W" else if (res=="TYR") str="Y" else if (res~/^LYS/) str="K" else if (res~/^ARG/) str="R" else if (res~/^HIS/) str="H" else if (res~/^ASP/) str="D" else if (res~/^GLU/) str="E" else if (res==/UNK/) str="U" else if (res==/UPR/) str="B" return str }