#!/usr/local/bin/gawk -f # #@ fill shift list with chemical shift statistics #@ from a database , currently the cyana lib file. # # a sequance file [ 3 letter[+/-] , first line may contain res number # ALA 12 # ASP- # etc.. # # # # default/other # [-v] keepcharge = 0|1 # keep charge of residue in input [LYS+ etc] # [-v] addcharge = 0|1 # keep charge of residue in input [LYS+ etc] # [-v] nocharge = 0|1 # remove charge # [-v] expandmethyls = 0|1 # expand methyl group peudo atoms # [-v] removepseudo = 1|0 # expand methyl group peudo atoms # [-v] keeppseudo = 0|1 # expand methyl group peudo atoms # # [-v] sigma=2 # multiply rms with sigma for output ppm margins # # [-v] ref=dss # ppm reference dss,bruk # [-v] dc=-2.66 # for dss: dc=0 ; bruk # # for bruk: dc=-2.66 # [-v] bmrb = 0|1 # read shifts from shiftstatfile otherwise use cyanalib # [-v] showlib = 0|1 # show lbirary values that were read in # [-v] shiftstatfile=filename # default: shiftstatfile="/home/spine/DOCS/reference_data/statsel.txt" # # [-v] cyana = cyana # [-v] out=aria|cyana|dyana|xeasy # [cd]yana|xeasy : hb2,hb3 nomenclature [default] # aria : hb1,hb2 nomenclature BEGIN { stderr="/dev/stderr" progname="seq2shift.awk" if ( ARGV[1]~/^(h|help)$/ || help!="" ) { print ARGV[0] # because of the which command, help is only available if # this script is in the PATH system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } } NR==1 { # settings , rerad at beginning of every file. if (keepcharge=="") keepcharge=0 if (keepcharge ) nocharge=0 if (!keeppseudo) removepseudo=1 if (removepseudo) keeppseudo=0 if (cyana=="") cyana="cyana" if (cyana=="") cyana="cyana2" # CSTABLE changed!! if (sigma=="") sigma=2 # CSTABLE changed!! if (!out) out="" if (ref=="") ref= "dss" if (ref=="dss") { dc=0 print "...",progname, ": output shifts relative to DSS reference: ref="ref > stderr } else if (ref="bruk") { dc=-2.66 print "...",progname, ": output shifts relative to bruker reference: ref="ref > stderr } else { print "***",progname,"WARNING, unknown reference: ref="ref,"dc=",dc+0 > stderr } if (bmrb) { if (shiftstatfile=="") shiftstatfile="/home/spine/DOCS/reference_data/statsel.txt" read_shiftstat() } else { read_cyanalib() #read_cyanalib2() } } $1~/^#/ { print > stderr ; next } /#/ { sub(/#.*$/,"") } { resread++ } resread==1 { rinit = r = ( $2=="" ? 1 : $2 ) } $1 != "link" && NF>0{ res=$1 if (!keepcharge) res=conv_res(res) if (nocharge) sub(/[-+]$/,"",res) if (NF==2) r=$2 rlast=r resnum[resread]=r resnam[r++]=res } END { print "residues from",rinit,rlast >stderr for (r=rinit;r<=rlast;r++) { #print "residue",r >stderr # print r,resnum[r-rinit+1],resnam[r],atnum[resnam[r]] for ( a=1 ; a<=atnum[resnam[r]] ; a++ ) { #print "residue",r,a >stderr res=resnam[r] at=atlist[res,a] printf "%5i %5s %5s", r,resnam[r],at printf "% 8.3f %7.3f # %4i~ %8.3f %8.3f %8.3f %8.3f\n", cslib[res,at,"ave"],cslib[res,at,"rms"]*sigma, cslib[res,at,"nobs"], cslib[res,at,"ave"],cslib[res,at,"min"], cslib[res,at,"max"],cslib[res,at,"wid"] if (at~/Q/) at=expandpseudo(at,res) } } } function expandpseudo(at,res, at0,n) { at=toupper(at) at0=at if (at!~/^Q/) return at nps="1,2,3" if (res == "VAL" && at == "QQG" ) at = "QG1 QG2" else if (res == "LEU" && at == "QQD" ) at = "QD1 QD2" else if ( at == "QG" || ( res == "GLY" && at == "QA" ) || ( res != "ALA" && at == "QB" ) || ( res == "ILE" && at == "QG1" ) || ( res ~ /LYS|PRO|ARG/ && at == "QD" ) || ( res ~ /LYS/ && at == "QE" ) ) { if ( out=="aria" ) nps="1,2" else if (out~/^([cd]yana|xeasy)$/) nps="2,3" else nps="2,3" } else if ( ( res ~ /ASN/ && at == "QD2" ) || ( res ~ /GLN/ && at == "QE2" ) || ( res ~ /ARG/ && at ~ /^QH/ ) ) nps="1,2" if (!expandmethyls && nps=="1,2,3") return at if (!expandaromats && ( res ~ /PHE|TYR/ && at ~ /^Q[DE]/ ) ) return at at=exp_at(at,nps) return at } function exp_at(at,nh, n,hh,a,na,outat,aa) { # at [atomname] : "QB" # nh [pseudoatom xpansion] : "1,2" # returns outat -> "HB1 HB2" (split it for y'self) na=split(at,aa) ; # print "###",na gsub(/,/," ",nh);n=split(nh,hh) if (!removepseudo) outat=" "at for ( b=1;b<=na;b++ ) for (a=1;a<=n;a++) { atin= aa[b] sub(/^Q/,"H",aa[b]) ; atout= aa[b] outat=outat" "aa[b] hh[a] #if (atin~/Q/) print "##",atin,"->",atout,hh[1],hh[2]>stderr } return substr(outat,2) } # convert residue names to CYANA format lys -> lys+ etc # # ----------------------------------------------------- # function conv_res(res) { res=toupper(res) if (addcharge && res ~ /^(ARG|LYS|HIS)$/ ) res=res"+" if (addcharge && res ~ /^(ASP|GLU)$/ ) res=res"-" return res } function read_cyanalib( res,at) { # a bit quirky way to get the location of the dyana-directory # by looking inside the dyana startup script. # assuming shifts are relative to DSS print "cyana="cyana>stderr com=cyana " 'cyanalib'|grep 'Library file'|tail -1" com | getline line split(line,ss) cyanalib=ss[3] gsub("\"","",cyanalib) print "reading from file : ["cyanalib"]" > stderr # read until CSTABLE header while ((getline str0 < cyanalib) > 0 && str0!~/^CSTABLE/ ) {} split(str0,str) if (str[1]=="CSTABLE") ncs=str[2] print "reading chemical shift statistics: ["ncs"]" > stderr # make db # cslib[res,at,"ave"]=average chemical shift etc. for (cs=1;cs<=ncs;cs++) { getline str0 < cyanalib #print str0 > stderr split(str0,str) res = str[2] ; at = str[3] Qcarbon = ( at ~ /^C/ ) if (Qcarbon) { str[5]+=dc str[7]+=dc str[8]+=dc } nx=split(expandpseudo(at,res),atx) for (x=1;x<=nx;x++) { atlist[res,++atnum[res]]=atx[x] cslib[res,atx[x],"nobs"]=str[4] cslib[res,atx[x],"ave"]=str[5] cslib[res,atx[x],"rms"]=str[6] cslib[res,atx[x],"min"]=str[7] cslib[res,atx[x],"max"]=str[8] cslib[res,atx[x],"mid"]=(str[7]+str[8])/2 cslib[res,atx[x],"wid"]=(str[8]-str[7]) #if (at~/Q/ && res~/ARG/) print "##",at,res,str[5]>stderr } if (showlib) { print "#",res,at,cslib[res,at,"ave"],cslib[res,at,"rms"] >stderr # cslib[res,at,"min"], # cslib[res,at,"max"],cslib[res,at,"mid"],cslib[res,at,"wid"] } } close(cyanalib) } function read_cyanalib2( res,at) { # a bit quirky way to get the location of the dyana-directory # by looking inside the dyana startup script. # assuming shifts are relative to DSS print "cyana="cyana>stderr com=cyana " './init;write tmp.lib' " cyanalib="tmp.lib" print "reading from file : ["cyanalib"]" > stderr # read until CSTABLE header while ((getline str0 < cyanalib) > 0 && str0!~/^CSTABLE/ ) {} split(str0,str) if (str[1]=="CSTABLE") ncs=str[2] print "reading chemical shift statistics: ["ncs"]" > stderr # make db # cslib[res,at,"ave"]=average chemical shift etc. for (cs=1;cs<=ncs;cs++) { getline str0 < cyanalib #print str0 > stderr split(str0,str) res = str[2] ; at = str[3] Qcarbon = ( at ~ /^C/ ) if (Qcarbon) { str[5]+=dc str[7]+=dc str[8]+=dc } nx=split(expandpseudo(at,res),atx) for (x=1;x<=nx;x++) { atlist[res,++atnum[res]]=atx[x] cslib[res,atx[x],"nobs"]=str[4] cslib[res,atx[x],"ave"]=str[5] cslib[res,atx[x],"rms"]=str[6] cslib[res,atx[x],"min"]=str[7] cslib[res,atx[x],"max"]=str[8] cslib[res,atx[x],"mid"]=(str[7]+str[8])/2 cslib[res,atx[x],"wid"]=(str[8]-str[7]) #if (at~/Q/ && res~/ARG/) print "##",at,res,str[5]>stderr } if (showlib) { print "#",res,at,cslib[res,at,"ave"],cslib[res,at,"rms"] >stderr # cslib[res,at,"min"], # cslib[res,at,"max"],cslib[res,at,"mid"],cslib[res,at,"wid"] } } close(cyanalib) } function read_shiftstat( res,at) { # the file is located in: # definition moved to top of script # shiftstatfile="/home/spine/DOCS/reference_data/statsel.txt" # and is generated from # http://www.bmrb.wisc.edu/ref_info/statful.htm # # format: (free) # # Amino Atom Atom Number Minimum Maximum Average Standard # Acid Name Type of-Shifts Shift Shift Shift Deviation # ------------------------------------------------------------ # ALA H H 12099 3.53 11.58 8.19 0.61 # ALA HA H 10190 1.50 6.51 4.26 0.44 # ALA HB H 9402 -0.73 2.70 1.37 0.25 # ALA C C 6198 164.48 185.42 177.77 2.22 # ALA CA C 8891 40.30 65.52 53.18 2.03 # ALA CB C 8130 0.00 38.70 18.97 1.87 # ALA N N 10017 99.44 142.81 123.16 3.68 # while ( (getline str0 < shiftstatfile)>0 ) { nf=split(str0,str) res = str[1] ; at = str[2] nx=split(expandpseudo(at,res),atx) if (str[1]~/^#/) continue if (nf==0) continue for (x=1;x<=nx;x++) { atlist[res,++atnum[res]]=atx[x] cslib[res,atx[x],"nobs"]=str[4] cslib[res,atx[x],"min"]=str[5] cslib[res,atx[x],"max"]=str[6] cslib[res,atx[x],"ave"]=str[7] cslib[res,atx[x],"rms"]=str[8] cslib[res,atx[x],"mid"]=(str[5]+str[6])/2 cslib[res,atx[x],"wid"]=(str[6]-str[5]) } if (showlib) { print "#",res,at,cslib[res,at,"ave"],cslib[res,at,"rms"] >stderr # cslib[res,at,"min"], # cslib[res,at,"max"],cslib[res,at,"mid"],cslib[res,at,"wid"] } } close(shiftstatfile) }