#!/usr/local/bin/gawk -f # # run as: # complete_assignment.awk xxx.seq xxx.prot CcH_.peaks # addheavyatom2peaks.awk xxx.seq xxx.prot CcH_.peaks # #@ adds assignments in xeasy peaklist where one atom #@ of a proton-heteroatom couple is assigned and the #@ remaining assignment is clear. # # useful to keep the assignments in candid for pseudo 4D version of # cCH (->hcCH) and CNH (->hcNH) noesy spectra. also for pseudo 3D # versions of filtered 2Dnoesy spectra e.g. noN-noesy ( no N15 bound # protons in one dimension hH -> hHC) # BEGIN { stderr="/dev/stderr" progname="complete_assignments.awk" #for ( i in ARGV ) if (ARGV[i]=="help") help=1 if ( ARGV[1]~/^(h|help)$/ || help!="" ) { print ARGV[0] # because of the which command, help is only available if # this script is in the PATH system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } } FNR==NR && FILENAME!~/[.]seq$/ { print "give sequence file first [*.seq]" >stderr print "halt" >stderr exit } FILENAME ~ /[.]seq$/ { seq = FILENAME if (NF==2) resn=$2 ; else resn++ residue[resn]=toupper($1) next } FILENAME ~/[.]prot$/ && $1!~/^#/ { atnr=$1 atnam=$4 resnr=$5 shift=$2 dshift=$3 atom[atnr]=atnam resnum[atnr]=resnr resnam[atnr]=residue[resnr] atnum[resnr,atnam]=atnr id[atnr]=residue[resnr]"-"resnr"-"atnam atppm[atnr]=shift atdppm[atnr]=dshift next } /^# *Number of dimensions/ { type[FILENAME]="peaks" } /^# *CYANAFORMAT/ { type[FILENAME]="peaks" } FILENAME ~ /[.]peaks$/ || type[FILENAME]=="peaks" { if (/^#/&&/Number of dimensions/) { dim=$NF ;print ;next } if (/# *CYANAFORMAT/) { cyaf=$NF ndim=length(cyaf) if ( hdom=index(cyaf,"h") ) { if ( xdom=index(cyaf,"n") ) xtyp="n" else if ( xdom=index(cyaf,"c") ) xtyp="c" } if ( Hdom=index(cyaf,"H") ) { if ( Xdom=index(cyaf,"N") ) Xtyp="N" else if ( Xdom=index(cyaf,"C") ) Xtyp="C" } if (index(cyaf,"h")) hdom=index(cyaf,"h") if (index(cyaf,"H")) Hdom=index(cyaf,"H") if (index(cyaf,"c")) xdom=index(cyaf,"c") if (index(cyaf,"n")) xdom=index(cyaf,"n") if (index(cyaf,"C")) Xdom=index(cyaf,"C") if (index(cyaf,"N")) Xdom=index(cyaf,"N") print "##Hdom,Xdom,Xtyp:",Hdom,Xdom,Xtyp >stderr print "##hdom,xdom,typ:",hdom,xdom,xtyp >stderr print next } if ( index($0,$1)>20 && $1~/^[0-9]+$/ ){ # whitespace ... index # ambiguous assignment $0=substr(fulline,1,index($0,$1)-1) substr($0,index($0,$1)) } if ($1~/^[0-9]+$/ ) { fulline=$0 if (""==rp++) { if (dim!=ndim) {print "##ERROR dimensions do not match";dim"!="ndim;error++;exit} if (cyaf=="") {print "##ERROR #CYANAFORMAT not found";error++;exit} } # fill in h if (hdom) nh=$(ndim+7+hdom) ; else nh=0 if (xdom) nx=$(ndim+7+xdom) ; else nx=0 if (Hdom) nH=$(ndim+7+Hdom) ; else nH=0 if (Xdom) nX=$(ndim+7+Xdom) ; else nX=0 if (verbose) { print "#[1]ndim,Xdom,Hdom,nH,nX",ndim,Xdom,Hdom,nH,nX #>stderr print "#[2]ndim,xdom,hdom,nh,nx",ndim,xdom,hdom,nh,nx #>stderr } #if (hdom && xdom && nx!=0 && nh==0) { # nm=find_prot_num(nx,hh) # if (nm==1) nh=hh[1]; else if (nm>1) nh=hh[1] # $(ndim+7+hdom)=nh #} # fill in H #if (verbose && Hdom && Xdom && xh) print "#[2]Xdom,Hdom,nh,nx", # Xdom,Hdom,nh,nx >stderr if (Hdom && Xdom && nX!=0 && nH==0) { nm=find_prot_num(nX,hh) if (nm==1) nH=hh[1]; else if (nm>1) nH=hh[1] if (verbose) print "#setting H[X]="nh"["nX"]", "in col",(ndim+7+Hdom),"was",$(ndim+7+Hdom) $(ndim+7+Hdom)=nH if (setppm==$(Hdom+1) ) $(Hdom+1)=atppm[nm] } # fill in h #if (verbose && Hdom && Xdom && xh) print "#[2]Xdom,Hdom,nh,nx", # Xdom,Hdom,nh,nx >stderr if (hdom && xdom && nx!=0 && nh==0) { nm=find_prot_num(nx,hh) if (nm==1) nh=hh[1]; else if (nm>1) nh=hh[1] if (verbose) print "#setting h[x]="nh"["nx"]", "in col",(ndim+7+hdom),"was",$(ndim+7+hdom) $(ndim+7+hdom)=nh if (setppm==$(hdom+1) ) $(hdom+1)=atppm[nm] } # fill in c if (hdom) nh=$(ndim+7+hdom) ; else nh=0 if (xdom) nx=$(ndim+7+xdom) ; else nx=0 if (hdom && xdom && nx==0 && nh!=0) { nx=find_heavy_num(nh) #print "x",nx,nh,id[nx],atppm[nx],xdom+1,(nx in atppm) $(ndim+7+xdom)=nx if ( setppm== $(xdom+1) ) $(xdom+1)=atppm[nx] } # fill in C if (Hdom) nh=$(ndim+7+Hdom) ; else nh=0 if (Xdom) nx=$(ndim+7+Xdom) ; else nx=0 if (Hdom && Xdom && nx==0 && nh!=0) { nx=find_heavy_num(nh) #print "X",nx,nh,id[nx],atppm[nx],Xdom+1,(nx in atppm) $(ndim+7+Xdom)=nx if ( setppm==$(Xdom+1) ) $(Xdom+1)=atppm[nx] } # build up the output string str=sprintf("%6i",$1) for (d=1;d<=dim;d++) { ppm=$(d+1) str=str sprintf(" %7.3f",ppm) } for (c=ndim+1+1;c<=ndim+1+6;c++) { str=str sprintf(" %s",$c) } for (c=ndim+7+1;c<=ndim+7+ndim;c++) { str=str sprintf(" %4i",$c) } for (c=ndim+7+ndim+1;c<=NF;c++) { str=str sprintf(" %s",$c) } if (verbose) { for (d=1;d<=ndim;d++) { printf "%4s:%-4s - ",resnum[$(ndim+7+d)],atom[$(ndim+7+d)] } printf "\n" } print str } else { print } } function to_heavy(res,atom, hetero) { # for NH's gsub(/ /,"",res) gsub(/ /,"",atom) if (atom~/^[HQ]N?[123]?$/) hetero="N" else if (res~/^ASN/ && atom ~ /^[HQ]D/ ) hetero="ND2" else if (res~/^GLN/ && atom ~ /^[HQ]E/ ) hetero="NE2" else if (res~/^ARG/ && atom == "HE" ) hetero="NE" else if (res~/^ARG/ && atom ~ /^HH/ ) hetero="NH"substr(atom,3,1) else if (res~/^LYS/ && atom ~ /^HZ/ ) hetero="NZ" else if (res~/^HIS/ && atom ~ /^H(D1|E2)$/ ) hetero="N"substr(atom,2,2) else if (res~/^TRP/ && atom ~ "HE1" ) hetero="N"substr(atom,2,2) else { # for CH's hetero=atom sub(/^[HQM]/,"C",hetero) ho=hetero if (hetero~/^C[AB]/) hetero=substr(hetero,1,2) else if (hetero~/^CG/ && res!~/VAL|ILE|THR/ ) hetero="CG" else if (hetero~/^CD/ && res !~ /(HIS|PHE|TYR|ILE|LEU|TRP)/ ) hetero="CD" else if (hetero~/^CE/ && res !~ /^(HIS|PHE|TYR|TRP)/ ) hetero="CE" else if (hetero~/^C[DE]$/ && res ~ /^(PHE|TYR)/ ) hetero=hetero"1" else if (hetero~/^C[ZH]/ && res !~ /^TRP/ ) hetero=substr(hetero,1,2) else hetero=substr(hetero,1,3) #if (ho!=hetero) print "##",ho,hetero } if (verbose) print "##to_heavy:",res,atom,"->",hetero return hetero } function find_prot_num(nx,nnhh, nh,ip) { # assume present # atom[atnr] = atnam # resnum[atnr] = resnr # resnam[atnr] = residue[resnr] # atnum[resnr,atnam] = atnr # nx : number of heavy atom # nh : number of proton atom # ip : number of matches # nnhh : array[1:ip] with proton numbers #print "finding protons @ "resnum[nx],atom[nx]>stderr for (nh in atom) if ( resnum[nh]==resnum[nx] && nh!=nx) { #print nx,resnum[nx],atom[nx],to_heavy(resnam[nh],atom[nh]) >stderr if ( atom[nx]==to_heavy(resnam[nh],atom[nh]) ) { nnhh[++ip]=nh } } return ip } function find_heavy_num(nh, xkey,nx) { # assume present # atom[atnr] = atnam # resnum[atnr] = resnr # resnam[atnr] = residue[resnr] # atnum[resnr,atnam] = atnr # nx : number of heavy atom # nh : number of proton atom nx=0 xkey= resnum[nh] SUBSEP to_heavy(resnam[nh],atom[nh]) #print "finding heavy atom @",nh,resnum[nh],resnam[nh],atom[nh],to_heavy(resnam[nh],atom[nh]) >stderr if ( xkey in atnum ) nx = atnum[xkey] return nx }