#!/usr/local/bin/gawk -f # # peaks-project.awk [name1.seq] name2.prot name3.peaks # #@ make lower dimension projections from xeasy peak files # # command-line options: # dom=123 : write dimensions 1,3,4 to output # cyaf=NhH : cyanaformat for output file, checks for correctness # of nucleus (with dom=123: dom 1 should be N # dom 2 and 3 should be H # # not used: ## f=3 : use three letter code instead of 1 letter fo residue names ## sr=. : separator for resnam.number.atom [default: ""] ## sn="" : separator for atom names default: " "] # BEGIN { stderr="/dev/stderr" progname="peaks-project.awk" if ( ARGV[1]~/^(h|help)$/ || help!="" || ARGC<3) { print ARGV[0] # because of the which command, help is only available if # this script is in the PATH system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } } FNR==1 { #if (sr=="") sr="" #if(sn=="") sn=" " if (dom=="") dom="123" gsub(",","",dom) ndom=split(dom,dims,"") for (d in dims) seldims[dims[d]]=d } FILENAME ~ /[.]seq$/ { # currently not used seq = FILENAME if (NF==2) _resnum=$2 ; else _resnum++ residue[_resnum]=toupper($1) next } FILENAME ~ /[.]prot$/ { atid=$1 atnam[atid]=$4 nucl[atid]=substr(atnam[atid],1,1) if (nucl[atid]~/^[QM]$/) nucl[atid]="H" resnum[atid]=$5 if ($6=="#" && $7~/[A-Z][A-Z0-9]+[-+]?/ && !(resnum[atid] in residue)) residue[resnum[atid]]=$7 atres[atid]=residue[resnum[atid]] atres[atid,1]=AAoneletter(atres[atid]) #print (f==3 ? atres[atid]: atres[atid,1]) ID[atid]= (f==3 ? atres[atid]: atres[atid,1]) sr resnum[atid] sr atnam[atid] } FILENAME ~ /[.]peaks$/ { filetype="peaks" } /^# Number of dimensions / { filetype="peaks" } filetype=="peaks" { if (/^ *# *Number of dimensions/) { ndim=$NF print "# Number of dimensions",ndom next } if (/#CYANAFORMAT/) { cyanaformat=$NF if (cyaf!="") { $2=cyaf } else { for (d=1;d<=ndom;d++) newcyaf=newcyaf substr(cyanaformat,dims[d],1) $2=newcyaf } } if ($1~/^#/) { print ; next } # check for peaks with multiple assignments #outstr=sprintf" %i4",$1) if ( index($0,$1)>20 ) { pkid=pkid ncol=0 # start of assignment column sparkystr="" oldgroup="" } else { pkid=$1 ncol=1+ndim+2+2+2 # start of assignment column sparkystr="" oldgroup="" outstr_ppm[pkid]="" for(d=1;d<=ndom;d++) { td=dims[d] if (td+0==0) { print "ERROR",td ;exit} ppm=$(td+1) outstr_ppm[pkid]=outstr_ppm[pkid] sprintf(" %7.3f",ppm) } outstr_hei[pkid]=$(ndim+4) } ppm_str="" for(i=1;i<=ndom;i++) { #print dims[i] ppm_str=ppm_str" "sprintf(" %7s",$(1+dims[i])) } mid_str="" for (i=ndim+2;i<=ndim+7;i++) mid_str=mid_str" "$i asg_str="" nufilter=0 for(i=1;i<=ndom;i++) { #print ndim+7+dims[i] atnum=$(ndim+7+dims[i]) asg_str=asg_str" "sprintf(" %4i",atnum) if (cyaf) { fnucl=toupper(substr(cyaf,i,1)) thisnucl=nucl[atnum] if (fnucl==nucl[atnum]) nufilter++ else continue #print "##",fnucl,thisnucl,nufilter,atnam[atnum],atnum } } rest_str="" for (i=ndim+7+ndim+1;i<=NF;i++) rest_str=rest_str" "$i if (nufilter==ndom||!cyaf) printf " %4s %s %s %s %s\n",$1,ppm_str,mid_str,asg_str,rest_str # else # printf "### %4s %s %s %s %s\n",$1,ppm_str,mid_str,asg_str,rest_str } function AAtranslate(str,res) { res=str if (str=="A") res="ALA" else if (str=="C") res="CYS" else if (str=="D") res="ASP" else if (str=="E") res="GLU" else if (str=="F") res="PHE" else if (str=="G") res="GLY" else if (str=="H") res="HIS" else if (str=="I") res="ILE" else if (str=="K") res="LYS" else if (str=="L") res="LEU" else if (str=="M") res="MET" else if (str=="N") res="ASN" else if (str=="P") res="PRO" else if (str=="Q") res="GLN" else if (str=="R") res="ARG" else if (str=="S") res="SER" else if (str=="T") res="THR" else if (str=="V") res="VAL" else if (str=="W") res="TRP" else if (str=="Y") res="TYR" else if (str=="U") res="UNK" return res } function AAoneletter(res,str) { str=res if (res=="ALA") str="A" else if (res=="CYS") str="C" else if (res=="PHE") str="F" else if (res=="GLY") str="G" else if (res=="ILE") str="I" else if (res=="LEU") str="L" else if (res=="MET") str="M" else if (res=="ASN") str="N" else if (res=="PRO") str="P" else if (res=="GLN") str="Q" else if (res=="SER") str="S" else if (res=="THR") str="T" else if (res=="VAL") str="V" else if (res=="TRP") str="W" else if (res=="TYR") str="Y" else if (res~"^HIS") str="H" else if (res~"^ASP") str="D" else if (res~"^GLU") str="E" else if (res~"^ARG") str="R" else if (res~"^LYS") str="K" return str }