#!/usr/local/bin/gawk -f # # ppm2prot.awk #@ generate an xeasy .prot file from another shift.list or ppm.out file # this version (ppm2prot) doesn't need molmol, # just cyana (default cyana2.0), to do the nomenclature conversion # # call with a sequence file [uppercase] and # a ppm.out file or other shift list # # # -v keepcharge=1 : keep charges found in *.seq file # otherwise make default i.e. # (+1) for ARG,LYS,HIS and # (-1) for ASP, GLU # -v verbose=1 : be verbose # -v dshift=2 : dshift -> 2 sigma # # -v maxres=199 : skip data with resnr > 199 # # -v keepunassigned=1 : keep atoms in the prot-list # which have no assignment ( shift 999 ) # # -v cyone=1 : assume input follows iupac nomenclature # [ if you get multiple entries HB3 in output ] # !!! unreliable # -v pdbpresent=0 : will make _xplor.pdb and _cyana.pdb and halt # -v pdbpresent=1 : assume _xplor.pdb and _cyana.pdb are present # -v pdbpresent=2 : will make _xplor.pdb and _cyana.pdb and go on [default] # # -v x=1 : set pdbpresent=1 and cyone=1 # -v sleep=1 : sleepvalue [seconds] waiting for creation of pdb's # # -v compact=0|1 : replace identical CH2 chemical shifts by their pseudo atom # # -v showskipped=0|1 : show comment lines in the input # # -v molmol="/home/eiso/local/bin/molmol -t >& m.err" # # -v cyana="/software/bin/cyana >& c.err" # # -v translate=xplor|pdb : 12->32 or 12->23 type renaming # # multiple values for the same atom are treated as follows: # ppm -> middle of range of values # dppm -> spread of values /2 # such that # ppm+dppm is highest observed ppm-val # ppm-dppm is lowest observed ppm-val # # # # user definable tolerances # if (!tol) tol=0.02 # if (!ntolfac) ntolfac=10 # if (!ctolfac) ctolfac=10 # # BEGIN { IGNORECASE=1 if ( ARGV[1]=="h" || ARGV[1] == "help" ) help=1 progname="ppm2prot.awk" if (help) { system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } if (pdbpresent=="") pdbpresent=2 if (x==1) { pdbpresent=1 ; cyone=1 } x="" stderr="/dev/stderr" if (!dshift) dshift=1 keephn=1 if (!tol) tol=0.02 if (!ntolfac) ntolfac=10 if (!ctolfac) ctolfac=10 if (cyone=="") cyone=1 if (maxres=="") maxres=199 if (!sleep) sleep=2 if (!showskipped) showskipped=0 if (!cyana) cyana = "cyana >& c.err" if (!translate) translate = "xplor" # com="\\touch " ENVIRON["HOME"] "/molmol; \\rm -r " ENVIRON["HOME"] "/molmol " # print com >stderr # system(com) # array with linker residue, just add if you have others linkers["PL"]++ linkers["LL5"]++ linkers["LL2"]++ linkers["LL"]++ linkers["LP"]++ } ### NOT IMPLEMENTED # -v ch2=cyana : cyana format [ hb2,hb3 ] FNR==NR && FILENAME!~/[.]seq$/ { print "give sequence file first [*.seq]" >stderr print "halt" >stderr exit } (FILENAME ~ /[.]seq$/ || FILENAME==seq) && FNR==1 { root=seqfile=FILENAME sub(/[.]seq$/,"",root) if (keepcharge!="") print "## keeping charges for ASP,GLU(-),ARG,LYS,HIS(+)" > stderr #procseqmm = "_molmol.seq" #procseqcy = "_cyana.seq" nres=0 } FILENAME==seqfile && NF>0 { if (/link/) { next } $0=toupper($0) for(i=1;i<=NF;i++) { field=$i if (field~/^[A-Z][-+A-Z0-9]*$/) { # new residue res=field if (res in linkers) { if (!linkskip[res]++ && verbose) print "## skipping lines with linkers",$0 > stderr next } nres++ if (!( nres-1 in residuenumber)) residuenumber[nres]=1 else residuenumber[nres]=residuenumber[nres-1]+1 residuename[nres]=res # [opt] charges if (keepcharge==1) { re_neg="^(ASP|GLU)$" ; sub(re_neg,"&-",res) re_pos="^(ARG|LYS)$" ; sub(re_pos,"&+",res) #re_pos="^(ARG|LYS|HIS)$" ; sub(re_pos,"&+",res) if (res!=$1 && verbose>0)print "## changed",$1,"to",res > stderr } #print res > procseqmm #print res,residuenumber[nres] > procseqcy } else if (field~/^[0-9]+$/) { residuenumber[nres]=field if (verbose) printf "# residue #%i ->%i\n",nres,residuenumber[nres] > stderr } else if (field~/^#/) { # ignore } else { print "ERROR: sequence cannot be interpreted",field,"in",$0 exit } if (residuenumber[nres]>maxres) maxres=residuenumber[nres]+1 } next } FILENAME!=seqfile && !pdbsread++ { # first file/line after sequence is read if (residuenumber[nres]>maxres) maxres=residuenumber[nres]+1 pdbsread++ procpdbs() if (verbose){ print "## maxres="maxres>stderr print "## maxatnum=",maxatnum+0 >stderr } com= "sleep "sleep if (verbose)print "# sys:",com > stderr system(com) } FILENAME ~ /[.]ppm$/ || FILENAME ~ /.*ppm.*[.]out$/ { if (!ppmread++) { ppmfile=FILENAME if(FILENAME!="-") print "## reading ppm-file:",FILENAME > stderr } if ($2+0>900) next nu=split($1,uu,/[.]/) if (nu!=2) { print "## skipping atom",$1,"->",uu[1],uu[2],uu[3] next } resnr = uu[1] atnam = uu[2] resnam = gresnam[resnr] ppmval = $2 dppmval=0 ambiflag = $3 } FILENAME ~ /shiftlist/ || /^# +nr +res +nucl +median +rms +# +Nobs +mean/ || /^ *[0-9]+ +[a-z][a-z0-9]+[-+]? +[a-z0-9]*[a-z][a-z0-9]* +[0-9]+/ { dgshiftfile = FILENAME if (!prsh++ && verbose) print "## shiftfile:",dgshiftfile >stderr } FILENAME ~ /^shift[.]list[.]/ && /^ *[0-9]+ +[a-z][a-z0-9]+[-+]? +[a-z0-9]*[a-z][a-z0-9]* +[0-9]+/{ eiso_old=1 dgshiftfile = FILENAME if (!prsh++) print "## shiftfile:",dgshiftfile,$0 >stderr } FILENAME == dgshiftfile { if (($1~/^#/ || NF<5 ) && showskipped) { #print "showskipped",showskipped print "## skipping line",FILENAME":"FNR,$0 next } if ($1~/^#/||NF<4) { next } # print $1,NF if (!ppmread++) { ppmfile=FILENAME if(FILENAME!="-")print "## reading dg-shifts-file[1]:",FILENAME > stderr } resnr=$1+0 resnam=$2 atnam=$3 #print "["resnr"|"resnam"|"atnam"]" ppmval = $4 dppmval = $5 #print ppmval,dppmval, $1,$2,$3 printtwice=0 oa=atnam if (eiso_old) { #print "### e_old" > stderr if (atnam~/^Q[A-Z12]+[23]$/) { printf "%s",atnam > stderr sub(/^Q/,"H",atnam) sub(/[23]$/,"#",atnam) print "->",atnam,resnam,ppmval,dppmval> stderr } } if (atnam!=oa) print atnam,"<-",oa > stderr if (!keepall && ppmval+0==0 && dppmval+0==0) next } FILENAME ~ /[.]prot$/ { if ($1~/^#/) next ppmread++ key=$5"."$4 ppmval=$2 dppmval=$3 SOURCE[key]=FILENAME":"$0; PPM[key]=ppmval; DPPM[key]=dppmval * dshift next } FILENAME ~ /[.]shifts$/ && NF==4 { # why NF==4???? first read ppm value? if (!ppmread) ppmfile=FILENAME if (!ppmread) print "## reading shifts-file[1]:",FILENAME > stderr ppmread++ resnr=$2+0 # uses pdb naming except for H -> HN resnam=$1 atnam=$3 ppm= $4 } { RESNAM[resnr]=resnam if( maxres!="" && resnr>maxres) next #print "~~~"resnr,maxres,(resnr>maxres)q if (!resnr || !atnam ) { if (verbose) print "#a#",FILENAME":"FNR,$0,"nr[" resnr "] atom["atnam"]" next } if (!keepcase) { resnam=toupper(resnam) atnam=toupper(atnam) } oldkey=resnr"."atnam # translate Nterminal NH3 (HT1, nmrview) to HN (xeasy) if ( atnam=="HT1" ) atnam="HN" # uses pdb naming except for H -> HN if (!keephn && atnam=="HN") atnam="H" # uses pdb naming except for H -> HN if (!keepco && atnam=="CO") atnam="C" if (resnam~/^GLN/ && atnam~/^NE$/) atnam=atnam "2" if (resnam~/^ASN/ && atnam~/^ND$/) atnam=atnam "2" if (!keepHring) { if (resnam~/^(PHE|TYR)/ && atnam~/^H[DE][12]$/) atnam=substr(atnam,1,2) "#" } # change HG1* etc to QG1 if (atnam~/^H.+[*%#]$/) { sub(/^H/,"Q",atnam) ; sub(/[*%#]$/,"",atnam) } if (resnam~/^THR/ && atnam~/^QB$/) atnam="HB" if (resnam~/^GLY/ && atnam~/^HA1$/) atnam="HA3" else if (resnam~/^VAL/ && atnam~/^CQG$/) atnam= "CG1" # CQG not available else if (resnam~/^(TYR|PHE)/ && atnam~/^C[DE]$/) atnam=atnam else if (resnam~/^(TYR|PHE)/ && atnam~/^CQ[DE]$/) sub("Q","",atnam) else if (resnam~/^ILE/ && atnam~/^[QC]D$/) atnam=atnam "1" else if (resnam~/^LEU/ && atnam~/^CG1$/) atnam= "CG" key=resnr"."atnam if ( oldkey!=key && verbose>0) { print "##",oldkey,"changed to",key > stderr } if (key=="") print "KEY",key,FILENAME":"$0>stderr if (resnam~/^(TYR|PHE)/ && atnam~/^C[DE]$/) { SOURCE[key"1"]=FILENAME":"$0; PPM[key"1"]=ppmval; DPPM[key"1"]=dppmval * dshift SOURCE[key"2"]=FILENAME":"$0; PPM[key"2"]=ppmval; DPPM[key"2"]=dppmval * dshift } else { SOURCE[key]=FILENAME":"$0; PPM[key]=ppmval; DPPM[key]=dppmval * dshift } comment=$0"#" gsub(/^[^#]+#|#$/,"",comment) if (comment~/^( +[-+]?[.0-9]+[~]?)+/) { if (verbose>1)print "comment",resnam,key,comment >stderr if (verbose>1)print "comment[",$0"]" >stderr atstat[key]=comment } next } END{ if (help)exit if (verbose>1) { for (qw in ATIDm) print "#mm# ["ATIDm[qw]"]",qw > "out" for (qw in ATIDc) print "#cy# ["ATIDc[qw]"]",qw > "out" } if (!seqfile) { print "\n##ERROR: no sequence file read;exiting\n" > stderr exit } if (!ppmread) { print "##error: no shifts read;exiting" > stderr exit } #protfile="/dev/stdout" #print "## writing to",protfile for ( resat1 in ispseudo) { split(resat1,resatt,SUBSEP) # resat = resatt[1] # ipse = resatt[2] pshid=ATIDc[ ispseudo[resatt[1],resatt[2]] ] haspseudo[pshid]=resatt[1] if (verbose>0) print "###",pshid,"haspseudo",resatt[1] } # if id with ppm-value has pseudo atom, refer ppm to pseudoatom # and remove ppm-value for original atom [only methyl group, see below] for (id in PPM) { if (id in haspseudo) { #print "##",haspseudo[id],id > "tmp" multppm[haspseudo[id]]++ if ( multppm[haspseudo[id]] > 1 ) { PPM[haspseudo[id]]=PPM[haspseudo[id]]" "PPM[id] DPPM[haspseudo[id]]=DPPM[id] print "##pse",id,haspseudo[id],PPM[haspseudo[id]],multppm[haspseudo[id]] } else { PPM[haspseudo[id]]=PPM[haspseudo[id]]" "PPM[id] DPPM[haspseudo[id]]=DPPM[id] } PPM[id]="999.999" DPPM[id]="999.999" if (id=="") print "EMPTY ID",id,(id in haspseudo) >stderr } } #prochirals CH2 if (compact) { for (id in PPM) { #print "##",id if (id in hasprochiral) { prochiral=hasprochiral[id] if (verbose>1) print "## prochiral", residuename[id+0""],id,prochiral,PPM[id] > stderr IDSRC[prochiral]=IDSRC[prochiral]" "id PROCH_PPM[prochiral]=PROCH_PPM[prochiral]" "PPM[id] PROCH_DPPM[prochiral]=fmax( PROCH_DPPM[prochiral],DPPM[id]) } } for (prochiral in PROCH_PPM) { if ( nprochiral[prochiral] > 1 ) { #print "### prid",prochiral,nprochiral[prochiral] >stderr sp=split(PROCH_PPM[prochiral],prpp) if (sp==2) { #print "sp",sp >stderr id1=isprochiral[prochiral,1] ; ppm1=PPM[id1] id2=isprochiral[prochiral,2] ; ppm2=PPM[id2] if (ppm1==ppm2) collapse[prochiral]=2 } else if (sp==3) { id1=isprochiral[prochiral,1] ; ppm1=PPM[id1] id2=isprochiral[prochiral,2] ; ppm2=PPM[id2] id3=isprochiral[prochiral,3] ; ppm2=PPM[id3] if (ppm1==ppm2 && ppm2==ppm3) collapse[prochiral]=3 } else { # continue } if (collapse[prochiral]>1 && DPPM[id1]stderr print "## collapsing identical ppm-values; first case:", IDSRC[prochiral],PROCH_PPM[prochiral]," --> ",prochiral > stderr print "## set verbose>0 for a list of all cases" >stderr } else if (verbose>0)print "## identical ppm-values: collapsing", IDSRC[prochiral],PROCH_PPM[prochiral]" --> ",prochiral > stderr PPM[prochiral]=PPM[id1] DPPM[prochiral]=DPPM[id1] PPM[id1]=PPM[id2]="999.999" DPPM[id1]=DPPM[id2]="999.999" if(sp==3) { PPM[id3]=DPPM[id3]="999.999" } } } } } out="sort -n|sort -nk5" #protfile out="sort -nk5" #protfile #out="cat>" protfile for (na=1; na<=maxatnum ; na++) if (na in ATIDc) { # ppmid = ATIDc[na] if ( !(ATIDc[na] in PPM) && !(ATIDm[na] in PPM) && ATIDc[na]!~/[.][OS]/) { PPM[ATIDc[na]]="999.000" if (ATIDc[na]~/O/) print "##",na,ATIDc[na],PPM[ATIDc[na]],DPPM[ATIDc[na]]> stderr if (verbose) print "##",na,ATIDc[na],PPM[ATIDc[na]],DPPM[ATIDc[na]] } } # for (a=1;a in ATIDc;a++) { # ppmid=ATIDc[a] for (ppmid in PPM) { if (verbose) print "##ppmid",ppmid,PPM[ppmid],DPPM[ppmid],"m",multppm[ppmid]> stderr if (multppm[ppmid]>1) { split(PPM[ppmid],ppms) ppm=(amax(ppms)+amin(ppms))/2 dppm=(amax(ppms)-amin(ppms))/2 print "##multi:", ppmid,PPM[ppmid],"mean:",ppm,"~"dppm,amax(ppms),amin(ppms) > stderr } else { ppm=PPM[ppmid] dppm=DPPM[ppmid] } i=-1 # changes if ppmid is found in ATNUMc/m if (ppmid in ATIDc) { if (verbose>1) print ppmid,"in cyana[1]" found_ppmid_with_shift_in_pdb[ppmid]++ i=ATNUMc[ppmid] } else if (ppmid in ATIDm) { if (verbose>1) print ppmid,"in xplor[2]" found_ppmid_with_shift_in_pdb[ppmid]++ i=ATNUMm[ppmid] } else { if (!atomnotfoundmsg++ && verbose>0) { print "## atom[s] not found in pdb's" > stderr print SOURCE[ppmid] > stderr } } if (i != -1) if (keepunassigned || ppm+0<900) { if (dppm==0) { if (ATNAMc[i]~/^[HQ]/) dppm=tol else if (ATNAMc[i]~/^[N]/) dppm=tol*ntolfac else if (ATNAMc[i]~/^[C]/) dppm=tol*ctolfac } pseppmid=ppmid if (!(pseppmid in atstat) && pseppmid~/[.]Q/) { pseat=pseppmid sub(/.+[.]/,"",pseat) hpseat="H" substr(pseat,2) "1" sub(pseat,hpseat,pseppmid) # CLEANUP ! #print pseppmid,ppmid,pseat,hpseat,pseppmid in atstat >stderr } if (d2o!="" && isexchangeable(ATNAMc[i],RESNAMc[i]) ) { print "##exchanging",ATNAMc[i],RESNAMc[i] | out } else { printf "%4i %7.3f %6.3f %4s %3i # %-4s%s\n" , i,ppm,dppm,ATNAMc[i],RESNUMc[i],RESNAMc[i], (pseppmid in atstat ?" #stat:"atstat[pseppmid]:"") | out } } } #for (atidi in PPM) { # just a check # if (!(atidi in found_ppmid_with_shift_in_pdb)) { # print "WARNING:"progname":"atidi,"not in found_ppmid_with_shift_in_pdb; PPM=",PPM[atidi] > stderr # } #} for (atidi in PPM) { #printf " .">stderr if (atidi=="") print "PPM['']="PPM[atidi],"src="SOURCE[atidi]>stderr if (!(atidi in found_ppmid_with_shift_in_pdb)) { amatch=0 for (k=1;k<=3;k++) { if ((atidi k) in found_ppmid_with_shift_in_pdb ) { amatch=1 anammatch[atidi]=anammatch[atidi] " " atidi "" k } } if (amatch==1) { if (verbose>0) printf "## found partial match in PDB files: %-8s --> %-8s\n", atidi,anammatch[atidi] > stderr } else { misid=atidi misidnum=atidi+0 misidat=substr(misid,index(misid,".")+1) misres[misidnum]++ misresat[misidnum] = misresat[misidnum]" "misidat if (verbose>0)print "##missing in found_ppmid_with_shift_in_pdb:",atidi,(atidi in found_ppmid_with_shift_in_pdb)>stderr } } } #printf "">stderr for (i in misres) { printf "## missing in PDB files: %4i %-5s : %s\n",i,RESNAM[i],misresat[i] >stderr printf "## missing in PDB files: %4i %-5s : %s\n",i,RESNAM[i],misresat[i] | out } if (!(cleanup==-1)) { } } function isexchangeable(atom,res) { if (atom=="HN" || atom=="H") retval=1 else if (res~/^(ASN|GLN)$/ && atom~/^[HQ][DE]2[12]?$/) retval=1 else if (res~/^(HIS)/ && atom ~/^H(D1|E2)$/ ) retval=1 else if (res~/^(TRP)/ && atom=="HE1" ) retval=1 else if (res~/^ARG/ && atom ~/H[EH]/ ) retval=1 else if (res~/^LYS/ && atom ~/^[HQ]Z/ ) retval=1 else if (atom ~/^N/ ) retval=1 else retval=0 return retval } function abs(a) {return x<0 ? -x : x } function procpdbs() { cyanapdb = "_cyana.pdb" xplorpdb = "_xplor.pdb" if (!pdbpresent || pdbpresent==2) { com = "cyanalib\n" if (proxylib) com=com "read "proxylib " append\n" com = com "read seq "seqfile";pseudo=1;\nwrite " cyanapdb com = com ";translate xplor;write _xplor.pdb;quit" print "## generating "cyanapdb >stderr print "## generating "xplorpdb >stderr print com>stderr print com > "cyana-in.cya" print com | cyana close(cyana) if (!pdbpresent) exit print "## reading pdb-files:",xplorpdb,cyanapdb > stderr } resnuminc="" ; prevresnumpdb="" while ( (getline pdbline < xplorpdb)>0 ) { if (pdbline~/^(ATOM|HETATM)/) procpdb(ATNAMm,RESNUMm,RESNAMm,ATIDm,ATNUMm) else if (verbose) print "#notproc",pdbline } #print "lastline",lastline >"out" #print "## reading pdb-file:",cyanapdb > stderr resnuminc="" ; prevresnumpdb="" ; while ( (getline pdbline < cyanapdb)>0 ) { if (pdbline~/^(ATOM|HETATM)/) procpdb(ATNAMc,RESNUMc,RESNAMc,ATIDc,ATNUMc) else if (verbose) print "#notproc",pdbline } } function procpdb(ATNAM,RESNUM,RESNAM,ATID,ATNUM, id) { s0=pdbline split(s0,ss) atnum = 0+substr(s0,7,5) #;print "["atnum"]" atnam = substr(s0,13,4) #;print "["atnam"]" atnam = gensub(/ *([1-3]?)([A-Z0-9]+) */,"\\2\\1","?",atnam) resnam = substr(s0,18,4) sub(/ +$/,"",resnam) if (resnam~/^(LP|PL|LL)/) return 0 if (resnam~/DU/ && atnam~/^(Q[0-9]+|QCA)$/) return 0 resnumpdb=substr(s0,23,4)+0 if (resnumpdb!=prevresnumpdb) { resnuminc++ prevresnumpdb=resnumpdb if (verbose) print "pdbproc:",resnam,atnam,"nums",resnumpdb, resnuminc,(resnuminc in residuenumber), residuenumber[resnuminc] > stderr } resnum=residuenumber[resnuminc] if (verbose>1)print "resnum="resnum "=?=" "resnumpdb="resnumpdb>stderr ATNAM[atnum]=atnam RESNUM[atnum]=resnum RESNAM[atnum]=resnam id = resnumpdb"."atnam if (verbose>1) print " ###resnumpdb",atnum,resnumpdb,atnam,resnam>stderr if (verbose>1) print " ###["resnam"]"atnam,resnumpdb,id,atnum>stderr if (verbose>1) print " ###ATID["id"]["atnum"]",resnam,resnum,atnum>stderr ATID[id]=atnum ATID[atnum]=id ATNUM[id]=atnum gresnam[resnum]=resnam resat=resnam"."atnam #print "####",resat pse="" #if (resat~/ALA/)print "########",resnum,resat > stderr if (resat ~ /^ALA[.]HB[123]*/) pse=resnum".QB" else if (resat ~ /^VAL[.]HG1[123]?$/) pse=resnum".QG1" else if (resat ~ /^VAL[.]HG2[123]?$/) pse=resnum".QG2" else if (resat ~ /^LEU[.]HD1[123]?$/) pse=resnum".QD1" else if (resat ~ /^LEU[.]HD2[123]?$/) pse=resnum".QD2" else if (resat ~ /^ILE[.]HG2[123]?$/) pse=resnum".QG2" else if (resat ~ /^ILE[.]HD1[123]?$/) pse=resnum".QD1" else if (resat ~ /^THR[.]HG2[123]?$/) pse=resnum".QG2" else if (resat ~ /^MET[.]HE[123]?$/) pse=resnum".QE" else if (resat ~ /^LYS[-+]?[.]HZ[123]?$/) pse=resnum".QZ" #else if (resat ~ /^VAL[.]QG[12]$/) pse=resnum".QQG" #else if (resat ~ /^LEU[.]QD[12]$/) pse=resnum".QQD" else if (resat == /^QDUF[.]HD[12]$/) pse=resnum".QD" else if (resat == /^QDUF[.]HE[12]$/) pse=resnum".QE" else if (resat == /^QDUT[.]HA[123]$/) pse=resnum".QA" else if (resat == /^QDUB[.]HA[12]$/) pse=resnum".QA" #if (resat ~/^LYS/) { print "####",resat,pse > stderr } # pse = "12.QD2" (example) if (pse) { if (!(id in haspseudo) ) { ispseudo[pse,++npse[pse]]=atnum haspseudo[id]=pse # print pse,npse[pse],atnum,id } } # generate list of protchirals (CH2 groups/ C(CH3)2 groups) prochiral="" if ( resat ~ /^GLY[.]HA[12]*/ ) prochiral=resnum".QA" else if ( resnam !~ /ALA/ && atnam~/^HB[123]$/) prochiral=resnum".QB" else if ( atnam~/^HG[123]$/ ) prochiral=resnum".QG" else if ( resnam !~ /(HIS|TRP)/ && atnam~/^HD[123]$/ ) prochiral=resnum".QD" else if ( resnam !~ /MET/ && atnam~/^HE[123]$/ ) prochiral=resnum".QE" else if ( resnam ~ /ILE/ && atnam~/^HG1[123]$/) prochiral=resnum".QG1" else if ( resnam ~ /THR/ && atnam~/^HG1[123]$/) prochiral=resnum".QG1" else if ( resnam ~ /VAL/ && atnam~/^QG[12]$/ ) prochiral=resnum".QQG" else if ( resnam ~ /LEU/ && atnam~/^QD[12]$/ ) prochiral=resnum".QQD" if (prochiral) if (!(id in hasprochiral) ) { nprochiral[prochiral]++ if(verbose==1)print "## defined prochiral:", resnam,id,prochiral,nprochiral[prochiral] >stderr isprochiral[prochiral,nprochiral[prochiral]]=id hasprochiral[id]=prochiral } lastline=s0 maxatnum= ( maxatnum>atnum ? maxatnum : atnum ) } function mean(dat, i, n, m) { for ( i in dat ) { m+=dat[i];n++ } if (n==0) { print "#", m, n;return m} return m/n } function fmax(a,b) { return (a >= b ? a : b) } function amax(dat, i, n, m) { for ( i in dat ) { if (!n++) m=dat[i] if ( dat[i] > m ) m=dat[i] } if (n==0) { print "# empty array", m, n;return m} return m } function amin(dat, i, n, m) { for ( i in dat ) { if (!n++) m=dat[i] if ( dat[i] < m ) m=dat[i] } if (n==0) { print "# empty array", m, n;return m} return m } function median(dat,ni) { # assume sorted array return (dat[int(ni/2+0.5)]+dat[int(ni/2+1)])/2 } function sort(dat,ni, i, f) { if (ni!=1) do { f=1; for (i=1;idat[i+1]) { f=0;switch(dat,i, i+1) } } } while (f==0) } function switch(dat, a, b, t) { t=dat[a];dat[a]=dat[b];dat[b]=t } function rmsdev(dat,m, i,out) { # returns root-mean-square deviation of dat from m ; # returns -1 if dat has no elements. for ( i in dat ) out+=sqr(m-dat[i]) return (size(dat)>0 ? sqrt(out/size(dat)) : -1) } function size(dat, i,ni) { # return number of elements of arr : -1 if no elements for ( i in dat ) ni++ return ( ni ? ni : -1) } function sqr(a) { return a*a }