#!/usr/local/bin/gawk -f # # pdb2iupac.awk # # change pdb nomenclature to iupac nomenclature # # possible input formats # .shifts # .upl /.lol # .prot # .xpk # .ppm.oiut # # ouput is on stdout # # BEGIN { progname="pdb2iupac.awk" if ( ARGV[1]~/(^h|^help$)/ || help!="" ) { print ARGV[0] system("gawk '/^#/{print}/^BEGIN/{exit}' `which "progname"`") help=1 exit } stderr="/dev/stderr" IGNORECASE=1 } FILENAME ~ /[.]seq$/ { seqread++ numres=prevres+1 if (NF==2 && $2 ~ /^[0-9]+$/) numres=$2 residue[numres]=$1 prevres=numres next } FILENAME ~ /[.]shifts$/ { filetype="shifts" } filetype=="shifts" { res=$2 atom=$3 sub(atom,to_iupac(res,atom)) print next } FILENAME ~ /[.](upl|lol)$/{ res=$2 atom=$3 sub(" "atom" ","_"to_iupac(res,atom)"_") res=$5 atom=$6 sub(" "atom" ","_"to_iupac(res,atom)"_") ff="____" rex="^"ff".*"ff"$" if ($3~rex) sub($3,substr($3,length(ff)-1,length($3)-length(ff)*2)) if ($6~rex) sub($6,substr($6,length(ff)-1,length($6)-length(ff)*2)) print next } ( FILENAME ~ /[.]prot$/ || filetype=="prot" ) && $1~/^[^#]/ { if ($6=="#") res=$7 atom=$4 newatom =to_iupac(res,atom) if (verbose && atom!=newatom) print atom,"->",newatom >stderr sub(atom" ",newatom" ") if (out=="ppm") { namb=1 if ( $4 ~ /^Q/) { sub("^Q","H",$4) namb=2 } $0 = sprintf("%-8s %8.3f %i",$5"."$4"#",$2,namb) } if (out=="nv") { if ( $4 ~ /^Q/) { qqq="H" substr($4,2) "1" sub(" "$4,qqq) } } if (out=="pseudo") { newatom =to_pseudo(res,atom) if (verbose && atom!=newatom) print atom,"->",newatom >stderr while ( length(atom) > length(newatom) ) { newatom=" "newatom } while ( length(atom) < length(newatom) ) { atom=" "atom } sub(atom" ",newatom" ") } } FILENAME ~ /^ppm.*[.]out$/ { if (split($1,aa,".")==2) { res=aa[1] atom=aa[2] if (seqread=="") { print "ERROR: need sequence file [*.seq]" > stderr exit } if (res in residue) { newatom = to_iupax(residue[res],atom) if (length(newatom)length(atom)) atom=atom" " $0=strsub(atom,newatom,$0) } else { if (!misreswarn[res]++) print "ERROR: residue", res,"not defined",(pedantic?"FATAL: exiting ":"") > stderr if (pedantic) exit } } else { print "FATAL ERROR: unable to interpret",$0," (123.HA 5.678 2)" exit } } FILENAME ~ /[.]xpk$/ { if (FNR==2) { xpkdim=2 } if (FNR==6) { for (i=1;i<=NF;i++) if ($i~/[.]L$/) asgcol[i+1]=$i } if (FNR>6) { # data lines gsub(/[{][ \t]+/,"{") gsub(/[ \t]+[}]/,"}") $0=toupper($0) s0=$0 while (match($0,/[{][^{}]*[ \t]+[^{}]*[}]/)) { lstr=substr($0,1,RSTART-1) mstr=substr($0,RSTART,RLENGTH) rstr=substr($0,RSTART+RLENGTH) if (verbose) print mstr gsub(/[ \t]+/,"|",mstr) $0=lstr mstr rstr if (verbose) print mstr } for ( c in asgcol ) { asgs = $c gsub(/(^{|}$|[|])/," ",asgs) nasg=split(asgs,aasgs) newasg="{" for (n=1;n<=nasg;n++) { if ( split(aasgs[n],aa,".")==2 ) { res=aa[1] atom=aa[2] if (res in residue) { newatom = to_iupax(residue[res],atom) newatom = to_pseudo(residue[res],newatom) } else { print "FATAL ERROR: residue",res,"not defined [exit]" exit } if (verbose && atom!=newatom) print atom,"->",newatom >stderr newasg= newasg (n==1?"":"|") res"."newatom if (verbose && atom!=newatom)print res,atom,newatom >stderr } else { newasg= newasg (n==1?"":"|") aasgs[n] } } newasg=newasg"}" gsub("[|]"," ",newasg) $0=strsub($c,newasg,$0) #if (s1!=$0) { print ">"$0 ; print "<"s1 } } #if } } 1 function strsub(s1,s2,t, s,l,retval) { if (t=="") t=$0 retval=t s=index(t,s1) l=length(s1) if (s) retval = substr(t,1,s-1) s2 substr(t,s+l) return retval } function to_iupac(res,atom, outatom) { # substitute hb1->hb2 hb2->hb3 etc res=toupper(res) atom=toupper(atom) gsub(/ /,"",atom) outatom=atom #if (res=="GLY") { # if (atom=="HA1") outatom="HA2" # if (atom=="HA2") outatom="HA3" #} #hb1 hb2 resrxp="(arg|as[np]|cy[ps]|gl[nu]|met|phe|" resrxp=resrxp "pro|ser|tyr|trp|leu|lys|his|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HB1") outatom="HB2" if (atom=="HB2") outatom="HB3" } #hg1 hg2 resrxp="(arg|gl[nu]|met|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG1") outatom="HG2" if (atom=="HG2") outatom="HG3" } #hg11 hg12 (ile) resrxp="(ile|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG11") outatom="HG12" if (atom=="HG12") outatom="HG13" } #hd1 hd2 resrxp="(arg|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HD1") outatom="HD2" if (atom=="HD2") outatom="HD3" } #he1 he2 resrxp="(lys)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HE1") outatom="HE2" if (atom=="HE2") outatom="HE3" } return outatom } function to_iupax(res,atom, outatom) { # substitute hb1->hb2 hb2->hb3 etc res=toupper(res) atom=toupper(atom) gsub(/ /,"",atom) outatom=atom if (atom=="HN") outatom="H" if (res=="GLY") { if (atom=="HA1") outatom="HA2" if (atom=="HA2") outatom="HA3" } #hb1 hb2 resrxp="(arg|as[np]|cy[ps]|gl[nu]|met|phe|" resrxp=resrxp "pro|ser|tyr|trp|leu|lys|his|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HB1") outatom="HB2" if (atom=="HB2") outatom="HB3" } #hg1 hg2 resrxp="(arg|gl[nu]|met|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG1") outatom="HG2" if (atom=="HG2") outatom="HG3" } #hg11 hg12 (ile) resrxp="(ile|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG11") outatom="HG12" if (atom=="HG12") outatom="HG13" } #hd1 hd2 resrxp="(arg|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HD1") outatom="HD2" if (atom=="HD2") outatom="HD3" } #he1 he2 resrxp="(lys)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HE1") outatom="HE2" if (atom=="HE2") outatom="HE3" } return outatom } function to_pseudo(res,atom, outatom) { # substitute ALA HB1/2->QB res=toupper(res) atom=toupper(atom) gsub(/ /,"",atom) outatom=atom #if (resat~/ALA/)print "########",resnum,resat > stderr if (res ~ /^ALA/ && atom ~ /^HB[123]*$/ ) outatom="QB" else if (res ~ /^VAL/ && atom ~ /^HG1[123]?$/) outatom="QG1" else if (res ~ /^VAL/ && atom ~ /^HG2[123]?$/) outatom="QG2" else if (res ~ /^LEU/ && atom ~ /^HD1[123]?$/) outatom="QD1" else if (res ~ /^LEU/ && atom ~ /^HD2[123]?$/) outatom="QD2" else if (res ~ /^ILE/ && atom ~ /^HG2[123]?$/) outatom="QG2" else if (res ~ /^ILE/ && atom ~ /^HD1[123]?$/) outatom="QD1" else if (res ~ /^THR/ && atom ~ /^HG2[123]?$/) outatom="QG2" else if (res ~ /^MET/ && atom ~ /^HE[123]?$/ ) outatom="QE" else if (res ~ /^LYS/ && atom ~ /^HZ[123]?$/ ) outatom="QZ" return outatom } function from_iupac(res,atom, outatom) { # substitute hb2->hb1 hb3->hb2 etc res=toupper(res) atom=toupper(atom) gsub(/ /,"",atom) outatom=atom if (res=="GLY") { # if (atom=="HA2") outatom="HA1" if (atom=="HA3") { outatom="HA2" print "warning -- unexpected GLY HA3 -> converted to HA1" > stderr } } #hb1 hb2 resrxp="(arg|as[np]|cy[ps]|gl[nu]|met|phe|" resrxp=resrxp "pro|ser|tyr|trp|leu|lys|his|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HB2") outatom="HB1" if (atom=="HB3") outatom="HB2" # print res,resrxp,"{" atom ">" outatom "}",(atom=="HD3") > stderr } #hg1 hg2 resrxp="(arg|gl[nu]|met|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG2") outatom="HG1" if (atom=="HG3") outatom="HG2" } #hg11 hg12 (ile) resrxp="(ile|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG12") outatom="HG11" if (atom=="HG13") outatom="HG12" } #hd1 hd2 resrxp="(arg|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HD2") outatom="HD1" if (atom=="HD3") outatom="HD2" # print res,resrxp,"{" atom ">" outatom "}",(atom=="HD3") } #he1 he2 resrxp="(lys)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HE2") outatom="HE1" if (atom=="HE3") outatom="HE2" } return outatom } function from_iupax(res,atom, outatom) { # substitute hb2->hb1 hb3->hb2 etc res=toupper(res) atom=toupper(atom) gsub(/ /,"",atom) outatom=atom if (res=="GLY") { # if (atom=="HA2") outatom="HA1" if (atom=="HA3") { outatom="HA2" print "warning -- unexpected GLY HA3 -> converted to HA1" > stderr } } #hb1 hb2 resrxp="(arg|as[np]|cy[ps]|gl[nu]|met|phe|" resrxp=resrxp "pro|ser|tyr|trp|leu|lys|his|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HB2") outatom="HB1" if (atom=="HB3") outatom="HB2" # print res,resrxp,"{" atom ">" outatom "}",(atom=="HD3") > stderr } #hg1 hg2 resrxp="(arg|gl[nu]|met|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG2") outatom="HG1" if (atom=="HG3") outatom="HG2" } #hg11 hg12 (ile) resrxp="(ile|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HG12") outatom="HG11" if (atom=="HG13") outatom="HG12" } #hd1 hd2 resrxp="(arg|pro|lys|unk|pru)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HD2") outatom="HD1" if (atom=="HD3") outatom="HD2" # print res,resrxp,"{" atom ">" outatom "}",(atom=="HD3") } #he1 he2 resrxp="(lys)([-0-9+]?)" if (res ~ resrxp) { if (atom=="HE2") outatom="HE1" if (atom=="HE3") outatom="HE2" } return outatom }