************************************************************************* * This program is to compare two structures and find the superposition * that has the maximum TM-scale. * The program can be freely copied or modified. * * Reference: Yang Zhang, Jeffrey Skolnick, Proteins, 2004 57:702-710. * For comments, please email to: zhang6@buffalo.edu ************************************************************************* program TMscore PARAMETER(nmax=3000) common/stru/xt(nmax),yt(nmax),zt(nmax),xb(nmax),yb(nmax),zb(nmax) common/nres/nresA(nmax),nresB(nmax),nseqA,nseqB common/para/d,d0,d0_fix common/align/n_ali,iA(nmax),iB(nmax) common/nscore/i_ali(nmax),n_cut ![1,n_ali],align residues for the score dimension k_ali(nmax),k_ali0(nmax) character*100 fnam,pdb(100),outname character*3 aa(-1:20),seqA(nmax),seqB(nmax) character*100 s,du character seq1A(nmax),seq1B(nmax),ali(nmax) character sequenceA(nmax),sequenceB(nmax),sequenceM(nmax) dimension L_ini(100),iq(nmax) common/scores/score,score_maxsub,score_fix common/GDT/n_GDT1,n_GDT2,n_GDT4,n_GDT8 double precision score,score_max,score_fix,score_fix_max double precision score_maxsub dimension xa(nmax),ya(nmax),za(nmax) ccc RMSD: double precision r_1(3,nmax),r_2(3,nmax),r_3(3,nmax),w(nmax) double precision u(3,3),t(3),rms,drms !armsd is real data w /nmax*1.0/ ccc data aa/ 'BCK','GLY','ALA','SER','CYS','VAL','THR','ILE', & 'PRO','MET','ASP','ASN','LEU', & 'LYS','GLU','GLN','ARG', & 'HIS','PHE','TYR','TRP','CYX'/ character*1 slc(-1:20) data slc/'X','G','A','S','C','V','T','I', & 'P','M','D','N','L','K','E','Q','R', & 'H','F','Y','W','C'/ *****instructions -----------------> call getarg(1,fnam) if(fnam.eq.' '.or.fnam.eq.'?'.or.fnam.eq.'-h')then write(*,*) write(*,*)'Brief instructure for running TM-score program:' write(*,*) write(*,*)'1. Run TM-score to compare ''model'' and ''native'':' write(*,*)' TMscore model native' write(*,*) write(*,*)'2. Run TM-score with an assigned d0, e.g. 5 Angstroms:' write(*,*)' TMscore model native -d 5' write(*,*) write(*,*)'3. Run TM-score with superposition output, e.g. ', & '''TM.sup'':' write(*,*)' TMscore model native -o TM.sup' write(*,*) write(*,*)'4, To view the superposition structures by rasmol:' write(*,*)' rasmol -script TM.sup' write(*,*) goto 9999 endif ******* options -----------> m_out=-1 m_fix=-1 narg=iargc() i=0 j=0 115 continue i=i+1 call getarg(i,fnam) if(fnam.eq.'-o')then m_out=1 i=i+1 call getarg(i,outname) elseif(fnam.eq.'-d')then m_fix=1 i=i+1 call getarg(i,fnam) read(fnam,*)d0_fix else j=j+1 pdb(j)=fnam endif if(i.lt.narg)goto 115 ccccccccc read data from first CA file: open(unit=10,file=pdb(1),status='old') i=0 101 read(10,104,end=102) s if(s(1:3).eq.'TER') goto 102 if(s(1:4).eq.'ATOM')then if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or.s(13:16). & eq.' CA')then i=i+1 read(s,103)du,seqA(i),du,nresA(i),du,xa(i),ya(i),za(i) do j=-1,20 if(seqA(i).eq.aa(j))seq1A(i)=slc(j) enddo endif endif goto 101 102 continue 103 format(A17,A3,A2,i4,A4,3F8.3) 104 format(A100) close(10) nseqA=i c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ccccccccc read data from first CA file: open(unit=10,file=pdb(2),status='old') i=0 201 read(10,204,end=202) s if(s(1:3).eq.'TER') goto 202 if(s(1:4).eq.'ATOM')then if(s(13:16).eq.'CA '.or.s(13:16).eq.' CA '.or.s(13:16). & eq.' CA')then i=i+1 read(s,203)du,seqB(i),du,nresB(i),du,xb(i),yb(i),zb(i) do j=-1,20 if(seqB(i).eq.aa(j))seq1B(i)=slc(j) enddo endif endif goto 201 202 continue 203 format(A17,A3,A2,i4,A4,3F8.3) 204 format(A100) close(10) nseqB=i c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ****************************************************************** * pickup the aligned residues: ****************************************************************** k=0 do i=1,nseqA do j=1,nseqB if(nresA(i).eq.nresB(j))then k=k+1 iA(k)=i iB(k)=j goto 205 endif enddo 205 continue enddo n_ali=k !number of aligned residues if(n_ali.lt.1)then write(*,*)'There is no common residues in the input structures' goto 9999 endif ************///// * parameters: ***************** *** d0-------------> d0=1.24*(nseqB-15)**(1.0/3.0)-1.8 if(d0.lt.0.5)d0=0.5 if(m_fix.eq.1)d0=d0_fix *** d0_search -----> d0_search=d0 if(d0_search.gt.8)d0_search=8 if(d0_search.lt.4.5)d0_search=4.5 *** iterative parameters -----> n_it=20 !maximum number of iterations d_output=5 !for output alignment if(m_fix.eq.1)d_output=d0_fix n_init_max=6 !maximum number of L_init n_init=0 L_ini_min=4 if(n_ali.lt.4)L_ini_min=n_ali do i=1,n_init_max-1 n_init=n_init+1 L_ini(n_init)=n_ali/2**(n_init-1) if(L_ini(n_init).le.L_ini_min)then L_ini(n_init)=L_ini_min goto 402 endif enddo n_init=n_init+1 L_ini(n_init)=L_ini_min 402 continue ****************************************************************** * find the maximum score starting from local structures superposition ****************************************************************** score_max=-1 !TM-score score_maxsub_max=-1 !MaxSub-score n_GDT1_max=-1 !number of residues<1 n_GDT2_max=-1 !number of residues<2 n_GDT4_max=-1 !number of residues<4 n_GDT8_max=-1 !number of residues<8 do 333 i_init=1,n_init L_init=L_ini(i_init) iL_max=n_ali-L_init+1 do 300 iL=1,iL_max !on aligned residues, [1,nseqA] LL=0 ka=0 do i=1,L_init k=iL+i-1 ![1,n_ali] common aligned r_1(1,i)=xa(iA(k)) r_1(2,i)=ya(iA(k)) r_1(3,i)=za(iA(k)) r_2(1,i)=xb(iB(k)) r_2(2,i)=yb(iB(k)) r_2(3,i)=zb(iB(k)) ka=ka+1 k_ali(ka)=k LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 if(i_init.eq.1)then !global superposition armsd=dsqrt(rms/LL) rmsd_ali=armsd endif do j=1,nseqA xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo d=d0_search-1 call score_fun !init, get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka0 k_ali0(i)=k_ali(i) enddo endif if(score_maxsub_max.lt.score_maxsub)score_maxsub_max= & score_maxsub if(n_GDT1_max.lt.n_GDT1)n_GDT1_max=n_GDT1 if(n_GDT2_max.lt.n_GDT2)n_GDT2_max=n_GDT2 if(n_GDT4_max.lt.n_GDT4)n_GDT4_max=n_GDT4 if(n_GDT8_max.lt.n_GDT8)n_GDT8_max=n_GDT8 *** iteration for extending ----------------------------------> d=d0_search+1 do 301 it=1,n_it LL=0 ka=0 do i=1,n_cut m=i_ali(i) ![1,n_ali] r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) ka=ka+1 k_ali(ka)=m LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 do j=1,nseqA xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo call score_fun !get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka k_ali0(i)=k_ali(i) enddo endif if(score_maxsub_max.lt.score_maxsub)score_maxsub_max & =score_maxsub if(n_GDT1_max.lt.n_GDT1)n_GDT1_max=n_GDT1 if(n_GDT2_max.lt.n_GDT2)n_GDT2_max=n_GDT2 if(n_GDT4_max.lt.n_GDT4)n_GDT4_max=n_GDT4 if(n_GDT8_max.lt.n_GDT8)n_GDT8_max=n_GDT8 303 format(i5,i5,i5,f17.14,f17.14,i5,f7.3) if(it.eq.n_it)goto 302 if(n_cut.eq.ka)then neq=0 do i=1,n_cut if(i_ali(i).eq.k_ali(i))neq=neq+1 enddo if(n_cut.eq.neq)goto 302 endif 301 continue !for iteration 302 continue 300 continue !for shift 333 continue !for initial length, L_ali/M ****************************************************************** * Output ****************************************************************** *** output TM-scale ----------------------------> write(*,*) write(*,*)'*****************************************************', & '************************' write(*,*)'* TM-SCORE ', & ' *' write(*,*)'* A scoring function to assess the quality of protein', & ' structure predictions *' write(*,*)'* Based on statistics: ', & ' *' write(*,*)'* 0.0 < TM-score < 0.17, Random predictions ', & ' *' write(*,*)'* 0.4 < TM-score < 1.00, Meaningful predictions', & ' *' write(*,*)'* Reference: Yang Zhang and Jeffrey Skolnick, ', & 'Proteins 2004 57: 702-710 *' write(*,*)'* For comments, please email to: zhang6@buffalo.edu ', & ' *' write(*,*)'*****************************************************', & '************************' write(*,*) write(*,501)pdb(1),nseqA 501 format('Structure1: ',A10,' Length= ',I4) write(*,502)pdb(2),nseqB 502 format('Strucutre2: ',A10,' Length= ',I4, & ' (by which all scores are normalized)') write(*,503)n_ali 503 format('Number of residues in common= ',I4) write(*,513)rmsd_ali 513 format('RMSD of the common residues= ',F8.3) write(*,*) write(*,504)score_max,d0 504 format('TM-score = ',f6.4,' (d0=',f5.2,')') write(*,505)score_maxsub_max 505 format('MaxSub-score= ',f6.4,' (d0= 3.50)') score_GDT=(n_GDT1_max+n_GDT2_max+n_GDT4_max+n_GDT8_max) & /float(4*nseqB) write(*,506)score_GDT,n_GDT1_max/float(nseqB),n_GDT2_max & /float(nseqB),n_GDT4_max/float(nseqB),n_GDT8_max/float(nseqB) 506 format('GDT-score = ',f6.4,' %(d<1)=',f6.4,' %(d<2)=',f6.4, $ ' %(d<4)=',f6.4,' %(d<8)=',f6.4) write(*,*) *** recall and output the superposition of maxiumum TM-score: LL=0 do i=1,ka0 m=k_ali0(i) !record of the best alignment r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 do j=1,nseqA xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo d=d_output !for output call score_fun() !give i_ali(i), score_max=score now LL=0 do i=1,n_cut m=i_ali(i) ![1,nseqA] r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) LL=LL+1 enddo call u3b(w,r_1,r_2,LL,0,rms,u,t,ier) armsd=dsqrt(rms/LL) rmsd=armsd *** output rotated chain1 + chain2-----> if(m_out.ne.1)goto 999 OPEN(unit=7,file=outname,status='unknown') !pdb1.aln + pdb2.aln 900 format(A) 901 format('select ',I4) write(7,900)'load inline' write(7,900)'select atomno<1000' write(7,900)'color [255,20,147]' write(7,900)'wireframe .45' write(7,900)'select none' write(7,900)'select atomno>1000' write(7,900)'color [100,149,237]' write(7,900)'wireframe .20' write(7,900)'#color group' do i=1,n_cut write(7,901)nresA(iA(i_ali(i))) write(7,900)'color red' enddo write(7,900)'select all' write(7,900)'exit' do i=1,nseqA write(7,1237)nresA(i),seqA(i),nresA(i),xt(i),yt(i),zt(i) enddo write(7,1238) do i=2,nseqA write(7,1239)nresA(i-1),nresA(i) enddo do i=1,nseqB write(7,1237)2000+nresB(i),seqB(i),nresB(i),xb(i),yb(i),zb(i) enddo write(7,1238) do i=2,nseqB write(7,1239)2000+nresB(i-1),2000+nresB(i) enddo 1237 format('ATOM ',i5,' CA ',A3,I6,4X,3F8.3) 1238 format('TER') 1239 format('CONECT',I5,I5) 999 continue *** record aligned residues by i=[1,nseqA], for sequenceM()------------> do i=1,nseqA iq(i)=0 enddo do i=1,n_cut j=iA(i_ali(i)) ![1,nseqA] k=iB(i_ali(i)) ![1,nseqB] dis=sqrt((xt(j)-xb(k))**2+(yt(j)-yb(k))**2+(zt(j)-zb(k))**2) if(dis.lt.d_output)then iq(j)=1 endif enddo ******************************************************************* *** output aligned sequences k=0 i=1 j=1 800 continue if(i.gt.nseqA.and.j.gt.nseqB)goto 802 if(i.gt.nseqA.and.j.le.nseqB)then k=k+1 sequenceA(k)='-' sequenceB(k)=seq1B(j) sequenceM(k)=' ' j=j+1 goto 800 endif if(i.le.nseqA.and.j.gt.nseqB)then k=k+1 sequenceA(k)=seq1A(i) sequenceB(k)='-' sequenceM(k)=' ' i=i+1 goto 800 endif if(nresA(i).eq.nresB(j))then k=k+1 sequenceA(k)=seq1A(i) sequenceB(k)=seq1B(j) if(iq(i).eq.1)then sequenceM(k)=':' else sequenceM(k)=' ' endif i=i+1 j=j+1 goto 800 elseif(nresA(i).lt.nresB(j))then k=k+1 sequenceA(k)=seq1A(i) sequenceB(k)='-' sequenceM(k)=' ' i=i+1 goto 800 elseif(nresB(j).lt.nresA(i))then k=k+1 sequenceA(k)='-' sequenceB(k)=seq1B(j) sequenceM(k)=' ' j=j+1 goto 800 endif 802 continue write(*,600)d_output,n_cut,rmsd 600 format('Superposition in the TM-score: Length(d<',f3.1, $ ')=',i3,' RMSD=',f6.2) write(*,603)d_output 603 format('(":" denotes the residue pairs of distance < ',f3.1, & ' Angstrom)') write(*,601)(sequenceA(i),i=1,k) write(*,601)(sequenceM(i),i=1,k) write(*,601)(sequenceB(i),i=1,k) write(*,602)(mod(i,10),i=1,k) 601 format(2000A1) 602 format(2000I1) write(*,*) c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 9999 END ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c 1, collect those residues with dis