******************************************************************************* * SPICKER_3.0, released on April 21, 2016 * * This program, Structure-PICKER (SPICKER), aims at selecting the * best fold of lowest free-energy by clustering structural decoys * generated by I-TASSER or other protein structure assembly simulations. * Comments and bug reports should be addressed to zhng@umich.edu * * Reference: * Y Zhang, J Skolnick, Journal of Computational Chemistry, 2004 25: 865-871 * * Permission to use, copy, modify, and distribute this program for * any purpose, with or without fee, is hereby granted, provided that * the notices on the head, the reference information, and this * copyright notice appear in all copies or substantial portions of * the Software. It is provided "as is" without express or implied * warranty. ******************* Updating history ************************************ * 2009/01/24: A bug with regard to the input format is fixed * 2010/08/02: SPICKER V2.0 is released. A new option is added to * automatically tune RMSD cutoff based on variation of * decoy distributions. This option works best for decoys * generated by ab initio simulations, such as QUARK. * 2010/09/14: modified 'seq.dat' to a simplied format * 2010/12/29: modified 'rmsinp' to a simplied format * 2016/04/21: added an option to cluster decoys using 1/TM-score to replace * RMSD as the distance scale. This part is written by * Dr. Jens Thomas. ************************************************************************* * * Input files includes: * 'rmsinp'---Mandatory, length of protein & piece for RMSD calculation * 'seq.dat'--Mandatory, sequence file, for output of PDB models * 'tra.in'---Mandatory, list of trajectory names used for clustering * In the first line of 'tra.in', there are 3 parameters: * par1: number of decoy files * par2: 1, default cutoff, best for clustering decoys from * template-based modeling; * -1, cutoff based on variation, best for clustering * decoys from ab initio modeling. * -2, cluster based on 1/TM-scores * par3: 1, select closc from all decoys; * -1, closc from clustered decoys (slighly faster) * From second lines are file names which contain coordinates * of 3D structure decoys. All these files are mandatory. See * attached 'rep1.tra1' etc for the format of decoys. * 'rep1.tra1', 'rep2.tra1', ... ---decoy files which should have the * same name as those listed in 'tra.in'. In the first line, * the first number is the length of the decoy; the second * number is the energy of the decoy (if you donot know the * energy you can put any number there); the third and fourth * numbers are not necessary and useless. * Starting from the second line, the coordinates (x,y,z) of * C-alpha atoms are listed. * 'CA'-------Optional, native structure, for comparison to native. * * Output files includes: * 'str.txt'-----list of structure in cluster; * 'combo*.pdb'--PDB format of cluster centroids; * 'closc*.pdb'--PDB format of structures closest to centroids; * 'rst.dat'-----summary of clustering results; * * Important data and explanations in 'rst.dat': * ------------ summary of clusers ------------------- * i Size R_cut density R_cl_co E_model #str Trajectory * B-------------------------------------------------- * 1 109 5.90 18. 1.67 -2200.5 -2272.2 57 rep2.tra1 * 2 12 5.90 2. 0.70 505.6 580.4 5 rep1.tra1 * 3 11 5.90 2. 0.97 742.9 926.9 1 rep1.tra1 * 4 19 5.90 3. 0.66 -2062.5 -2057.1 26 rep1.tra1 * 5 6 5.90 1. 0.44 219.5 215.0 13 rep1.tra1 * --------------------- --------------------- * i N_in N_ex * C--------------------------------------------- * 1 109 3.31 2.75 109 3.31 2.75 * 2 12 1.54 0.92 12 1.54 0.92 * 3 11 1.72 1.19 11 1.72 1.19 * 4 19 1.27 1.15 9 1.10 1.03 * 5 6 0.96 0.69 6 0.96 0.69 * * Size------number of decoy structures in the cluster * R_cut-----RMSD cutoff for defining the clusters * density---size/R_cut * R_cl_co---RMSD distance between combo and closc * -------average energy of decoys in the cluster * #str & Trajectory----in this example, the cluster center of the first * cluster comes from 57th decoy in 'rep2.tra1' * N_in------number of structure decoys in the cluster (=size) * ----average RMSD of decoys to cluster center * ---average RMSD of decoys to cluster centroid * (in I-TASSER, cluster_density=N_in/) * N_ex------number of structure decoys in the cluster, after excluding * decoys from previous clustering * ----average RMSD of decoys to cluster center, after excluding * decoys from previous clustering * ---average RMSD of decoys to cluster centroid, after excluding * decoys from previous clustering * ******************************************************************************* c 1 2 3 4 5 6 7 ! c 345678901234567890123456789012345678901234567890123456789012345678901234567 program cluster parameter(ndim=2000) !Length parameter(nst=13000) !number of used structure, maximum allowed c parameter(nst=100) !number of used structure, maximum allowed parameter(ntr=20000) !number of trajectories parameter(ncl=100) !number of clusters character filen(ntr)*72,c2*6,seq(ndim)*3 character txt1*70,txt2*70 ccc RMSD: double precision r_1(3,ndim),r_2(3,ndim),w(ndim) double precision u(3,3),t(3),rms !armsd is real data w /ndim*1.0/ ccc dimension xt(ndim),yt(ndim),zt(ndim) !temporal coordinate dimension x_n(ndim),y_n(ndim),z_n(ndim) !native structure dimension x(ndim,nst),y(ndim,nst),z(ndim,nst) !used structures dimension amat(nst,nst) !RMSD matrics dimension mark(nst) !for removing used structures dimension n_str_near(nst) !numbef of neighboring structures dimension itra(nst),istr(nst),E(nst) dimension n_str_cl(ncl),n_str_cl_ex(ncl) dimension i_cl(ncl),rmsd_cl_cut(ncl) dimension E_combo(ncl) dimension i_str_cl(ncl,nst),i_str_cl_ex(ncl,nst) dimension rmsd_str(nst) dimension xs(ndim),ys(ndim),zs(ndim) dimension xc(ncl,ndim),yc(ncl,ndim),zc(ncl,ndim) dimension xcl(ncl,ndim),ycl(ncl,ndim),zcl(ncl,ndim) dimension rmsd_close_min(ncl) !RMSD between 'combo.pdb' and 'closm.pdb' dimension R_in(100),R_ex(100),Rc_in(100),Rc_ex(100) dimension order(nst) dimension ires(ndim) double precision rmsd_a dimension x1(ndim),y1(ndim),z1(ndim),nn1(ndim) dimension x2(ndim),y2(ndim),z2(ndim),nn2(ndim) ********************************************************************** ********************************************************************** **** input: open(1,file='rmsinp',status='old') !read Lch open(2,file='seq.dat',status='old') !read SEQ for model.pdb open(3,file='CA',status='unknown') !for comparison to native open(4,file='tra.in',status='old') !information for trajectories **** output: open(20,file='rst.dat',status='unknown') open(21,file='str.txt',status='unknown') !structures in cluster open(22,file='RMSD.list',status='unknown') !structures in cluster ********************************************************************** read(1,*)n1,n2 !calculate RMSD between [n1,n2] read(1,*)Lch close(1) ******** following parameter has been optimized ******************** nc_max=10 RMSD_cut_initial=8 RMSD_cut_min=3.5 RMSD_cut_max=12 ratio1=0.7 ratio2=0.15 *** n_closc>0: closc from all decoys; n_closc<0, closc from 13000 read(4,*)n_tra,n_para,n_closc if(n_tra.gt.20000)then write(*,*)'There are too many trajectory files (>20000)' write(*,*)'To fix the problem, you can merge multiple' write(*,*)'trajectory files into a single trajectory ' write(*,*)'file. See example of attached "rep1.tra1"' stop endif c if(n_para.lt.0)then c RMSD_cut_initial=5.5 c RMSD_cut_min=3.5 c RMSD_cut_max=8.0 c ratio1=0.5 c ratio2=0.15 c endif ******************************************************************** *** find out the total number of structures & structure closest to template: *** The total number of structure will be used for picking clustering struct. R_temp_min=10000 i_str_all=0 do i_tra=1,n_tra i_str_tra=0 read(4,'(A72)')filen(i_tra) open(10,file=filen(i_tra),status='old') 10 read(10,*,end=19,err=19)Lch1,energ do i=1,Lch1 read(10,*,end=19,err=19)xt(i),yt(i),zt(i) enddo i_str_tra=i_str_tra+1 i_str_all=i_str_all+1 goto 10 19 close(10) enddo n_str_all=i_str_all c write(*,*)'Total number structures=',n_str_all close(4) c^^^^^^^^^^^^^^^^^^^^^^^^ n_str_all done ^^^^^^^^^^^^^^^^^^^^^^^^ cccccccccccc read native structure cccccccccccccccccccc i=0 11 read(3,1239,end=5)tx,ii,tx,tx,ii,tx,a1,a2,a3 if(ii.ge.1.and.ii.le.Lch)then i=i+1 ires(i)=ii x_n(i)=a1 y_n(i)=a2 z_n(i)=a3 r_1(1,i)=a1 r_1(2,i)=a2 r_1(3,i)=a3 endif goto 11 5 continue Lch_n=i !length of native 1239 format(A4,I7,A4,A7,I4,A4,3F8.3) c^^^^^^^^^^^^^^^ read native done ^^^^^^^^^^^^^^^^^^^^^^ ccc read structures from trajectories and find best structure in 13000 cccccccc i_str_tra_a=0 ! rmsd_min_all=100 rmsd_min_use=100 n_max=nst !maximum structures to handle delta=float(n_str_all)/float(n_max) !take a structure in each delta if(delta.lt.1)delta=1 i_str=1 !number of structure used for clustering i_str_all=0 E_min=100000 E_min_all=100000 write(22,*)'RMSD Energy #str trajectory' do 100 i_tra=1,n_tra open(10,file=filen(i_tra),status='old') i_str_tra=0 !order number of the structure in this file 20 read(10,*,end=29,err=29)Lch1,energ do i=1,Lch1 read(10,*,end=29,err=29)xt(i),yt(i),zt(i) enddo i_str_tra=i_str_tra+1 i_str_all=i_str_all+1 *** calculate RMSD of structure to native-----------> if(Lch_n.gt.1)then do i=1,Lch_n r_2(1,i)=xt(ires(i)) r_2(2,i)=yt(ires(i)) r_2(3,i)=zt(ires(i)) enddo call u3b(w,r_1,r_2,Lch_n,0,rms,u,t,ier) armsd=dsqrt(rms/Lch_n) if(rmsd_min_all.gt.armsd)then rmsd_min_all=armsd E_rma=energ itra_rmsd_min_all=i_tra istr_rmsd_min_all=i_str_tra endif if(E_min_all.gt.energ)then rmsd_E_min_all=armsd E_min_all=energ itra_E_min_all=i_tra istr_E_min_all=i_str_tra endif endif *** select structure for clustering--------> if(i_str_all.ge.i_str*delta)then if(i_str.gt.n_max) goto 29 order(i_str_tra)=order(i_str_tra)+1 i_str_tra_a=i_str_tra_a+i_str_tra itra(i_str)=i_tra istr(i_str)=i_str_tra E(i_str)=energ n_str=i_str do i=1,Lch x(i,i_str)=xt(i) y(i,i_str)=yt(i) z(i,i_str)=zt(i) enddo if(Lch_n.gt.1)then if(rmsd_min_use.gt.armsd)then rmsd_min_use=armsd E_rmu=energ itra_rmsd_min_use=i_tra istr_rmsd_min_use=i_str_tra endif if(E_min.gt.energ)then E_min=energ rmsd_E_min=armsd itra_E_min=i_tra istr_E_min=i_str_tra endif rmsd_str(i_str)=armsd write(22,61)armsd,energ,i_str_tra,filen(i_tra) 61 format(f8.3,1x,f11.2,1x,i10,1x,a18) else rmsd_str(i_str)=-1 endif i_str=i_str+1 endif *** goto 20 29 close(10) 100 continue close(22) c^^^^^^^^^^^^^^^^^read structures finished ^^^^^^^^^^^^^^^^^^^^ c write(*,*)i_str,n_str,itra(n_str),istr(n_str),n_max ccc output distribution of i_str_tra -------> i_str_tra_a=float(i_str_tra_a)/float(n_str) write(21,*)'delta=',delta write(21,*)'=',i_str_tra_a write(21,*)'distribution------->' do i=1,nst if(order(i).gt.0.5)then write(21,*)i,order(i) endif enddo c Fill mark array do i=1,n_str mark(i)=1 enddo cjmht Calculate the amat with the all-by-all scores if (n_para.eq.-2) then call tm_score_all(ndim,nst,n_str,Lch,x,y,z,amat,rmsd_delta, & rmsd_a) elseif (n_para.eq.-3) then call read_score_matrix(nst,n_str,amat,rmsd_delta, rmsd_a) else call rmsd_score_all(ndim,nst,w,n_str,Lch,x,y,z,amat, & rmsd_delta,rmsd_a) endif cjmht write out the score matrix of TM scores if (n_para.eq.-2) then open(30,file='tm.matrix',status='unknown') !scores do i=1,n_str do j=i,n_str write(30,2000)i-1,j-1,amat(i,j) enddo enddo close(30) 2000 format(i4,2x,i4,2x,f9.4) endif ******** reset RMSD_cutoff parameters: if(n_para.le.-1)then !parameters for ab inito modeling RMSD_cut_initial=rmsd_a*0.5 if(RMSD_cut_initial.lt.2)RMSD_cut_initial=2 RMSD_cut_min=RMSD_cut_initial-0.8*rmsd_delta RMSD_cut_max=RMSD_cut_initial+0.8*rmsd_delta if(RMSD_cut_min.lt.1)RMSD_cut_min=1 endif write(*,*)RMSD_cut_initial,RMSD_cut_min,RMSD_cut_max ************************************************************** * find out the biggest cluster and decide the RMSD_cut_min ************************************************************** RMSD_cut=RMSD_cut_initial 140 do j=1,n_str n_str_near(j)=0 !number of structure in cluster do k=1,n_str if(amat(j,k).lt.RMSD_cut)then n_str_near(j)=n_str_near(j)+1 endif enddo enddo *** find out the biggest cluster -----------------> n_str_cl_max=0 !maximum number of structures in cluster do j=1,n_str if(n_str_near(j).gt.n_str_cl_max)then n_str_cl_max=n_str_near(j) endif enddo *** check the size of the cluster -------------> ratio=float(n_str_cl_max)/float(n_str) if(ratio.gt.ratio1.and.RMSD_cut.gt.RMSD_cut_min)then RMSD_cut=RMSD_cut-0.1 goto 140 endif if(ratio.lt.ratio2.and.RMSD_cut.lt.RMSD_cut_max)then RMSD_cut=RMSD_cut+0.2 goto 140 endif if(RMSD_cut.lt.RMSD_cut_min)RMSD_cut=RMSD_cut_min if(RMSD_cut.gt.RMSD_cut_max)RMSD_cut=RMSD_cut_max ***** RMSD_cut of first cluster, i.e. RMSD_cut_min, is decided *** cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc cccccccccccc find out nc_max clusters cccccccccccccccccccccccc cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc nc=0 !number of clusters do i=1,nc_max *** calculate nc(j), number of neightboring structures -----------------> do j=1,n_str n_str_near(j)=0 !number of structure in cluster do k=1,n_str if(mark(j).eq.1.and.mark(k).eq.1)then if(amat(j,k).lt.RMSD_cut)then n_str_near(j)=n_str_near(j)+1 endif endif enddo enddo *** find out the biggest cluster -----------------> n_str_cl_max=0 !maximum number of structures in cluster do j=1,n_str if(n_str_near(j).gt.n_str_cl_max)then n_str_cl_max=n_str_near(j) i_cl(i)=j !structure center of i'th cluster endif enddo *** check the size of the cluster -------------> if(n_str_cl_max.lt.1) goto 41 !no remaining clusters. *** remove the structures in the cluster--------> n_str_cl(i)=0 !# of structure including some used struct n_str_cl_ex(i)=0 !# of structure excluding used structure R_in(i)=0 !average pairwise RMSD including R_ex(i)=0 !average pairwise RMSD excluding do j=1,n_str c if(mark(j).eq.1)then if(amat(j,i_cl(i)).lt.RMSD_cut)then if(mark(j).ne.0)then n_str_cl_ex(i)=n_str_cl_ex(i)+1 R_ex(i)=R_ex(i)+amat(j,i_cl(i)) i_str_cl_ex(i,n_str_cl_ex(i))=j endif mark(j)=0 n_str_cl(i)=n_str_cl(i)+1 i_str_cl(i,n_str_cl(i))=j R_in(i)=R_in(i)+amat(j,i_cl(i)) endif c endif enddo R_in(i)=R_in(i)/n_str_cl(i) R_ex(i)=R_ex(i)/n_str_cl_ex(i) rmsd_cl_cut(i)=rmsd_cut nc=i c write(*,*)i,n_str_cl_ex(i),n_str_cl_max enddo c^^^^^^^^^^^^^^^^5 cluster find out ^^^^^^^^^^^^^^^^^^^^^^ cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc 41 continue ***************************************************************** c calculate 'combo.pdb' and E_combo COMBO************************************************************ do i=1,nc ns=0 E_combo(i)=0 rmsd_nat_a=0 rmsd_cen_a=0 do ii=1,Lch xs(ii)=0 ys(ii)=0 zs(ii)=0 enddo *** k=i_cl(i) !center structure of the cluster do ii=1,Lch r_2(1,ii)=x(ii,k) !center structure r_2(2,ii)=y(ii,k) r_2(3,ii)=z(ii,k) enddo nn=n_str_cl(i) !number of structures in cluster (including) *** write(21,1200)i,rmsd_str(k),istr(k),filen(itra(k)) 1200 format('#Cluster',i3,f8.3,i8,2x,a15) write(21,*)'i_cl i_str R_nat R_cen E #str traj' write(21,*)'-----------------------------------------------' write(21,*)'Nstr=',nn do l=1,nn m=i_str_cl(i,l) !l'th structure in i'th cluster write(21,1201)i,l,rmsd_str(m),amat(k,m),E(m), $ istr(m),filen(itra(m)) 1201 format(i3,i8,2f7.3,f9.1,i8,2x,a15) *** E_combo: E_combo(i)=E_combo(i)+E(m) ns=ns+1 rmsd_nat_a=rmsd_nat_a+rmsd_str(m) rmsd_cen_a=rmsd_cen_a+amat(k,m) *** rotate nearby structure into the center structure----> do n=1,Lch r_1(1,n)=x(n,m) r_1(2,n)=y(n,m) r_1(3,n)=z(n,m) enddo call u3b(w,r_1,r_2,Lch,1,rms,u,t,ier) !u rotate r_1 to r_2 do j=1,Lch xx=t(1)+u(1,1)*r_1(1,j)+u(1,2)*r_1(2,j)+u(1,3)*r_1(3,j) yy=t(2)+u(2,1)*r_1(1,j)+u(2,2)*r_1(2,j)+u(2,3)*r_1(3,j) zz=t(3)+u(3,1)*r_1(1,j)+u(3,2)*r_1(2,j)+u(3,3)*r_1(3,j) xs(j)=xs(j)+xx ys(j)=ys(j)+yy zs(j)=zs(j)+zz enddo enddo *** averaging: do j=1,Lch xc(i,j)=xs(j)/float(ns) yc(i,j)=ys(j)/float(ns) zc(i,j)=zs(j)/float(ns) enddo E_combo(i)=E_combo(i)/float(ns) rmsd_nat_a=rmsd_nat_a/ns rmsd_cen_a=rmsd_cen_a/ns write(21,*)'-----------------------------------------' write(21,1222)rmsd_nat_a,rmsd_cen_a,E_combo(i) 1222 format(' average=',2f7.3,f9.1) enddo c^^^^^^^^^^^^^^^^^^^^ combo finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ***************************************************************** c calculate & find closc ***************************************************************** do i=1,nc do k=1,Lch r_1(1,k)=xc(i,k) r_1(2,k)=yc(i,k) r_1(3,k)=zc(i,k) enddo nn=0 Rc_in(i)=0 !RMSD to centroid including rmsd_close_min(i)=100. !minimum RMSD between closc and combo do j=1,n_str_cl(i) m=i_str_cl(i,j) do k=1,Lch r_2(1,k)=x(k,m) r_2(2,k)=y(k,m) r_2(3,k)=z(k,m) enddo call u3b(w,r_1,r_2,Lch,0,rms,u,t,ier) armsd=dsqrt(rms/Lch) !RMSD12 Rc_in(i)=Rc_in(i)+armsd nn=nn+1 ccc if(armsd.lt.rmsd_close_min(i))then rmsd_close_min(i)=armsd do k=1,Lch xcl(i,k)=r_2(1,k) ycl(i,k)=r_2(2,k) zcl(i,k)=r_2(3,k) enddo endif ccc enddo Rc_in(i)=Rc_in(i)/nn enddo do i=1,nc do k=1,Lch r_1(1,k)=xc(i,k) r_1(2,k)=yc(i,k) r_1(3,k)=zc(i,k) enddo nn=0 Rc_ex(i)=0 !RMSD to centroid including do j=1,n_str_cl_ex(i) m=i_str_cl_ex(i,j) do k=1,Lch r_2(1,k)=x(k,m) r_2(2,k)=y(k,m) r_2(3,k)=z(k,m) enddo call u3b(w,r_1,r_2,Lch,0,rms,u,t,ier) armsd=dsqrt(rms/Lch) !RMSD12 Rc_ex(i)=Rc_ex(i)+armsd nn=nn+1 enddo Rc_ex(i)=Rc_ex(i)/nn enddo ***************************************************************** c calculate 'closc.pdb', closest structure to combo CLOSC************************************************************ if(n_closc.lt.0)goto 130 !use decoys in the same cluster do i=1,nc rmsd_close_min(i)=100. enddo do i_tra=1,n_tra open(10,file=filen(i_tra),status='old') 110 read(10,*,end=119,err=119)Lch1,energ do i=1,Lch1 read(10,*,end=119,err=119)xt(i),yt(i),zt(i) enddo do i=1,Lch r_1(1,i)=xt(i) r_1(2,i)=yt(i) r_1(3,i)=zt(i) enddo *** check whether this structure close to 'combo.pdb': do j=1,nc do i=1,Lch r_2(1,i)=xc(j,i) r_2(2,i)=yc(j,i) r_2(3,i)=zc(j,i) enddo call u3b(w,r_1,r_2,Lch,0,rms,u,t,ier) armsd=dsqrt(rms/Lch) if(armsd.lt.rmsd_close_min(j))then rmsd_close_min(j)=armsd do i=1,Lch xcl(j,i)=r_1(1,i) ycl(j,i)=r_1(2,i) zcl(j,i)=r_1(3,i) enddo endif enddo *** goto 110 119 close(10) enddo 130 continue c^^^^^^^^^^^^^^^^^^^^ closc finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ccccccccccccccoutput the 5 clusters in PDB format ccccccccccccccc do i=1,Lch read(2,*)ii,seq(i) enddo do i=1,nc if(i.lt.10)then c2=char(48+i)//'.pdb' else c2=char(48+i/10)//char(48+i-10*(i/10))//'.pdb' endif *** 'combo.pdb'->average of structures of cluster--------> open(10,file='combo'//c2,status='unknown') do j=1,Lch xx=xc(i,j) yy=yc(i,j) zz=zc(i,j) write(10,1237)'ATOM',j,'CA',seq(j),j,'',xx,yy,zz enddo write(10,322) 322 format('TER') do j=2,Lch write(10,1238)'CONECT',j-1,j enddo close(10) *** 'closc.pdb'->structure closest to combo.pdb--------> open(10,file='closc'//c2,status='unknown') do j=1,Lch xx=xcl(i,j) yy=ycl(i,j) zz=zcl(i,j) write(10,1237)'ATOM',j,'CA',seq(j),j,'',xx,yy,zz enddo write(10,322) do j=2,Lch write(10,1238)'CONECT',j-1,j enddo close(10) enddo 1237 format(A4,I7,A4,A5,I6,A4,3F8.3) 1238 format(A6,i5,i5) c^^^^^^^^^^^^^^output PDB structures finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ write(20,*)'Summary of SPICKER clustering:' write(20,*)'Modeling Length: ',Lch write(20,*)'Native Length: ',Lch_n write(20,*)'Number of clusters: ',nc write(20,*)'Total number of structures=',n_str_all write(20,*)'Number of structure in use=',n_str write(20,*) cccccccRMSD and TM-score of cluster to native cccccccccccccccccccccccccc if(Lch_n.gt.1)then write(20,*)'--- comparison to native structure-------' write(20,*)' i R_combo TM_combo R_closc TM_closc' write(20,*)'A----------------------------------------' do i=1,nc do j=1,Lch_n r_1(1,j)=x_n(j) !CA r_1(2,j)=y_n(j) r_1(3,j)=z_n(j) nn2(j)=j x2(j)=x_n(j) y2(j)=y_n(j) z2(j)=z_n(j) enddo *** combo-> do j=1,Lch_n r_2(1,j)=xc(i,ires(j)) r_2(2,j)=yc(i,ires(j)) r_2(3,j)=zc(i,ires(j)) nn1(j)=j x1(j)=xc(i,ires(j)) y1(j)=yc(i,ires(j)) z1(j)=zc(i,ires(j)) enddo call u3b(w,r_1,r_2,Lch_n,0,rms,u,t,ier) armsd=dsqrt(rms/Lch_n) !RMSD12 rmsd_combo=armsd call TMscore(Lch_n,x1,y1,z1,nn1,Lch_n,x2,y2,z2,nn2, & TM1,Rcomm,Lcomm) *** closc-> do j=1,Lch_n r_2(1,j)=xcl(i,ires(j)) r_2(2,j)=ycl(i,ires(j)) r_2(3,j)=zcl(i,ires(j)) nn1(j)=j x1(j)=xcl(i,ires(j)) y1(j)=ycl(i,ires(j)) z1(j)=zcl(i,ires(j)) enddo call u3b(w,r_1,r_2,Lch_n,0,rms,u,t,ier) armsd=dsqrt(rms/Lch_n) !RMSD12 rmsd_closc=armsd call TMscore(Lch_n,x1,y1,z1,nn1,Lch_n,x2,y2,z2,nn2, & TM2,Rcomm,Lcomm) *** write(20,50)i,rmsd_combo,TM1,rmsd_closc,TM2 50 format(i5,f9.3,f9.4,f9.3,f9.4) enddo write(20,*) txt1=' ' txt2='RMSD Energy #str Trajectory' write(20,*)txt1(1:23)//txt2(1:35) write(20,*)'-------------------------------------------' i=itra_rmsd_min_all j=itra_rmsd_min_use k=itra_E_min l=itra_E_min_all write(20,124)rmsd_min_all,E_rma,istr_rmsd_min_all,filen(i) write(20,125)rmsd_min_use,E_rmu,istr_rmsd_min_use,filen(j) write(20,126)rmsd_E_min,E_min,istr_E_min,filen(k) write(20,127)rmsd_E_min_all,E_min_all,istr_E_min_all,filen(l) 124 format('Minimum RMSD in all=',f8.3,f9.1,i8,2x,a20) 125 format('Minimum RMSD in use=',f8.3,f9.1,i8,2x,a20) 126 format('Minimum E in all=',f8.3,f9.1,i8,2x,a20) 127 format('Minimum E in use=',f8.3,f9.1,i8,2x,a20) endif write(20,*) ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc ccccccccccccccccccoutput cluster analysis cccccccccccccccccccc write(20,*)'------------ summary of clusers -------------------' txt1='i Size R_cut density R_cl_co ' txt2=' E_model #str Trajectory' write(20,*)txt1(1:31)//txt2(1:35) write(20,*)'B--------------------------------------------------' do i=1,nc k=i_cl(i) density=n_str_cl(i)/rmsd_cl_cut(i) write(20,123)i,n_str_cl(i),rmsd_cl_cut(i),density, $ rmsd_close_min(i),E_combo(i),E(k),istr(k),filen(itra(k)) enddo 123 format(i2,i6,f6.2,f7.0,f6.2,2f9.1,i6,1x,a13) ccccccccccccccccccoutput cluster analysis cccccccccccccccccccc write(20,*) write(20,*)' include used structure exclude used structre' write(20,*)' --------------------- ---------------------' write(20,*)'i N_in N_ex ' write(20,*)'C---------------------------------------------' do i=1,nc write(20,133)i,n_str_cl(i),R_in(i),Rc_in(i), $ n_str_cl_ex(i),R_ex(i),Rc_ex(i) enddo 133 format(i2,i6,f7.2,f7.2,i8,f7.2,f7.2) write(20,*) write(20,*)'RMSD_cut_initial=',RMSD_cut_initial write(20,*)'RMSD_cut_min=',RMSD_cut_min write(20,*)'RMSD_cut_max=',RMSD_cut_max write(20,*)'ratio_min=',ratio1 write(20,*)'ratio_max=',ratio2 write(20,*) write(20,*)'trajectories used:',n_tra do i=1,n_tra write(20,*)filen(i) enddo c^^^^^^^^^^^^^^^^^^^^output done ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ stop end cccccccccccccccc Calculate sum of (r_d-r_m)^2 cccccccccccccccccccccccccc c w - w(m) is weight for atom pair c m (given) c x - x(i,m) are coordinates of atom c m in set x (given) c y - y(i,m) are coordinates of atom c m in set y (given) c n - n is number of atom pairs (given) c mode - 0:calculate rms only (given) c 1:calculate rms,u,t (takes longer) c rms - sum of w*(ux+t-y)**2 over all atom pairs (result) c u - u(i,j) is rotation matrix for best superposition (result) c t - t(i) is translation vector for best superposition (result) c ier - 0: a unique optimal superposition has been determined(result) c -1: superposition is not unique but optimal c -2: no result obtained because of negative weights w c or all weights equal to zero. cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc subroutine rmsd_score_all(ndim,nst,w,n_str,Lch,x,y,z,amat, & rmsd_delta,rmsd_a) implicit none c Intent In integer ndim,nst ! dimenson of arrays integer n_str,Lch double precision w(ndim) real x(ndim,nst),y(ndim,nst),z(ndim,nst) c Intent Out real amat(nst,nst) real rmsd_delta double precision rmsd_a c Local variables real rmsd2_a,armsd integer i,j,k,n_rmsd,ier double precision u(3,3),t(3),rms,r_1(3,ndim),r_2(3,ndim) real mina,maxa mina=1000.0 maxa=0.0 cccccccccccccc calculate RMSD matrics ccccccccccccccccccccccccccc c write(*,*)'number of used structures=',n_str write(*,*)"Calculating RMSD scores" rmsd_a=0 rmsd2_a=0 n_rmsd=0 do i=1,n_str do j=i,n_str do k=1,Lch r_1(1,k)=x(k,i) r_1(2,k)=y(k,i) r_1(3,k)=z(k,i) r_2(1,k)=x(k,j) r_2(2,k)=y(k,j) r_2(3,k)=z(k,j) enddo call u3b(w,r_1,r_2,Lch,0,rms,u,t,ier) armsd=dsqrt(rms/Lch) !RMSD12 mina=min(mina,armsd) maxa=max(maxa,armsd) amat(i,j)=armsd amat(j,i)=armsd n_rmsd=n_rmsd+1 rmsd_a=rmsd_a+armsd rmsd2_a=rmsd2_a+armsd*armsd enddo enddo rmsd_a=rmsd_a/n_rmsd rmsd2_a=rmsd2_a/n_rmsd rmsd_delta=sqrt(rmsd2_a-rmsd_a**2) ! 68.2% is in [-d,+d] c^^^^^^^^^^^^RMSD matrics finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ c write(*,*)"RETURNING rmsd_a ",rmsd_a c write(*,*)"RETURNING rmsd_delta ",rmsd_delta c write(*,*)"MINA MAXA ",mina,maxa return end subroutine tm_score_all(ndim,nst,n_str,Lch,x,y,z,amat,rmsd_delta, & rmsd_a) implicit none c Intent In integer ndim integer n_str ! Number of structures in use integer nst ! Max number of structures integer Lch ! Chain length of the structures (all same) real x(ndim,nst),y(ndim,nst),z(ndim,nst) c Intent Out real amat(nst,nst) real rmsd_delta double precision rmsd_a c Local variables real RMSD_min, RMSD_max parameter(RMSD_min=0) parameter(RMSD_max=50) real rmsd,rmsd2_a,TM,Rcomm integer i,j,k,n_rmsd,Lcomm c Arrays to pass data into TMscore routine integer nn1, nn2 ! resseq of structures - assume all same real x1,y1,z1,x2,y2,z2 ! coordinates dimension x1(ndim),y1(ndim),z1(ndim),nn1(ndim) dimension x2(ndim),y2(ndim),z2(ndim),nn2(ndim) WRITE(*,*)'* SPICKER CALCULATING TM SCORE MATRIX *' cccccccccccccc calculate TM matrices ccccccccccccccccccccccccccc c write(*,*)'number of used structures=',n_str rmsd_a=0 rmsd2_a=0 n_rmsd=0 !$OMP PARALLEL DEFAULT(NONE) !$OMP& SHARED(n_str,Lch,x,y,z,amat,n_rmsd,rmsd_a,rmsd2_a) !$OMP& PRIVATE(i,j,k,x1,y1,z1,x2,y2,z2,nn1,nn2,TM,Rcomm,Lcomm,rmsd) !$OMP DO c which is the correct place? c!$OMP& PRIVATE(i,j,k,x1,y1,z1,x2,y2,z2,nn1,nn2,TM,Rcomm,Lcomm,rmsd) !$OMP& REDUCTION(+:n_rmsd,rmsd_a,rmsd2_a) !$OMP& COLLAPSE(2) do i=1,n_str do j=1,n_str c OMP the COLLAPSE(2) directive unrolls both loops so we need to loop c over all indices. continue doesn't seem to work so we just protect the c j lt i with the if statement if (j .ge. i) then if (j .eq. i) then rmsd = 0.0 else do k=1,Lch x1(k)=x(k,i) y1(k)=y(k,i) z1(k)=z(k,i) nn1(k)=k x2(k)=x(k,j) y2(k)=y(k,j) z2(k)=z(k,j) nn2(k)=k enddo call TMscore(Lch,x1,y1,z1,nn1,Lch,x2,y2,z2,nn2,TM, & Rcomm,Lcomm) ! armsd=dsqrt(rms/Lch) !RMSD12 if (TM .eq. 0.0) then rmsd = RMSD_max else rmsd = 1.0/TM endif ! Make sure rmsd fits in bounds rmsd=min(rmsd,RMSD_max) rmsd=max(rmsd,RMSD_min) endif amat(i,j)=rmsd amat(j,i)=rmsd n_rmsd=n_rmsd+1 rmsd_a=rmsd_a+rmsd rmsd2_a=rmsd2_a+rmsd*rmsd endif enddo enddo !$OMP END DO !$OMP END PARALLEL rmsd_a=rmsd_a/n_rmsd rmsd2_a=rmsd2_a/n_rmsd rmsd_delta=sqrt(rmsd2_a-rmsd_a**2) ! 68.2% is in [-d,+d] c^^^^^^^^^^^^RMSD matrics finished ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ c write(*,*)"RETURNING rmsd_a ",rmsd_a c write(*,*)"RETURNING rmsd_delta ",rmsd_delta c write(*,*)"MINA MAXA ",mina,maxa return end subroutine read_score_matrix(nst,n_str,amat,rmsd_delta, & rmsd_a) * This routine reads in a matrix of scores in a scale 0-1 and * uses the reciprocal to convert them into an rmsd-like score implicit none ! Intent In integer nst,n_str ! Intent Out real amat dimension amat(nst,nst) real rmsd_delta double precision rmsd_a ! local variables real scored,RMSD_min,RMSD_max parameter(RMSD_min=0) parameter(RMSD_max=50.0) real armsd,rmsd2_a integer i,j,k,n_rmsd rmsd_a=0 rmsd2_a=0 n_rmsd=0 WRITE(*,*)'* SPICKER READING SCORE MATRIX *' open(1,file='score.matrix',status='old') do i=1,n_str*n_str c read(1,'(i4,1x,i4,1x,1x,f8.3)',end=3,err=2) j, k, scored read(1,*,end=3,err=2) j, k, scored if (scored .gt. 1.0 .or. scored .lt. 0.0 .or. & j .gt. n_str .or. k .gt. n_str) then write(*,*)"Invalid score line: ", j ,k ,scored stop endif c matrix indexing starts from 1 j=j+1 k=k+1 if (scored .eq. 0.0) then scored = RMSD_max else scored = 1.0/scored endif scored=min(scored,RMSD_max) scored=max(scored,RMSD_min) armsd = scored amat(j,k) = armsd amat(k,j) = armsd n_rmsd = n_rmsd+1 rmsd_a = rmsd_a+armsd rmsd2_a = rmsd2_a+armsd*armsd enddo 3 continue close(1) if (i .le. n_str*n_str/2 - n_str/2) then write(*,*),"NOT ENOUGH SCORE MATRIX VALUES!" stop 1 endif rmsd_a=rmsd_a/n_rmsd rmsd2_a=rmsd2_a/n_rmsd rmsd_delta=sqrt(rmsd2_a-rmsd_a**2) ! 68.2% is in [-d,+d] c write(*,*)"RETURNING rmsd_a ",rmsd_a c write(*,*)"RETURNING rmsd_delta ",rmsd_delta return 2 write(*,*),"SPICKER ERROR READING SCORE MATRIX LINE ",i stop 1 end subroutine u3b(w, x, y, n, mode, rms, u, t, ier) ! subroutine parameters - in double precision w(*), x(3,*), y(3,*) integer n, mode ! subroutine parameters - out double precision rms, u(3,3), t(3) integer ier ! subroutine variables integer i, j, k, l, m1, m integer ip(9), ip2312(4) double precision r(3,3), xc(3), yc(3), wc double precision a(3,3), b(3,3), e(3), rr(6), ss(6) double precision e0, d, spur, det, cof, h, g double precision cth, sth, sqrth, p, sigma ! these three variables are constant double precision sqrt3, tol, zero data sqrt3 / 1.73205080756888d+00 / data tol / 1.0d-2 / data zero / 0.0d+00 / data ip / 1, 2, 4, 2, 3, 5, 4, 5, 6 / data ip2312 / 2, 3, 1, 2 / ! zero all arrays and matrices, set u and a = identity matrix wc = zero rms = zero e0 = zero do i=1, 3 xc(i) = zero yc(i) = zero t(i) = zero do j=1, 3 r(i,j) = zero u(i,j) = zero a(i,j) = zero if( i .eq. j ) then u(i,j) = 1.0 a(i,j) = 1.0 end if end do end do ! ! Get centroids of the two point lists ! ier = -1 if( n .lt. 1 ) return ier = -2 do m=1, n if( w(m) .lt. 0.0 ) return wc = wc + w(m) do i=1, 3 xc(i) = xc(i) + w(m)*x(i,m) yc(i) = yc(i) + w(m)*y(i,m) end do end do if( wc .le. zero ) return do i=1, 3 xc(i) = xc(i) / wc yc(i) = yc(i) / wc end do ! ! Correlation matrix of 2 point sets ! do m=1, n do i=1, 3 e0=e0+w(m)*((x(i,m)-xc(i))**2+(y(i,m)-yc(i))**2) d = w(m) * ( y(i,m) - yc(i) ) do j=1, 3 r(i,j) = r(i,j) + d*( x(j,m) - xc(j) ) end do end do end do ! ! Determinant of correlation matrix ! det = r(1,1) * ( (r(2,2)*r(3,3)) - (r(2,3)*r(3,2)) ) & - r(1,2) * ( (r(2,1)*r(3,3)) - (r(2,3)*r(3,1)) ) & + r(1,3) * ( (r(2,1)*r(3,2)) - (r(2,2)*r(3,1)) ) sigma = det ! ! rr is the correlation matrix multiplied by itself; as any matrix multiplied ! by itself is symmetrical, we only store the upper triangle. ! m = 0; do j=1, 3 do i=1, j m = m+1 rr(m) = r(1,i)*r(1,j) + r(2,i)*r(2,j) + r(3,i)*r(3,j) end do end do ! ! Calculate some useful info, and get the eigenvalues by solving the cubic equation: ! Y**3 - 3HY + 2G = 0 ! via setting X=Y+SPUR spur = (rr(1)+rr(3)+rr(6)) / 3.0 cof = (((((rr(3)*rr(6) - rr(5)*rr(5)) + rr(1)*rr(6)) & - rr(4)*rr(4)) + rr(1)*rr(3)) - rr(2)*rr(2)) / 3.0 det = det*det do i=1, 3 e(i) = spur end do ! JUMP POINT HERE => build translation vector and get rms if( spur .le. zero ) goto 40 d = spur*spur h = d - cof g = (spur*cof - det)/2.0 - spur*h if( h .le. zero ) then if( mode .eq. 0 ) then ! JUMP POINT HERE => get rms goto 50 else ! JUMP POINT HERE => skip part of the generation of matrix a goto 30 end if end if sqrth = dsqrt(h) d = h*h*h - g*g if( d .lt. zero ) d = zero d = datan2( dsqrt(d), -g ) / 3.0 cth = sqrth * dcos(d) sth = sqrth*sqrt3*dsin(d) e(1) = (spur + cth) + cth e(2) = (spur - cth) + sth e(3) = (spur - cth) - sth ! ! Do we need to calculate the rotation and translation vector etc, or can we ! make do with only the eigenvalues? If so, skip the time consuming eigenvector ! generations, and jump right to the final rms code. ! if( mode .eq. 0 ) then ! JUMP POINT HERE => get rms goto 50 end if ! ! Eigenvector time! ! do l=1, 3, 2 d = e(l) ss(1) = (d-rr(3)) * (d-rr(6)) - rr(5)*rr(5) ss(2) = (d-rr(6)) * rr(2) + rr(4)*rr(5) ss(3) = (d-rr(1)) * (d-rr(6)) - rr(4)*rr(4) ss(4) = (d-rr(3)) * rr(4) + rr(2)*rr(5) ss(5) = (d-rr(1)) * rr(5) + rr(2)*rr(4) ss(6) = (d-rr(1)) * (d-rr(3)) - rr(2)*rr(2) if( dabs(ss(1)) .ge. dabs(ss(3)) ) then j=1 if( dabs(ss(1)) .lt. dabs(ss(6)) ) j = 3 else if( dabs(ss(3)) .ge. dabs(ss(6)) ) then j = 2 else j = 3 end if d = zero j = 3 * (j - 1) do i=1, 3 k = ip(i+j) a(i,l) = ss(k) d = d + ss(k)*ss(k) end do if( d .gt. zero ) d = 1.0 / dsqrt(d) do i=1, 3 a(i,l) = a(i,l) * d end do end do ! ! Construct matrix a ! d = a(1,1)*a(1,3) + a(2,1)*a(2,3) + a(3,1)*a(3,3) if ((e(1) - e(2)) .gt. (e(2) - e(3))) then m1 = 3 m = 1 else m1 = 1 m = 3 endif p = zero do i=1, 3 a(i,m1) = a(i,m1) - d*a(i,m) p = p + a(i,m1)**2 end do if( p .le. tol ) then p = 1.0 do i=1, 3 ! was continue, but continue would be wrong in the new code; we want CYCLE! continue does not mean what it does in c-style languages, it's a form of NOP if (p .lt. dabs(a(i,m))) cycle p = dabs( a(i,m) ) j = i end do k = ip2312(j) l = ip2312(j+1) p = dsqrt( a(k,m)**2 + a(l,m)**2 ) ! JUMP POINT HERE => build translation vector and get rms. if( p .le. tol ) goto 40 a(j,m1) = zero a(k,m1) = -a(l,m)/p a(l,m1) = a(k,m)/p else p = 1.0 / dsqrt(p) do i=1, 3 a(i,m1) = a(i,m1)*p end do end if a(1,2) = a(2,3)*a(3,1) - a(2,1)*a(3,3) a(2,2) = a(3,3)*a(1,1) - a(3,1)*a(1,3) a(3,2) = a(1,3)*a(2,1) - a(1,1)*a(2,3) ! JUMP LABEL 30 IS HERE! 30 do l=1, 2 d = zero do i=1, 3 b(i,l) = r(i,1)*a(1,l) + r(i,2)*a(2,l) + r(i,3)*a(3,l) d = d + b(i,l)**2 end do if( d .gt. zero ) d = 1.0 / dsqrt(d) do i=1, 3 b(i,l) = b(i,l)*d end do end do ! ! Construct matrix b. ! Note that although this appears similar to the construction of a, ! it is not the same - different indices etc are used! ! d = b(1,1)*b(1,2) + b(2,1)*b(2,2) + b(3,1)*b(3,2) p = zero do i=1, 3 b(i,2) = b(i,2) - d*b(i,1) p = p + b(i,2)**2 end do if( p .le. tol ) then p = 1.0 do i=1, 3 ! was continue, but continue would be wrong in the new code; we want CYCLE! continue does not mean what it does in c-style languages, it's a form of NOP if( p .lt. dabs(b(i,1)) ) cycle p = dabs( b(i,1) ) j = i end do k = ip2312(j) l = ip2312(j+1) p = dsqrt( b(k,1)**2 + b(l,1)**2 ) ! JUMP POINT HERE => build translation vector and get rms. if( p .le. tol ) goto 40 b(j,2) = zero b(k,2) = -b(l,1)/p b(l,2) = b(k,1)/p else p = 1.0 / dsqrt(p) do i=1, 3 b(i,2) = b(i,2)*p end do end if b(1,3) = b(2,1)*b(3,2) - b(2,2)*b(3,1) b(2,3) = b(3,1)*b(1,2) - b(3,2)*b(1,1) b(3,3) = b(1,1)*b(2,2) - b(1,2)*b(2,1) ! ! Construct final rotation matrix from a and b ! do i=1, 3 do j=1, 3 u(i,j) = b(i,1)*a(j,1) + b(i,2)*a(j,2) + b(i,3)*a(j,3) end do end do ! ! Build translation vector - JUMP LABEL 40 IS HERE! ! 40 do i=1, 3 t(i) = ((yc(i) - u(i,1)*xc(1)) - u(i,2)*xc(2)) - u(i,3)*xc(3) end do ! ! Get RMS error, having checked eigenvalues (we may have jumped here from the start) ! JUMP LABEL 50 IS HERE! ! 50 do i=1, 3 if( e(i) .lt. zero ) e(i) = zero e(i) = dsqrt( e(i) ) end do ier = 0 ! FIX THIS! LITERAL TOLERANCE VALUE if( e(2) .le. (e(1) * 1.0d-05) ) ier = -1 d = e(3) if( sigma .lt. 0.0 ) then d = - d ! FIX THIS! LITERAL TOLERANCE VALUE if( (e(2) - e(3)) .le. (e(1) * 1.0d-05) ) ier = -1 end if d = (d + e(2)) + e(1) rms = (e0 - d) - d if( rms .lt. 0.0 ) rms = 0.0 return end ************************************************************************* ************************************************************************* * This is a subroutine to compare two structures and find the * superposition that has the maximum TM-score. * Reference: Yang Zhang, Jeffrey Skolnick, Proteins 2004 57:702-10. * * Explanations: * L1--Length of the first structure * (x1(i),y1(i),z1(i))--coordinates of i'th residue at the first structure * n1(i)--Residue sequence number of i'th residue at the first structure * L2--Length of the second structure * (x2(i),y2(i),z2(i))--coordinates of i'th residue at the second structure * n2(i)--Residue sequence number of i'th residue at the second structure * TM--TM-score of the comparison * Rcomm--RMSD of two structures in the common aligned residues * Lcomm--Length of the common aligned regions * * Note: * 1, Always put native as the second structure, by which TM-score * is normalized. * 2, The returned (x1(i),y1(i),z1(i)) are the rotated structure after * TM-score superposition. ************************************************************************* ************************************************************************* subroutine TMscore(L1,x1,y1,z1,n1,L2,x2,y2,z2,n2,TM,Rcomm,Lcomm) PARAMETER(nmax=2000) common/stru/xt(nmax),yt(nmax),zt(nmax),xb(nmax),yb(nmax),zb(nmax) common/nres/nresA(nmax),nresB(nmax),nseqA,nseqB common/para/d,d0 common/align/n_ali,iA(nmax),iB(nmax) common/nscore/i_ali(nmax),n_cut ![1,n_ali],align residues for the score dimension k_ali(nmax),k_ali0(nmax) dimension L_ini(100) common/scores/score double precision score,score_max dimension xa(nmax),ya(nmax),za(nmax) !$OMP THREADPRIVATE(/stru/,/nres/,/para/,/align/,/nscore/,/scores/) dimension x1(nmax),y1(nmax),z1(nmax),n1(nmax) dimension x2(nmax),y2(nmax),z2(nmax),n2(nmax) ccc RMSD: double precision r_1(3,nmax),r_2(3,nmax),w(nmax) double precision u(3,3),t(3),rms !armsd is real data w /nmax*1.0/ ccc ********* convert input data **************** nseqA=L1 do i=1,nseqA xa(i)=x1(i) ya(i)=y1(i) za(i)=z1(i) nresA(i)=n1(i) enddo nseqB=L2 do i=1,L2 xb(i)=x2(i) yb(i)=y2(i) zb(i)=z2(i) nresB(i)=n2(i) enddo ****************************************************************** * pickup the aligned residues: ****************************************************************** k=0 do i=1,nseqA do j=1,nseqB if(nresA(i).eq.nresB(j))then k=k+1 iA(k)=i iB(k)=j goto 205 endif enddo 205 continue enddo n_ali=k !number of aligned residues Lcomm=n_ali if(n_ali.lt.1)then c write(*,*)'There is no common residues in the input structures' TM=0 Rcomm=0 return endif ************///// * parameters: ***************** *** d0-------------> d0=1.24*(nseqB-15)**(1.0/3.0)-1.8 if(d0.lt.0.5)d0=0.5 *** d0_search -----> d0_search=d0 if(d0_search.gt.8)d0_search=8 if(d0_search.lt.4.5)d0_search=4.5 *** iterative parameters -----> n_it=20 !maximum number of iterations d_output=5 !for output alignment n_init_max=6 !maximum number of L_init n_init=0 L_ini_min=4 if(n_ali.lt.4)L_ini_min=n_ali do i=1,n_init_max-1 n_init=n_init+1 L_ini(n_init)=n_ali/2**(n_init-1) if(L_ini(n_init).le.L_ini_min)then L_ini(n_init)=L_ini_min goto 402 endif enddo n_init=n_init+1 L_ini(n_init)=L_ini_min 402 continue ****************************************************************** * find the maximum score starting from local structures superposition ****************************************************************** score_max=-1 !TM-score do 333 i_init=1,n_init L_init=L_ini(i_init) iL_max=n_ali-L_init+1 do 300 iL=1,iL_max !on aligned residues, [1,nseqA] LL=0 ka=0 do i=1,L_init k=iL+i-1 ![1,n_ali] common aligned r_1(1,i)=xa(iA(k)) r_1(2,i)=ya(iA(k)) r_1(3,i)=za(iA(k)) r_2(1,i)=xb(iB(k)) r_2(2,i)=yb(iB(k)) r_2(3,i)=zb(iB(k)) LL=LL+1 ka=ka+1 k_ali(ka)=k enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 if(i_init.eq.1)then !global superposition armsd=dsqrt(rms/LL) Rcomm=armsd endif do j=1,nseqA xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo d=d0_search-1 call score_fun !init, get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka0 k_ali0(i)=k_ali(i) enddo endif *** iteration for extending ----------------------------------> d=d0_search+1 do 301 it=1,n_it LL=0 ka=0 do i=1,n_cut m=i_ali(i) ![1,n_ali] r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) ka=ka+1 k_ali(ka)=m LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 do j=1,nseqA xt(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) yt(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) zt(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo call score_fun !get scores, n_cut+i_ali(i) for iteration if(score_max.lt.score)then score_max=score ka0=ka do i=1,ka k_ali0(i)=k_ali(i) enddo endif if(it.eq.n_it)goto 302 if(n_cut.eq.ka)then neq=0 do i=1,n_cut if(i_ali(i).eq.k_ali(i))neq=neq+1 enddo if(n_cut.eq.neq)goto 302 endif 301 continue !for iteration 302 continue 300 continue !for shift 333 continue !for initial length, L_ali/M ******** return the final rotation **************** LL=0 do i=1,ka0 m=k_ali0(i) !record of the best alignment r_1(1,i)=xa(iA(m)) r_1(2,i)=ya(iA(m)) r_1(3,i)=za(iA(m)) r_2(1,i)=xb(iB(m)) r_2(2,i)=yb(iB(m)) r_2(3,i)=zb(iB(m)) LL=LL+1 enddo call u3b(w,r_1,r_2,LL,1,rms,u,t,ier) !u rotate r_1 to r_2 do j=1,nseqA x1(j)=t(1)+u(1,1)*xa(j)+u(1,2)*ya(j)+u(1,3)*za(j) y1(j)=t(2)+u(2,1)*xa(j)+u(2,2)*ya(j)+u(2,3)*za(j) z1(j)=t(3)+u(3,1)*xa(j)+u(3,2)*ya(j)+u(3,3)*za(j) enddo TM=score_max c^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ return END ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c 1, collect those residues with dis