/***************************************************************************************
    Copyright (C) 1997-2019 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of
    this software and associated documentation files (the "Software"), to deal in the
    Software without restriction, including without limitation the rights to use, copy,
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the
    following conditions:

    The above copyright notice and this permission notice shall be included in all
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
***************************************************************************************/


#include "ihd_typ.h"
#include "spc_usage.h"

void	ihd_typ::Free()
{
	fclose(outfp); 
	sprintf(Str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	outfp=open_file(Str,".het","r");
	while (fgets(Str,LINESIZE,outfp) != 0) { fprintf(stderr,"%s",Str); }
	fclose(outfp); free(Str);
	NilSeq(KeySqA); NilSeq(KeySqB); NilPDB(pdb);
	if(pdbAB) NilPDB(pdbAB);
	TotalNilCMSA(cmaA); TotalNilCMSA(cmaB);
	NilAlpha(AB);
	fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
		time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
}

void	ihd_typ::Init()
{
	if(argc < 6){
	   print_error(USAGE_SPARC_HETMER);
	   // print_error("usage: sparc hetmer msa1 msa2 pdb_file chains");
	}
	time1=time(NULL);
	Int4	x;
	float	f;
	device=0; thrds=1; MaxDist=0.0;
	NEW(Str,LINESIZE +3, char);
	ChnA=argv[5][0]; ChnB=argv[5][1];
	if(!(isupper(ChnA) && isupper(ChnB))) print_error(USAGE_SPARC_HETMER);
	if(argv[5][2]!=0) print_error(USAGE_SPARC_HETMER);
	for(Int4 a=6; a < argc; a++){
	   switch(argv[a][1]) {
		case 'D': {
		  if(sscanf(argv[a],"-D=%f",&f) == 1){ MaxDist=f; }
		  else print_error(USAGE_SPARC_HETMER); 
		} break;
		case 'd': {
		  if(sscanf(argv[a],"-dev=%d",&x) == 1){ device = x; }
		  else print_error(USAGE_SPARC_HETMER); 
		} break;
		case 't': {
		  if(sscanf(argv[a],"-thrds=%d",&x) == 1){ thrds= x; }
		  else print_error(USAGE_SPARC_HETMER); 
		} break;
		default: print_error(USAGE_SPARC_HETMER); break;
	   }
	}
	//	print_error("usage: sparc hetmer msa1 msa2 pdb_file chains");
	nsd=1.0;
#if 0	// pass in the number of stdev below the mean for ortholog cutoff.
	double dd;
	for(Int4 arg=6; arg < argc; arg++){
	   if(sscanf(argv[arg],"-stdev=%lf",&dd) ==1){
		if(dd < 0.0 &&  dd > 10.0) print_error(USAGE_SPARC_HETMER);
		else nsd=dd;
	   }
	}
#endif
	nameA = argv[2]; nameB = argv[3];
	AB=MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	FILE *fp=open_file(argv[2],"","r");
	cmaA=ReadCMSA(fp,AB); fclose(fp);
	if(cmaA == 0) print_error("input error");
// PutConfigCMSA(stderr,cmaA); PutConsensusCMSA(stderr,cmaA);
	fp=open_file(argv[3],"","r");
	cmaB=ReadCMSA(fp,AB); fclose(fp);
	if(cmaB == 0) print_error("input error");
// PutConfigCMSA(stderr,cmaB); PutConsensusCMSA(stderr,cmaB);
	pdb=MakePDB(argv[4]);
        CA=GetChainNumberPDB(pdb,ChnA);
        CB=GetChainNumberPDB(pdb,ChnB);
	//RenumberChainPDB(ChnB,1, pdb);
	KeySqA=GetPDBSeq(CA, pdb);
	KeySqB=GetPDBSeq(CB, pdb);
	pdbAB=0;
	sprintf(Str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	outfp=open_file(Str,".het","w");
}

set_typ	ihd_typ::GetDSets(Int4 run)
{
	cma_typ	cma;
	e_type	KeySq,KeySqX;
	char	Chn;
	if(run==1){ cma=cmaA; KeySq=KeySqA; KeySqX=KeySqB; Chn=ChnA; }
	else if(run==2){ cma=cmaB; KeySq=KeySqB; KeySqX=KeySqA; Chn=ChnB; }
	else print_error("ihd->GetDSets() input error");
	Int4	MaxScrB=FastAlnSeqSW(12,4,KeySq,KeySq,AB);
	Int4	N=NumSeqsCMSA(cma);
	set_typ	UseSet=MakeSet(N+3); ClearSet(UseSet);
	ds_type setsB=DSets(N);
	e_type	SqI,SqJ;
	UInt4	txI,txJ,i,j,n,seti,setj,*TxB;
	UInt4	*LenB,*xB;
	double	dd=round((double)MaxScrB/30.0);
	char	str[100];
	sprintf(str,"maxscores vs chain %c",Chn); 
	h_type	HG=Histogram(str,0,MaxScrB,dd);
	sprintf(str,"all scores vs chain %c",Chn); 
	h_type	HGA=Histogram(str,0,MaxScrB,dd);
	h_type	HG2=Histogram("disjoint set sizes",0,500,10.0);
	NEW(TxB,N+3,UInt4); NEW(LenB,N+3,UInt4); NEW(xB,N+3,UInt4);
	FILE	*efp=0; // efp=stderr;
	for(i=1; i <= N; i++){
	   SqI=TrueSeqCMSA(i,cma);
	   TxB[i]=TaxIdentSeq(SqI); LenB[i]=LenSeq(SqI);
	   xB[i]=NumXSeq(SqI);
// fprintf(stderr,"TxB[%d]=%d; seti=%d\n",i,TxB[i],seti);
// PutSeq(stderr,SqI,AB);
	}
	for(i=1; i <= N; i++){
	   txI=TxB[i]; seti=findDSets(i,setsB);
	   for(j=i+1; j <= N; j++){
	      setj=findDSets(j,setsB);
	      if(seti != setj){
	        txJ=TxB[j];
	        if(txJ == txI){
		   seti=linkDSets(setj,seti,setsB);
	        }
	      }
	   }
	} // PutDSets(stderr,setsB);
	Int4 score,scoreX,maxscore=0,maxJ=0,KeyJ=0;
	BooLean found;
// fprintf(stderr,"DEBUG...XXX\n");
	// PutDSets(stdout,setsB);
	for(i=1; i <= N; i++){
	    if(TxB[i]==0) continue;	// if no tax_id then skip
	    seti=findDSets(i,setsB);
// fprintf(stderr,"TxB[%d]=%d; seti=%d\n",i,TxB[i],seti);
	    if(seti == i){	// representative seq...
	      SqI=TrueSeqCMSA(i,cma);
	      char king=KingdomSeq(SqI);
	      char *phylum=PhylumSeq(SqI);
	      if(efp) fprintf(efp,"\nSet %lu (%lu...%s(%c)):\n",
			i,TxB[i],phylum,king);
	      for(n=0,j=1; j <= N; j++){
	        setj=findDSets(j,setsB);
		if(setj == seti) n++;
	      }
	      for(maxscore=0,KeyJ=0,j=1; j <= N; j++){
	        setj=findDSets(j,setsB);
		if(setj == seti){
	          SqJ=TrueSeqCMSA(j,cma);
		  score=FastAlnSeqSW(12,4,SqJ,KeySq,AB);
// fprintf(stderr,"Score %d vs %d = %d\n",i,j,score);
IncdHist((double)score,HGA);
		  if(score == maxscore && PdbSeq(SqJ)){
		     // if(IsSameSeqID(SqJ,KeySq)) KeyJ=j;
		     maxJ=j;
		  } else if(score > maxscore){
		     maxscore=score; maxJ=j;
		     // if(IsSameSeqID(SqJ,KeySq)) KeyJ=j;
		  }
#if 0
		  char TheSame='F',IsSub='F';
		  if(FastIdentSeqs(KeySq,SqJ)) TheSame='T';
		  if(IsSubSeq(KeySq,SqJ)) IsSub='T'; // PdbSeq(SqJ);
		  // PutDSet(stdout,j,setsB);
		  fprintf(stdout,"%lu\t%lu\t%lu(%lu)\t%c%c\t%ld\n",
				j,i,LenB[j],xB[j],TheSame,IsSub,score);
#else
		  if(IsSameSeqID(SqJ,KeySq)) KeyJ=j;
		  if(0) fprintf(stdout,"%lu\t%lu\t%lu(%lu)\t%ld\n",
				j,i,LenB[j],xB[j],score);
#endif
		}
	      } if(efp && n > 1) fprintf(efp,"  %lu seqs\n",n);
#if 1
	      if(KeySqX != 0){
		  if(KeyJ > 0) j=KeyJ; else j=maxJ;  
	          SqJ=TrueSeqCMSA(j,cma);
		  scoreX=FastAlnSeqSW(12,4,SqJ,KeySqX,AB);
	          if(scoreX > maxscore) continue;
	      }
#endif
	      if(KeyJ > 0) AddSet(KeyJ,UseSet);
	      else AddSet(maxJ,UseSet);
	      IncdHist((double)maxscore,HG);
	      IncdHist((double)n,HG2);
	    }
	} NilDSets(setsB);
	if(efp) PutHist(efp,60,HG2); NilHist(HG2);
	if(0) PutHist(stderr,60,HGA); NilHist(HGA);
#if 0	// this does not seem to work...
	sprintf(str,"pruned scores vs chain %c",Chn); 
	HG2=Histogram(str,0,MaxScrB,dd);
	double	mean=MeanHist(HG);
	double	stdev=sqrt(VarianceHist(HG));
	Int4 Cut=(Int4) round(mean-(nsd*stdev));
	fprintf(stderr,"Cut=%d; mean=%lf\n",Cut,mean);
	for(j=1; j <= N; j++){
	    if(!MemberSet(j,UseSet)) continue;
	    SqJ=TrueSeqCMSA(j,cma);
	    score=FastAlnSeqSW(12,4,SqJ,KeySq,AB);
	    if(score < Cut) DeleteSet(j,UseSet);    
	    else IncdHist((double)score,HG2);
	}
	if(1) PutHist(stderr,60,HG2); NilHist(HG2); 
#else
	double	mean=MeanHist(HG);
	double	stdev=sqrt(VarianceHist(HG));
	double	DD=round(mean+3.0*stdev);
fprintf(stderr,"MinimumHist = %lf; NumDataHist=%d\n",MinimumHist(HG),NumDataHist(HG));
fprintf(stderr,"mean=%lf; stdev=%lf; DD=%lf; dd=%lf\n",mean,stdev,DD,dd);
	if(1) PutHist(stderr,60,HG);
	sprintf(str,"scores vs heteromeric chain %c",Chn); 
	HG2=Histogram(str,0,DD,dd);
	for(j=1; j <= N; j++){
	    if(MemberSet(j,UseSet)){
	    	SqJ=TrueSeqCMSA(j,cma); score=FastAlnSeqSW(12,4,SqJ,KeySq,AB);
	    	IncdHist((double)score,HG2);
	    }
	}
	PutHist(stderr,60,HG2);
	PutHist(outfp,60,HG2); NilHist(HG2); 
#endif
	// PutHist(stderr,50,HG); PutHist(outfp,50,HG);
	NilHist(HG);
// exit(1);
	free(TxB); free(LenB); free(xB);
	return UseSet;
}

Int4	ihd_typ::FindProteinPairs( )
{
	Int4	MaxScrA=FastAlnSeqSW(12,4,KeySqA,KeySqA,AB);
	Int4	MaxScrB=FastAlnSeqSW(12,4,KeySqB,KeySqB,AB);
	if(0) fprintf(stderr,"Max SW score=%ld\n",MaxScrB);

	//=== 1. Find best hit for each organism for each cma. ===
	set_typ SetA=GetDSets(1);
	set_typ	SetB=GetDSets(2);

	//======== 2. Pair up best hits for each organism ========
	UInt4	i,j,TxA,TxB;
	Int4	M=NumSeqsCMSA(cmaA),N=NumSeqsCMSA(cmaB);
	e_type	SqI,SqJ;
	// cma_typ	cmaI,cmaJ;
	Int4	Np=0,*PrdA,*PrdB;
	NEW(PrdA,M+3,Int4); NEW(PrdB,N+3,Int4);
	BooLean	*SkipA,*SkipB;
	NEW(SkipA,M+3,BooLean); NEW(SkipB,N+3,BooLean);
	for(i=1; i <= M; i++) SkipA[i]=TRUE;
	for(j=1; j <= N; j++) SkipB[j]=TRUE;
	for(i=1; i <= M; i++){
	   if(!MemberSet(i,SetA)) continue;
	   SqI=TrueSeqCMSA(i,cmaA); TxA=TaxIdentSeq(SqI);
	   if(TxA==0) continue;
	   for(j=1; j <= N; j++){
	     if(!MemberSet(j,SetB)) continue;
	     SqJ=TrueSeqCMSA(j,cmaB); TxB=TaxIdentSeq(SqJ);
	     if(TxB==0) continue;
	     if(TxA == TxB){
		Np++; PrdA[Np]=i; PrdB[Np]=j; 
		SkipA[i]=FALSE; SkipB[j]=FALSE;
		if(0){ 
		  fprintf(stdout,"\n%d to %d: %ld (%ld; %ld)\n",i,j,
			FastAlnSeqSW(12,4,SqI,SqJ,AB),
			FastAlnSeqSW(12,4,SqI,KeySqA,AB),
			FastAlnSeqSW(12,4,SqJ,KeySqB,AB));
		  PutSeqInfo(stdout,SqI); PutSeqInfo(stdout,SqJ);
		  fprintf(stdout,"\n");
		} break;
	     }
	   }
	} fprintf(stdout,"%d pairs found\n",Np);

	//======== 3. Print cma files for each domain ========
	sprintf(Str,"_%c",ChnA); FILE *fp=open_file(nameA,Str,"w");
	Int4 rtnA=PutSelectOneCMSA(fp,SkipA,PrdA,cmaA); fclose(fp);
	sprintf(Str,"_%c",ChnB); fp=open_file(nameB,Str,"w");
	Int4 rtnB=PutSelectOneCMSA(fp,SkipB,PrdB,cmaB); fclose(fp);

	//========== Free memory ===============
	free(SkipA); free(SkipB); free(PrdA); free(PrdB);
	// NilSeq(KeySqA); NilSeq(KeySqB);
	NilSet(SetA); NilSet(SetB);
	return 0;
}

void	ihd_typ::RunCCMpred()
{
	//==== get ccmpred input alignments. =============
	char	*ArgV[10];
	Int4	ArgC=0,N=0,M=0,i,j,lenA,lenB;
	char	str[200];
	ArgV[0]=AllocString("cma2aln");
        sprintf(str,"%s_%c",nameA,ChnA); ArgV[1]=AllocString(str); ArgC=2;
fprintf(stderr,"Running cma2aln on file %s\n",str);
        char **pdbIDA=run_cma2aln(ArgC,ArgV,M);
	for(i=1; pdbIDA[i]; i++){
	   fprintf(stderr,"%d. %s\n",i,pdbIDA[i]); free(pdbIDA[i]);
	} free(pdbIDA);

	free(ArgV[1]);
        sprintf(str,"%s_%c",nameB,ChnB); ArgV[1]=AllocString(str);
fprintf(stderr,"Running cma2aln on file %s\n",str);
        char **pdbIDB=run_cma2aln(ArgC,ArgV,N);
	for(i=1; pdbIDB[i]; i++){
	    fprintf(stderr,"%d. %s\n",i,pdbIDB[i]); free(pdbIDB[i]);
	} free(pdbIDB);
	free(ArgV[0]); free(ArgV[1]);
	if(M != N){
		fprintf(stderr,"M=%ld; N=%ld\n",M,N);
		print_error("ERROR: the MSAs are inconsistent.");
	}
	assert(M==N);

	//==== concatenate the two *.in alignments. =============
	lenA=TotalLenCMSA(cmaA);
	lenB=TotalLenCMSA(cmaB);
	char	*rtnA,*rtnB;
	char	*strA,*strB;
	NEW(strA,lenA*2,char); NEW(strB,lenB*2,char);
	
	FILE	*fpA,*fpB,*fpAB;
        sprintf(str,"%s_%c",nameA,ChnA); 
fprintf(stderr,"Concatenating files %s and",str);
	fpA=open_file(str,".in","r");
        sprintf(str,"%s_%c",nameB,ChnB); 
fprintf(stderr," %s.\n\n",str);
	fpB=open_file(str,".in","r");
        sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	fpAB=open_file(str,".in","w");
	do {
		rtnA=fgets(strA,lenA+2,fpA);
		rtnB=fgets(strB,lenB+2,fpB);
		if(rtnA == NULL || rtnB == NULL) break;
		for(i=0; !isspace(strA[i]); i++) fprintf(fpAB,"%c",strA[i]);
		for(i=0; !isspace(strB[i]); i++) fprintf(fpAB,"%c",strB[i]);
		fprintf(fpAB,"\n");
	} while(TRUE);
	fclose(fpA); fclose(fpB); fclose(fpAB);
	free(strA); free(strB);

#if 1	//================ no need to turn this off when reading in the *.dca and *.mst files.
	//==== run ccmpred on this input file.
	ArgV[0]=AllocString("ccmpred"); ArgC=1;
	ArgV[ArgC]=AllocString("-t"); ArgC++;
        sprintf(str,"%d",thrds); 
	ArgV[ArgC]=AllocString(str); ArgC++;
	ArgV[ArgC]=AllocString("-d"); ArgC++;
        sprintf(str,"%d",device); ArgV[ArgC]=AllocString(str); ArgC++;
#if 0
        if(gpu_num >= 0){
	  sprintf(str,"-d %d",gpu_num);
	  ArgV[ArgC]=AllocString(str); ArgC++; 
	}
#endif
        sprintf(str,"%s_%s_%c%c.in",nameA,nameB,ChnA,ChnB);
	ArgV[ArgC]=AllocString(str); ArgC++;
        sprintf(str,"%s_%s_%c%c.dca",nameA,nameB,ChnA,ChnB);
        ArgV[ArgC]=AllocString(str); ArgC++;
        run_ccmpred(ArgC,ArgV);  fflush(stdout); fflush(stderr);
        for(i=0; i < ArgC; i++){ free(ArgV[i]); ArgV[i]=0; } ArgC=0;
        // sprintf(str,"%s.in",nameA); remove(str);
        // sprintf(str,"%s.in",nameB); remove(str);
#endif
}

#define ihd_warning " ===> SPARC hetmer input error!\n\
  Make sure that the two MSAs correspond to the two interacting chains in\n\
  the pdb structure, the sequences of which need to be included in their\n\
  corresponding MSAs and labeled with their NCBI pdb_identifiers\n\
  (e.g., 5M4O_C and 5M4O_D for chains C and D in pdb file 5m4o).\n\n"


char	*ihd_typ::GetSubAln(char *filename,e_type SqX,BooLean Left)
{
   Int4	strt,z,y,x,a,s;
   FILE	*fp=0;
   char	c,C,Str[5000],buffer[5000],*Rtn,*ALN=0,*SEQ=0;
   e_type SqY;

   //=========== Get the sequence for pdb seq. ==============
   SeqToString(buffer,SqX,AB);
   SEQ=AllocString(buffer);
   //=========== Get the alignment for *.mst seq. ==============
   if(Left) sprintf(Str,"%s_%c_X",filename,ChnA); 
   else sprintf(Str,"%s_%c_X",filename,ChnB); 
   fp=open_file(Str,".mst","r");
   for(x=0; (c=fgetc(fp)) != ':'; x++){ 
	if(c == EOF || isspace(c)) print_error("mst file input error");
   } assert(isspace(c=fgetc(fp)));

   for(z=y=0; (c=fgetc(fp)) != '\n'; y++){ 
	if(isalpha(c)){ buffer[z]=toupper(c); z++; } Str[y]=c;
   } buffer[z]=0; Str[y]=0; fclose(fp);
   ALN=AllocString(Str);
   e_type sqY=StringToSeq(buffer,"SeqY mst",1,AB);

   //=========== Find overlap between the pdb and alignment seqs ========
   C=OverlappingSeqs(strt,sqY,SqX,25,0); // allows for 'X' residues!!
   if(C==FALSE){
	  fprintf(stderr,"C=%d; strt=%ld\n",C,strt);
	  PutSeq(stderr,SqX,AB); PutSeq(stderr,sqY,AB);
	  AlnSeqSW(stderr,11,1,SqX,sqY,AB);
	  fprintf(stderr,"\nihd_typ::GetSubAln() error!\n");
	  // fprintf(stderr,"%s\n",Str);
	  // fprintf(stderr,"%s\n",ihd_warning);
	  print_error(ihd_warning);
   } NilSeq(sqY);

   x=a=s=0;
   if(strt < 0) s=-strt; else if(strt > 0) a=strt; 
   if(Left){ 	// add additional residues to C-terminal end...
	while((C=ALN[a]) != 0 && (c=SEQ[s]) != 0){
	   Str[x]=C; x++; a++;
	   // fprintf(stdout,"%c%d vs %c%d\n",C,a,c,s);
	   if(C == '-') continue; else s++;
	} 
	while(isalpha(SEQ[s])){ Str[x]=tolower(SEQ[s]); s++; x++; }
	Str[x]=0; 
   } else { //==== add additional residues in pdb file MSA N-terminal end. ====
	// fprintf(stderr,"Str='%s'\nALN='%s'\n  y=%d; z=%d\n",Str,ALN,y,z);
        if(s > a){ 	
	   for(y=0; (y < s); y++,x++){ Str[x]=tolower(SEQ[y]); }
	} 
	while((C=ALN[a]) != 0 && (c=SEQ[s]) != 0){
	   Str[x]=C; x++; a++;
	   if(C == '-') continue; else s++;
	} Str[x]=0;
   } free(ALN); free(SEQ); return AllocString(Str); 
}

Int4	ihd_typ::MakeJointAln(char *chn)
{
	Int4	x=GetChainNumberPDB(pdb,chn[0]);
	e_type	SqA=GetPDBSeq(x,pdb); 
		x=GetChainNumberPDB(pdb,chn[1]);
	e_type	SqB=GetPDBSeq(x,pdb);
	FILE	*efp=0; // efp=stderr;

	if(efp){ PutSeq(efp,SqA,AB); PutSeq(efp,SqB,AB);}
	char	*XX=GetSubAln(nameA,SqA,TRUE);
	if(efp) fprintf(efp,"%c: %s\n",chn[0],XX);
	char	*YY=GetSubAln(nameB,SqB,FALSE);
	if(efp) fprintf(efp,"%c: %s\n",chn[1],YY);
	if(efp) fprintf(efp,"joint: %s%s\n",XX,YY); 
	char	str[200];
	sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	FILE	*fp=open_file(str,".aln","w");
	fprintf(fp,"%s%s\n",XX,YY); fclose(fp);
	free(XX); free(YY); NilSeq(SqA); NilSeq(SqB);
	return 0;
}

pdb_typ	ihd_typ::MergeChainsPDB(char chnA, char chnB, pdb_typ P)
/**************************************************************************
  Merge chnB with chnA. move to pdb_typ.
 **************************************************************************/
{
        atm_typ atom;
        Int4    C,cA,cB,a,r,diff,Start;
	pdb_typ	pdbRtn=0;
	//==== renumber chain B ====
        cA=GetChainNumberPDB(P,chnA);
        if(cA==0){
	   fprintf(stderr,"cA = %d; chnA=%c\n",cA,chnA);
	   print_error("RenumberMergeChainsPDB() input error 1");
	}
        Start=P->maxres[cA] + 1;
        cB=GetChainNumberPDB(P,chnB);
        if(cB==0){
	   fprintf(stderr,"cB = %d; chnB=%c\n",cB,chnB);
	   print_error("RenumberMergeChainsPDB() input error 2");
	}
        diff=P->lowres[cB]-Start;
        if(diff !=0){
          for(a=1; a<= P->natoms[cB]; a++){
           atom = P->atom[cB][a]; r=ResAtom(atom);
	   RenumberAtom(r-diff,atom);
          } P->lowres[cB] -= diff; P->maxres[cB] -= diff;
	}
	//==== output chain A ====
	FILE *fp=tmpfile();
        fprintf(fp,"HEADER    %s\n",P->filename);
        for(a=1; a<= P->natoms[cA]; a++){
                r = ResAtom(P->atom[cA][a]); PutAtom(fp, P->atom[cA][a]);
        }
	//==== Relabel chnB atoms to chnA ====
        for(a=1; a<= P->natoms[cB]; a++){
           atom = P->atom[cB][a]; ReNameChainAtom(chnA,atom);
	   PutAtom(fp,atom); ReNameChainAtom(chnB,atom);
        } fprintf(fp,"TER               \n");
        fprintf(fp,"END               \n");
	rewind(fp); pdbRtn=MakePDB(fp); fclose(fp);
        return pdbRtn;
}

int	ihd_typ::Run()
{
	Int4    i,j;

	Int4	rtnF=FindProteinPairs();
	RunCCMpred();
	//==== Get joint aln file from pdb and mst files. ====
	MakeJointAln(argv[5]);

	// combine the two chains into one chain within pdb file.
	// if(argc != 5) print_error("usage: sparc d2d pdb_file chnA chnB");

	int     arg,Argc=0;
        char    *Argv[50],str[200];
	pdb_typ	pdbAB=MergeChainsPDB(ChnA,ChnB,pdb);
	sprintf(str,"%s_%s",argv[2],argv[3]);
	FILE *ofp=open_file(str,".pdb","w");
	PutPDB(ofp,pdbAB); fclose(ofp);
	Int4 split=MaxResPDB(GetChainNumberPDB(pdb,ChnA),pdb);
	// fprintf(stderr,"split = %d\n",split);

	// look only at DC-scores for pairs across the two domains...
	// RunSTARC(): starc AA_BB.pdb A AA_BB_AB
	Argv[Argc]=AllocString("starc"); Argc++;

	sprintf(str,"%s_%s.pdb",nameA,nameB);
	Argv[Argc]=AllocString(str); Argc++;

	sprintf(str,"%c",ChnA);
	Argv[Argc]=AllocString(str); Argc++;
	
	sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	Argv[Argc]=AllocString(str); Argc++;
	
	sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	// ofp=open_file(str,".sprc","w");
	ofp=tmpfile(); 
	// fprintf(stdout,"\npdb file: %s\n",FilenamePDB(pdb));
	fprintf(ofp,"\n");
	for(i=0; i < argc; i++) fprintf(ofp,"%s ",argv[i]);
	fprintf(ofp,"\nall: ");
	for(i=0; i < Argc; i++) fprintf(ofp,"%s ",Argv[i]);
	fprintf(ofp,"\n");

	fprintf(ofp,"%c|%c: ",ChnA,ChnB);
	for(i=0; i < Argc; i++) fprintf(ofp,"%s ",Argv[i]);
	fprintf(ofp,"-split=%d\n",-split);

	fprintf(ofp,"%c2%c: ",ChnA,ChnA);
	for(i=0; i < Argc; i++) fprintf(ofp,"%s ",Argv[i]);
	fprintf(ofp,"-split=%d ",split);
	fprintf(ofp,"-side=-1\n");

	fprintf(ofp,"%c2%c: ",ChnB,ChnB);
	for(i=0; i < Argc; i++) fprintf(ofp,"%s ",Argv[i]);
	fprintf(ofp,"-split=%d ",split);
	fprintf(ofp,"-side=1\n");

	fprintf(ofp,"%c2%c: ",ChnA,ChnB);
	for(i=0; i < Argc; i++) fprintf(ofp,"%s ",Argv[i]);
	fprintf(ofp,"-split=%d\n\n",split);


    char mode='C',mthd[10];
    for(j=0; j <= 4; j++){
	if(j==0){ 
	   sprintf(mthd,"all");
	} else if(j==1){ 
	   sprintf(mthd,"%c|%c",ChnA,ChnB);
	   sprintf(str,"-split=%d",-split);
	   Argv[Argc]=AllocString(str); Argc++;
	} else if(j == 2){
	   sprintf(mthd,"%c2%c",ChnA,ChnA);
	   Argc--; free(Argv[Argc]);
	   sprintf(str,"-split=%d",split);
	   Argv[Argc]=AllocString(str); Argc++;
	   sprintf(str,"-side=-1");
	   Argv[Argc]=AllocString(str); Argc++;
	} else if(j == 3){
	   sprintf(mthd,"%c2%c",ChnB,ChnB);
	   Argc--; free(Argv[Argc]);
	   sprintf(str,"-side=1");
	   Argv[Argc]=AllocString(str); Argc++;
	} else if(j == 4){
	   sprintf(mthd,"%c2%c",ChnA,ChnB);
	   Argc--; free(Argv[Argc]);
	   Argv[Argc]=AllocString("-P=30"); Argc++;
	}
	if(MaxDist > 0.0){ 
	   sprintf(str,"-D=%.1f",MaxDist); Argv[Argc]=AllocString(str); Argc++;
	}
	edc_typ *edc= new edc_typ(Argc,Argv);
	dci_typ *dci=edc->RunDCA( );
	if(MaxDist > 0.0){ Argc--; free(Argv[Argc]); }
	if(dci){
            // if(edc->DoShowPairs()) dci->Put(stdout);
            char *Chns=edc->RtnChains();
            set_typ SetB=0;
	    dci->PutResults(ofp,mode,Chns,mthd);
            if(edc->DoShowPairs()){
                fprintf(outfp,"\nhighest DCA scoring pairs between %s & %s\n",
			nameA,nameB);
		sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
	        FILE *pfp=open_file(str,".pml","w");
		fprintf(pfp,"cmd.load(\"./%s_%s.pdb\")\n",nameA,nameB);
		fprintf(pfp,"cmd.color(\"palegreen\",\"chain %c\")\n",ChnA);
		fprintf(pfp,"sele resi 1-%d\n",split);
		fprintf(pfp,"cmd.color(\"lightblue\",\"sele\")\n");
                dci->PutBest(outfp,edc->ShowPairs,SetB,ChnA,pfp,FALSE);
		fclose(pfp);
            }
            free(Chns); delete dci;
        } else print_error("Input error");
        delete edc;
    } 
    rewind(ofp);
    sprintf(str,"%s_%s_%c%c",nameA,nameB,ChnA,ChnB);
    for(char c=0; (c=fgetc(ofp)) != EOF; ) fprintf(outfp,"%c",c);
    fprintf(outfp,"\n"); fclose(ofp);
    for(i=0; i < Argc; i++) free(Argv[i]);
    return 0;
}

