/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/
#include "drc_typ.h"

extern   int ChainVSI(int argc,char *argv[],FILE *ifp, FILE *ofp);

Int4	drc_typ::run_bpps(FILE *mmafp, FILE *hptfp, FILE *ptrnfp)
// WARNING: Argv & Argc are set in drc_init.cc; This just adds one more argument.
{
	Int4    A,x;
	char	c;
	FILE	*fp=0,*ofp,*pfp;
	BooLean	SkipRunOMC=FALSE;

	// 2. ========= Run BPPS in query mode ============
        // sprintf(str,"-Focus=1:2"); Argv[Argc]=AllocString(str); Argc++;
        sprintf(str,"-Focus=1:%d",query_id); Argv[Argc]=AllocString(str); Argc++;
        sprintf(str,"-root"); Argv[Argc]=AllocString(str); Argc++;
        // don't use SecretCode=3; prevents hptfp output ...
	sprintf(str,"%s.chk",argv[1]); fp=fopen(str,"r");
	if(fp != NULL){ fclose(fp); SkipRunOMC=TRUE; }
        // omc_typ *omc= new omc_typ('Q',Argc,Argv); omc->VerboseOff();
        omc_typ *omc= new omc_typ('q',Argc,Argv); omc->VerboseOff();
	if(!SkipRunOMC){ omc->Run(mmafp,hptfp,ptrnfp); omc->PrintTime(stderr); }
	else omc->PutDARC(mmafp,hptfp,ptrnfp); 
	// fprintf(stderr,"Query=%s\n",omc->RtnSeqID(query_id));
	ofp=open_file(argv[1],"_fg","w"); sprintf(str,"%s_fg",argv[1]);
	// pfp=open_file(argv[1],"_ptrn","w");
pfp=open_file(argv[1],".darc","a+");
fprintf(pfp,"###################### 1.BPPS Q (Cp/Sp) output #########################\n");
	omc->PutHyperPartition(pfp); fprintf(pfp,"Pattern residues:\n");
	omc->OutPutMMA_PTRN(ofp,pfp,2,str); fprintf(pfp,"\n");
	fclose(ofp); fflush(pfp); fclose(pfp);
	if(UseBG){    // creates background alignment <prefix>_bg cma file.
	  ofp=open_file(argv[1],"_bg","w"); sprintf(str,"%s_bg",argv[1]);
	  omc->OutPutMMA_PTRN(ofp,0,1,str); fclose(ofp); 
	} Argc--; free(Argv[Argc]); Argc--; free(Argv[Argc]);
	Argv[Argc]=0;	// remove -Focus  and -root options
	delete omc;
	//==================== Create himsa files ======================
	// run_hieraln(2,Argc,Argv);
	return 0;
}

Int4	drc_typ::find_lots_of_pdbids(FILE *fp)
// Otputs a list of pdb sequences with lots of structures from pdbaa file.
{
  char	id[50],*STR,ID[8],Tmp[10],**pdb_id,c,*sp,chn;
  Int4	m,n,i,j,S,hpsz=0,NN=0;
  FILE	*tfp=0,*pdbfp=0;
  h_type  HG=Histogram("number of pdb_ids",0,1000,10.0);
  dh_type dH=dheap(1002,4);
  set_typ SetZ[4];
  for(i=0; i < 4; i++) SetZ[i]=MakeSet(259);

  if(pdbaa_path){
fprintf(stderr,"path=%s\n",pdbaa_path);
  	NEWP(pdb_id,509,char); NEW(STR,20003,char);
	pdbfp=open_file(pdbaa_path,"","r");
	for(m=n=0 ; fgets(STR,20000,pdbfp) != NULL; ){	// read pdbaa line
	   if(sscanf(STR,">%4s_%c:",str,&chn) != 2) continue;
fprintf(stderr,"\t%d. =%s_%c\n",m,str,chn);
	   sprintf(str1,"%s_%c:",str,chn);
	   sp=&STR[1]; c=1;	// c == 1 is the NCBI delimiter between pdbids.
	   for(n=0; (sp=strchr(sp,c)) != NULL; ){
		sp++;  strncpy(Tmp,sp,8); Tmp[6]=0; 
		n++; pdb_id[n]=AllocString(Tmp);
		if(0) fprintf(fp,"'%s'\n",S,n,Tmp);
		if(n >= 500) break;
	   } 
	   if(n > 1) IncdHist(n,HG);
	   if(n >= 60){
	     m++;
	     if(strstr(STR,"ibosomal") == 0){
		STR[80]=0;
  		for(i=0; i <= 3; i++) ClearSet(SetZ[i]);
		for(i=1; i <= n; i++){ 
		  for(j=1; j <= 3; j++){ 
		    AddSet(pdb_id[i][j],SetZ[j]);
		  }
		}
		double key=0.0;
		for(j=1; j <= 3; j++){ 
		    key -= (double)CardSet(SetZ[j]);
		    insrtHeap(m,(keytyp)key,dH);
		}
		fprintf(fp,"%d.%s\n%s_%c: ",m,STR,str,chn);
		for(i=1; i <= n; i++) fprintf(fp,"%s ",pdb_id[i]);
		fprintf(fp,"\n");
	     }
	   } for(i=1; i <= n; i++){ free(pdb_id[i]); pdb_id[i]=0; }
	} free(pdb_id); fclose(pdbfp);
  } 
  for(i=1; !emptyHeap(dH); i++){
	Int4 NN=(Int4) minkeyHeap(dH); assert((m=delminHeap(dH)) != 0);
	fprintf(stdout,"rank %d: %d(%d)\n",i,m,NN);
  }
  PutHist(stdout,60,HG); NilHist(HG); Nildheap(dH);
  return hpsz;
}

Int4	drc_typ::expand_pdbids(char *mstfile)
// Looks at 'raw' mst file without BPPS patterns or identical chain pdb_ids.
// Creates a list of All relevant pdb identifies from pdbaa file.
//================ look for additional pdb_ids if pdbaa file provided ===
{
  char	id[50],*STR,ID[8],Tmp[10],**pdb_id,c,*sp;
  Int4	m,n,i,j,S,hpsz=0,NN=0;
  FILE	*tfp=0,*pdbfp=0;

#if 0	// use this to find pdb seqs with lots of structures.
	this->find_lots_of_pdbids(stdout); exit(1);
#endif
 
  tfp=fopen(mstfile,"r");
  if(tfp==0){ fprintf(stderr,"No proteins of known structure found.\n"); return 0; }
  for(NN=0; fgets(Str,StrLen,tfp) != NULL; ){ NN++; } rewind(tfp);
  if(pdbAllID){	// try modifying code to avoid calling this more than once.
          for(j=1; pdbAllID[j]; j++){
             for(i=0; pdbAllID[j][i]; i++){ free(pdbAllID[j][i]); } free(pdbAllID[j]);
          } free(pdbAllID); pdbAllID=0;
  }
// if(1) fprintf(stderr,"DEBUG: NN=%d; pdbaa_path=%s\n",NN,pdbaa_path);
  NEWPP(pdbAllID,NN +3,char); NEW(STR,20009,char); NEWP(pdb_id,509,char);
  if(pdbaa_path) pdbfp=open_file(pdbaa_path,"","r");	// path to pdbaa file...
  for(S=0,hpsz=0; fgets(Str,StrLen,tfp) != NULL; ){	// read mst line...
     if(sscanf(Str,"%[^_:]_%c:",str,&c) != 2) print_error("expand_pdb_ids() input error");
     S++; sprintf(id,"%s_%c",str,c); // fprintf(stderr,"%s\n",id);
     sprintf(ID,">%6s",id); // fprintf(stderr,"%s\n",ID);
     if(pdbaa_path){
       for(n=0 ; fgets(STR,20000,pdbfp) != NULL; ){	// read pdbaa line
	 if(strncmp(STR,ID,7)==0){	// found sequence.
	   // fprintf(stderr,"%s",STR);
	   sp=&STR[1]; c=1;	// char c == 1 is the NCBI delimiter between pdbids.
	   for(n=0; (sp=strchr(sp,c)) != NULL; ){
		sp++;  strncpy(Tmp,sp,8); Tmp[6]=0; 
		n++; pdb_id[n]=AllocString(Tmp);
		if(0) fprintf(stderr,"%d.%d = '%s'\n",S,n,Tmp);
		if(n >= 500) break;
	   } break;	// no need to look further...
	 }
       } rewind(pdbfp);	// rewind for the next mst line.
     } else { n=0; }
     hpsz++; NEWP(pdbAllID[S],n+5,char); pdbAllID[S][0]=AllocString(id); 
     if(n==0) continue;
     hpsz+=n;	// count the total number of entries.
     for(m=0,i=1; i <= n; i++){
	   if(strncmp(id,pdb_id[i],6) == 0) continue;	// skip mst id if present.
	   // if(pdbAllID[S] == 0) NEWP(pdbAllID[S],n +3,char);
	   m++; pdbAllID[S][m]=AllocString(pdb_id[i]);
	   if(0) fprintf(stderr,"Added %s to group %d '%s'.\n",pdbAllID[S][m],S,pdbAllID[S][0]);
     }
     //============== pdbaa_path output... ==============
     for(i=1; i <= n; i++){ free(pdb_id[i]); pdb_id[i]=0; }
  } free(pdb_id); free(STR); fclose(tfp);
  if(pdbfp) fclose(pdbfp); 
  hpsz *= 12;	// assume no more than 12 adjacent identical subunits: dodecahedron.
  if(0) fprintf(stderr,"hpsz=%d\n",hpsz);
  return hpsz;
}


Int4	drc_typ::expand_mstfile(char *mstfile)
// tfp = pointer to mst file without BPPS patterns or identical chain pdb_ids.
// replaces simple mst file of same name.
{
  char	id[50],c,*sp;
  Int4	n,i,k,j,S,x,line;
  FILE	*pdbfp=0,*ofp=0,*tfp=0;

  if(pdbAllID == 0) print_error("Analysis not performed; no structures found.");
  tfp=tmpfile(); ofp=open_file(mstfile,"","r");
  while(fgets(Str,StrLen,ofp) != NULL){ fprintf(tfp,"%s",Str); }
  fclose(ofp); rewind(tfp);
  ofp=open_file(mstfile,"","w");
  for(line=0; fgets(Str,StrLen,tfp) != NULL; ){	// read mst line...
     if(sscanf(Str,"%[^_:]_%c:",str,&c) != 2) print_error("expand_mstfile() input error");
     sprintf(id,"%s_%c",str,c); // fprintf(stderr,"%s\n",ID);
     fprintf(ofp,"%s",Str); 
     //============== bpps_ptrns output... ==============
     assert(pdbID);   // c2a_typ created mst file & SeqID, so both should match.
     for(S=0,j=1; pdbID[j]; j++){
	  if(0) fprintf(stderr,"%d. %s\n",j,pdbID[j]);
	  if(strncmp(pdbID[j],id,6)==0){
	    if(0) fprintf(stderr,"%d. %s matches %s.\n",j,pdbID[j],id);
	    S=j; break;
	  }
     }
     if(S > 0){
       assert(S <= nMst); 
       if(bpps_ptrns){
	  x=mst2vsi[S];
          if(bpps_ptrns[x]){
            for(i=1; bpps_ptrns[x][i] != 0; i++){ 
	      if(0) fprintf(stderr,"%d. bpps_ptrns[%d][%d]=%s\n",S,x,i,bpps_ptrns[x][i]);
	      fprintf(ofp,"%s\n",bpps_ptrns[x][i]); 
	    }
	  }
	}
     } //============== pdbaa_path output... ==============
     line++; assert(pdbAllID[line] != 0);
     assert(strncmp(id,pdbAllID[line][0],6) == 0);
     for(i=1; pdbAllID[line][i]; i++){
	  if(i==1) fprintf(ofp,"&");
	  fprintf(ofp,"%s",pdbAllID[line][i]);
	  if(pdbAllID[line][i+1]) fprintf(ofp,";"); else fprintf(ofp,".\n");
     }
  } fclose(ofp); fclose(tfp);
  if(pdbfp) fclose(pdbfp); 
  return 0;
}

Int4	drc_typ::run_c2a()
// ========= Create CCMpred input & *.dca + *.mst output files ============
{
	Int4    i,A,NumFG=0,NumBG=0,MaxBG;


	ArgV[0]=AllocString("cma2aln");
	sprintf(str,"%s_fg",argv[1]); ArgV[1]=AllocString(str); ArgC=2;
	pdbID=run_cma2aln(ArgC,ArgV,NumFG);	// pdbID=pdbids in *_fg and in *.mst files.
	sprintf(str,"%s_fg",argv[1]);
	fprintf(stderr,"PDB sequences found in the query partition ('%s') of the input MSA.\n",str);
	if(pdbID == 0) print_error("No pdb sequences found"); 
	else {
	   for(NumPdbID=0,i=1; pdbID[i]; i++){  
		NumPdbID++;
		if(i%10 == 1) fprintf(stderr," %s",pdbID[i]);
		else fprintf(stderr,", %s",pdbID[i]);
		if(i%10 == 0) fprintf(stderr,"\n");
	   } fprintf(stderr,"\n");
	}
#if 1	// output background aln file; creates *_bg.in; use cma2aln to prune *_bg.in file.
	if(UseBG){
	  free(ArgV[1]); sprintf(str,"%s_bg",argv[1]); ArgV[1]=AllocString(str); 
	  ArgV[ArgC]=AllocString("-ccm"); ArgC++;	//create *.in file only.
	  MaxBG=MAXIMUM(Int4,NumFG,100000);	// at most 100,000 sequences...randomly selected.
	  sprintf(str,"-M=%d",MaxBG); ArgV[ArgC]=AllocString(str); ArgC++;
	  // Print out representative set of BG sequences...option for cma2aln
	  char **dummy=run_cma2aln(ArgC,ArgV,NumBG);
	  for(i=1; dummy[i]; i++) free(dummy[i]); free(dummy);
	}
#endif
	for(A=0; A < ArgC; A++){ free(ArgV[A]); ArgV[A]=0; }
	return 0;
}


Int4	drc_typ::run_ccm()
// 4. ========= Run CCMpred ============
{
	Int4    i,A;
	FILE	*fp=0;
	BooLean	SkipRunCCM=FALSE;

	if(SbSmplNum ==0){
	  sprintf(str,"%s_fg_X.dca",argv[1]); fp=fopen(str,"r");
	  if(fp != NULL){ fclose(fp); SkipRunCCM=TRUE; }
	}

	ArgV[3]=ArgV[4]=0;
	ArgV[0]=AllocString("ccmpred"); ArgC=1;
#ifdef OPENMP
	if(num_thrds > 1){
	  ArgV[ArgC]=AllocString("-t"); ArgC++;
	  sprintf(str,"%d",num_thrds); ArgV[ArgC]=AllocString(str); ArgC++;
	}
#endif
	if(gpu_num >= 0){
	  sprintf(str,"-d %d",gpu_num); ArgV[ArgC]=AllocString(str); ArgC++; 
	}
	if(!SkipRunCCM){
fprintf(stderr,"############ running ccmpred #################\n");
	  sprintf(str,"%s_fg.in",argv[1]); ArgV[ArgC]=AllocString(str); ArgC++;
	  sprintf(str,"%s_fg_X.dca",argv[1]); ArgV[ArgC]=AllocString(str); ArgC++;
	  run_ccmpred(ArgC,ArgV);  fflush(stdout);
fprintf(stderr,"############ Done running ccmpred #################\n");
	}
#if 1	// Run CCMpred on BG alignment; creates *_bg_X.dca
	if(UseBG){
	  sprintf(str,"%s_bg_X.dca",argv[1]); fp=fopen(str,"r");
	  if(fp != NULL){ fclose(fp); SkipRunCCM=TRUE; } else SkipRunCCM=FALSE; 
	  if(!SkipRunCCM){ 
	    if(ArgV[3]) free(ArgV[3]); if(ArgV[4]) free(ArgV[4]); ArgC=3;
	    sprintf(str,"%s_bg.in",argv[1]); ArgV[3]=AllocString(str);  ArgC++;
	    sprintf(str,"%s_bg_X.dca",argv[1]); ArgV[4]=AllocString(str); ArgC++;
	    run_ccmpred(ArgC,ArgV);  fflush(stdout);
	  }
	}
#endif
	for(A=0; A < ArgC; A++){ free(ArgV[A]); ArgV[A]=0; }
	return 0;
}

Int4	drc_typ::create_pdb_paths(Int4 maxNum)
{
	Int4    i,j,k,nPaths,S,X;
	char	c,id[10],*pdbI,*pdbJ,*pdbK,found;   
	char	debug=0; // debug=1;

	assert(pdbAllID != 0);
	// select pdb files corresponding to sequences present in the msa...
	FILE *pofp=NULL; 
	FILE *pifp=open_file(argv[2],"","r"); // argv[2] = input pdb_paths
	NEWP(pdbUnique, maxNum +3, char);
	NEW(X2id,maxNum +3, Int4); NEW(X2file,maxNum +3, Int4);
	for(nPaths=0,X=0; fgets(str,len_str,pifp) != NULL; ){
	   char *sp=strstr(str,"_H.pdb");
	   if(sp == NULL) continue; 
	   else {
	     sp -= 4; 
	     if(sscanf(sp,"%4s_H.pdb",str0) == 1) {
		for(i=0; i < 4; i++) str2[i]=toupper(str0[i]);  str2[i]=0;
	        for(i=1; (pdbI=pdbID[i]); i++){  // additional pdb files (if any).
		   if(strncmp(str2,pdbI,4) == 0){	// found corresponding file.
		     if(pofp==NULL){ pofp=open_file(argv[1],".paths","w"); }
		     nPaths++; fprintf(pofp,"%s",str); strcpy(str1,str);
	             sprintf(Str,"%4s_H.pdb",str0); sp=strstr(str1,Str); *sp=0;
	     	     if(debug) fprintf(stderr,"%d. %s",X,str);
	     	     if(debug) fprintf(stderr,"%d. %s + %s\n",X,str1,Str);
		     for(S=1; pdbAllID[S]; S++){
		        if(strncmp(pdbAllID[S][0],pdbI,4) == 0){  // == found file.
			   X++; pdbUnique[X]=AllocString(pdbAllID[S][0]);
			   for(j=1; (pdbJ=pdbAllID[S][j]); j++){
			     for(found=0,k=0; k < j; k++){	// skip redundant files.
				pdbK=pdbAllID[S][k];
				if(strncmp(pdbK,pdbJ,4) == 0){ found=1; break; }
			     }
			     if(!found){
				X++; pdbUnique[X]=AllocString(pdbJ);
				for(k=0; k < 4; k++) id[k]=tolower(pdbJ[k]); id[k]=0;
		     		nPaths++; fprintf(pofp,"%s%s_H.pdb\n",str1,id); 
		     		if(debug) fprintf(stderr,"\t%s_H.pdb\n",id); 
			     }
			   }
			}
		     }
	     	     if(debug) fprintf(stderr,"\n");
		   }
		}
	     }
	   }
	} fclose(pifp); 
	if(pofp != NULL){ fprintf(pofp,"\n"); fflush(pofp); fclose(pofp); }
	if(debug) for(X=1; pdbUnique[X]; X++) fprintf(stderr,"%d. %s unique.\n",X,pdbUnique[X]);
	if(nPaths < 1){ print_error("No matches in pdb_paths file."); }
	return nPaths;
}

Int4	drc_typ::run_sarp(FILE *vsifp,FILE *mmafp,FILE *hptfp,FILE *ptrnfp)
//========= Map BPPS pattern residues to input structures (vsi file) ============
//============== run sarp program ============
{
	Int4    i,j,k,f,a,A,x;
	char	chn,c,Clss=0;

	
	ArgV[0]=AllocString("sarp"); ArgC=1;
	sprintf(str,"%s.paths",argv[1]); ArgV[ArgC]=AllocString(str); ArgC++;
	ArgV[ArgC]=AllocString(argv[1]); ArgC++;
	if(0){ fprintf(stderr,"sarp cmds: ");
	  for(A=0; A < ArgC; A++) fprintf(stderr,"%s ",ArgV[A]); fprintf(stderr,"\n");
	}
	hsc_typ hsc(ArgC,ArgV,mmafp,hptfp,ptrnfp); hsc.VerboseOff();
   	// mmafp, ptrnfp and hptfp files are closed by hsc_typ.
	char mode;
	if(MkSIPRIS) mode=0; else mode='x';	// 'x' will skip sipris file.
	// run SARP analysis...
	FILE	*xfp=tmpfile(),*dmfp=tmpfile();	// dmfp returns (for bpps option in mstarc):
	hsc.Run(mode,3,xfp,vsifp,dmfp); rewind(dmfp); fclose(xfp);
	Int4 MaxNumF=MaxDepth*hsc.MaxNumFullRpts();
#if 0	// DEBUG
	while(fgets(str,len_str,dmfp) != NULL){ fprintf(stderr,"%s",str); } rewind(dmfp); 
	/************** dmfp file format:
	1Y=104,80,64,3,11,57,62,48,87,76,115,75,77,22,55,6,107,19,99,23,108,60,59,21
	1R=39,83,60,35,52,30,33,49,40,90,94,9,86,56,8,99
	2Y=108,84,69,8,16,62,67,53,91,80,79,21,81,27,60,11,111,24,103,28,112,65,64,26
	2R=44,43,95,87,65,40,49,39,51,50,57,35,38,54,45,94,98,41,14,90,61,13,103,78,42
	***************/
#endif
	// Find the maximum number of vsi files generated.
	while(fgets(str,len_str,dmfp) != NULL){
	   if(sscanf(str,"%d%c=%[0-9,]\n",&f,&Clss,str1)==3){ maxF=MAXIMUM(Int4,f,maxF); }
	} rewind(dmfp);
#if 0	// DEBUG
	FILE	*fp=open_file("junk",".dmfp","w");
	while(fgets(str,len_str,dmfp) != NULL) fprintf(fp,"%s",str); fclose(fp);
	rewind(dmfp);
#endif
	// retrieve bpps pattern information.
	if(0){	// DEBUG...
	  // FILE *dxfp=open_file("junk",".x","w");
	  FILE *dxfp=0; // dxfp=open_file(argv[1],".vsi_cmd","w");	// vsi commands
	  FILE *dvfp=open_file(argv[1],".vsi","w");
	  FILE *dzfp=0; // dzfp=open_file(argv[1],".dmf","w");	// for bpps option in mstarc.
	  hsc.Run(mode,3,dxfp,dvfp,dzfp); 
	  if(dxfp) fclose(dxfp); 
	  if(dvfp) fclose(dvfp); 
	  if(dzfp) fclose(dzfp);
	  // second SeqID freed up by hsc...
	}
	NEWPP(bpps_ptrns, maxF+3, char); NEW(numF,maxF+3,Int4); 
	for( ; fgets(str,len_str,dmfp) != NULL; ){
	   // fprintf(stderr,"%s",str);
	   if(sscanf(str,"%d%c=%[0-9,]\n",&f,&Clss,str1) ==3){
		sprintf(str,"#%c=%s",Clss,str1);  // fprintf(stderr,"  %s\n",str1);
		if(bpps_ptrns[f]==0) NEWP(bpps_ptrns[f],MaxNumF +2,char);
		numF[f]++;	// number of patterns for file f.
		assert(bpps_ptrns[f][numF[f]]==0);
		bpps_ptrns[f][numF[f]]=AllocString(str);
#if 0
if(numF[f] > MaxDepth){
	fprintf(stderr,"numF[%d]=%d; bpps_ptnrs[f][numF[f]]=%s\n",
		f,numF[f],bpps_ptrns[f][numF[f]]);
	rewind(dmfp); 
	while(fgets(str,len_str,dmfp) != NULL){ fprintf(stderr,"%s",str); }
	
}
#endif
		assert(numF[f] <= MaxNumF);	// 'Y', 'R', 'O', etc...
		assert(f <= maxF);
	   }
	} fclose(dmfp);
	if(0){	// DEBUG...
	  FILE *dxfp=open_file("junk",".x","w");
	  FILE *dvfp=open_file(argv[1],".vsi","w");
	  FILE *dzfp=open_file(argv[1],".dmf","w");
	  hsc.Run(mode,3,dxfp,dvfp,dzfp); fclose(dxfp); fclose(dvfp); fclose(dzfp);
	  // second SeqID freed up by hsc...
	  exit(1);
	}
	SeqID=hsc.RtnSeqIDs();	// for passing in below...
#if 1	// DEBUG...
	fprintf(stderr,"\nSeqIDs:\n");
	for(k=0,i=1; SeqID[i]; i++){
	   for(j=1; SeqID[i][j]; j++){
#if 1
	        k++; fprintf(stderr,"SeqID[%d][%d]=%s\n",i,j,SeqID[i][j]);
#else
	        k++; fprintf(stderr,"%s, ",SeqID[i][j]);
		if(k % 10 == 0) fprintf(stderr,"\n");
#endif
	   }
	} fprintf(stderr,"\n");
#endif
	for(A=0; A < ArgC; A++){ free(ArgV[A]); ArgV[A]=0; }
	return 0;
}

Int4	drc_typ::SetUpPyMOL(FILE *vsifp,Int4 hpsz)
//========= Map BPPS pattern residues to structures (for PyMOL files) ============
{
	Int4    i,k,f,x,X,file,id,End;
	char	chn,c,*s,ID[10];
	set_typ	Used=0;
	FILE	*efp=0; // efp=stderr;

	//----------- Count the number of groups in the vsi input file. ----------
	NEW(IDs,hpsz+ 5, Int4); NEW(File,hpsz+ 5, Int4); NEWP(PdbPath,hpsz+ 5, char);
	// Find the id and file numbers for the pdb_ids in the query family partition.
	Used=MakeSet(hpsz+5); ClearSet(Used);
	// fprintf(stderr,"======= %d proteins of known structure found in MSA =======\n",NumPML);
	for(End=file=0; fgets(str,len_str,vsifp) != NULL; ){
	     if(strncmp(str,"~$=",3)==0){ 	// start of file 'f'.
		file++; 
		if(sscanf(str,"~$=%d",&f) != 1) print_error("vsi file syntax error");
		assert(f==file);
	     } else if(strncmp(str,"File",4)==0){
	        if(efp) fprintf(stderr,"str = %s",str);
		if(sscanf(str,"File%d=%s",&id,str2) != 2) print_error("vsi file syntax error");
		// str2 should end at first space character.
		// Read pdb file identifier and compare with SeqID[file][id]
		char *strX=strstr(str,"_H.pdb:");
		if(strX == NULL) print_error("pdb_paths input file syntax error");
		strX -= 4;
		if(sscanf(strX,"%4s_H.pdb:%c",str0,&chn) == 2) {
			for(i=0; i < 4; i++){
			    if(islower(str0[i])) str0[i]=toupper(str0[i]);
			} sprintf(ID,"%s_%c",str0,chn);
		} else print_error("pdb_paths input file syntax error");
		for(X=1; pdbUnique[X]; X++){
		   if(strncmp(pdbUnique[X],ID,6) == 0){
		     if(MemberSet(X,Used)){ 
			fprintf(stderr,"duplicate = %s\n",pdbUnique[X]);
		     } else {
		        AddSet(X,Used);
		        End++; IDs[End]=id; File[End]=file; X2id[X]=id; X2file[X]=file;
			if(efp) fprintf(stderr,"%d.%s: file=%d; id=%d\n",X,pdbUnique[X],file,id);
		        if(strchr(str2,'/') == NULL) PdbPath[End]=AllocString("./");
		        else { 
			  PdbPath[End]=AllocString(str2);  s=strrchr(PdbPath[End],'/');
			  s++; *s=0;   // end string after last slash character.
		        } if(efp) fprintf(stderr," End=%d; %s\n",End,str2);
		     } fprintf(stderr,"%d. %s == %s\n",X,pdbUnique[X],ID);
		   } else if(efp) fprintf(stderr,"%d. %s != %s\n",X,pdbUnique[X],ID);
		}
	     }
        } NilSet(Used);
#if 1   // eliminate redundant analyses.
        FILE *jnkfp=stderr; // jnkfp=open_file(argv[1],".list","w");
	Int4 j,jm1,jd,fjle;
	char	*last;
	set_typ SetD=MakeSet(End+5);
        for(j=1,jm1=0; j <= End; j++,jm1++){
	     id=IDs[j]; file=File[j];
if(efp) fprintf(stderr,"%d. id=%d; file=%d; End=%d\n",j,id,file,End);
	     assert(id!=0 && file != 0);
	     assert(SeqID[file] && SeqID[file][id]);
if(efp) fprintf(stderr,"%d. SeqID[%d][%d]=%s\n",j,file,id,SeqID[file][id]);
	     if(j > 1){
	        jd=IDs[jm1]; fjle=File[jm1]; // duplicate found??
		if(SeqID[fjle]==0 || SeqID[fjle][jd]==0){
			 if(jm1 != 0) AddSet(jm1,SetD); continue;
		}
		if(strncmp(SeqID[file][id],SeqID[fjle][jd],6) == 0){
			 AddSet(j,SetD); continue;
		}
	     } fprintf(jnkfp,"%d. %s\n",j,SeqID[file][id]);
	     last=SeqID[file][id];
        } if(jnkfp != stderr) fclose(jnkfp); 
	char **TmpPath;
	Int4	*JDs,*Fjle,end;
	NEW(JDs,End+ 5, Int4); NEW(Fjle,End+ 5, Int4); NEWP(TmpPath,End+ 5, char);
        for(i=1,j=0; i <= End; i++){
	     if(MemberSet(i,SetD)){ if(PdbPath[i]) free(PdbPath[i]); PdbPath[i]=0; }
	     else { j++; JDs[j]=IDs[i]; Fjle[j]=File[i]; TmpPath[j]=PdbPath[i]; }
	} NilSet(SetD); free(PdbPath); free(IDs); free(File);
	end=j; PdbPath=TmpPath; IDs=JDs; File=Fjle;
	return end;
#else
	return End;
#endif
}

Int4	drc_typ::MkPyMOL(FILE *vsifp, Int4 end)
//----------- Create pymol scripts showing top DCA pairs & BPPS residues. ----------
{
	Int4    i,j,k,a,A,x,f,file,id;

	for(j=1; j <= end; j++){
	     id=IDs[j]; file=File[j]; 
	     assert(id!=0 && file != 0);
	     assert(SeqID[file] && SeqID[file][id]);
	     // fprintf(stderr,"%d. %s\n",j,SeqID[file][id]);
             sprintf(str1,"chn_vsi %s_pdb.VSI %d %d -T -skip=W -d2.5 -D",argv[1],id,file);
	     // fprintf(stderr,"%s\n%s\n",str,str1);
	     // fprintf(stderr,"%d. %s\n",j,SeqID[file][id]);
             ArgC=string2argv(ArgV,str1);       // mode == 'T'
             FILE *ifp,*ofp=tmpfile();
	     // for(i=0; i < ArgC; i++) { fprintf(stderr,"%s ",ArgV[i]); } fprintf(stderr,"\n"); 
             ChainVSI(ArgC,ArgV,vsifp,ofp); rewind(ofp); ifp=ofp; rewind(vsifp);
             for(ArgC-- ; ArgC >= 0; ArgC--) free(ArgV[ArgC]);
       	     sprintf(str1,"chn_vsi %s.crs %d -d2.5 -c -D -pml=%s_%d",str0,id,argv[1],j);
       	     // sprintf(str1,"chn_vsi %s.crs %d -d2.5 -c -D -pml=%s_%d_%d",str0,id,argv[1],id,file);
	     // fprintf(stderr,"%d. %s --> %s = %s\n",j,str1,pdbID[x],SeqID[file][id]);
             ArgC=string2argv(ArgV,str1);       // mode == 'p'
	     // for(i = 0; i < ArgC; i++) { fprintf(stderr,"%s ",ArgV[i]); } fprintf(stderr,"\n"); 
	     ChainVSI(ArgC,ArgV,ifp,0); fclose(ifp);
             for(ArgC-- ; ArgC >= 0; ArgC--) free(ArgV[ArgC]);
        } return 0;
}

dch_typ	*drc_typ::run_starc(Int4 hpsz,Int4 end)
//========= run STARC on DCA pairs and add couplings to PyMOL scripts. ============
{
      Int4  i,j,x,A,id,grp,key;
      FILE  *xfp=0,*infp=0,*efp=0; efp=stderr;

      dch_typ *dch= new dch_typ(hpsz);
      // fprintf(stdout,"pdbid\tchn(s)\tMaxDist\tScore\tBiU\tL\tD\tX\td\tdS\n");
      FILE *dcfp=NULL;
      for(j=1; j <= end; j++){
	id=IDs[j]; grp=File[j]; 
	if(id==0 || grp== 0) continue;	// chain is not the right one.
	key=KeyPdbID(grp); 
	if(key == 0 ){
	   fprintf(stdout,"WARNING: pdb file may start at residue < 1.\n");
	   continue;	// need to check this out and debug...why splitting up?
	}
	if(SeqID[grp] == 0 || SeqID[grp][id] == 0) continue;
	assert(SeqID[grp] && SeqID[grp][id]);
	// run mstarc on query sequence only
	// fprintf(stderr,"%d. %s\n",j,SeqID[grp][id]);
	ArgV[0]=AllocString("sparc"); ArgC=1;
	assert(PdbPath[j]); ArgV[ArgC]=AllocString(PdbPath[j]); ArgC++;
	sprintf(str,"%s_fg_X",argv[1]); ArgV[ArgC]=AllocString(str); ArgC++;
	// ArgV[2] = dcafile
	sprintf(str,"-DC=%d",num_dc); ArgV[ArgC]=AllocString(str); ArgC++;
	// ArgV[ArgC]=AllocString("-no_evc_file"); ArgC++;
	// ArgV[ArgC]=AllocString("-F"); ArgC++;
	sprintf(str,"-D=%.3f",Dmax); ArgV[ArgC]=AllocString(str); ArgC++;
	// fprintf(stderr,"DmaX=%.3lf\n",Dmax);
	if(efp) fprintf(efp,"%d. '%s' (%d.%d) == '%s' (%d.%d)?\n",
			j,pdbUnique[j],X2id[j],X2file[j],SeqID[grp][id],id,grp);
	// create a tmpfile to pass in mst for one pdbid only...
	// assert(Dmax >= 4.0);
	if(efp){ 
	  fprintf(efp,"\ngroup %d. %s: %s\n",grp,SeqID[grp][key],SeqID[grp][id]);
	  this->GetSubMST(efp,SeqID[grp][key],SeqID[grp][id]); 
	}
	infp=tmpfile();
	x=this->GetSubMST(infp,SeqID[grp][key],SeqID[grp][id]); 
	rewind(infp); 
	if(x == 0){
	   fprintf(stderr,"\ngroup %d. %s: %s\n",grp,SeqID[grp][key],SeqID[grp][id]);
	   this->GetSubMST(stderr,SeqID[grp][key],SeqID[grp][id]); 
	} assert(x > 0);
	sprintf(str,"_%d.pml",j); xfp=open_file(argv[1],str,"a"); // append DCA to pymol script.
	// fprintf(xfp,"cmd.show(\"cartoon\",\"all\")\n");
	fprintf(xfp,"set dash_width, 1\nset dash_round_ends, off\n\n");
	{
	    if(dcfp==NULL){
		dcfp=open_file(argv[1],".darc","a+");
		fprintf(dcfp,"############## 3.Highest DC-scoring pairs (in PyMOL scripts) ##############\n");
	    }
	    sprc_typ sprc(ArgC,ArgV,dch);
	    if(UseBG){ sprintf(str,"%s_bg_X",argv[1]); sprc.SetBGfile(str); } // *_bg_X.dca 
// fprintf(stderr,"==== DEBUG 3: sprc_X ====> %s\n",ArgV[2]);
	    sprc.run_mstarc(xfp,SeqID[grp][id],dcfp,infp); fprintf(xfp,"\n"); 
	} 
        fprintf(xfp,"cmd.show(\"spheres\",\"metals\")\n"); 
        fprintf(xfp,"util.cbay metals\n"); 
        fprintf(xfp,"set cartoon_gap_cutoff, 0\n"); 
        // fprintf(xfp,"set sphere_scale, 0.25, metals\n"); 
        fprintf(xfp,"set precomputed_lighting\n"); 
	fprintf(xfp,"cmd.hide(\"cartoon\",\"ligand\")\n");
	fclose(xfp); fclose(infp);
// Convert *.pml files to *.pse files
       	sprintf(str,"%s_%d",argv[1],j);
	if(efp) fprintf(stderr,"===> %s <====\n",str);
	this->ConvertPML2PSE(str);

	for(A=0; A < ArgC; A++){ free(ArgV[A]); ArgV[A]=0; }
      } if(dcfp != NULL){ fprintf(dcfp,"\n"); fflush(dcfp); fclose(dcfp); }
      return dch;
}

Int4	drc_typ::KeyPdbID(Int4 grp)
// return the id corresponding to the key (main) id for *.mst group 'grp'.
{
	Int4    i,j;
	FILE	*efp=0; // efp=stderr;
	for(i=1; SeqID[i]; ) i++; assert(grp < i);
	for(j=1; SeqID[grp][j]; j++){
	  for(i=1; pdbID[i]; i++){
	     if(efp){
	       fprintf(stderr,"pdbID[%d]='%s'\n",i,pdbID[i]);
	       fprintf(stderr,"!!!--> %d.%d. %s (%s)\n",grp,j,SeqID[grp][j],SbSmplKey);
	     } if(strncmp(pdbID[i],SeqID[grp][j],6) == 0) return j; 
	  }
	} 
#if 1
	fprintf(stderr,"grp=%d: ",grp);
	for(j=1; SeqID[grp][j]; j++) fprintf(stderr,"%s ",SeqID[grp][j]);
	fprintf(stderr,"\n");
#endif
	return 0;
}

Int4	drc_typ::GetSubMST(FILE *ofp,char *keypdb, char *twinpdb)
// print out a sub-mstarc input file from main mst file.
{
	Int4    i,j;
        char    c,PDBID[9],chn,aln[5009];
	FILE *mstfp=0;
// fprintf(stderr,"2. Mode='%c'\n",Mode);
	if(Mode=='B') mstfp=open_file(argv[1],"_X.mst","r"); 
	else if(Mode=='S') mstfp=open_file(argv[1],"_sprc_X.mst","r"); 
	else mstfp=open_file(argv[1],"_fg_X.mst","r");
	for(i=0,Str[4999]=0; fgets(Str,5000,mstfp) != NULL; i++){
	  if(Str[4999] != 0) print_error("mst aln-formatted input too long");
	  if(sscanf(Str,"%4s_%c: %s\n",PDBID,&chn,aln) == 3){
	    sprintf(str,"%4s_%c",PDBID,chn);
	    // if(strncmp(str,keypdb,6) == 0 && strncmp(twinpdb,keypdb,6) != 0)
	    if(strncmp(str,keypdb,6) == 0) {	// found a match...
	        if(0) fprintf(stderr,"\n=============== %s twin='%s' ===============\n",str,twinpdb);
		fprintf(ofp,"%s: %s\n",twinpdb,aln); 
		while(fgets(Str,5000,mstfp) != NULL){
		   c=Str[0];
		   if(c == '#') fprintf(ofp,"%s",Str);
		   else { fclose(mstfp); return 1; }
		} fclose(mstfp); return 1;
	    } Str[4999]=0;
	  }
	} fclose(mstfp); return 0;
}

Int4	drc_typ::MapMST2VSI(FILE *vsifp)
//========= Map BPPS pattern residues to structures (for PyMOL files) ============
{
	Int4    i,j,f,x,file,id;
	char	chn;
	
	//----------- Count the number of groups in the mst file. ----------
	for(nMst=1; pdbID[nMst]; ) nMst++; nMst--;
	NEW(mst2vsi,nMst+5, Int4);
	for(file=0; fgets(str,len_str,vsifp) != NULL; ){
	     if(strncmp(str,"~$=",3)==0){ 	// start of file 'f'.
		if(sscanf(str,"~$=%d",&f) != 1) print_error("vsi file syntax error");
		file++; assert(f==file);
	     } else if(strncmp(str,"File",4)==0){
		if(sscanf(str,"File%d=%s",&id,str2) != 2) print_error("vsi file syntax error");
		char *strX=strstr(str,"_H.pdb:");
		if(strX == NULL) print_error("pdb_paths input file syntax error");
		strX -= 4;
		if(sscanf(strX,"%4s_H.pdb:%c",str1,&chn) == 2) {
			for(i=0; i < 4; i++) if(islower(str1[i])) str1[i]=toupper(str1[i]);
			sprintf(str0,"%s_%c",str1,chn);
		} else print_error("pdb_paths input file syntax error");
	        for(x=1; pdbID[x]; x++){
			if(strncmp(pdbID[x],str0,6) == 0){ mst2vsi[x]=file; break; }
		}
	     }
        } 
	FILE *logfp=0;
	for(i=1,j=0; i <= nMst; i++){ 
	   if(mst2vsi[i] == 0){
		if(logfp==0){
		   logfp=open_file(argv[1],".darc","a+"); 
		   fprintf(logfp,"\nSubgroup sequences of known structure ");
		   fprintf(logfp,"with errors or not in pdb_paths:\n ");
		   // AFN: 12/18/2019. e.g., 3HB9 has residue inconsistencies 
		   // (ASN717A and ILE717A) and is skipped.
		} j++;
		fprintf(logfp,"%s ",pdbID[i]); fflush(logfp);
		if(j % 10 == 0) fprintf(logfp,"\n ");
	   }
	}
	if(logfp){ 
		if(j % 10 != 0) fprintf(logfp,"\n"); fprintf(logfp,"\n"); fclose(logfp); 
	}
	// for(i=1; i <= nMst; i++) assert(mst2vsi[i] != 0);
	return nMst;
}

Int4	drc_typ::Run()
{
	Int4 time1=time(NULL);
	FILE *xfp=NULL;
    	assert(PathSPARC==0);	// don't use SPARC mode here. 
        FILE *mmafp=tmpfile(),*hptfp=tmpfile(),*ptrnfp=tmpfile();
	this->run_bpps(mmafp,hptfp,ptrnfp);
	// for(rewind(hptfp) ; fgets(str,len_str,hptfp) != NULL; ){ fprintf(stderr,"%s",str); }
        rewind(mmafp); rewind(hptfp); rewind(ptrnfp);
	this->run_c2a();
        sprintf(str,"%s_fg_X.mst",argv[1]);
	Int4 hpsz=this->expand_pdbids(str); 	// requires mst file; creates pdbAllID.
	Int4 NumPaths=0;
	if(hpsz > 0) NumPaths=this->create_pdb_paths(hpsz);	// creates pdb list for sarp.
  if(NumPaths > 0){	// then pdb files are available...
	FILE *vsifp=tmpfile(); 
	this->run_sarp(vsifp,mmafp,hptfp,ptrnfp); 
	rewind(vsifp); // makes bpps_ptrns.
char k;
FILE *dbgfp=open_file(argv[1],".vsi","w");
while((k=fgetc(vsifp)) != EOF) fprintf(dbgfp,"%c",k); rewind(vsifp); fclose(dbgfp);
	// mmafp, ptrnfp and hptfp files are closed by run_sarp.
	// run_sarp creats the *.sprs file.
	{	// Run SIPRIS within darc 
		char usg[]="darc sipris runtime error\n";
		char vrsn[]="sipris called by darc\n";
		ArgV[0]=AllocString("sipris"); ArgC=1;
		sprintf(str,"%s.sprs",argv[1]);
		FILE *tmpfp=fopen(str,"r");
		if(tmpfp!=NULL){
		  fclose(tmpfp);
		  ArgV[ArgC]=AllocString(str); ArgC++;
		  ArgV[ArgC]=AllocString("S"); ArgC++;
		  // sprintf(str,"-out=%s_sprs.out",argv[1]);
		  // ArgV[ArgC]=AllocString(str); ArgC++;
		  scl_typ scl(ArgC,ArgV,' ',usg,vrsn);
		  // Int4 rtn=scl.Run();
		  xfp=open_file(argv[1],".darc","a+"); 
		  fprintf(xfp,
		    "##################### 2.SIPRIS 'S' mode (clCp/clSp) ########################\n");
		  fprintf(xfp," -log10(p) estimates for family (R) ");
		  fprintf(xfp,"& superfamily (Y) pattern residues 3D clusters\n");
		  Int4 rtn=scl.Run(xfp); 
		  fprintf(xfp,"  (class_R = family; class_Y = superfamily)\n\n");
		  fflush(xfp); fclose(xfp);
		}
		for(Int4 arg=0; arg < ArgC; arg++){ free(ArgV[arg]); ArgV[arg]=0; }
	}
	this->MapMST2VSI(vsifp); rewind(vsifp);		// creates mst2vsi array...
        sprintf(str,"%s_fg_X.mst",argv[1]);
	this->expand_mstfile(str); 		// requires bpps_ptrns & mst2vsi.
#if 1
	Int4 end=this->SetUpPyMOL(vsifp,hpsz); rewind(vsifp); 
	this->MkPyMOL(vsifp,end); fclose(vsifp); 
#else
	Int4 end=this->SetUpPyMOL(vsifp,hpsz); fclose(vsifp); 
#endif
	this->run_ccm();

	dch_typ	*dch=this->run_starc(hpsz,end);
	if(SbSmplNum > 0){	// DCA ranks with various subsampled MSAs.
	  Int4 JJ,j,Grp,grp,zz;
	  for(JJ=Grp=0,grp=1; Grp==0 && SeqID[grp]; grp++){
	   for(j=1; Grp==0 && SeqID[grp][j]; j++){
		// fprintf(stderr,"--> %d.%d. %s (%s)\n",grp,j,SeqID[grp][j],SbSmplKey);
		if(strncmp(SbSmplKey,this->SeqID[grp][j],6)==0){
			fprintf(stderr,"%d.%d. %s\n",grp,j,SeqID[grp][j]);
			Grp=grp; JJ=j; break;
		}
	   }
	  } if(Grp==0){ 
	     fprintf(stderr,"Subsampling query structure \"%s\" not found\n",SbSmplKey);
	     fprintf(stderr,"  make sure that the coordinates are in <pdb_paths>.\n");
	     fprintf(stderr,"  ...skipping subsampling.\n\n");
	  } else {
	      if(PdbPath[Grp]==0){	// add paths to seq IDs that lack one.
		for(Int4 g=1; SeqID[g]; g++){
		  if(PdbPath[g]==0) continue;
		  for(j=1; SeqID[g][j]; j++){
		    if(strncmp(SeqID[Grp][JJ],SeqID[g][j],4) == 0){
			PdbPath[Grp]=AllocString(PdbPath[g]); break; 
	   	    }
		  } if(PdbPath[Grp]) break;
		} if(PdbPath[Grp]==0) print_error("drc_typ::Run() pdb file input error");
	     }
	     assert(JJ!=0);	// chain is not the right one.
	     assert(SeqID[Grp] != 0 && SeqID[Grp][JJ] != 0);
	     Int4 key=KeyPdbID(Grp); 
	     if(key == 0){
	        xfp=open_file(argv[1],".darc","a+"); 
		fprintf(xfp,"####### INPUT ERROR #######\n");
		fprintf(xfp,"!!! Invalid PDB ID '%s' !!!\n",
						SeqID[Grp][JJ]);
		fprintf(xfp,"   ..skipping DCA sub-sampling\n");
		fclose(xfp);
	     } else {
	       run_key_starc(SeqID[Grp][JJ],PdbPath[Grp]);
	       xfp=open_file(argv[1],".darc","a+"); 
	       fprintf(xfp,"############## 3. DCA sub-sampling MSAs. #############\n");
	       Int4 Cut=1; sprintf(str,"%s_fg",argv[1]);
	       lsd_typ *lsd=this->SubSampleDCA(Cut,SbSmplSize,SbSmplNum,xfp); 
	       // ^ prints out ranks after sampling...
	       FILE *pmlfp=0;
	       Int4 **colpair=lsd->Put(xfp,pmlfp,0,0); delete lsd; 
	       {
                sprintf(str,"%s_fg",argv[1]);
		rdc_typ rdc(str);
		for(Int4 i=1; colpair[1][i] != 0; i++){
                   Int4 x=colpair[1][i],y=colpair[2][i];
                   assert(y > 0 && x > 0); rdc.Run(xfp,x,y,i);
		} 
	       } free(colpair[1]); free(colpair[2]); free(colpair); fclose(xfp);
	     }
	  }
	}
	xfp=open_file(argv[1],".darc","a+"); 
	fprintf(xfp,"############### 4.STARC (3dCdc/3dSdc, dcCp/dcSp, 3dCp/3dSp) ###############\n");
	fprintf(xfp,"   Statistical Tool for Analysis of Residue Couplings\n");
	fprintf(xfp,"   S-scores correspond to -log10(p) of the correspondence \n");
	fprintf(xfp,"    between DC-scores & 3D structure or\n");
	fprintf(xfp,"    between BPPS family(R) residues & DC-scores (or 3D structure).\n");
	dch->Put(xfp); 
	fprintf(xfp,"  Values (-log10(p)) correspond to 3dSdc, dcSp & 3dSp scores:\n");
	fprintf(xfp,"  <pdbid>  chn     r(A)    3dSdc    BiU    ...\n");
	fprintf(xfp,"    bpps   (X)     DC(3D)  dcSp    (3dSp)  ...\n");
	fprintf(xfp,"    where X is the foreground symbol (typically an 'R')\n");
	fprintf(xfp,"    and where BiU is a ball-in-urn ranking score.\n");
	fprintf(xfp,"  For definitions, see DARC and STARC papers:\n");
	fprintf(xfp,"       PMID: 32015389 and PMID: 30596639, respectively.\n\n");
	fflush(xfp); fclose(xfp); delete dch; 
   } else {
	xfp=open_file(argv[1],".darc","a+"); 
	fprintf(xfp,"\n!!!!!!!!!!!! no structures found for SIPRIS, DCA, and STARC analyses. !!!!!!!!!!!\n\n");
	fflush(xfp); fclose(xfp);
   }
	fprintf(stderr,"\nDARC run completed.\n");
        fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
               time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	return 0;
}

Int4	drc_typ::run_key_starc(char *PdbSeqID,char *pdb_path,FILE *osfp)
//========= run STARC on DCA pairs and add couplings to PyMOL scripts. ============
{
	Int4  i,x,A;
	FILE  *infp=0;

	dch_typ *dch=new dch_typ(12);
        // fprintf(stdout,"pdbid\tchn(s)\tMaxDist\tScore\tBiU\tL\tD\tX\td\tdS\n");
	ArgV[0]=AllocString("sparc"); ArgC=1;
	assert(pdb_path); ArgV[ArgC]=AllocString(pdb_path); ArgC++;
// fprintf(stderr,"3. Mode='%c'\n",Mode);
	if(Mode =='B') sprintf(str,"%s_X",argv[1]); 
	else if(Mode =='S') sprintf(str,"%s_sprc_X",argv[1]); 
	else sprintf(str,"%s_fg_X",argv[1]); 
	ArgV[ArgC]=AllocString(str); ArgC++;	// ArgV[2] = dcafile
	sprintf(str,"-DC=%d",num_dc); ArgV[ArgC]=AllocString(str); ArgC++;
	sprintf(str,"-D=%.3f",Dmax); ArgV[ArgC]=AllocString(str); ArgC++;
// fprintf(stderr,"DEBUG 4.0.0: Dmax=%.2lf\n",Dmax);
	infp=tmpfile(); x=this->GetSubMST(infp,PdbSeqID,PdbSeqID); rewind(infp); 
// fprintf(stderr,"DEBUG 4.1.0\n");
	if(Mode=='B' && x == 0){
		fprintf(stderr,"File %s not found in %s_X.mst file\n",PdbSeqID,argv[1]);
		print_error("fatal input error");
	}
	Int4 RtnOS=0;
	{
	    sprc_typ sprc(ArgC,ArgV,dch);
	    if(UseBG){ sprintf(str,"%s_bg_X",argv[1]); sprc.SetBGfile(str); } // *_bg_X.dca 
	    RtnOS=sprc.run_mstarc(0,PdbSeqID,0,infp,0,osfp); 
	} fclose(infp);
// fprintf(stderr,"DEBUG 4.2.0\n");
	for(A=0; A < ArgC; A++){ free(ArgV[A]); ArgV[A]=0; }
	delete dch;
	return RtnOS;
}

