/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "bp2dc_typ.h"

#define USAGE_START \
"USAGE: sarp <pdb_name_file> <mcBPPS_prefix> [-options]\n\
    (files needed: <mcBPPS_prefix>_new.hpt <mcBPPS_prefix>.pttrns <mcBPPS_prefix>_new.mma\n\
    -color=<str>    - Designate repeat colors to use (e.g., <str>=\"MROYGC\")\n\
    -Color=<str>    - Designate sidechain colors to use (e.g., <str>=\"MROYGC\")\n\
    -dummy \n\
\n\n"

#if 0	// input files:
        SDR_4CB.pttrns  // all patterns (FILE *fp1)
        SDR_4CB.hpt     // hpt          call as "SDR_4CB", assumes "SDR_4CB.hpt"
        SDR_4CB_new.mma // subgroup alignments; open all of these...  (FILE *fp2)
           // pull out cma file for vsi.
        pdb_infile      // pdb file paths
#endif

void	bp2dc_typ::InitRead(FILE *mmafp, FILE *hptfp)
// mmafp and hptfp are passed in by hierview to avoid too many input files!!
{
	// read in hyperpartition...

	// read in subgroup alignments...
	FILE *fp=0;
	assert(mcBPPS_prefix != 0);
	if(mmafp){ IN_CMA=MultiReadCMSA(mmafp,&Number,AB); fclose(mmafp); }
        else {
	   sprintf(string,"%s_new",mcBPPS_prefix); CheckString();
	   fp = open_file(string,".mma","r");
	   IN_CMA=MultiReadCMSA(fp,&Number,AB); fclose(fp);
	} assert(IN_CMA != 0);
#if 1
	if(hptfp){ hpt=new hpt_typ(hptfp); fclose(hptfp); } // for hierview... 
	else {
	  sprintf(string,"%s_new.hpt",mcBPPS_prefix); CheckString();
	  if((fp=fopen(string,"r")) == 0){
	     sprintf(string,"%s_chk",mcBPPS_prefix); CheckString();
	     fp = open_file(string,".hpt","r"); 
	  } hpt=new hpt_typ(fp); fclose(fp);
	}
#else
	fp = open_file(string,".hpt","r"); hpt = new hpt_typ(fp); fclose(fp);
#endif
#if 1	// lengthen IN_CMA array 
	if(hpt->NumSets() > Number) {	// then lengthen the IN_CMA array using Null's
	   Int4	x,y;
	   cma_typ *in_cma; NEW(in_cma,hpt->NumSets() +3, cma_typ);
	   for(x=1,y=1; x <= hpt->NumSets(); x++){
#if 1
	        if(IN_CMA[y] == 0 && x == hpt->NumSets()){
	     	  fp = tmpfile(); 
	          PutRandomCMA(fp,tFreqCMSA(IN_CMA[1]),LengthCMSA(1,IN_CMA[1]),1,AB);
	          rewind(fp); in_cma[x]=IN_CMA[y]=ReadCMSA(fp,AB); fclose(fp); Number++;
	          RenameCMSA("Random",IN_CMA[y]); y++; continue;
		}
#endif
		if(strcmp(NameCMSA(IN_CMA[y]),hpt->ElmntSetName(x)) == 0){ // --> same set.
			in_cma[x]=IN_CMA[y];  y++;
		} else if(y == 1 || hpt->TypeOfSet(x) != '?'){
			print_error("*.hpt and *.mma files are fatally inconsistent");
		} 
	   } y--; 
	   if(y != Number) print_error("*.hpt and *.mma files are fatally inconsistent");
	   Number = hpt->NumSets(); free(IN_CMA); IN_CMA=in_cma; 
fprintf(stderr,"%s; y = %d; NumSets=%d\n",NameCMSA(IN_CMA[1]),y,Number);
	}
#endif
	// read in patterns ...
	fp = open_file(mcBPPS_prefix,".pttrns","r");
	ptrn = new pat_typ(fp); fclose(fp);
	Validate();
}

void	bp2dc_typ::Free()
{
	Int4 i;
	delete ptrn;
	delete hpt;
	free(mcBPPS_prefix);
	for(i=1; i <= Number; i++){ if(IN_CMA[i]) TotalNilCMSA(IN_CMA[i]); } free(IN_CMA);
	NilAlpha(AB);
}

void	bp2dc_typ::Validate()
{
	Int4 Row,Col;
	// hpt vs cma
	if(hpt->NumSets() != Number) print_error("inconsistent *.hpt and *.mma input files");
	for(Row = 1; Row <= hpt->NumSets(); Row++){
	   if(IN_CMA[Row] == 0 && hpt->TypeOfSet(Row) == '?') continue;
	   if(strcmp(NameCMSA(IN_CMA[Row]),hpt->ElmntSetName(Row))){
		print_error("bp2dc_typ::Validate(): *.hpt and *.mma files are inconsistent");
	   }
	}
	if(IN_CMA[Number] == 0 || strcmp(NameCMSA(IN_CMA[Number]),"Random") != 0){
		print_error("*.mma file lacks a Random sequence set.");
	}
	// hpt vs ptrn.
	if(hpt->NumBPPS() != ptrn->NumPttrns){
		fprintf(stderr,"hpt->NumSets = %d != ptrn->NumPttrns = %d\n",hpt->NumBPPS(),ptrn->NumPttrns);
		print_error("hyperpartition and pttrns input files are inconsistent");
	}
	for(Col = 1; Col <= hpt->NumBPPS(); Col++){
	   Row= ptrn->NumPttrns - ptrn->PttrnCategory[Col] + 1;	// reversed order for *.pttrns file.
	   if(Row > hpt->NumBPPS()) print_error("*.hpt and *.pttrns files are inconsistent");
	}
	// patterns vs cma
	for(Int4 i=1; i <= Number; i++){
	   if(IN_CMA[i] == 0 && hpt->TypeOfSet(i) == '?') continue;
	    if(LengthCMSA(1,IN_CMA[i]) < ptrn->MaxPttrnPos){
		print_error("cma alignment files and pttrns input files are inconsistent");
	    }
	}
	   // char symbol=hpt->Cell(Row,Col);
	   // if(symbol != '+') print_error("*.hpt and *.pttrns files are inconsistent");
}


void	bp2dc_typ::Init()
{
	LastNumData=0;
	FindBest=FALSE; K1=0,K2=0; MaxMeanDist=20; MaxSqDist=1000; MinDistInSeq=8;
        MinDist=3;      // what about inserts in some proteins?
	SideColors=0;
	string[298]=0;
        begin=0,end=0; Begin=0,End=0; KeyCol=0; MinVar=0.0; bin_size=1.0;
        HA_dmax=0.0, dmax=0.0;
	Target=40; MaxDataPoints=2000;
	AB = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62); verbose=TRUE;
}

#if 0
BooLean	bp2dc_typ::AddColToSeq(Int4 S, Int4 A, Int4 *ColToSeq)
// returns true if new repeat was found and added; false if already present.
{
	Int4 num_cols=NumCol( ),col;
	Int4 R;
	RptCategory[S][R]=A;
	Col2FullSeq[S][R]=ColToSeq;
	AddSet(A,RelevantSet[S]);
	return TRUE;
}

BooLean	bp2dc_typ::AddColToSeq(Int4 i, Int4 S, Int4 A, Int4 *ColToSeq)
// returns true if new repeat was found and added; false if already present.
{
	Int4 num_cols=NumCol( ),col;
	assert(i > 0 && i <= esc->NumPDB_Set[S]);
	Int4 R,I=esc->PDB_SetI[S][i],C=esc->PDB_SetC[S][i];
	for(R=1; R <= NumRpts[I][C]; R++){
	  	Int4 *Tmp=Col2pdbSeq[I][C][R];
		BooLean	different=FALSE;
		for(Int4 col=1; col <= num_cols; col++){
		   if(ColToSeq[col] && Tmp[col] && ColToSeq[col] != Tmp[col]){
			different=TRUE; break;
		   } // ignores 'X' residue in column positions.
		} if(!different){ return FALSE; } // if 
	} NumRpts[I][C] = R;
	if(R > MAX_NUMBER_INTERNAL_RPTS) print_error("Too many internal repeats"); 
	Col2pdbSeq[I][C][R]=ColToSeq;
	// AddSet(A,RelevantSet[S]);
	RelevantSeq[I][C][R]=A;
	return TRUE;
}
#endif

#define P2C_USAGE_START \
"USAGE: bp2dc <mcBPPS_prefix> <fasta_seq> <options>)\n\
   (Bayesian partitioning to direct coupling)\n\
   argument [-options]\n\
    -best                       Find the atom pairs that deviate least (default: deviate most)\n\
    -beta                       Use beta carbons for non-glycine residues (default: alpha carbons only)\n\
    -col=<int1>                 compute all pairs that include columns <int1>\n\
    -D<float>                   dmax for non-classical H-bonds (default: case dependent)\n\
    -d<float>                   dmax in Angstroms for classical H-bonds (default: 2.5 Angstoms)\n\
    -show=<int>:<int>           Show residues in columns <int1> and <int2>\n\
    -srch=<int1>:<int2>         Search for residues in sequence <int1> that are a better 3D-fit to \n\
                                average distances of other residues within 10 Angstroms of \n\
                                column <int2> residues \n\
    -range=<int1>:<int2>        Only look at columns <int1> to <int2>\n\
    -Range=<int1>:<int2>        Focus on columns <int1> to <int2>\n\
    -bin=<real>                 Set the bin size for histogram (default: 1.0)\n\
    -P=<filename>               Pattern file corresponding to sequence subgroup.\n\
    -maxdist=<int>              Set the Maximum Mean distance to consider (default: 40)\n\
    -seqdist=<int>              Set the minimum distance between residues to consider (default: 4)\n\
    -maxsqdist=<int>            Set the maximum distance between residues to consider (default: 200) \n\
    -mindist=<int>              Set the minimum distance between aligned columns to compare (default: 3)\n\
    -minvar=<int>               Set the minimum variance to consider (default: 0)\n\
\n\n"

void	bp2dc_typ::GetArg(Int4 argc, char *argv[])
// *************** Get arguments for all program options **********************
{
	Int4 arg,i;
        if(argc < 3) print_error(P2C_USAGE_START);
        if(argv[1][0] == '-' || argv[2][0] == '-') print_error(P2C_USAGE_START);
        for(arg = 0; arg < argc; arg++){
		fprintf(stdout,"%s ",argv[arg]);
	} fprintf(stdout,"\n");
	OutFile=mcBPPS_prefix=AllocString(argv[1]);
	SeqFile=AllocString(argv[2]);
	dcaP=MkSeqSet(argv[2],AB);
        for(arg = 2; arg < argc; arg++){
          if(argv[arg][0] == '-'){
           switch(argv[arg][1]) {
             case 'b':
		if(sscanf(argv[arg],"-bin=%lf",&bin_size) == 1){
		     // fprintf(stderr,"binsize = %f\n",bin_size);
		     if(bin_size < 0.001) print_error(P2C_USAGE_START);
		     else if(bin_size > 4.0) print_error(P2C_USAGE_START);
		} else if(strcmp("-best",argv[arg])==0) {
			FindBest=TRUE;
		} else if(strcmp("-beta",argv[arg])==0) {
			UseBeta=TRUE;
		} else print_error(P2C_USAGE_START);
		break;
             case 'c':
		if(sscanf(argv[arg],"-col=%d",&KeyCol) == 1){
		   if(KeyCol < 1) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
	     case 'D':
                if(sscanf(argv[arg],"-D%f",&dmax) != 1 || dmax > 100 || dmax < 0){
                        print_error(P2C_USAGE_START);
                } break;
             case 'd':
                if(sscanf(argv[arg],"-d%f",&HA_dmax) != 1 || HA_dmax > 100 || HA_dmax < 0)
                        print_error(P2C_USAGE_START);
                break;
             case 'm':
		// if(sscanf(argv[arg],"-maxdist=%lf",&MaxMeanDist) != 1)
		if(sscanf(argv[arg],"-minvar=%lf",&MinVar) != 1){
		 if(sscanf(argv[arg],"-maxdist=%d",&MaxMeanDist) != 1){
		   if(sscanf(argv[arg],"-mindist=%d",&MinDist) != 1) print_error(P2C_USAGE_START);
		   else if(MinDist < 1) print_error(P2C_USAGE_START);
		 } else if(MaxMeanDist < 1) print_error(P2C_USAGE_START);
		} else if(MinVar < 0.0) print_error(P2C_USAGE_START);
		break;
             case 'R':
		if(sscanf(argv[arg],"-Range=%d:%d",&Begin,&End) == 2){
			if(Begin > End || Begin <= 0) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		// print_error("-R option not yet implemented");
		break;
             case 'r':
		if(sscanf(argv[arg],"-range=%d:%d",&begin,&end) == 2){
			if(begin >= end || begin <= 0) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
             case 's':
		if(sscanf(argv[arg],"-seqdist=%d",&MinDistInSeq) == 1){
			if(MinDistInSeq < 1) print_error(P2C_USAGE_START);
		} else if(sscanf(argv[arg],"-show=%d:%d",&K1,&K2) == 2){
			if(K1 > K2){ i = K1; K1 = K2; K2 = i; }
			else if(K1 == K2) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
             default : print_error(P2C_USAGE_START);
           }
	 }
	}
}

Int4	*bp2dc_typ::MapSeqToSqAln(Int4 &Row)
//************************ 3. Find dca sequences in cma file ***************************
//************************ and get mapping between seqs ***************************
// Note that 
{
   Int4	I,C,i,j,c1,c2,col,real,R,dca_real,os,os_cma,os_dca,N,S,s,sq,A;
   Int4	NumCol=LengthCMSA(1,IN_CMA[1]);
   Int4 MinSeqOverlap = (Int4) ceil(LengthCMSA(1,IN_CMA[1])* 0.75);
   e_type dcaE,cmaE,*csq;
   char str[58];
   Int4 Score;

#if 0
   	NEW(csq,Number +3, e_type);
	for(A=1; A < Number; A++){ if(IN_CMA[A]) csq[A]=MkConsensusCMSA(IN_CMA[A]); }
#endif
	S=1; Row=0;
	//======================== Find column residue positions in Full dca seq =============
	dcaE=SeqSetE(S,dcaP);
	if(dcaE==0 || LenSeq(dcaE) < MinSeqOverlap){ return 0; }
	// fprintf(stderr,"******************** Set %d (dca[%d][%d]) ********************\n",S,I,C);
// PutSeq(stderr,dcaE,AB);
	//*************** check over all CMA files except last, reject subgroup. ****************
	for(A=Number-1; A > 0; A--)	// search backwards to favor subset assignments...
	{
	     cma=IN_CMA[A]; 
	     if(cma==0){ continue; } // there were no sequences in this Misc set; no need to check.
	     assert(LengthCMSA(1,cma) == NumCol); N = NumSeqsCMSA(cma);
	     // if(FastAlnSeqSW(12,4,csq[A],dcaE,AB) < 10) continue;	// if 
	     for(sq=1; sq <= N; sq++){	//========== looking through sequences in alignment. ======
	  	cmaE = TrueSeqCMSA(sq,cma);	
		if(LenSeq(cmaE) < MinSeqOverlap){ continue; }
		Int4 NumX,adjust=0,MaxNumX;
		// char rtn=IsSameSeqFast(dcaE,cmaE,&os,&NumX,MinSeqOverlap); // ignores 'X' residues...
		char rtn=IsSameSeqFastX(dcaE,cmaE,&os,&NumX,MinSeqOverlap); // ignores 'X' residues...
		if(rtn==0) continue; 
		MaxNumX=(Int4)floor(((double) MinSeqOverlap*0.33));
		// fprintf(stderr," !!!!!!! IsSameSeq(): Set = %d; sq=%d; os=%d; NumX = %d (%d) !!!!!!!\n",
		//	A,sq,os,NumX,MaxNumX);
		if(NumX > MaxNumX) continue;			// Ignore if 2/3rds of residues are 'X's
		StrSeqID(str,50,dcaE); 
		// fprintf(stderr," a match: %s --> \"%s\"(%d)\n",str,NameCMSA(cma),A);
		// Fixed this so that overhanging regions within sequences are allowed.
		if(rtn == 1) adjust = -os;	// dcaE N-terminus starts within cmaE.
		else if(rtn == 2) adjust = os;	// cmaE N-terminus starts within dcaE.
		else print_error("p2c_typ::MapSqAlnToStruct: this should not happen!");
		os_cma=OffSetSeq(cmaE); // Score=PseudoAlnScoreSqToCMSA(csq[A],sq,cma);
		Int4 *TmpColToSeq; NEW(TmpColToSeq ,NumCol + 3, Int4); // temporary array for dcaE.
		for(col = 1; col <= NumCol; col++){
			if(IsDeletedCMSA(1,sq,col,cma)) { continue; } // assumes single block
// WARNING!!!: need to fix IsDeletedCMSA(sq,col,cma) within cmsa.cc !!!!  afn: 12_27_2010.
			// if(IsDeletedCMSA(sq,col,cma)) continue; // this function has problems!!!
			// NOTE: TruePosCMSA() returns the position in real seq w/o offset
			// col is position in block 1 (assumes only one block) within fake seq
			i=TruePosCMSA(sq,col,cma);	// ignores offset... 
			if(i < 1 || RealToFakeCMSA(sq,os_cma+i,cma) == 0){ continue; }
			          // ^ this == 0 if no corresponding position in fake seq.
			j = i+adjust;         // <-- corresponding position in dcaE;
			if(!(j > 0 && j <= LenSeq(dcaE))){ continue; }
					// ^ implies that dcaE lacks these positions
			c1=AlphaChar(ResSeq(i,cmaE),AB); c2=AlphaChar(ResSeq(j,dcaE),AB);
			assert(c1 == 'X' || c2 == 'X' || c1 == c2);
			TmpColToSeq[col]=j;	// position within full dca sequence.
			// assert(TmpColToSeq[col]==j); // run this with old method turned on as a check.
		}
                for(col = 1; col <= NumCol; col++){
                          c1=AlphaChar(ResSeq(TmpColToSeq[col],dcaE),AB);
                          fprintf(stderr,"%d: %c%d\n",col,c1,TmpColToSeq[col]);
                } Row=A;
		return TmpColToSeq;
#if 0
		if(AddColToSeq(S,A,TmpColToSeq)){	// adds columns to Col2FullSeq[S][R]=TmpColToSeq;
		} else free(TmpColToSeq); 
#endif
	     } 
        }
	//=================== Find column residue positions in each dca structure =============
	// for(A=1; A < Number; A++) if(csq[A]) NilSeq(csq[A]); free(csq);
	return 0;
}

void    bp2dc_typ::ResidueToName(char r, char *str)
{
        switch (r){
          case 'A': strncpy(str,"ALA",4); break;
          case 'C': strncpy(str,"CYS",4); break;
          case 'D': strncpy(str,"ASP",4); break;
          case 'E': strncpy(str,"GLU",4); break;
          case 'F': strncpy(str,"PHE",4); break;
          case 'G': strncpy(str,"GLY",4); break;
          case 'H': strncpy(str,"HIS",4); break;
          case 'I': strncpy(str,"ILE",4); break;
          case 'K': strncpy(str,"LYS",4); break;
          case 'L': strncpy(str,"LEU",4); break;
          case 'M': strncpy(str,"MET",4); break;
          case 'N': strncpy(str,"ASN",4); break;
          case 'P': strncpy(str,"PRO",4); break;
          case 'Q': strncpy(str,"GLN",4); break;
          case 'R': strncpy(str,"ARG",4); break;
          case 'S': strncpy(str,"SER",4); break;
          case 'T': strncpy(str,"THR",4); break;
          case 'V': strncpy(str,"VAL",4); break;
          case 'W': strncpy(str,"TRP",4); break;
          case 'Y': strncpy(str,"TYR",4); break;
          default: strncpy(str,"XXX",4); break;
        }
}

void	bp2dc_typ::PrintKLST_Files( )
{
   //************* Print out vsi files for each set.
   Int4 x,p,n,i,j,S,Row,vsi_number=0,NumCol=LengthCMSA(1,IN_CMA[1]);
   char str[150],side_color[]="WMROYGCBDLMROYGCBDLMROYGCBDLDDDDDDDDDDDDDDDDDDD";
   Int4 MaxColor=40;
   double maxdist=4.0;
   set_typ SetP=0;
   e_type dcaE=SeqSetE(1,dcaP);

   if(SideColors){
	for(j=0,i=1; isalpha(SideColors[j]); j++,i++){
		side_color[i]=SideColors[j]; if(i >= MaxColor) break;
	}
   }
   Int4 *ColToSeq=this->MapSeqToSqAln(Row);
   if(ColToSeq==0) print_error("bp2dc failed to find a match\n");
   FILE *fp=open_file(SeqFile,".pdb","w");
   fprintf(fp,"HEADER    junk\n");
   for(i=1; i <= LenSeq(dcaE); i++){
         char Name[9]; ResidueToName(AlphaChar(ResSeq(i,dcaE),AB),Name);
         sprintf(str,"ATOM  %5d  CA  %3s A%4d    %8.3f%8.3f%8.3f  1.00 50.00\n", i,Name,i,0,0,0);
         // MkAtom(str);
         fprintf(fp,"%s",str);
   } fprintf(fp,"TER   %5d\nEND\n",i); fclose(fp);
   PutSeq(stderr,dcaE,AB); 

   FILE *vfp=open_file(SeqFile,"_pdb.klst","w");
   fprintf(vfp,"file: ./%s.pdb\n",SeqFile);
   fprintf(vfp,"chain: A.\n");
#if 1
  	S=1; 
 	char vsifile[200];
	vsi_number++;
	// fprintf(vfp,"~$=%d.\n",vsi_number); fflush(vfp);

	Int4 X;
        for(X=0,x=ptrn->NumPttrns;  x > 0; x--)	// stored backwards...
	{
	    Int4 Col=ptrn->PttrnCategory[x];
	    if(hpt->Cell(Row, Col) != '+') continue; else X++;
	    // print out subgroups before supergroups to ensure proper color.
	    if(X > 40) continue;  // ran out of colors.
	    if(X==1){
	      Int4 start=1,TheEnd=NumCol,num_ins,num_del;
	      for(j=1; ColToSeq[j] == 0 && j <= TheEnd; j++){ } start=j;
	      for(j=TheEnd; ColToSeq[j] == 0 && j > 0; j--){ } TheEnd=j;
	      for(j=1,num_del=0; j <= NumCol; j++){ if(ColToSeq[j] == 0) num_del++; }
	      Int4 nAln,strt=ColToSeq[start],end=ColToSeq[TheEnd];
	      nAln=NumCol - num_del; num_ins=(end-strt+1) - nAln;
#if 1	// Find residue Set...
	      SetP=MakeSet(end +9);
	      for(j=start; j <= TheEnd; j++){ 
		Int4 res_j= ColToSeq[j];
		if(res_j != 0){ assert(res_j >= strt && res_j <= end);  AddSet(res_j,SetP); }
	      }
	      Int4 low,high;
	      char *rtn=RtnStrSet(SetP,low,high);
	      fprintf(stderr,"Set string = \"%s\"; range: %d-%d\n",rtn,low,high);
	      NilSet(SetP); SetP=0;
	      fprintf(vfp,"range: %s(%d;%d).\n",rtn,nAln,num_ins); free(rtn);
#else
	      fprintf(vfp,"range: %d_%d(%d;%d).\n",strt,end,nAln,num_ins);
#endif
	    }
            BooLean first=TRUE;
	    for(p=1; p <= ptrn->NumPttrnRes[x]; p++){
	        // if(p > 10) continue;  // skip less significant.
		Int4 col=ptrn->PosPttrn[x][p];
		Int4 site=ColToSeq[col];
		if(site == 0) continue;  // not visible within structures.
		assert(site <= LenSeq(dcaE));
		Int4 r=ResSeq(site,dcaE);
		char Res=AlphaChar(r,AB);
		if(strchr(ptrn->PttrnRes[x][p],Res)){
		   // Int4 X=ptrn->NumPttrns-x+1; 
		   if(first){ fprintf(vfp,"%c=%d",side_color[X],site); first=FALSE; }
		   else fprintf(vfp,",%d",site);
		} else {	// print mismatches in white.
		   // if no higher level pattern, then print out.
		   if(first){ fprintf(vfp,"%c=%d",side_color[X],site); first=FALSE; }
		   else fprintf(vfp,",%d",site);
	       	} 
	  } fprintf(vfp,"\n");
	} fprintf(vfp,"\n");
#endif
   fclose(vfp);
}

