/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "psc_typ.h"

void	p2c_typ::Init()
{
#if 1
	clr_typ *clr = new clr_typ();
	this->trace_color=clr->RtnTraceColorsPyMOL();
	this->side_color=clr->RtnSideColorsPyMOL();
	delete clr;
#else
	static char color_trace[]="WYROMGCBPDLWWWWWWWWWWWWWWWWWWDDDDDDDDDDDDDDDDDDD";
	static char color_side[]="WYROMGCBPTNLWWWWWWWWWWWWWWWWWWDDDDDDDDDDDDDDDDDDD";
	for(Int4 i=0; color_trace[i]; i++) this->trace_color[i]=color_trace[i];
	for(Int4 i=0; color_side[i]; i++) this->side_color[i]=color_side[i];
#endif
	SeqIDs=0; Diagnostic=0;
	LastARSD=0.0; LastNumData=0;
	FindBest=FALSE; K1=0,K2=0; MaxMeanDist=20; MaxSqDist=1000; MinDistInSeq=8;
        MinDist=3;      // what about inserts in some proteins?
        begin=0,end=0; Begin=0,End=0; KeyCol=0; MinVar=0.0; bin_size=1.0;
        HA_dmax=0.0, dmax=0.0;
	Target=40; MaxDataPoints=2000;
}

char    *p2c_typ::PathPDB2SeqID(char *pdbstr,char chn){  // for VSI file.
        char *strX=strstr(pdbstr,"_H.pdb");
        if(strX == NULL) print_error("pdb_paths input file syntax error");
        strX -= 4;
        char str1[10],str0[15];
        if(sscanf(strX,"%4s_H.pdb",str1) == 1) {
           for(Int4 i=0; i < 4; i++) if(islower(str1[i])) str1[i]=toupper(str1[i]);
           sprintf(str0,"%s_%c",str1,chn);
           // fprintf(stderr," %s\n",str0);
        } else print_error("pdb_paths input file syntax error");
        return AllocString(str0);
}

void    p2c_typ::FreeSeqIDs()
{
	if(SeqIDs){
	  for(Int4 S=1; S <=esc->NumPDB_Sets; S++){
            for(Int4 j=1; esc->PDB_SetI[S][j]; j++){
              if(SeqIDs[S] && SeqIDs[S][j]) free(SeqIDs[S][j]);
            } if(SeqIDs[S]) free(SeqIDs[S]);
	  } free(SeqIDs); SeqIDs=0;
	}
}

void    p2c_typ::Free()
{
	Int4 S,R,I,C,j;
	if(Diagnostic) free(Diagnostic);
	for(S=1; S <=esc->NumPDB_Sets; S++){
           for(R=1; R <= NumFullRpts[S]; R++){ free(Col2FullSeq[S][R]); }
           NilSet(RelevantSet[S]); free(RptCategory[S]); free(Col2FullSeq[S]);
	} this->FreeSeqIDs();
	free(RelevantSet); free(RptCategory); free(Col2FullSeq); free(NumFullRpts);
	for(I=1; I <=mpdb->NumPDB; I++){
           for(C=1; C <=nChainsPDB(mpdb->pdb[I]); C++){
              for(R=1; R <= NumRpts[I][C]; R++){
                 if(Col2pdbSeq[I][C][R]) free(Col2pdbSeq[I][C][R]);
              } if(RelevantSeq[I][C]) free(RelevantSeq[I][C]);
              if(Col2pdbSeq[I][C]) free(Col2pdbSeq[I][C]);
           } free(RelevantSeq[I]); free(Col2pdbSeq[I]); free(NumRpts[I]);
	} free(RelevantSeq); free(Col2pdbSeq); free(NumRpts);
}

BooLean	p2c_typ::AddColToSeq(Int4 S, Int4 A, Int4 *ColToSeq)
// returns true if new repeat was found and added; false if already present.
{
	Int4 num_cols=NumCol( ),col,R;
	for(R=1; R <= NumFullRpts[S]; R++){
		// fprintf(stderr,"============ R = %d ===========\n",R);
	 	Int4 *Tmp=Col2FullSeq[S][R];
	  	BooLean different=FALSE;
		for(Int4 col=1; col <= num_cols; col++){
		   if(ColToSeq[col] && Tmp[col] && ColToSeq[col] != Tmp[col]){
			different=TRUE; break;
		   }
		} if(!different){ return FALSE; }
	} NumFullRpts[S] = R;
// WARNING; This is recognizing 'phantom' repeats.
	if(R > MAX_NUMBER_INTERNAL_RPTS){
	    fprintf(stderr,"%d repeats --> ",R);
	    print_error("Too many internal repeats!"); 
	} RptCategory[S][R]=A; Col2FullSeq[S][R]=ColToSeq;
	return TRUE;
}

BooLean	p2c_typ::AddColToSeq(Int4 i, Int4 S, Int4 A, Int4 *ColToSeq)
// returns true if new repeat was found and added; false if already present.
{
	Int4 num_cols=NumCol( ),col;
	assert(i > 0 && i <= esc->NumPDB_Set[S]);
	Int4 R,I=esc->PDB_SetI[S][i],C=esc->PDB_SetC[S][i];
	for(R=1; R <= NumRpts[I][C]; R++){
	  	Int4 *Tmp=Col2pdbSeq[I][C][R];
		BooLean	different=FALSE;
		for(Int4 col=1; col <= num_cols; col++){
		   if(ColToSeq[col] && Tmp[col] && ColToSeq[col] != Tmp[col]){
			different=TRUE; break;
		   } // ignores 'X' residue in column positions.
		} if(!different){ return FALSE; } // if 
	} NumRpts[I][C] = R;
	if(R > MAX_NUMBER_INTERNAL_RPTS) print_error("Too many internal repeats"); 
	Col2pdbSeq[I][C][R]=ColToSeq;
	// AddSet(A,RelevantSet[S]);
	RelevantSeq[I][C][R]=A;
	return TRUE;
}

static Int4 is_same_seq_fastY(register unsigned char *sup, register unsigned char *sup_end,
        register unsigned char *sub, register unsigned char *sub_end,a_type AB)
// returns the overlap between sequences...
{
        register Int4 NumX=0;
Int4 i=1;
        while(sup != sup_end && sub != sub_end){
if(AB) fprintf(stderr,"%d. %c = %c\n",i++,AlphaChar(*sup,AB),AlphaChar(*sub,AB));
            if(*sup != *sub){
                if(*sup!=0 && *sub!=0){ return -1; } else NumX++;
            } sup++,sub++;
        } return NumX;
}


char    IsSameSeqFastY(e_type E1, e_type E2,Int4 *Start,Int4 *RtnNumX,Int4 MinOverlap,a_type AB)
// find out whether or not E1 and E2 are the same sequence.
// return 1 if seq E1 lacks an N-terminal extension.
// return 2 if seq E2 lacks an N-terminal extension.
// Sets Start to the position in one sequence corresponding to the start of the other.
{
        Int4    st,end,lenSb,lenSp,NumX;
        unsigned char   *sup,*sub;      // superseq and subseq

	// Ignore X's on either ends...
	Int4 start1=0,end1=0;
        for(start1=1; ResSeq(start1,E1)==0; start1++) ;
        for(end1=LenSeq(E1); ResSeq(end1,E1)==0; end1--) ;

	Int4 start2=0,end2=0;
        for(start2=1; ResSeq(start2,E2)==0; start2++) ;
        for(end2=LenSeq(E2); ResSeq(end2,E2)==0; end2--) ;

// fprintf(stderr,"start1=%d; end1=%d; start2=%d; end2=%d\n",start1,end1,start2,end2); 
	*RtnNumX=0;
	if(end1 < MinOverlap || end2 < MinOverlap) return 0;
	
	end=end2-MinOverlap+1;
	//          st=end                            st=1
	//    E2 ----+---------+   <-- to...from <--   +---------+-------  E2
	//           |--MinOL--|  	               |--MinOL--|
	//    E1     +---------+-----                  +---------+------   E1
	if(end > 0){
   	  sup=SeqPtr(E2); lenSp=LenSeq(E2);	sub=SeqPtr(E1); lenSb=LenSeq(E1); 
          for(st=start2; st<= end; st++){
	    // if((lenSp-st+1) < MinOverlap) break;	// guarranteed to be long enough.
	    // NumX=is_same_seq_fastY(sup+st,sup+lenSp+1,sub+1,sub+lenSb+1,AB);
	    // NumX=is_same_seq_fastY(sup+st,sup+end2+1,sub+start1 + 1,sub+end1+1,AB);
	    // NumX=is_same_seq_fastY(sup+st,sup+end2,sub+start1,sub+end1,AB);
	    NumX=is_same_seq_fastY(sup+st,sup+end2+1,sub+start1,sub+end1+1,AB);
	    if(NumX >=0){			// sequences match!
	      // if((lenSb-st+1-NumX) < MinOverlap) break;	// can only get shorter from here...
	      // if((end-st < NumX)) return 0;  // short perfect match; assume no perfect match below.
	      if((end-st < NumX)) continue; // could be due to a string of X residues on end.
// fprintf(stderr,"st=%d; start1=%d\n",st,start1);
	      *Start=(st-start1); *RtnNumX=NumX; return 1; 
	    }
          }
	}
	//  E2  +---------+-------                         +---------+-------  E2 
	//      |--MinOL--|                                |--MinOL--|
	//  E1  +---------+------  --> from..to -->  ------+---------+         E1
	//     st=1                                        st=end
	end=end1-MinOverlap+1;
	if(end > 0){
	  sup=SeqPtr(E1); lenSp=LenSeq(E1);	sub=SeqPtr(E2); lenSb=LenSeq(E2); 
          for(st=start1; st<= end; st++){
	    // if((lenSp-st+1) < MinOverlap) break;
	    // NumX=is_same_seq_fastY(sup+st,sup+lenSp+1,sub+1,sub+lenSb+1,AB);
	    // NumX=is_same_seq_fastY(sup+st,sup+end1+1,sub+start2 + 1,sub+end2+1,AB);
	    // NumX=is_same_seq_fastY(sup+st,sup+end1,sub+start2,sub+end2,AB);
	    NumX=is_same_seq_fastY(sup+st,sup+end1+1,sub+start2,sub+end2+1,AB);
	    if(NumX >=0){			// sequences match!
	      // if((lenSb-st+1-NumX) < MinOverlap) break;	// can only get shorter from here...
	      //if((end-st < NumX)) return 0;	// perfect match but too short...
	      if((end-st < NumX)) continue;	// could be due to a string of X residues on end.
// fprintf(stderr,"st=%d; start1=%d\n",st,start2);
	      *Start=(st-start2); *RtnNumX=NumX; return 2; 
	    }
	  }
	} return 0;
}

void    p2c_typ::MapSqAlnToStruct( )
// Call only after calling esc_typ( )
//************************ 3. Find pdb sequences in cma file ***************************
//************************ and get mapping between seqs ***************************
// Note that 
{
   Int4	I,C,i,j,c1,c2,col,real,R,pdb_real,os,os_cma,os_pdb,N,S,s,sq,A;
   Int4	NumCol=LengthCMSA(1,IN_CMA[1]);
   e_type pdbE,cmaE,*csq;
   char str[58];
   Int4 Score;

   NEW(RelevantSet,esc->NumPDB_Sets + 5,set_typ); 
   NEWP(RptCategory,esc->NumPDB_Sets + 5,Int4);
   NEWPP(Col2FullSeq,esc->NumPDB_Sets + 5,Int4);
   NEW(NumFullRpts,esc->NumPDB_Sets + 5,Int4);
   for(S=1; S <=esc->NumPDB_Sets; S++){
        NEW(RptCategory[S],MAX_NUMBER_INTERNAL_RPTS + 5,Int4);
   	RelevantSet[S] = MakeSet(Number + 5); ClearSet(RelevantSet[S]);
   	NEWP(Col2FullSeq[S],MAX_NUMBER_INTERNAL_RPTS+ 5,Int4);
   }
   NEWPP(RelevantSeq,mpdb->NumPDB + 5,Int4); // is there a corresponding seq. in cma file
   NEWP3(Col2pdbSeq,MAX_NUM_PDB_INPUT_FILES + 5,Int4); // mapping of pdb residue # to cma column #
   NEWP(NumRpts,mpdb->NumPDB +5, Int4);
   for(I=1; I <=mpdb->NumPDB; I++){
	NEWP(RelevantSeq[I],nChainsPDB(mpdb->pdb[I]) + 3, Int4);
	NEWPP(Col2pdbSeq[I],nChainsPDB(mpdb->pdb[I]) + 3, Int4);
   	NEW(NumRpts[I],nChainsPDB(mpdb->pdb[I]) +5, Int4);	// NumRpts[I][C] = 0;
   	for(C=1; C <=nChainsPDB(mpdb->pdb[I]); C++){
		pdbE=mpdb->pdbSeq[I][C];
		if(pdbE && LenSeq(pdbE) >= esc->MinSeqOverlap){
		    NEW(RelevantSeq[I][C],MAX_NUMBER_INTERNAL_RPTS + 3, Int4);
		    NEWP(Col2pdbSeq[I][C],MAX_NUMBER_INTERNAL_RPTS +3, Int4);
		    // NEWP(Col2pdbSeq[I][C],NumCol + 3, Int4);
		}
	}
   }
#if 1	// Diagnostics...
   enum fault { deleted=0, disjoint=1, missing=2, x_over=3, gaps=4, off=5, okay=6, highX=7 };
   Int4	Fault[10]; for(i=0; i <= 9; i++) Fault[i]=0;
#endif
   NEW(csq,Number +3, e_type);
   for(A=1; A < Number; A++){ if(IN_CMA[A]) csq[A]=MkConsensusCMSA(IN_CMA[A]); }
   for(S=1; S <=esc->NumPDB_Sets; S++){
	//======================== Find column residue positions in Full pdb seq =============
	pdbE=esc->FullSeq[S];
#if 0	// debug...
PutShortSeqID(stderr,pdbE);
fprintf(stderr,":\n ******************** Set %d/%d (pdb[%d][%d]) Number=%d. ********************\n",
		S,esc->NumPDB_Sets,I,C,Number);
fprintf(stderr," leng=%d; minoverlap=%d. ********************\n", LenSeq(pdbE),esc->MinSeqOverlap);
#endif
	if(pdbE==0 || LenSeq(pdbE) < esc->MinSeqOverlap){ continue; }
#if 0
	PutShortSeqID(stderr,pdbE);
	fprintf(stderr,": ******************** Set %d/%d (pdb[%d][%d]) Number=%d. ********************\n",
		S,esc->NumPDB_Sets,I,C,Number);
#endif
	//*************** check over all CMA files except last, reject subgroup. ****************
	for(A=Number-1; A > 0; A--)	// search backwards to favor subset assignments...
	{
	     cma=IN_CMA[A]; 
	     if(cma==0){ continue; } // there were no sequences in this Misc set; no need to check.
	     assert(LengthCMSA(1,cma) == NumCol); N = NumSeqsCMSA(cma);
	     // if(FastAlnSeqSW(12,4,csq[A],pdbE,AB) < 10) continue;	// if 
	     for(sq=1; sq <= N; sq++){	//========== looking through sequences in alignment. ======
	  	cmaE = TrueSeqCMSA(sq,cma);	
		if(LenSeq(cmaE) < esc->MinSeqOverlap){ continue; }
		Int4 NumX,adjust=0,MaxNumX;
		char rtn=0;
#if 0	// DEBUG.
		// rtn=IsSameSeqFast(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap); // ignores 'X' residues...
		// rtn=IsSameSeqFastX(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap); // ignores 'X' residues...
if(A==2 && sq==1){
		rtn=IsSameSeqFastY(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap,AB); 
	fprintf(stderr," !!!!!!! IsSameSeq(): Set = %d; sq=%d; os=%d; NumX = %d (%d); rtn=%d !!!!!!!\n",
			A,sq,os,NumX,(Int4)floor(((double) esc->MinSeqOverlap*0.33)),rtn);
	PutSeq(stderr,pdbE,AB); PutSeq(stderr,cmaE,AB);
	if(rtn==2) PutDiagonalSeq(stderr,os,pdbE,cmaE,AB);
       	else PutDiagonalSeq(stderr,os,cmaE,pdbE,AB);
} else {
		rtn=IsSameSeqFastY(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap,AB); 
		// rtn=IsSameSeqFastX(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap);
}
#else
		// rtn=IsSameSeqFastY(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap,AB); 
		rtn=IsSameSeqFastY(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap,0); 
#endif
#if 0
char sTr[10],stR[10]; StrSeqID(sTr,6, pdbE); StrSeqID(stR,6, cmaE);
if(strcmp(sTr,stR)==0){ 
	PutShortSeqID(stderr,pdbE); PutShortSeqID(stderr,cmaE); fprintf(stderr,": rtn=%d.\n",rtn); 
	if(rtn==0){
	   AlnSeqSW(stderr,11,1,cmaE,pdbE,AB);
	   PutSeq(stderr,pdbE,AB);
	}
}
#endif
		if(rtn==0) continue; 
		MaxNumX=(Int4)floor(((double) esc->MinSeqOverlap*0.33));
#if 0	// DEBUG...
		fprintf(stderr," !!!!!!! IsSameSeq(): Set = %d; sq=%d; os=%d; NumX = %d (%d) rtn=%d !!!!!!!\n",
			A,sq,os,NumX,MaxNumX,rtn);
		if(rtn==2) PutDiagonalSeq(stderr,os,pdbE,cmaE,AB);
        	else PutDiagonalSeq(stderr,os,cmaE,pdbE,AB);
#endif
		if(NumX > MaxNumX){
		    PutShortSeqID(stderr,pdbE); fprintf(stderr," --> "); 
		    PutShortSeqID(stderr,cmaE); fprintf(stderr,"\n");
		    fprintf(stderr,"skipping "); PutShortSeqID(stderr,cmaE);
		    fprintf(stderr," due to too many unknown residues\n");
		    continue;			// Ignore if 2/3rds of residues are 'X's
		}
	        if(ColPairSet){
		  assert(Number== 2);
		  if(MapPdb2cmaSq==0){ NEWP(MapPdb2cmaSq,mpdb->NumPDB + 5,Int4); }
		  for(Int4 id=1; id <= esc->NumPDB_Set[S]; id++){	// label the rest of sequences.
		    I=esc->PDB_SetI[S][id]; C=esc->PDB_SetC[S][id]; // C == pdb chain; I = pdb file ID.
		    assert(I <= mpdb->NumPDB && I > 0); 
		    pdb_typ P=mpdb->pdb[I]; assert(C <= nChainsPDB(P) && C > 0);
		    if(MapPdb2cmaSq[I]==0) NEW(MapPdb2cmaSq[I],nChainsPDB(P)+5,Int4);
	   	    e_type pdbIC=mpdb->pdbSeq[I][C]; assert(pdbIC);
		    Int4 os2;
		    // char rtn2=IsSameSeqFastX(pdbIC,cmaE,&os2,&NumX,esc->MinSeqOverlap); // ignoring 'X' residues...
		    // char rtn2=IsSameSeqFastY(pdbIC,cmaE,&os2,&NumX,esc->MinSeqOverlap,AB); // ignoring 'X' residues...
		    char rtn2=IsSameSeqFastY(pdbIC,cmaE,&os2,&NumX,esc->MinSeqOverlap,0); // ignoring 'X' residues...
		    if(rtn2 && NumX <= MaxNumX){ MapPdb2cmaSq[I][C]=sq; }
	          }
	        }
		StrSeqID(str,50,pdbE); 
		// fprintf(stderr," a match: %s --> \"%s\"(%d)\n",str,NameCMSA(cma),A);
		// Fixed this so that overhanging regions within sequences are allowed.
		if(rtn == 1) adjust = -os;	// pdbE N-terminus starts within cmaE.
		else if(rtn == 2) adjust = os;	// cmaE N-terminus starts within pdbE.
		else print_error("p2c_typ::MapSqAlnToStruct: this should not happen!");
		os_cma=OffSetSeq(cmaE); Score=PseudoAlnScoreSqToCMSA(csq[A],sq,cma);
		Int4 *TmpColToSeq; NEW(TmpColToSeq ,NumCol + 3, Int4); // temporary array for pdbE.
		for(col = 1; col <= NumCol; col++){
			if(IsDeletedCMSA(1,sq,col,cma)) { Fault[deleted]++; continue; } // assumes single block
// WARNING!!!: need to fix IsDeletedCMSA(sq,col,cma) within cmsa.cc !!!!  afn: 12_27_2010.
			// if(IsDeletedCMSA(sq,col,cma)) continue; // this function has problems!!!
			// NOTE: TruePosCMSA() returns the position in real seq w/o offset
			// col is position in block 1 (assumes only one block) within fake seq
			i=TruePosCMSA(sq,col,cma);	// ignores offset... 
			if(i < 1 || RealToFakeCMSA(sq,os_cma+i,cma) == 0){ Fault[disjoint]++; continue; }
			          // ^ this == 0 if no corresponding position in fake seq.
			j = i+adjust;         // <-- corresponding position in pdbE;
			if(!(j > 0 && j <= LenSeq(pdbE))){ Fault[missing]++; continue; }
					// ^ implies that pdbE lacks these positions
			c1=AlphaChar(ResSeq(i,cmaE),AB); c2=AlphaChar(ResSeq(j,pdbE),AB);
#if 1	// DEBUG..
			if(!(c1 == 'X' || c2 == 'X' || c1 == c2)){
			   fprintf(stderr,"rtn=%d; os=%d; adjust=%d\n",rtn,os,adjust);
			   fprintf(stderr,"sq=%d; col=%d; %c%d..%c%d\n",sq,col,c1,i,c2,j);
			   AlnSeqSW(stderr,11,1,cmaE,pdbE,AB);
			   IsSameSeqFastY(pdbE,cmaE,&os,&NumX,esc->MinSeqOverlap,AB); // ignoring 'X' residues...
			}
#endif
			assert(c1 == 'X' || c2 == 'X' || c1 == c2);
			TmpColToSeq[col]=j;	// position within full pdb sequence.
			Fault[okay]++;
			// assert(TmpColToSeq[col]==j); // run this with old method turned on as a check.
		}
		if(AddColToSeq(S,A,TmpColToSeq)){	// adds columns to Col2FullSeq[S][R]=TmpColToSeq;
			AddSet(A,RelevantSet[S]);
			// StrSeqID(str, 50, cmaE); SeqIDs[S]=AllocString(str);
			// fprintf(stderr,"Added %d %d to RelevantSet\n",S,A);
			// Then A added to RelevantSet[S] & RptCategory[S][R]=A; increments NumFullRpts[S].
			// ^ this returns TRUE iff TmpCols are unique (not found for this pdb before).
			// TRUE implies a repeat has been added.
			// inserts next to deletions are a problem! Run "tweakcma -iron"
#if 0	// for debugging...can all be turned off.
			if(TmpColToSeq[NumCol]==0){
		 	   for(col = 1; col <= NumCol; col++){
				c1=AlphaChar(ResSeq(TmpColToSeq[col],pdbE),AB);
				fprintf(stderr,"%d: %c%d\n",col,c1,TmpColToSeq[col]);
			   }
			}
			c1=AlphaChar(ResSeq(TmpColToSeq[1],pdbE),AB);
			c2=AlphaChar(ResSeq(TmpColToSeq[NumCol],pdbE),AB);
			// PutSubSeq(stderr,TmpColToSeq[1],TmpColToSeq[NumCol],pdbE,AB);
			PutSeq(stderr,pdbE,AB); PutSeq(stderr,cmaE,AB);
			fprintf(stderr,
			    "!!!!!!!! IsSameSeq(): Set=%d; score=%d; os=%d; col(1)=%c%d; col(%d)=%c%d) !!!!!!\n",
				A,Score,os,c1,TmpColToSeq[1],NumCol,c2,TmpColToSeq[NumCol]);
			if(rtn==2) PutDiagonalSeq(stderr,os,pdbE,cmaE,AB);
        		else PutDiagonalSeq(stderr,os,cmaE,pdbE,AB);
#endif
			// break; // ignore any duplicate sequences that might be in cma
		} else {
			// fprintf(stderr,"Not Added: %d %d\n",S,A);
			free(TmpColToSeq); 
		}
	     }	// end sq loop. 
        } // end A loop.
	//=================== Find column residue positions in each pdb structure =============
	if(CardSet(RelevantSet[S]) > 0){	// Add the rest of the sequences in the set.
	   Int4		id,NumX,Rj,Start,End,x,y;
	   e_type	pdbIC,pdbS;
	   for(R=1; R <= NumFullRpts[S]; R++){
		A=RptCategory[S][R];
		//*************** Find Start and End for repeat R within pdb S ********************
		for(x=1; x <= NumCol; x++){ Start=Col2FullSeq[S][R][x];  if(Start !=0) break; }
 // if(x > NumCol) break;	// Temporary fix; need to see why this is occurring; run purify!!
		if(x > NumCol){ 
#if 0
			PutSeq(stderr,pdbE,AB); 
			fprintf(stderr,"x=%d; NumCol=%d; S=%d; R=%d\n",x,NumCol,S,R);
#endif
			Fault[x_over]++; continue;
		} assert(x <= NumCol); 
		for(x=NumCol; x > 0; x--){ End=Col2FullSeq[S][R][x];  if(End!=0) break; }
		if(!(Start < End && End > 0)){
			fprintf(stderr,"x=%d; NumCol=%d; S=%d; R=%d; Start=%d; End=%d\n",x,NumCol,S,R);
			Fault[off]++; continue;
		} assert(Start < End && End > 0);
		if(!(Start > 0 && End <= LenSeq(FullSeq[S]))){
			PutSeq(stderr,FullSeq[S],AB);
			fprintf(stderr,"x=%d; NumCol=%d; S=%d; R=%d; Start=%d; End=%d; LenSeq(S)=%d\n",
				x,NumCol,S,R,LenSeq(FullSeq[S]));
			assert(Start > 0 && End <= LenSeq(FullSeq[S]));
	 	} pdbS=MkSubSeq(Start,End,FullSeq[S]);	// pdbS = subseq of aligned pdb seq.
		for(id=1; id <= esc->NumPDB_Set[S]; id++){	// label the rest of sequences.
		   I=esc->PDB_SetI[S][id]; C=esc->PDB_SetC[S][id]; // C == pdb chain; I = pdb file ID.
		   pdbIC=mpdb->pdbSeq[I][C]; assert(pdbIC);
		   // char rtn=IsSameSeqFast(pdbS,pdbIC,&os,&NumX,esc->MinSeqOverlap); // ignoring 'X' residues...
		   // char rtn=IsSameSeqFastX(pdbS,pdbIC,&os,&NumX,esc->MinSeqOverlap); // ignoring 'X' residues...
		   // char rtn=IsSameSeqFastY(pdbS,pdbIC,&os,&NumX,esc->MinSeqOverlap,AB); // ignoring 'X' residues...
		   char rtn=IsSameSeqFastY(pdbS,pdbIC,&os,&NumX,esc->MinSeqOverlap,0); // ignoring 'X' residues...
		   Int4 MaxNumX = (Int4) floor(((double) esc->MinSeqOverlap*0.33));
		   // Int4 MaxNumX = (Int4) floor(((double) LenSeq(pdbS)*0.25));
		   if(rtn && NumX <= MaxNumX){
			Int4 *TmpColToSeq; NEW(TmpColToSeq ,NumCol + 3, Int4); // temporary array for pdbIC
			for(col=1; col <= NumCol; col++){	//
				x = Col2FullSeq[S][R][col];
				if(x){
				    y = x - OffSetSeq(pdbIC); 
				    if(y < 1 || y > LenSeq(pdbIC)){ Fault[gaps]++; continue; }  // gaps at end...
				    if(ResSeq(y,pdbIC)){ TmpColToSeq[col]=y; }
				    unsigned char r_x,r_y;
				    r_y=ResSeq(y,pdbIC); r_x=ResSeq(x,FullSeq[S]);
				    if(r_x && r_y && r_x != r_y){
					PutSeq(stderr,FullSeq[S],AB);
					fprintf(stderr,
					  "x=%d;y=%d;NumCol=%d;S=%d;R=%d;Start=%d;End=%d;LenSeq(S)=%d\n",
                                		x,y,NumCol,S,R,LenSeq(FullSeq[S]));
					this->DebugMapSqAln2Strct(stderr,S,C,R,I,TmpColToSeq,pdbS,pdbIC,pdbE);
					assert(ResSeq(y,pdbIC) == ResSeq(x,FullSeq[S]));
				    }
				}
		        }
			// assert(AddColToSeq(id,S,A,TmpColToSeq)); 
			// id is passed in in order to retrieve I, C to check for repeats.
			if(!AddColToSeq(id,S,A,TmpColToSeq)){ // sets Col2pdbSeq[I][C][R]= TmpColToSeq;
			   free(TmpColToSeq); continue; // just skip this; not sure that it matters?
			   // In 'id' mode: if(TRUE) TmpCol
			   fprintf(stderr,"!!!!!!!!!!!!!!!!!! ERROR (%d:%s) !!!!!!!!!!!!!!!!!!\n",A,NameCMSA(cma));
			   this->DebugMapSqAln2Strct(stderr,S,C,R,I,TmpColToSeq,pdbS,pdbIC,pdbE);
			   print_error("p2c_typ::MapSqAlnToStruct( ) error: Redundant internal repeats?");
			} 
			// assert(AddColToSeq(id,S,A,Col2FullSeq[S][R])); 
		   } else if(rtn && NumX > MaxNumX) Fault[highX]++; 
		} NilSeq(pdbS);
	   }
	}
   }
   char Str[300];
   sprintf(Str,"deleted=%d, disjoint=%d, missing=%d, x_over=%d, gaps=%d, off=%d; highX=%d;  okay=%d.\n",
			Fault[0],Fault[1],Fault[2],Fault[3],Fault[4],Fault[5],Fault[7],Fault[6]);
   Diagnostic=AllocString(Str);
   for(A=1; A < Number; A++) if(csq[A]) NilSeq(csq[A]); free(csq);
}

#define P2C_USAGE_START \
"USAGE: p2c_typ(argc, *argv[], <pdb_name_file>,<cmafile>)\n\
   argument [-options]\n\
    -best                       Find the atom pairs that deviate least (default: deviate most)\n\
    -beta                       Use beta carbons for non-glycine residues (default: alpha carbons only)\n\
    -col=<int1>                 compute all pairs that include columns <int1>\n\
    -D<float>                   dmax for non-classical H-bonds (default: case dependent)\n\
    -d<float>                   dmax in Angstroms for classical H-bonds (default: 2.5 Angstoms)\n\
    -show=<int>:<int>           Show residues in columns <int1> and <int2>\n\
    -srch=<int1>:<int2>         Search for residues in sequence <int1> that are a better 3D-fit to \n\
                                average distances of other residues within 10 Angstroms of \n\
                                column <int2> residues \n\
    -range=<int1>:<int2>        Only look at columns <int1> to <int2>\n\
    -Range=<int1>:<int2>        Focus on columns <int1> to <int2>\n\
    -bin=<real>                 Set the bin size for histogram (default: 1.0)\n\
    -P=<filename>               Pattern file corresponding to sequence subgroup.\n\
    -maxdist=<int>              Set the Maximum Mean distance to consider (default: 40)\n\
    -seqdist=<int>              Set the minimum distance between residues to consider (default: 4)\n\
    -maxsqdist=<int>            Set the maximum distance between residues to consider (default: 200) \n\
    -mindist=<int>              Set the minimum distance between aligned columns to compare (default: 3)\n\
    -minvar=<int>               Set the minimum variance to consider (default: 0)\n\
\n\n"

void	p2c_typ::GetArg(Int4 argc, char *argv[])
// *************** Get arguments for all program options **********************
{
	Int4 arg,i;
        // if(argc < 3) print_error(P2C_USAGE_START);
        for(arg = 0; arg < argc; arg++){
		fprintf(stdout,"%s ",argv[arg]);
	} fprintf(stdout,"\n");
        for(arg = 1; arg < argc; arg++){
          if(argv[arg][0] == '-'){
           switch(argv[arg][1]) {
             case 'b':
		if(sscanf(argv[arg],"-bin=%lf",&bin_size) == 1){
		     // fprintf(stderr,"binsize = %f\n",bin_size);
		     if(bin_size < 0.001) print_error(P2C_USAGE_START);
		     else if(bin_size > 4.0) print_error(P2C_USAGE_START);
		} else if(strcmp("-best",argv[arg])==0) {
			FindBest=TRUE;
		} else if(strcmp("-beta",argv[arg])==0) {
			UseBeta=TRUE;
		} else print_error(P2C_USAGE_START);
		break;
             case 'c':
		if(sscanf(argv[arg],"-col=%d",&KeyCol) == 1){
		   if(KeyCol < 1) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
	     case 'D':
                if(sscanf(argv[arg],"-D%f",&dmax) != 1 || dmax > 100 || dmax < 0){
                        print_error(P2C_USAGE_START);
                } break;
             case 'd':
                if(sscanf(argv[arg],"-d%f",&HA_dmax) != 1 || HA_dmax > 100 || HA_dmax < 0)
                        print_error(P2C_USAGE_START);
                break;
             case 'm':
		// if(sscanf(argv[arg],"-maxdist=%lf",&MaxMeanDist) != 1)
		if(sscanf(argv[arg],"-minvar=%lf",&MinVar) != 1){
		 if(sscanf(argv[arg],"-maxdist=%d",&MaxMeanDist) != 1){
		   if(sscanf(argv[arg],"-mindist=%d",&MinDist) != 1) print_error(P2C_USAGE_START);
		   else if(MinDist < 1) print_error(P2C_USAGE_START);
		 } else if(MaxMeanDist < 1) print_error(P2C_USAGE_START);
		} else if(MinVar < 0.0) print_error(P2C_USAGE_START);
		break;
             case 'R':
		if(sscanf(argv[arg],"-Range=%d:%d",&Begin,&End) == 2){
			if(Begin > End || Begin <= 0) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		// print_error("-R option not yet implemented");
		break;
             case 'r':
		if(sscanf(argv[arg],"-range=%d:%d",&begin,&end) == 2){
			if(begin >= end || begin <= 0) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
             case 's':
		if(sscanf(argv[arg],"-seqdist=%d",&MinDistInSeq) == 1){
			if(MinDistInSeq < 1) print_error(P2C_USAGE_START);
		} else if(sscanf(argv[arg],"-show=%d:%d",&K1,&K2) == 2){
			if(K1 > K2){ i = K1; K1 = K2; K2 = i; }
			else if(K1 == K2) print_error(P2C_USAGE_START);
		} else print_error(P2C_USAGE_START);
		break;
             default : print_error(P2C_USAGE_START);
           }
	 }
	}
}

int	p2c_typ::PrintKLST_Files(char call )
{
   //************* Print out vsi files for each set.
   Int4 A,x,p,n,i,j,I,C,R,S;
   Int4 vsi_number=0;
   // char side_color[]="WYROMGCBDLWWWWWWWWWWWWWWWWWWDDDDDDDDDDDDDDDDDDD";
   Int4 MaxColor=40;
   double maxdist=4.0;
   set_typ SetP=0;

   if(SideColors){
	for(j=0,i=1; isalpha(SideColors[j]); j++,i++){
		side_color[i]=SideColors[j]; if(i >= MaxColor) break;
	}
   }
   FILE *vfp=0;
   for(S=1; S <= esc->NumPDB_Sets; S++){
 	char vsifile[200];
	if(CardSet(RelevantSet[S]) == 0) continue;	// skip irrelevant files.
	assert(esc->PDB_SetI[S]);
	vsi_number++;
	if(vfp==0){
#if 0
	    if(call > 0) vfp=open_file(OutFile,".sprs","w");
	    else vfp=open_file(OutFile,"_pdb.klst","w");
#else
	    vfp=open_file(OutFile,".sprs","w");
#endif
	}
	// fprintf(vfp,"~$=%d.\n",vsi_number); fflush(vfp);

	for(R=1; R <= NumFullRpts[S]; R++){
#if 1
          for(j=1; esc->PDB_SetI[S][j]; j++){
		I=esc->PDB_SetI[S][j]; C=esc->PDB_SetC[S][j];
		fprintf(vfp,"file: %s\n",mpdb->pdb_file[I]);
		fprintf(vfp,"chain: %c.\n",ChainCharPDB(C,mpdb->pdb[I]));
	  }
#endif
	  Int4 Row = RptCategory[S][R]; // hpt row...
	  assert(MemberSet(Row,RelevantSet[S]));
	  // if(!MemberSet(A,RelevantSet[S])) continue;	// skip irrelevant files.
	  Int4 X;
          // for(X=0,x=1; x <= ptrn->NumPttrns; x++) 
          for(X=0,x=ptrn->NumPttrns;  x > 0; x--)	// stored backwards...
	  {
	    Int4 Col=ptrn->PttrnCategory[x];
	    if(hpt->Cell(Row, Col) != '+') continue; else X++;
	    // print out subgroups before supergroups to ensure proper color.
	    if(X > 40) continue;  // ran out of colors.
	    if(X==1){
	      Int4 start=1,TheEnd=NumCol(),num_ins,num_del;
	      for(j=1; Col2FullSeq[S][R][j] == 0 && j <= TheEnd; j++){ } start=j;
	      for(j=TheEnd; Col2FullSeq[S][R][j] == 0 && j > 0; j--){ } TheEnd=j;
	      for(j=1,num_del=0; j <= NumCol(); j++){ if(Col2FullSeq[S][R][j] == 0) num_del++; }
	      Int4 nAln,strt=Col2FullSeq[S][R][start],end=Col2FullSeq[S][R][TheEnd];
	      nAln=NumCol() - num_del; num_ins=(end-strt+1) - nAln;
#if 1	// Find residue Set...
	      SetP=MakeSet(end +9);
	      for(j=start; j <= TheEnd; j++){ 
		Int4 res_j= Col2FullSeq[S][R][j];
		if(res_j != 0){ assert(res_j >= strt && res_j <= end);  AddSet(res_j,SetP); }
	      }
	      Int4 low,high;
	      char *rtn=RtnStrSet(SetP,low,high);
	      if(0) fprintf(stderr,"Set string = \"%s\"; range: %d-%d\n",rtn,low,high);
	      NilSet(SetP); SetP=0;
	      fprintf(vfp,"range: %s(%d;%d).\n",rtn,nAln,num_ins); free(rtn);
#else
	      fprintf(vfp,"range: %d_%d(%d;%d).\n",strt,end,nAln,num_ins);
#endif
	    }
            BooLean first=TRUE;
	    for(p=1; p <= ptrn->NumPttrnRes[x]; p++){
	        // if(p > 10) continue;  // skip less significant.
		Int4 col=ptrn->PosPttrn[x][p];
		Int4 site=Col2FullSeq[S][R][col];
		if(site == 0) continue;  // not visible within structures.
		assert(site <= LenSeq(FullSeq[S]));
		Int4 r=ResSeq(site,FullSeq[S]);
		char Res=AlphaChar(r,AB);
		if(strchr(ptrn->PttrnRes[x][p],Res)){
		   // Int4 X=ptrn->NumPttrns-x+1; 
		   if(first){ fprintf(vfp,"%c=%d",this->side_color[X],site);
		    first=FALSE; 
		   } else fprintf(vfp,",%d",site);
		} else {	// print mismatches in white.
#if 0
		   // See whether shows up at a higher level:
		   BooLean IsHigher=FALSE;
		   for(Int4 x2=x+1;  x2 > 0; x2--){
	    	      Int4 Col2=ptrn->PttrnCategory[x2];
	    	      if(hpt->Cell(Row, Col2) != '+') continue;
	    	      for(Int4 p2=1; p2 <= ptrn->NumPttrnRes[x2]; p2++){
			Int4 col2=ptrn->PosPttrn[x2][p2];
			if(col == col2){ IsHigher=TRUE; break; }
		      } if(IsHigher) break;
		   } if(IsHigher) continue;
#endif
		   // if no higher level pattern, then print out.
		   if(first){ fprintf(vfp,"%c=%d",this->side_color[X],site);
			first=FALSE; } else fprintf(vfp,",%d",site);
	       	} 
	  } fprintf(vfp,"\n");
	} fprintf(vfp,"\n");
      }
   } if(vfp){ fclose(vfp); return 1; } else return 0;
}

void	p2c_typ::PrintVSI_Files(FILE *vsifp,FILE *mstfp)
{
   //************* Print out vsi files for each set.
   Int4 A,x,p,n,i,j=0,I,C,R,S,vsi_number=0;
   Int4 MaxColor=40;
   double maxdist=4.0;
   char **seq_ids=0;
   pdb_typ P;

   if(TraceColors){
	for(j=0,i=1; isalpha(TraceColors[j]); j++,i++){
		this->trace_color[i]=TraceColors[j]; if(i >= MaxColor) break;
	}
   }
   if(SideColors){
	for(j=0,i=1; isalpha(SideColors[j]); j++,i++){
		this->side_color[i]=SideColors[j]; if(i >= MaxColor) break;
	}
   }
   FILE *vfp=0; 
   if(vsifp){ this->FreeSeqIDs(); NEWPP(SeqIDs,esc->NumPDB_Sets +5, char); }
   for(S=1, vsi_number=0; S <= esc->NumPDB_Sets; S++){
	if(vsifp == 0 && vfp==0) vfp=open_file(OutFile,".vsi","w");
	if(CardSet(RelevantSet[S]) == 0){
	        I=esc->PDB_SetI[S][1]; C=esc->PDB_SetC[S][1];
	     	if(0) fprintf(stderr,
		  "%d(%d): %s:%c; valid=%d; (%d chains); seqID[%d][%d] skipped\n",
		   S,vsi_number,FilenamePDB(mpdb->pdb[I]),ChainCharPDB(C,mpdb->pdb[I]),
			CardSet(RelevantSet[S]),esc->NumPDB_Set[S],S,j);
// assert(CardSet(RelevantSet[S]) != 0);
		continue;	// skip irrelevant files.
	}
	vsi_number++;
	if(SeqIDs) NEWP(SeqIDs[vsi_number],esc->NumPDB_Set[S]+5, char); 
	// dummy array to ensure free up of entire array.
	if(vsifp){ fprintf(vsifp,"~$=%d.\n",vsi_number); fflush(vsifp); }
	else { fprintf(vfp,"~$=%d.\n",vsi_number); fflush(vfp); }
        for(j=1; esc->PDB_SetI[S][j]; j++){
	        I=esc->PDB_SetI[S][j]; C=esc->PDB_SetC[S][j];
	        assert(I <= mpdb->NumPDB && I > 0); 
	        P=mpdb->pdb[I]; assert(P); assert(C <= nChainsPDB(P) && C > 0);
		if(vsifp){
		    fprintf(vsifp,"File%d=%s:%c  // \n",j,mpdb->pdb_file[I],
						ChainCharPDB(C,mpdb->pdb[I]));
		    SeqIDs[vsi_number][j]=PathPDB2SeqID(mpdb->pdb_file[I],
						ChainCharPDB(C,mpdb->pdb[I]));
	     	    if(0) fprintf(stderr,"%d: %s; valid=%d; (%d chains); seqID[%d][%d]=%s\n",
			S,FilenamePDB(mpdb->pdb[I]),CardSet(RelevantSet[S]),
			esc->NumPDB_Set[S],S,j,SeqIDs[vsi_number][j]);
		} else {
		    fprintf(vfp,"File%d=%s:%c  // \n",j,mpdb->pdb_file[I],ChainCharPDB(C,mpdb->pdb[I]));
		}
	}
	if(vsifp) fprintf(vsifp,"\n1-10000.W15\n");
	else fprintf(vfp,"\n1-10000.W15\n");
        for(n=1; esc->PDB_SetI[S][n]; n++){
	  I=esc->PDB_SetI[S][n]; C=esc->PDB_SetC[S][n];
#if 0	// DEBUG
	  if(n==1){
	    fprintf(vfp,"\n# NumRpts[%d][%d] = %d\n",I,C,NumRpts[I][C]);
	    fprintf(vfp,"\n# NumFullRpts[%d] = %d\n",S,NumFullRpts[S]);
	  }
#endif
	  for(R=1; R<=NumRpts[I][C]; R++){
	     // Find adjacent, hetero subunits...
	     char **AdjMolecule=AdjacentHeteroMolecule(R,C,I,maxdist);
	     for(j=1; AdjMolecule[j]; j++){
		if(AdjMolecule[j][0] == '!'){	// indicates a single atom (ion).
		   if(vsifp) fprintf(vsifp,"\n%d%s.{X}\n",n,AdjMolecule[j]); 
		   else fprintf(vfp,"\n%d%s.{X}\n",n,AdjMolecule[j]); 
		} else {
		   if(vsifp) fprintf(vsifp,"\n%d!%s.C\n",n,AdjMolecule[j]); 
		   else fprintf(vfp,"\n%d!%s.C\n",n,AdjMolecule[j]); 
		} free(AdjMolecule[j]);
	     } free(AdjMolecule);
	  }
	}
	if(vsifp) fprintf(vsifp,"\n"); else fprintf(vfp,"\n");
        // for(x=ptrn->NumPttrns; x > 0; x--)
#if 1
	BooLean	HptIsTree=FALSE;
	Int4 *Parent=0;
	if(hpt->IsTree(Parent)) HptIsTree=TRUE; 
	if(Parent) free(Parent);
#endif
	for(R=1; R <= NumFullRpts[S]; R++){
	  Int4 Row = RptCategory[S][R]; // hpt row...
	  assert(MemberSet(Row,RelevantSet[S]));
	  // if(!MemberSet(A,RelevantSet[S])) continue;	// skip irrelevant files.
	  Int4 X;
          // for(X=0,x=1; x <= ptrn->NumPttrns; x++) 
char *LastSet=0;
          for(X=0,x=ptrn->NumPttrns;  x > 0; x--)	// stored backwards...
	  {
	    Int4 Col=ptrn->PttrnCategory[x];
	    if(hpt->Cell(Row, Col) != '+') continue; else X++;
LastSet=hpt->GrpName(Col);
	    // print out subgroups before supergroups to ensure proper color.
	    if(X > 40) continue;  // ran out of colors.
            BooLean first=TRUE;
#if 1	// add more info to vsi files; need to fix this so that only one trace is printed
	    if(R < MaxColor){
	      if(HptIsTree){
	       // fprintf(vfp,"\n# %d.%s:\n",Row,hpt->ElmntSetName(Row));
	       if(vsifp) fprintf(vsifp,"\n# %d.%s:\n",Col,hpt->ElmntSetName(Col)); // 
	       else fprintf(vfp,"\n# %d.%s:\n",Col,hpt->ElmntSetName(Col)); // 
	      } else if(vsifp) fprintf(vsifp,"\n# %d.%s:\n",Col,hpt->GrpName(Col));
	      else fprintf(vfp,"\n# %d.%s:\n",Col,hpt->GrpName(Col));
	      Int4 start=1,TheEnd=NumCol();
	      for(j=1; Col2FullSeq[S][R][j] == 0 && j <= TheEnd; j++){ } start=j;
	      for(j=TheEnd; Col2FullSeq[S][R][j] == 0 && j > 0; j--){ } TheEnd=j;
	      if(vsifp) fprintf(vsifp,"%d-%d.%c80\n",Col2FullSeq[S][R][start],
			Col2FullSeq[S][R][TheEnd],this->trace_color[R]);
	      else fprintf(vfp,"%d-%d.%c80\n",Col2FullSeq[S][R][start],
			Col2FullSeq[S][R][TheEnd],this->trace_color[R]);
	    }
#endif
if(mstfp){ fprintf(mstfp,"%d%c=",vsi_number,this->side_color[X]); }
	    for(p=1; p <= ptrn->NumPttrnRes[x]; p++){
	        // if(p > 10) continue;  // skip less significant.
		Int4 col=ptrn->PosPttrn[x][p];
		Int4 site=Col2FullSeq[S][R][col];
		if(site == 0) continue;  // not visible within structures.
		assert(site <= LenSeq(FullSeq[S]));
		Int4 r=ResSeq(site,FullSeq[S]);
		char Res=AlphaChar(r,AB);
// PutSeq(stderr,FullSeq[S],AB); 
// site+=OffSetSeq(FullSeq[S]);
#if 1
	        char Color=toupper(this->side_color[X]);
		if(strchr(ptrn->PttrnRes[x][p],Res)==0){	// A pattern mismatch?
		   Color=tolower(Color);
		}
		if(first) first=FALSE; 
		else {
			if(vsifp) fprintf(vsifp,","); else fprintf(vfp,",");
			if(mstfp) fprintf(mstfp,",");
		}
		if(vsifp) fprintf(vsifp,"%c%d.%c",Res,site,Color); 
		else fprintf(vfp,"%c%d.%c",Res,site,Color);
		if(mstfp) fprintf(mstfp,"%d",site);
#else
		if(strchr(ptrn->PttrnRes[x][p],Res)){	// A pattern match?
		   // Int4 X=ptrn->NumPttrns-x+1; 
		   if(first){ 
			first=FALSE; 
			if(vsifp) fprintf(vsifp,"%c%d.%c",Res,site,this->side_color[X]); 
			else fprintf(vfp,"%c%d.%c",Res,site,this->side_color[X]);
if(mstfp) fprintf(mstfp,"%d",site);
		   } else {
			if(vsifp) fprintf(vsifp,",%c%d.%c",Res,site,this->side_color[X]);
			else fprintf(vfp,",%c%d.%c",Res,site,this->side_color[X]);
if(mstfp) fprintf(mstfp,",%d",site);
		   }
		} else {	// print mismatches in white.
		   // See whether shows up at a higher level:
		   BooLean IsHigher=FALSE;
		   for(Int4 x2=x+1;  x2 > 0; x2--){
	    	      Int4 Col2=ptrn->PttrnCategory[x2];
	    	      if(hpt->Cell(Row, Col2) != '+') continue;
	    	      for(Int4 p2=1; p2 <= ptrn->NumPttrnRes[x2]; p2++){
			Int4 col2=ptrn->PosPttrn[x2][p2];
			if(col == col2){ IsHigher=TRUE; break; }
		      } if(IsHigher) break;
		   } if(IsHigher) continue;
		   // if no higher level pattern, then print out.
		   if(first){
		      first=FALSE; 
		      if(vsifp) fprintf(vsifp,"%c%d.W",Res,site); 
		      else fprintf(vfp,"%c%d.W",Res,site); 
		   } else if(vsifp) fprintf(vsifp,",%c%d.W",Res,site);
		   else fprintf(vfp,",%c%d.W",Res,site);
	       	} 
#endif
	    } if(vsifp) fprintf(vsifp,"\n"); else fprintf(vfp,"\n");
if(mstfp) fprintf(mstfp,"\n");
#if 1	// print column positions as well...
	    for(first=TRUE,p=1; p <= ptrn->NumPttrnRes[x]; p++){
	        // if(p > 10) continue;  // skip less significant.
		Int4 col=ptrn->PosPttrn[x][p];
		Int4 site=Col2FullSeq[S][R][col];
		if(site == 0) continue;  // not visible within structures.
		assert(site <= LenSeq(FullSeq[S]));
		Int4 r=ResSeq(site,FullSeq[S]);
		char Res=AlphaChar(r,AB);
		if(strchr(ptrn->PttrnRes[x][p],Res)){
		   if(first){
			first=FALSE; 
			if(vsifp) fprintf(vsifp,"#%s%d",ptrn->PttrnRes[x][p],col); 
			else fprintf(vfp,"#%s%d",ptrn->PttrnRes[x][p],col);
		   } else if(vsifp) fprintf(vsifp,",%s%d",ptrn->PttrnRes[x][p],col);
		   else fprintf(vfp,",%s%d",ptrn->PttrnRes[x][p],col);
		} else {	// print mismatches in white.
		   if(first){
			first=FALSE; 
			if(vsifp) fprintf(vsifp,"#(%s%d)",ptrn->PttrnRes[x][p],col);
			else fprintf(vfp,"#(%s%d)",ptrn->PttrnRes[x][p],col); 
		   } else if(vsifp) fprintf(vsifp,",(%s%d)",ptrn->PttrnRes[x][p],col);
		   else fprintf(vfp,",(%s%d)",ptrn->PttrnRes[x][p],col);
	       	} 
	    } if(vsifp) fprintf(vsifp,"\n"); else fprintf(vfp,"\n");
#endif
	  }
#if 1	// print cross conserved patterns...
	   assert(LastSet);
	   if(0) fprintf(stderr,"last=%s\n",LastSet);
	   if(R==1){ 
	      FILE *xfp=vsifp; if(xfp == 0) xfp=vfp;
	      xcs_typ xcs(OutFile);
	      if(0) xcs.Put(stderr);
	      Int4 site,*pos=xcs.PttrnPos(LastSet);
	      char *xres=xcs.PttrnRes(LastSet);
	      if(pos && xres){
		fprintf(xfp,"\n# %d.XCS:\n",xcs.GetNum(LastSet));
		for(Int4 x=1; pos[x]; x++){
	           site=Col2FullSeq[S][R][pos[x]];
		   if(site == 0) continue;  // not visible within structures.
		   assert(site <= LenSeq(FullSeq[S]));
		   Int4 r=ResSeq(site,FullSeq[S]);
		   char Res=AlphaChar(r,AB);
		   if(x ==1) fprintf(xfp,"%c%d.W",Res,site);
		   else fprintf(xfp,",%c%d.W",Res,site);
		} fprintf(xfp,"\n#");
		for(Int4 x=1; pos[x]; x++){
		   if(x > 1) fprintf(xfp,",");
		   fprintf(xfp,"%c%d",xres[x],pos[x]);
		} fprintf(xfp,"\n\n");
	        free(pos); free(xres);
	      } else if(0) fprintf(stderr,"pos == 0)\n");
	      if(0){ xcs.PutRow(stderr,LastSet); fprintf(stderr,"\n"); }
	   }
#endif
	} if(vsifp) fprintf(vsifp,"\n"); else fprintf(vfp,"\n"); // fclose(vfp);
   } if(vfp) fclose(vfp); if(vsifp) fflush(vsifp);
}

char	**p2c_typ::AdjacentHeteroMolecule(Int4 RR, Int4 CC, Int4 II, double maxdist)
// ,res_typ **ResALL_I,Int4 *num_resALL_I,pdb_typ P)
// Return string array indicating which molecules are are adjacent to chain C0
// sprintf(str,"[%s]%d:%c",res_name,res,chain);
{
        Int4    C,a,aa,i,j,jj,x,p;
        atm_typ atm,aatm;
        char	**OutPut,str[100];
	Int4	NumMol=0,MaxNumMol=1000;

	assert(II <= mpdb->NumPDB && II > 0); pdb_typ P=mpdb->pdb[II]; assert(CC <= nChainsPDB(P) && CC > 0);
        NEWP(OutPut,MaxNumMol+3,char);

	for(x=ptrn->NumPttrns; x > 0; x--){
	  for(p=1; p <= ptrn->NumPttrnRes[x]; p++){
	    Int4 pos=ptrn->PosPttrn[x][p];
	    Int4 Res=Col2pdbSeq[II][CC][RR][pos] + OffSetSeq(mpdb->pdbSeq[II][CC]);
	    res_typ *ResP=mpdb->ResALL[II][CC];
	    for(jj=1; jj <= mpdb->num_resALL[II][CC]; jj++){
	      if(ResidueID(ResP[jj]) != Res) continue;  // find the pattern residue (Res).
	      for(aa=1; aa <= ResidueAtomNumber(ResP[jj]); aa++){
		aatm=AtomResidue(aa,ResP[jj]);
	        if(IsWaterAtom(aatm)) continue;
                for(C=1; C <= nChainsPDB(P); C++){
		  if(C == CC) continue; 
		  if(IsProteinPDB(C,P)) continue;
		  res_typ *ResH=mpdb->ResALL[II][C];
		  for(j=1; j <= mpdb->num_resALL[II][C]; j++){
		      for(a=1; a <= ResidueAtomNumber(ResH[j]); a++){
			atm = AtomResidue(a,ResH[j]);
	     		if(IsWaterAtom(atm)) continue;
             		if(!IsHeteroAtom(atm)) break;	// all residue atoms are either hetero or not.
                        if(DistanceAtoms(atm, aatm) <= maxdist){
			   char *mole_name=AtomResName(atm);
			   while(isspace(mole_name[0])){
			      mole_name++;
			      if(!isprint(mole_name[0])) print_error("AdjacentHeteroMolecule() atom error");
			   }
			   if(AtomsInResidue(ResH[j]) == 1) {
			     sprintf(str,"![%s]%d:%c",mole_name,ResAtom(atm),AtomChain(atm));
			   } else {
			     sprintf(str,"[%s]%d:%c",mole_name,ResAtom(atm),AtomChain(atm));
			   }
			   // sprintf(str,"[%s]%d:%c",AtomResName(atm),ResAtom(atm),AtomChain(atm));
			   // ResidueName(R); ResidueID(R); chn= ResidueChain(R); ChainCharPDB(chn,P);
			   BooLean is_new=TRUE;
			   for(i=1; i <= NumMol; i++){
				if(strcmp(str,OutPut[i]) == 0){ is_new=FALSE; break; }
			   }
			   if(is_new){
				NumMol++; OutPut[NumMol]=AllocString(str);  
				if(NumMol >= MaxNumMol) return OutPut;
			   } break;	// No need to look further...
			}
		      }
		  }
		}
	      }
	    }
	  }
        } return OutPut;
}

