/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "rpm_typ.h"

#define	USAGE_START_OLD	"USAGE: pttrn_mtch <multicma_file> <hpt_file> <pttrn_file> [options]\n\
   options:\n\
     -C         Count simulated sequences in each of the input cma files (output from -S).\n\
     -H      	Output Files for viewing a heat map of matching sequences\n\
     -M=<int>	Output a File of sequences with <= <int> mismatches\n\
     -M      	Output a File of perfectly matching sequences\n\
     -Q=<str>   Input Pattern corresponds to cma file named <str>.cma\n\
     -Min=<int> minimum number of sequences in cma file to output results\n\
     -same      Don't sort input sets by score; output order same as input order\n\
     -S         Emit simulated sequences for each of the input cma files.\n\
     -Print     print out sequences in input file as multiple NameCMA.seq files\n\
     -print     print out sets for input file as multiple NameCMA.seq files\n\
     -x		dummy option\n\
\n\n"

#define	USAGE_START	"USAGE: pttrn_mtch <prefix> [options]\n\
   Input files: <prefix>_new.mma <prefix>_new.hpt <prefix>.pttrns \n\
                 These are bpps and darc output files.\n\
   options:\n\
     -C         Count simulated sequences in each of the input cma files (output from -S).\n\
     -H      	Output Files for viewing a heat map of matching sequences\n\
     -M=<int>	Output a File of sequences with <= <int> mismatches\n\
     -M      	Output a File of perfectly matching sequences\n\
     -Q=<str>   Input Pattern corresponds to cma file named <str>.cma\n\
     -Min=<int> minimum number of sequences in cma file to output results\n\
     -same      Don't sort input sets by score; output order same as input order\n\
     -S         Emit simulated sequences for each of the input cma files.\n\
     -Print     print out sequences in input file as multiple NameCMA.seq files\n\
     -print     print out sets for input file as multiple NameCMA.seq files\n\
     -x		dummy option\n\
\n\n"


/*********************************************************
Key superfamilies:

AAA+
P-loop GTPases
EEP
GNAT
RecA (??)


RhoD
vWFA (VWA)
Helicase_Ct (DEAD)

 *********************************************************/

void    rpm_typ::Init()
{
	if(argc < 2) print_error(USAGE_START);
	time1=time(NULL); 
	TurnOffLicenseStatement();
	AB=MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	FILE *fp = open_file(argv[1],"_new.mma","r");
	IN_CMA=MultiReadCMSA(fp,&Number,AB); fclose(fp);
	if(Number < 1) print_error("input error: less than one cma file");
	for(Int4 i=1; i <= Number; i++){
	  if(nBlksCMSA(IN_CMA[i]) != 1) print_error("cma error; multiple blocks.");
	  if(LengthCMSA(1,IN_CMA[i]) != LengthCMSA(1,IN_CMA[1]))
	  	print_error("FATAL: cma lengths differ!");
	}
	fp = open_file(argv[1],"_new.hpt","r");
	Hpt=new hpt_typ(fp); fclose(fp);
// Hpt->Put(stderr,TRUE,FALSE,TRUE);
#if 1	// sort patterns by significance...
	Int4	i,j,x,row,Nptrn=0;
	char	*S,Str[1005],ptrn[1005],**Ptrn;
	Str[999]=0; ptrn[999]=0;
	NEWP(Ptrn,Hpt->NumBPPS() +3,char);
	fp = open_file(argv[1],".pttrns","r");
	if((S=fgets(Str,1000,fp)) != 0 && sscanf(Str,"%d: %[A-Z]%d,",&row,ptrn,&x)==3){
	     assert(Str[999]==0 && ptrn[999]==0);
	     for(Nptrn=i=0; S != 0; i++){
		if(sscanf(Str,"%d: %[A-Z]%d,",&j,ptrn,&x)==3){
		   if(j < 1 || j > Hpt->NumBPPS() || Ptrn[j] != 0){
			fprintf(stderr,"ptrn (%d:%d) = '%s%ld'\n",i,j,ptrn,x);
			fprintf(stderr,"%d =? %s",row,Str);
			print_error("rpm_typ: fatal input error 1");
		   } else {
			S=strchr(Str,':');
			while(!isalpha(*S)) S++;
			sprintf(ptrn,"-P=%s",S);
			Ptrn[j] = AllocString(ptrn); Nptrn++;
			// fprintf(stderr,"%d: %s\n",j,Ptrn[j]);
		   }
		} S=fgets(Str,1000,fp); 
	    }
	} fclose(fp);
	if(Nptrn != Hpt->NumBPPS()){
	      fprintf(stderr,"Nprtn=%d\n",Nptrn);
	      print_error("rpm_typ: fatal input error 2");
	} else for(i=1; i<=Hpt->NumBPPS(); i++){
	      Hpt->ReSetPttrnArgv(i,Ptrn[i]); free(Ptrn[i]);
	} free(Ptrn);
#endif
// Hpt->Put(stderr,TRUE,FALSE,TRUE); exit(1);
	HptPttrn=0; NumPttrns=this->GetHptPttrns();
	seed=7061950; MinNumSeq=0;
	SortByScore=TRUE; PrintSets=HeatMap=FALSE; mode=' '; HM=0;
	MaxMisMatches=0;Simulated=0;
	PrintCMA=FALSE; 
	query_cma[0]=0; qcma=0;
	for(int arg = 2; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(USAGE_START);
	   switch(argv[arg][1]) {
             case 'C': 
		if(argv[arg][2]==0) mode = 'C'; else print_error(USAGE_START);
		break;
             case 'H': 
		if(argv[arg][2]==0){ HeatMap=TRUE; }
		break;
#if 0
             case 'h': 
		if(sscanf(argv[arg],"-hm=%s",str)==1){
		   HM=AllocString(str);
		} else print_error(USAGE_START);
		break;
#endif
             case 'M': 
		if(sscanf(argv[arg],"-Min=%d",&MinNumSeq)==1){
			if(MinNumSeq < 1) print_error(USAGE_START);
		} else if(argv[arg][2]==0){ mode = 'M';
		} else if(argv[arg][2]=='='){ mode = 'M'; 
		   if(sscanf(argv[arg],"-M=%d",&MaxMisMatches)!=1) 
				print_error(USAGE_START);
		} else print_error(USAGE_START);
		break;
             case 'P': 
		if(sscanf(argv[arg],"-Print=%s",print_file_name)!=1) 
			print_error(USAGE_START);
		PrintCMA=TRUE; 
		break;
             case 'p': 
		if(strcmp("-print",argv[arg]) == 0){ PrintSets=TRUE; }
		else print_error(USAGE_START);
		break;
             case 'Q': 
		if(sscanf(argv[arg],"-Q=%s",query_cma)!=1) print_error(USAGE_START);
		break;
             case 'S': 
		if(argv[arg][2]==0){ mode = 'S'; PrintCMA=TRUE; }
		else print_error(USAGE_START);
		// if(sscanf(argv[arg],"-S=%d",&Simulated)!=1) print_error(USAGE_START);
		// if(Simulated < 1) print_error(USAGE_START);
		// mode='S'; PrintCMA=TRUE;
		break;
             case 's': 
		if(strcmp("-same",argv[arg]) == 0){ SortByScore=FALSE; }
		else print_error(USAGE_START);
		break;
             case 'x': mode = 'x'; break;
	     default: print_error(USAGE_START);
	   }
	}
	if(seed == 7061950) seed = (UInt4) time(NULL);
	sRandom(seed);
}

Int4	rpm_typ::GetHptPttrns()
{
	Int4	c,i,j,x=0;
        //============= Get patterns from Hpt Args ================
	NEWP(this->HptPttrn,Hpt->NumBPPS() +3,char);
// Hpt->Put(stderr);
// Hpt->Put(stderr,TRUE,FALSE,TRUE);
        for(c=1; c <= Hpt->NumBPPS(); c++){
// fprintf(stderr,"DEBUG: GetHptPttrns() column %d\n",c);
            Int4 argC=Hpt->nArg(c);
            char *S,**Str=Hpt->Argv(c);
            for(i=0; i < argC; i++){
              if(strncmp(Str[i],"-P=",3) == 0){
		// fprintf(stderr,"Str[%d]=%s\n",c,Str[i]);
                S=Str[i]+3; this->HptPttrn[c]=AllocString(S); x++;
		// fprintf(stderr,"Pttrn[%d]=%s\n",c,this->HptPttrn[c]);
                break;
              }
            }
        } // fprintf(stderr,"\n%d patterns found\n",x);
	return x;
}

Int4	rpm_typ::EmitSimSeqs(FILE *fp)
{
	e_type *Seq=0;
	Int4 rpts=1,Gap_Len[4]; Gap_Len[0]=Gap_Len[1]=Gap_Len[2]=0;
	for(Int4 f=1; f <= Number; f++){
	   Seq=SimulatedSeqsCMSA(IN_CMA[f],NumSeqsCMSA(IN_CMA[f]),rpts,Gap_Len);
	   char new_info[200]; 
	    for(Int4 n=1; n <= NumSeqsCMSA(IN_CMA[f]); n++) {
		e_type	E=SeqSetE(n,TrueDataCMSA(IN_CMA[f]));
		char    *phylum=PhylumSeq(E);
		char    K=KingdomSeq(E);
		sprintf(new_info,"%s {<%s(%c)>}seq%d",NameCMSA(IN_CMA[f]),phylum,K,n);
		ChangeInfoSeq(new_info,Seq[n]);
		PutSeq(stdout,Seq[n],AB);
		NilSeq(Seq[n]);
	   } free(Seq);
	} return 0;
}

Int4	rpm_typ::CntSimSeqs(FILE *fp)
{
	Int4	j,x,MaxSize=100000; // 100,000 is max id for CDD.
	Int4	NumSets=0,fID[500],fCnt[500];	// For reverse comparison...
	Int4	N,NumIdent=0;
	set_typ Set=MakeSet(MaxSize); 
	ss_type	data;
	cma_typ	cma=0;
	ClearSet(Set);
	e_type Ei,Ej;
	char str2[108];
	for(Int4 f=1; f <= Number; f++){
	   // ^ assume f == 1 is Root & f == Number is reject.
	   Int4 v,n2,i,ID;
	   cma=IN_CMA[f];
	   data = TrueDataCMSA(cma); N = NSeqsSeqSet(data);
	   BooLean	*counted; NEW(counted,N+2,BooLean);
	   // fprintf(stdout,"====== File %s ========\n",NameCMSA(cma)); 
	   fprintf(stdout,"%s(%d) = ",NameCMSA(cma),N); 
	   for(j=1; j<= N; j++) {
		if(counted[j]) continue; 
		Ej=SeqSetE(j,data); StrSeqID(str,100,Ej);
		if(sscanf(str,"Set%d",&ID) != 1){
		   if(sscanf(str,"cd%d",&ID) != 1){
		     if(strcmp(str,"Random") != 0 && strcmp(str,"Reject") != 0){
			fprintf(stderr,"Name = %s",str);
			print_error("Input names invalid");
		     } else ID=0;
		   }
		}
		if(ID > 0){
		   if(!MemberSet(ID,Set)){  // first 'f' found.
			AddSet(ID,Set);
			if(f > 1 && f < Number){
			      NumSets++; fCnt[NumSets]=1;
			      fID[NumSets]=ID; 
			}
		   } else if(f > 1 && f < Number){	// found in previous f set.
			for(x = 1; x <= NumSets; x++){
			   	if(fID[x] == ID){ fCnt[x]++; break; }
			}
		   }
		}
		for(n2=0,i=1; i<= N; i++) {
		   if(counted[i]) continue;
		   Ei=SeqSetE(i,data); StrSeqID(str2,100,Ei);
		   if(strcmp(str2,str) == 0){
			counted[i]=TRUE; n2++;
			if(n2==1){
				// fprintf(stdout," %s ",str); 
				if(ID==0) fprintf(stdout," Random"); 
				else fprintf(stdout," Set%d",ID); 
				fflush(stdout);
			} // PutSeq(stdout,Ei,A);
		   }
		} // fprintf(stdout," %d\n",n2);
		if(n2 == N) NumIdent++;
		fprintf(stdout,"(%d)",n2); fflush(stdout);
	   } free(counted); fprintf(stdout,"\n"); fflush(stdout);
	} Int4 Card=CardSet(Set);  double d=100.0*((double)NumIdent/(double)Number);
	fprintf(stdout,"\n identical = %d out of %d sets = %.1f%c\n",NumIdent,Number,d,'%'); 
	fprintf(stdout," PERCENT %.1f\n\n",d);
#if 0
	PutSet(stdout,Set); fprintf(stdout,"\n"); 
	for(x=1; x <= NumSets; x++){
	  	fprintf(stdout,"Set%d = %d nodes.\n",fID[x],fCnt[x]); 
	} fprintf(stdout,"\n");
#endif
	NilSet(Set);
	return 0;
}

Int4	rpm_typ::GetPttrnInfo(char *Arg)
{
	char	*Used; NEW(Used,LengthCMSA(1,IN_CMA[1]) +5, char);
	Int4	i,j,k,x,NumResidues=0;
        do {
             NumResidues++;  // increment...
             if(NumResidues >= this->MaxPttrnPositions){
		 fprintf(stderr,"NumResidues = %d >= %d pattern positions\n",
			NumResidues,this->MaxPttrnPositions);
                 print_error("Too many input patterns");
             } else if(sscanf(Arg,"%[a-zA-Z]%ld",str,&x) == 2){
		strcpy(residue_str[NumResidues],str);
		Position[NumResidues]=x;
	     } else {
		 fprintf(stderr,"Arg=%s\n",Arg);
                 print_error(USAGE_START);
             } Residue[NumResidues] = residue_str[NumResidues][0];
	     i=Position[NumResidues]; 
#if 1
	     if(Used[i]){
		fprintf(stderr,"Fatal: more than one pattern at position %d\n",i);
		print_error("Input error: only one pattern at each position allowed");
	     } else Used[i]=1;
#endif
	     if(qcma) k = RealToFakeCMSA(1, Position[NumResidues], qcma);
             else k = Position[NumResidues];
	     if(k < 1 || k > LengthCMSA(1,IN_CMA[1])){
		print_error("pattern input error 1");
	     } 
	     // fprintf(stdout,"%s%d(%d) ", residue_str[NumResidues],Position[NumResidues],k);
	     Position0[NumResidues] = k;
             while(Arg[0] != ',' && Arg[0] != 0) Arg++;
             if(Arg[0] == ',') Arg++;
        } while(Arg[0]);
	free(Used);
        // fprintf(stdout,"\n\n");
	return NumResidues;
}

Int4	rpm_typ::GetSeqInfo(cma_typ cma, FILE *mfp, Int4 NumResidues,
		UInt4 *NumMatch, UInt4 *Match, UInt4 &TotalMatch,
		UInt4 *NumDeleted, UInt4 *Deleted, UInt4 &TotalDeleted,
		UInt4 *NumMisMatch, UInt4 *MisMatch, UInt4 &TotalMisMatch)
{
	Int4 hits=0,miss=0,del=0,j;
	BooLean	*skip=0;
	if(mode == 'M'){
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);	// set to FALSE.
	}
	Int4 sq,n=0;
	for(sq=1; sq <= NumSeqsCMSA(cma); sq++){
                for(hits=miss=del=0,j=1; j <= NumResidues; j++){
		    Int4 r1 = ResidueCMSA(1,sq,Position0[j],cma);
		    if(MemSset(r1,Residues[j])){ Match[j]++; hits++; }
		    else if(r1 != 0){
			// if(skip){ skip[sq]=TRUE; }
			MisMatch[j]++; miss++;
		    } // else if(skip){ skip[sq]=TRUE; }
		    else { Deleted[j]++; del++; }
		} NumMatch[hits]++;
		NumDeleted[del]++;
		TotalDeleted += del;
		TotalMatch += hits;	
		NumMisMatch[miss]++;
		TotalMisMatch += miss;	
		if(skip && (miss + del) > MaxMisMatches){ skip[sq]=TRUE; } 
        }
	if(skip){
	     for(n=0,sq=1; sq <= NumSeqsCMSA(cma); sq++){
		 if(skip[sq] == FALSE) n++; 
	     }
	     if(n > 0){
		if(mfp==0){
		   char Str[20],*str1,*str2;
		   if(NumResidues == 2){
			str1=GetPatternFromSST(Residues[1],AB);
			str2=GetPatternFromSST(Residues[2],AB);
			sprintf(Str,"%s%d_%s%d",str1,Position0[1],
				str2,Position0[2]);
			free(str1); free(str2);
		        mfp = open_file(Str,argv[1],"w");
		   } else mfp = open_file("Match_",argv[1],"w");
		} PutSelectCMSA(mfp,skip,cma); 
	     } free(skip);
	} return n;
}

int	rpm_typ::RunPttrnMtch(char *Ptn, FILE *ofp,FILE *txtfp)
{ 
	Int4	arg,i,j,k,s,blk=0,*len,t,n,N,NumResidues;
	cma_typ	cma=0;
	dh_type dH=0;
	FILE	*fp;

// fprintf(stderr,"pattern='%s'\n",Ptn); exit(1);
	if(query_cma[0] != 0){
	  sprintf(str,"%s.cma",query_cma);
	  qcma=ReadCMSA2(str,AB);
	  if(nBlksCMSA(qcma) != 1) print_error("query cma has multiple blocks.");
	  if(!qcma) print_error("cma file read error");
	  if(LengthCMSA(1,IN_CMA[1]) != LengthCMSA(1,qcma)){
		print_error("-Q option input error.");
	  }
	}

	//************************** auxilliary modes **************************
	if(mode == 'S') return this->EmitSimSeqs();
	else if(mode=='C') return this->CntSimSeqs();
	if(PrintCMA) return this->PrintOneCMA();
	else if(PrintSets) return this->PrintAllSets();

	//************************** Read pattern **************************
	NumResidues=this->GetPttrnInfo(Ptn);
	//************************** interpret pattern **************************
	char    output_name[300],checkin[200],temp_name[300];
        for(i = 1; i <= NumResidues; i++){
#if 0
           sprintf(temp_name,"%s",output_name);
           if(i < 100){ // use only first patterns in file name.
                sprintf(output_name,"%s_%s%d",temp_name,residue_str[i],Position[i]);
           }
#endif
           if(islower(Residue[i])){
                InvertResSet[i]=TRUE;
                Residue[i]=toupper(Residue[i]);
           } else InvertResSet[i]=FALSE;
           Int4 m = strlen(residue_str[i]);
           if(InvertResSet[i]){
             sst_typ NotThese=0;
             for(j=0; j<m; j++){
                char aa=residue_str[i][j];
                sst_typ tmp_set=SsetLet(AlphaCode(aa,AB));
                NotThese= UnionSset(NotThese,tmp_set);
             }
             Residues[i] = 0;   // Empty set...
             for(j=0; j <= nAlpha(AB); j++){
                if(!MemSset(j,NotThese)){
                    sst_typ tmp_set=SsetLet(j);
                    Residues[i] = UnionSset(Residues[i],tmp_set);
                }
             }
           } else {
             Residues[i] = 0;   // Empty set...
             for(j=0; j<m; j++){
                char aa=residue_str[i][j];
                sst_typ tmp_set=SsetLet(AlphaCode(aa,AB));
                Residues[i] = UnionSset(Residues[i],tmp_set);
             }
           }
#if 0
	   Int4 r1 = ResidueCMSA(1,1,Position0[i],qcma);
	   if(!MemSset(r1,Residues[i])){
		fprintf(stderr,"%c%d in seq 1 of query set fails to match input pattern %s%d\n",
                        AlphaChar(r1,AB),Position0[i],residue_str[i],Position[i]);
                print_error("input error 2");
	   }
#endif
        }

	//************************** allocate arrays,etc. **************************
	UInt4	**MisMatch,**Match,**NumMatch,**NumMisMatch,*TotalMatch,*TotalMisMatch;
	UInt4	**Deleted,**NumDeleted,*TotalDeleted;
	Int4	*SORTED;
	char	**NAME=0;
	NEWP(MisMatch,Number +2, UInt4); NEWP(Match,Number +2, UInt4);
	NEWP(Deleted,Number +2, UInt4); NEWP(NumDeleted,Number +2, UInt4);
	NEWP(NumMatch,Number +2, UInt4); NEW(TotalMatch,Number +2, UInt4);
	NEWP(NumMisMatch,Number +2, UInt4); NEW(TotalMisMatch,Number +2, UInt4);
	NEW(TotalDeleted,Number +2, UInt4);
	NEW(SORTED,Number +2, Int4);
	NEWP(NAME,Number +2, char);
        for(i=1; i <= Number; i++){
	  NEW(MisMatch[i],NumResidues+2, UInt4);
	  NEW(Match[i],NumResidues+2, UInt4);
	  NEW(NumMatch[i],NumResidues+2, UInt4);
	  NEW(NumMisMatch[i],NumResidues+2, UInt4);
	  NEW(Deleted[i],NumResidues+2, UInt4);
	  NEW(NumDeleted[i],NumResidues+2, UInt4);
	}

	//************************** compute pattern matches **************************
	// if output file then create this here.
#if 0
	// Hpt->Put(stderr);
	for(i=1; i <= Hpt->NumBPPS(); i++)  
	// for(i=1; i < Number; i++)    // don't use Reject Set.
	{
	   fprintf(stderr,"%d:",i);
	   if(1 || Hpt->nGrpsFG(i) > 1){
	      for(j=1; j <= Hpt->nGrpsFG(i); j++){
		k=Hpt->GrpsFG(i,j); // AddSet(k,set);
	        fprintf(stderr," %d",k);
	      } fprintf(stderr,"\n");
	   }
	} 
assert(IN_CMA[0]==0);
// exit(1);
	for(i=1; i < Number; i++){    // don't use Reject Set.
	   if(Hpt->nGrpsBG(i) > 1){
	   }
	}
#endif
	FILE	*mfp=0;
        for(i=1; i <= Number; i++){
	    this->GetSeqInfo(IN_CMA[i],mfp,NumResidues,
		NumMatch[i],Match[i],TotalMatch[i],
		NumDeleted[i],Deleted[i],TotalDeleted[i], 
		NumMisMatch[i], MisMatch[i],TotalMisMatch[i]);
	}
	if(mfp) fclose(mfp);
	//************************** sort profiles by scores **************************
	h_type	HG=Histogram("Average # Matches", 0,NumResidues+1,0.5);
	dH=dheap(Number+5,4);
        for(i=1; i <= Number; i++){	// over number of families...
	    // Int4 nsq=TotalMatch[i] + TotalMisMatch[i];
	    Int4 nsq=NumSeqsCMSA(IN_CMA[i]);
	    keytyp key=(keytyp) TotalMatch[i]/ (keytyp) nsq;	// average matches per sequence.
	    if(SortByScore) insrtHeap(i,-key,dH); else insrtHeap(i,i,dH);
	    IncdHist((double)key,HG);
	    // fprintf(stderr,"%3d %10s: %.3f\n",i,NameCMSA(IN_CMA[i]),key);
	}
	for(i=0;(j=delminHeap(dH)) != 0; ){ i++; SORTED[i]=j; }
	assert(i == Number);
	Nildheap(dH);

	double d,mean=MeanHist(HG);
	double	min=MinimumHist(HG);
	// Int4 Cutoff=(Int4) ceil(min + 1.0);
	// double cutoff=(double) Cutoff;
#if 0
	Int4 Cutoff=(Int4) ceil(mean);
	double cutoff=mean;
#else
	Int4 Cutoff=(Int4) ceil(mean);
	double cutoff=1.0;
#endif
	// fprintf(stderr,"cutoff=%f; Cutoff=%d\n",cutoff,Cutoff);
	Int4 HitsHist=0;
#if 0
        for(i=1; i <= Number; i++){
	    Int4 nsq=NumSeqsCMSA(IN_CMA[i]);
	    double key=(double) TotalMatch[i]/ (double) nsq;	// average matches per sequence.
	    if(key <= cutoff){
	    	// IncdHist((double) NumResidues - (double) key,HG2);
	    	IncdHist((double) key,HG2);
		HitsHist++;
	    }
	}
#endif

#if 0
	//************************** output matches **************************
	printf("       MATCHES: ");
	for(j = NumResidues; j >= 0; j--) printf("%4d ",j); 
	printf(" Total_sq AveMatch\n");
        for(k=1; k <= Number; k++){
	   i=SORTED[k];
	   Int4 nsq=NumSeqsCMSA(IN_CMA[i]);

	   if(nsq < MinNumSeq) continue;	// afn: 11/13/09.

	   printf("%3d %10s: ",i,NameCMSA(IN_CMA[i]));
           for(j = NumResidues; j >= 0; j--){
	     hits=NumMatch[i][j];	// from all matched to none matched
	     // nsq = hits + NumMisMatch[i][j];
	     double d=100.0*(double)hits/(double)nsq;
	     if(hits==0) printf("   . ");
	     else printf("%4d ",(Int4)floor(d+0.5));
	   }
	   printf(" %8d (%.2f)\n",nsq,(double)TotalMatch[i]/(double) nsq);
	}
	printf("       MATCHES: ");
	for(j = NumResidues; j >= 0; j--) printf("%4d ",j); 
	printf(" Total_sq AveMatch\n");
	printf("\n"); printf("\f"); printf("\n");
#endif
	// PutHist(stdout,60,HG); 
	NilHist(HG);

	//************************** sort profiles by scores **************************
	dH=dheap(Number+5,4);
        for(i=1; i <= Number; i++){	// over number of families...
	    double ave_percent=0.0;
            for(j=1; j <= NumResidues; j++){
	     Int4 hits=Match[i][j];
	     // Int4 nsq=hits + MisMatch[i][j];
	     Int4 nsq=NumSeqsCMSA(IN_CMA[i]);
	     if(nsq > 0){
	       ave_percent += 100.0*(double)hits/(double)nsq;
	     } // else ignore...
	    }
	    keytyp key=(keytyp) ave_percent/(keytyp)NumResidues;
	    if(SortByScore) insrtHeap(i,-key,dH); else insrtHeap(i,i,dH);
	}
	for(i=0;(j=delminHeap(dH)) != 0; ){ i++; SORTED[i]=j; }
	assert(i == Number);
	Nildheap(dH);
	// printf("\n"); printf("\f"); printf("\n");

	//************************** output pattern **************************
	if(ofp){
	  fprintf(ofp,"       PATTERN: ");
          for(j=1; j <= NumResidues; j++) {
             sprintf(str,"%s%d",residue_str[j],Position[j]);
	     fprintf(ofp,"%6s ",str);
	  } fprintf(ofp," Total_sq AvePercent\n");
	}
	//************************** output matches per position **************************
FILE *hmfp=0;
static Int4 calls=0;
Int4 AtomNum=1;
double	space=2.6; // space=4.0;
    if(HeatMap){
	if(HM){ calls++; sprintf(tmp_str,"%s_%d",argv[1],HM); }
	else { calls++; sprintf(tmp_str,"%s_%d",argv[1],calls); }
        hmfp=open_file(tmp_str,".pdb","w"); // Heat map using pymol.
        // hmfp=open_file(argv[1],".pdb","w"); // Heat map using rasmol.
	fprintf(hmfp,"HEADER    PercentScale\n");
        for(d=0.0,j=0; j <= 40; j++){
	     fprintf(hmfp,
		      "HETATM%5d  C   XXX A   1    %8.3f%8.3f%8.3f%6.2f%6.2f      1XXX\n",
                                AtomNum,(double)(j+2)*space,4.0*space,0.0,1.0,d); 
	     d = d + 2.5; AtomNum++;
	} fprintf(hmfp,"END\n");
     }
#if 1
	if(txtfp){
	   for(j=1; j <= NumResidues; j++){
             fprintf(txtfp,"%s%d\t",residue_str[j],Position[j]);
	   } fprintf(txtfp,"node\n");
	}
#endif
	HG=Histogram("Average Percent Matches",0,101,5.0);
        for(n=1; n <= Number; n++){
	   BooLean okay=TRUE,bad=TRUE;
	   i=SORTED[n];

	   Int4 nsq=NumSeqsCMSA(IN_CMA[i]);
	   if(nsq < MinNumSeq) continue;	// afn: 11/13/09.

	   NAME[n]=NameCMSA(IN_CMA[i]);
	   sprintf(tmp_str,"%s",NameCMSA(IN_CMA[i]));
	   tmp_str[10]=0;
	   // printf("%3d %10s: ",i,NameCMSA(IN_CMA[i]));
	   if(ofp) fprintf(ofp,"%3d %10s: ",i,tmp_str);
	   double ave_percent=0.0;
if(hmfp) fprintf(hmfp,"HEADER    %s\n",NameCMSA(IN_CMA[i]));
           for(j=1; j <= NumResidues; j++)
           // for(j= NumResidues; j > 0; j--)
	   {
	     Int4 hits;
	     if(residue_str[j][0] == 'X') hits=Deleted[i][j];
	     else hits=Match[i][j];
#if 0
	     nsq=Match[i][j] + MisMatch[i][j];
#else
	     nsq=Match[i][j] + MisMatch[i][j] + Deleted[i][j];
#endif
	     if(hits==0){
		okay=FALSE;
		if(ofp) fprintf(ofp,"     . "); d=0.0;
		d=0.0;
		// continue;
	     } else {
	        d=100.0*(double)hits/(double)nsq;
		if(d < 50.0) okay=FALSE;
		if(d >= 25.0) bad=FALSE;
		ave_percent += d;
	        // k=(Int4) floor(d+0.5); printf("%6d ",k);
	        if(ofp) fprintf(ofp,"%6.1lf ",d);
	     }
	     assert(d <= 100.0);
	     if(hmfp && d > 0.1) fprintf(hmfp,
		 "HETATM%5d  C   XXX B   1    %8.3f%8.3f%8.3f%6.2f%6.2f      1XXX\n",
                                AtomNum,(double)j*space,(double)-n*space,0.0,1.0,d);
#if 1
	     if(txtfp) fprintf(txtfp,"%.2f\t",d);
#endif
	     AtomNum++;
	   }
	   if(hmfp) fprintf(hmfp,"END\n");
	   if(txtfp) fprintf(txtfp,"%s\n",NameCMSA(IN_CMA[i]));

	   double Nsq = (double)(TotalMatch[i] + TotalMisMatch[i])/(double) NumResidues;
	   double d=ave_percent/(double)NumResidues;
	   IncdHist(d,HG);
	   if(ofp){
	     if(okay) fprintf(ofp," %8.1f (%.1f)+\n",Nsq,d);
	     else if(bad) fprintf(ofp," %8.1f (%.1f)-\n",Nsq,d);
	     else fprintf(ofp," %8.1f (%.1f)\n",Nsq,d);
	   }
	} 
	if(hmfp) fclose(hmfp); 

	//************************** output pattern ************************** 
	if(ofp){	
	  fprintf(ofp,"       PATTERN: ");
          for(j=1; j <= NumResidues; j++)
          // for(j=NumResidues; j > 0; j--)
	  {
             sprintf(str,"%s%d",residue_str[j],Position[j]);
	     fprintf(ofp,"%6s ",str);
	  } fprintf(ofp," Total_sq AvePercent\n\n");
	  // printf("\f"); printf("\n");
	}
#if 1
	//************************** output pattern **************************
	if(ofp && Number==1 && NumResidues == 2){ // = number of families...
	   fprintf(ofp," PTTRN: Match Mismatch\n",i);
           for(j=1; j <= NumResidues; j++){
             fprintf(ofp,"   %s%d: %d  %d\n",
			residue_str[j],Position[j],Match[1][j],MisMatch[1][j]);
	   } printf("\n\n");
	   fprintf(ofp,"exact %d %d %d %d\n\n",
		Match[1][1],MisMatch[1][1],Match[1][2],MisMatch[1][2]);
	}
#endif
	//************************** output Histograms **************************
	// PutHist(stdout,60,HG); 
	NilHist(HG);

	//************************** output sorted list of profile names **************************
        // for(n=1; n < Number; n++){ printf("%s,",NAME[n]); } printf("%s\n\n",NAME[n]);

        for(i=1; i <= Number; i++){
		free(MisMatch[i]); free(Match[i]); free(NumMatch[i]);
		free(NumMisMatch[i]); free(Deleted[i]); free(NumDeleted[i]);
	}
	free(MisMatch); free(Match); free(NumMatch); free(TotalMatch); free(NAME);
	free(NumMisMatch); free(TotalMisMatch); free(SORTED);
	free(Deleted); free(NumDeleted); free(TotalDeleted);
	if(qcma) TotalNilCMSA(qcma);
	return 0;
}

#include <dirent.h>

static bool DirectoryExists( const char* pzPath )
{
    if ( pzPath == NULL) return false;
    bool bExists = false;
    DIR *pDir = opendir (pzPath);
    if (pDir != NULL) { bExists = true;    (void) closedir (pDir); }
    return bExists;
}

Int4	rpm_typ::TurnIntoPyMOL(set_typ setX,char *prefix)
// Calls a pymol script to turn the pdb files into heat map pse files.
{
	Int4	i,j,y,z,s,status=0;
	double	x;
	
	// get the patterns for each column
	FILE	*pfp,*efp=0;
	// char	***Pttrn; NEWPP(Pttrn,NumPttrns+4,char);
	// assert(N==NumPttrns);
	for(i=1; i <= NumPttrns; i++){
	    if(setX && !MemberSet(i,setX)) continue;
	    sprintf(str,"%s_%d",argv[1],i);
	    pfp=open_file(str,".txt","w");
#if 1
fprintf(pfp,"label id 1,\"0%c\"\n",'%');
fprintf(pfp,"set label_position, [-0.41,3,0],id 1\n");
fprintf(pfp,"label id 11,\"25%c\"\n",'%');
fprintf(pfp,"set label_position, [0.38,3,0],id 11\n");
fprintf(pfp,"label id 21,\"50%c\"\n",'%');
fprintf(pfp,"set label_position, [0.38,3,0],id 21\n");
fprintf(pfp,"label id 31,\"75%c\"\n",'%');
fprintf(pfp,"set label_position, [0.38,3,0],id 31\n");
fprintf(pfp,"label id 41,\"100%c\"\n",'%');
fprintf(pfp,"set label_position, [0.38,3,0],id 41\n");
//  fprintf(pfp,"set label_color, red,id 41\n");
#endif
	    char *S = HptPttrn[i];
	    if(efp) fprintf(efp,"Pttrn='%s'\n",S);
	    for(x=2.5,j=42; S[0]; j++,x+=2.5){
		if(x > 10) x=2.5;
		if(sscanf(S,"%[A-Z]%d,",tmp_str,&s) == 2){
		    fprintf(pfp,"label id %d,\"%s%d\"\n",j,tmp_str,s);
		    fprintf(pfp,"set label_position, [0,%.1lf,0],id %d\n",x,j);
		    while(S[0] != ','){ S++; if(S[0]==0) break; }
		    while(S[0] == ',') S++;  
	    	    if(efp) fprintf(efp,"case 1: %s\n",tmp_str);
		} else if(sscanf(S,"%[A-Z]%d",tmp_str,&s) == 2){
		    fprintf(pfp,"label id %d,\"%s%d\"\n",j,tmp_str,s);
		    fprintf(pfp,"set label_position, [0,%.1lf,0],id %d\n",x,j);
	            if(efp) fprintf(efp,"case 2: %s\n",tmp_str);
		    break;
		} else print_error("TurnIntoPyMOL() input err 1");
	    } fprintf(pfp,"\n");
	    fclose(pfp);
	} 
	pfp=open_file(argv[1],"_script.py","w");
	fprintf(pfp,"\nfrom pymol import cmd\n\n");

	fprintf(pfp,"for i in range(1, %d+1):\n",NumPttrns);
	for(i=1; i <= NumPttrns; i++){
	   if(setX && !MemberSet(i,setX)){
		fprintf(pfp,"     if i==%d:\n          continue\n",i);
	   }
	}
	fprintf(pfp,"     cmd.reinitialize()\n");
	fprintf(pfp,"     cmd.load('%s_%cd.pdb' %c i)\n",argv[1],'%','%');
	fprintf(pfp,"     cmd.show(\"spheres\",\"all\")\n");
	fprintf(pfp,
	    "     cmd.spectrum(\"b\",\"blue_green_yellow_orange_red\",\"all\")\n");
#if 1
	fprintf(pfp,"     cmd.do('@%s_%cd.txt' %c i)\n",argv[1],'%','%');
#endif
	fprintf(pfp,"     cmd.sync()\n");
	fprintf(pfp,"     cmd.center(\"all\")\n");
	fprintf(pfp,"     cmd.zoom(\"all\")\n");
// cmd.set("label","id 8","R109")
// label id 3, "test"
// set label_position, [0,2.0,0]
// set label_size, 1et label_size, 12
// set label_size,9,all

	fprintf(pfp,"     cmd.set(\"label_size\",\"-2\",\"all\")\n");
	fprintf(pfp,"     cmd.set(\"specular\",\"off\")\n");
	// fprintf(pfp,"     cmd.set(\"label_font_id\",\"10\",\"all\")\n");
 	if(setX){
	  if(prefix) fprintf(pfp,"     cmd.save('%s_xc%cd.pse' %c i)\n\n",prefix,'%','%');
	  else fprintf(pfp,"     cmd.save('%s_xc%cd.pse' %c i)\n\n",argv[1],'%','%');
	} else {
	  if(prefix) fprintf(pfp,"     cmd.save('%s_hm%cd.pse' %c i)\n\n",prefix,'%','%');
	  else fprintf(pfp,"     cmd.save('%s_hm%cd.pse' %c i)\n\n",argv[1],'%','%');
	}
	fclose(pfp);
	sprintf(str,"pymol -cqr %s_script.py >& /dev/null\n",argv[1]);
	Int4 rtn=system(str); // exit(1);
	if(rtn ==0){
	  for(i=1; i <= NumPttrns; i++){
	   sprintf(str,"%s_%d.pdb",argv[1],i); remove(str);
	   sprintf(str,"%s_%d.txt",argv[1],i); remove(str);
	  } sprintf(str,"%s_script.py",argv[1]); remove(str);
#if 1
	  sprintf(str,"%s_heatmaps",argv[1]);
	  if(DirectoryExists(str)){
	    if(setX == 0){	// create cross conserved afterwards
	     sprintf(str,"\\rm -rf  %s_heatmaps\n",argv[1]);
	     status=system(str);
	     if(status != 0) print_error("failed to remove existing heatmap directory");
	     sprintf(str,"mkdir %s_heatmaps\n",argv[1]);
	     status=system(str);
	     if(status != 0) print_error("failed to create heatmap directory");
	    }
	  } else {
	     sprintf(str,"mkdir %s_heatmaps\n",argv[1]);
	     status=system(str);
	     if(status != 0) print_error("failed to create heatmap directory");
	  }
    if(setX){
	  if(prefix) sprintf(str,"\\mv -f %s_xc*.pse %s_heatmaps\n",prefix,argv[1]);
	  else sprintf(str,"\\mv -f %s_xc*.pse %s_heatmaps\n",argv[1],argv[1]);
    } else {
	  if(prefix) sprintf(str,"\\mv -f %s_hm*.pse %s_heatmaps\n",prefix,argv[1]);
	  else sprintf(str,"\\mv -f %s_hm*.pse %s_heatmaps\n",argv[1],argv[1]);
    }
	  status=system(str);
	  if(status != 0) print_error("failed to fill heatmap directory");
    if(setX){
	  if(prefix) sprintf(str,"\\mv -f %s_xc*.txt %s_heatmaps\n",prefix,argv[1]);
	  else sprintf(str,"\\mv -f %s_xc*.txt %s_heatmaps\n",argv[1],argv[1]);
    } else {
	  if(prefix) sprintf(str,"\\mv -f %s_hm*.txt %s_heatmaps\n",prefix,argv[1]);
	  else sprintf(str,"\\mv -f %s_hm*.txt %s_heatmaps\n",argv[1],argv[1]);
    }
	  status=system(str);
	  if(status != 0) print_error("failed to fill heatmap directory");
#endif
	} else fprintf(stderr,"Failed to create heat map *.pse files\n");
	return rtn;
}

Int4	rpm_typ::PutHeatMaps(char *prefix)
{
	Int4	i,j,k,x,n,sq,N,M,nPttrns,Len;
	cma_typ	*cma=IN_CMA;
	
	//============= Check input for consistency ================
	if(Number != Hpt->NumSets()) print_error("hpt and cma files inconsistent");
	Len=LengthCMSA(1,cma[1]);
	for(j=1; j <= Number; j++){
	   if(Len != LengthCMSA(1,cma[j])){
		print_error("cma file lengths are inconsistent");
	   } N=NumSeqsCMSA(cma[j]);
	   if(strcmp(NameCMSA(cma[j]),Hpt->SetName(j)) != 0) 
		print_error("hpt and cma files are inconsistent");
	   // fprintf(stderr,"%d. '%s' == '%s'\n",j,NameCMSA(cma[j]),Hpt->SetName(j));
	}



	//============= Get pattern info from Hpt Args ================
	BooLean	HtMp=HeatMap; HeatMap=TRUE;
	for(Int4 i=1; i <= NumPttrns; i++){
		sprintf(tmp_str,"%s_hm%d",argv[1],i);
		FILE *txtfp=open_file(tmp_str,".txt","w");
//fprintf(stderr," ==>.%s\n",HptPttrn[i]);
		this->RunPttrnMtch(HptPttrn[i],NULL,txtfp); 
		fclose(txtfp);
	} // fprintf(stderr,"\n");
	HeatMap=HtMp;
// fprintf(stderr,"DEBUG 9\n");
	j=this->TurnIntoPyMOL(0,prefix);
// fprintf(stderr,"DEBUG 10\n");
#if 1
	this->RunMtchXCS();
#endif
	return j;
}

Int4     rpm_typ::ParsePttrns(char *Str,set_typ set)
/** WARNING: index starts at 0 not 1 as for ParseIntegers() **/
// set_typ set = set of positions used.
{
        Int4    n,r;
        double  k;
	char	*s,ptrn[26];
	FILE	*efp=0;

	ClearSet(set);
	if(strncmp(Str,"-P=",3) == 0){
	   s=Str;
	   while(s[0] != '=') s++;
	   s++; n=0;
	   do {
             if(sscanf(s,"%[A-Z]%d,",ptrn,&r) == 2){
	        if(efp) fprintf(efp,"%s%d ",ptrn,r); n++; AddSet(r,set);
	     } else if(sscanf(s,"%[A-Z]%d",ptrn,&r) == 2){
	        if(efp) fprintf(efp,"%s%d\n ",ptrn,r); n++; AddSet(r,set);
		break;;
	     } else print_error("ParsePttrns() input error"); 
  	     while(s[0] != ','){ s++; if(s[0]==0) return n; }
	     s++;
	   } while(1);
	   if(efp) fprintf(efp,"\n");
	} return n;
}

Int4	rpm_typ::OutputCrossConserved( )
{
	Int4	i,j,k,x,n,sq,N,M,nPttrns,Len;
	cma_typ *cma=IN_CMA;
	
	// create N x M sets,each of size Number, where N = Len & M = nPttrns
	// create a single 
        // Medium (Based on chemical-geometrical mymicry)
        // e.g., Q cat look like part of H if in the right conformation.
	// see: rst_typ *rst=new rst_typ('M');
        const char *Pttrn[] = { 0,
	   "C","G","GA","A","AS","S","ST","SN","T","N","ND","NH","NQ","NQH","NDE",
           "D","DE","DEQ","E","EQ","EDQ","Q","QE","QK","QR","QH",
	   "QKR","K","KR","R","RQ","RK","H","HY",
           "W","WY","WYF","Y","YF","YHW","YHF","YHWF","F","FL",
           "V","VI","VL","VIL","VILM","I","IL",
           "L","LM","M","P",0,0,0};

	//============= Check input for consistency ================
	for(k=1,nPttrns=0; Pttrn[k] != 0; k++){
	    // fprintf(stderr,"%d = %s\n",k,Pttrn[k]);
	    nPttrns++;
	}
	if(Number != Hpt->NumSets()) print_error("hpt and cma files inconsistent");
	Len=LengthCMSA(1,cma[1]);
	Int4 maxN=0;
	for(j=1; j <= Number; j++){
	   if(Len != LengthCMSA(1,cma[j])){
		print_error("cma file lengths are inconsistent");
	   } N=NumSeqsCMSA(cma[j]);
	   if(strcmp(NameCMSA(cma[j]),Hpt->SetName(j)) != 0) 
		print_error("hpt and cma files are inconsistent");
	   fprintf(stderr,"%d. '%s' == '%s'\n",
		j,NameCMSA(cma[j]),Hpt->SetName(j));
	   if(N > maxN) maxN=N;
	}

	//============= Get FG & BG info from Hpt Args ================
	for(Int4 r=1; r <= Hpt->NumSets(); r++){
	   for(Int4 c=1; c <= Hpt->NumBPPS(); c++){
	     //Int4 argc=Hpt->RtnArgC(j);
	     char state=Hpt->RtnHyperPartition(r,c);
	     fprintf(stderr,"%c",state);
	   } fprintf(stderr,"\n");
	} fprintf(stderr,"\n\n");

	//============= Get pattern info from Hpt Args ================
	set_typ	*ptrn; NEW(ptrn,Hpt->NumBPPS()+3,set_typ);
	set_typ SetO= MakeSet(Len+3); ClearSet(SetO);
	for(i=1; i <= Len; i++) AddSet(i,SetO);
	for(Int4 c=1; c <= Hpt->NumBPPS(); c++){
	    ptrn[c]=MakeSet(Len+3); 
	    Int4 argc=Hpt->nArg(c);
	    char **Str=Hpt->Argv(c);
	    // char *sst=Hpt->sst_str(c);
	    // char    *GetPatternFromSST(sst, AB);
	    // fprintf(stderr,"%s\n",sst);
	    for(i=0; i < argc; i++){
	      // fprintf(stderr,"%s\n",Str[i]);
	      // sst_typ tmp_set=SsetLet(AlphaCode(aa,AB));
	      // usst = UnionSset(usst,SsetLet(x));
	      // DisjointSset(s,r);
	      n=ParsePttrns(Str[i],ptrn[c]);
	      if(n > 0){
		fprintf(stderr,"n=%d\n",n);
		fprintf(stderr,"Pttrn=%s\n",Str[i]);
		PutSet(stderr,ptrn[c]); // NilSet(pttn[c]);
		break;
	      }
	    }
	} fprintf(stderr,"\n");
	fprintf(stderr,"NumCols=%d; NumCMAs=%d\n",Hpt->NumBPPS(),Number);

	//============= Get Union & Intersection of pattern positions ================
	set_typ USet = CopySet(ptrn[1]); 
	set_typ XSet = CopySet(ptrn[1]); 
	for(Int4 c=2; c <= Hpt->NumBPPS(); c++){
	    UnionSet(USet,ptrn[c]);
	    IntersectSet3(XSet,ptrn[c]);
	}
	PutSet(stderr,USet);
	IntersectNotSet(SetO,USet);
	PutSet(stderr,SetO);
	PutSet(stderr,XSet);

	//============= Allocate sets for pattern matches ================
	set_typ **set; 
	NEWP(set,Len+3,set_typ);
	for(i=1; i <= Len; i++){
	    NEW(set[i],nPttrns+3,set_typ);
	    for(k=1; k <= nPttrns; k++){
		set[i][k]=MakeSet(Number + 3); ClearSet(set[i][k]);
	    }
	}
	//============= Create sets of nodes for pattern matches ================
	char	c;
	unsigned char r,R;
	double dd=0.0;
	Int4	m,blk=1;
	for(i=1; i <= Len; i++){
	   fprintf(stderr,"site %d out of %d \n",i,Len);
	   for(k=1; k <= nPttrns; k++){
	      for(j=1; j <= Number; j++){
	        N=NumSeqsCMSA(cma[j]);
		for(dd=0.0,sq=1; sq <=N; sq++){
		  for(x=0; (c=Pttrn[k][x]);x++){
		    R=AlphaCode(c,AB);
		    r=ResidueCMSA(blk,sq,i,cma[j]);
		    if(r ==R) dd+=1.0;
		  }
		} dd = dd/(double)N;
		if(dd >= 0.80) AddSet(j,set[i][k]);
	      }
	   }
	}
	//============= output results for pattern matches ================
	for(i=1; i <= Len; i++){
	    if(MemberSet(i,USet)) continue;
	    for(k=1; k <= nPttrns; k++){
		n=CardSet(set[i][k]);
		dd=(double)n/(double)Number;
		if(0 && dd > 0.5){
		   fprintf(stderr,"%d (%.2lf): %s\n",i,dd,Pttrn[k]);
		}
		if(n >= 2 && dd < 0.90){
		   fprintf(stderr,"%s%d %d\n",Pttrn[k],i,n);
		   char Strn[100];
		   sprintf(Strn,"%s%d",Pttrn[k],i);
		   RunPttrnMtch(Strn); 
		}
	    }
	}
	return 0;
}

