/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "cmc_typ.h"
#include "blosum62.h"
#include "rst_typ.h"
#include "swt_typ.h"

#if 0	//**********************************************************************
Syntax: <gi>(begin..end) <category>[;<category>] [(begin..end) <category>[;<category>]]

Example input:
362794	(25..135) Set35=-1.34; Set3=3.4
9117264	(5..234) Set35=2.34 (8..230) Set15=2.01

Space characters may be present but are unnecessary and can be easily parsed out.

> sed 's/[\t ]//g' example | sed 's/^.*[);];Set3=/Set3=/'
362794(25..135)Set35=-1.34;Set3=3.4
9117264(5..234)Set35=2.34(8..230)Set15=2.01

We can unambiguously parse out any fields we like with a command like the following:

> sed 's/[\t ]//g' example | grep '[);]Set3=' | sed 's/(.*[);]Set3=/ /'
362794 3.4

#endif	//**********************************************************************
void    cmc_typ::PutSeqContrib(FILE *fp,FILE *hfp,FILE *sfp,FILE *pfp)
// fp = contribution file pointer; hfp = histogram; sfp = sma file pointer.
{
     Int4	*NumSq,n,g,sq,num_sq = NumSeqsCMSA(TrueMainCMA);
     char        **HP=HyperPartition,str[300];
     double	*sqLLR,*Total;
     NEW(sqLLR,num_sq+3,double); NEW(NumSq,Hpt->NumSets()+3,Int4);
     NEW(Total,Hpt->NumSets()+3,double);
     if(hfp==0) hfp=stdout;

#if 1	// output 'interesting' sequences for each set.
     Int4   NumPhyla=0,max_num_phyla=50;
     Int4   *phyla=GetPhylaSeqSet(0,&NumPhyla,TrueDataCMSA(TrueMainCMA));
     char   *NameSaved=AllocString(NameCMSA(TrueMainCMA));
     h_type *HG=0; NEW(HG,Hpt->NumSets() +2,h_type);
     for(g=1; g < Hpt->NumSets(); g++){	// don't look at last (random) set!!
	if(IsFailedSet[g]) continue;
	if(CardSet(GrpSet[g]) == 0){ continue; } // Eventually print consensus seqs for these?
	    // May occur for internal nodes.
	sprintf(str,"sequence contributions to '%s' LLR",Hpt->ElmntSetName(g));
	HG[g]=Histogram(str,0,100,1.0);
     	dh_type	dH=0;
	set_typ sSet=0,pdbSet;
	if(sfp && Hpt->TypeOfSet(g) != '?'){
		dH=dheap(num_sq+2,4);
		sSet=MakeSet(num_sq+1); ClearSet(sSet);
	} pdbSet=MakeSet(num_sq+1); ClearSet(pdbSet);
        for(sq=1; sq <= num_sq; sq++){
    	   if(MemberSet(sq,GrpSet[g])){
		NumSq[g]++;
		double lpr0 = CalcTotalLPR(0,FALSE);
		DeleteSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition('o',sq); }
		double lpr = CalcTotalLPR(0,FALSE);
		sqLLR[sq]=lpr0-lpr; Total[g]+=sqLLR[sq];
		IncdHist(sqLLR[sq], HG[g]);
		if(sqLLR[sq] > 0 && PdbSeq(TrueSeqCMSA(sq,TrueMainCMA))){ AddSet(sq,pdbSet); }
		if(dH){ insrtHeap(sq,-(keytyp)sqLLR[sq],dH); }
		AddSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition(HP[g][n],sq); }
	   } 
    	}
	double mean=MeanHist(HG[g]);
	if(sfp && dH){	// then output a sma file.
           BooLean *Printed=0; NEW(Printed,NumPhyla+3,BooLean);
           for(Int4 I=0; I < max_num_phyla && !emptyHeap(dH); ){
	      double d=-minkeyHeap(dH);
	      if(d < 0 || d < mean) break; 
              assert((sq=delminHeap(dH)) != 0);
              if(!Printed[phyla[sq]]) {
		AddSet(sq,sSet);
                // fprintf(stderr,"Seq Num: %d\n",sq);
                Printed[phyla[sq]]=TRUE; I++; 
              }
           } free(Printed);
	   ReNameCMSA(Hpt->ElmntSetName(g),TrueMainCMA); 
	   PutInSetCMSA(sfp,sSet,TrueMainCMA);    
	} if(dH) Nildheap(dH); if(sSet) NilSet(sSet);
	ReNameCMSA(Hpt->ElmntSetName(g),TrueMainCMA); 
	if(CardSet(pdbSet) > 0) PutInSetCMSA(pfp,pdbSet,TrueMainCMA); NilSet(pdbSet);
     } ReNameCMSA(NameSaved,TrueMainCMA); free(NameSaved); free(phyla);

     free(NumSq); free(Total);
     for(sq=1; sq <= num_sq; sq++){
	e_type E=TrueSeqCMSA(sq,TrueMainCMA);
	Int4 s=FakeToRealCMA(sq,1,TrueMainCMA);
	Int4 e=FakeToRealCMA(sq,LengthCMSA(1,TrueMainCMA),TrueMainCMA);
	Int4 os=OffSetSeq(E);
	PutSeqID(fp,E); fprintf(fp," (%d:%d)",s,e);
	Int4 Mmbrshps=0;
        for(g=1; g <= Hpt->NumSets(); g++){	// don't look at last (random) set!!
    	  if(GrpSet[g] && MemberSet(sq,GrpSet[g])){
	    if(IsTreeHpt==FALSE){ 	// Old, single category output.
	   	if(IsFailedSet[g]){ fprintf(fp," in_failed_node=0.0"); continue; }
		if(g == Hpt->NumSets()){
		  fprintf(fp," Rejected=0.0");
		} else {
		  char *NameSet=Hpt->ElmntSetName(g);
		  fprintf(fp," %s=%.3f",NameSet,sqLLR[sq]);
		} break; // each sq is a member of only one set...
	    } else { 			// New, multiple category output.
	     assert(Hpt->NumSets() == Hpt->NumBPPS()+1);
	     if(IsFailedSet[g])fprintf(fp," in_failed_node=0.0"); 
	     if(g == Hpt->NumSets())fprintf(fp," Rejected=0.0"); 
	     else {

		double	*Map0; NEW(Map0,Hpt->NumBPPS() +3, double);
		double lpr0 = CalcTotalLPR(0,FALSE);
		for(n=1; n<= Hpt->NumBPPS(); n++){ Map0[n]=Map[n]; }
		DeleteSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition('o',sq); }
		double lpr = CalcTotalLPR(0,FALSE);
		for(n=1; n<= Hpt->NumBPPS(); n++){ 
		   if(HP[g][n] == '+'){		// set g is a child of or is set n.
			double d=Map0[n]-Map[n];
		  	char *NameSet=Hpt->ElmntSetName(n);	// assume that n corresponds to g.
		  	fprintf(fp," %s=%.3f",NameSet,d);
			if(g > n) fprintf(fp,";");
		   }
		}
		AddSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition(HP[g][n],sq); }
		free(Map0);
	     } break; // each sq is a member of only one set...
	    }
	  }
    	} fprintf(fp,"\n");
      }
#else	// OLD version
     for(sq=1; sq <= num_sq; sq++){
        for(g=1; g < Hpt->NumSets(); g++){	// don't look at last (random) set!!
    	   if(MemberSet(sq,GrpSet[g])){
	   	if(IsFailedSet[g]) continue;
		NumSq[g]++;
		double lpr0 = CalcTotalLPR(0,FALSE);
		DeleteSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition('o',sq); }
		double lpr = CalcTotalLPR(0,FALSE);
		sqLLR[sq]=lpr0-lpr; Total[g]+=sqLLR[sq];
		AddSet(sq,GrpSet[g]); 
		for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition(HP[g][n],sq); }
	   } 
    	}
     }
     if(hfp){
	NEW(HG,Hpt->NumSets() +2,h_type);
	for(g=1; g < Hpt->NumSets(); g++){
	   if(IsFailedSet[g]) continue;
	   if(NumSq[g]==0) continue;  // May occur for internal nodes.
	   sprintf(str,"sequence contributions to '%s' LLR",Hpt->ElmntSetName(g));
	   double mean=Total[g]/(double)NumSq[g];
	   Int4 s= (Int4) floor(mean - 10); if(s < -4) s = -4;
	   Int4 e= (Int4) ceil(mean + 12);
	   HG[g]=Histogram(str,s,e,0.5);
	}
     } free(NumSq); free(Total);
     for(sq=1; sq <= num_sq; sq++){
	e_type E=TrueSeqCMSA(sq,TrueMainCMA);
	Int4 s=FakeToRealCMA(sq,1,TrueMainCMA);
	Int4 e=FakeToRealCMA(sq,LengthCMSA(1,TrueMainCMA),TrueMainCMA);
	Int4 os=OffSetSeq(E);
	PutSeqID(fp,E); fprintf(fp," (%d:%d)",s,e);
	Int4 Mmbrshps=0;
        for(g=1; g <= Hpt->NumSets(); g++){	// don't look at last (random) set!!
    	   if(GrpSet[g] && MemberSet(sq,GrpSet[g])){
	   	if(IsFailedSet[g]){ fprintf(fp," in_failed_node=0.0"); continue; }
		if(g == Hpt->NumSets()){
		  fprintf(fp," Rejected=0.0");
		} else {
		  char *NameSet=Hpt->ElmntSetName(g);
		  fprintf(fp," %s=%.3f",NameSet,sqLLR[sq]);
		  if(HG){ IncdHist(sqLLR[sq], HG[g]); }
		} break;
	   }
    	} fprintf(fp,"\n");
     }
#endif
     if(HG){ 
	for(g=1; g < Hpt->NumSets(); g++){
		if(HG[g]){ PutHist(hfp,60,HG[g]); NilHist(HG[g]); }
	} free(HG);
     } free(sqLLR);
}

void	cmc_typ::PutMapContributions(FILE *fp)
// put the contributions to the Map for each set and intermediate node.
{
     assert(IsTreeHpt);	// make sure this is a tree...
     Int4	n,g,sq,num_sq = NumSeqsCMSA(MainCMA);
     char	x,**HP=HyperPartition;

     for(g=1; g < Hpt->NumSets(); g++){	
	assert(g <= Hpt->NumBPPS());
	if(IsFailedSet[g]) continue;
	Int4 card=CardSet(GrpSet[g]);
	fprintf(fp,"%d.%s(%d):",g,Hpt->ElmntSetName(g),card);
        //for(n=1; n < g; n++){ 
        for(n=1; n <= g; n++){ 
	   if(IsFailedBPPS[n]) continue;  
	   if(HP[g][n] == '+'){
		// assert(Hpt->TypeOfSet(n) == '?'); // this should correspond to an internal node.
		if(Hpt->TypeOfSet(n) != '?') continue; // this should correspond to an internal node.
	        double lpr=CalcTotalLPR(0,FALSE);
		if(n==1) x='-';	// Put in background partition.
		else x='o';	// Omit from the analysis.
		// Map[n];
		Hpt->Change('+',x,g,n); 
		for(sq=1; sq <= num_sq; sq++){ if(MemberSet(sq,GrpSet[g])){ che[n]->SetPartition(x,sq); } }
		ReSetRelations( );
		double lpr0=CalcTotalLPR(0,FALSE);
		Hpt->Change(x,'+',g,n); 
		for(sq=1; sq <= num_sq; sq++){ if(MemberSet(sq,GrpSet[g])){ che[n]->SetPartition('+',sq); } }
		ReSetRelations( );
		// fprintf(fp," (%d)%g -%g = %.2g",n,lpr,lpr0,lpr-lpr0);
		fprintf(fp," (%d) %.2f",n,lpr-lpr0);
	   }
	} fprintf(fp,"\n");
     } fprintf(fp,"\n");
}

void	cmc_typ::PutHyperPartition(FILE *fp)
// finding out why some GoldStd sequences are getting pushed out.
{
     Int4	n,g;
     char        **HP=HyperPartition;
#if 1
     if(fp==0) fp=outfp; 
     if(fp==0) fp=stdout;
#endif

     if(Hpt->NumBPPS() <= 25){ 
        fprintf(fp,"\n "); for(n=1; n<= Hpt->NumBPPS(); n++){ fprintf(fp,"="); }
        fprintf(fp," HyperPartition: ");
        for(n=1; n<= Hpt->NumBPPS(); n++){ fprintf(fp,"="); } fprintf(fp,"\n");
#if 0
        fprintf(fp," ");
        for(n=1; n<= Hpt->NumBPPS(); n++){ fprintf(fp,"%2d ",n); } fprintf(fp,"\n");
#endif
     } else {
	fprintf(fp,"\nHyperPartition:\n");
	for(n=1; n<= Hpt->NumBPPS(); n++){
	   if(n % 10 == 0) fprintf(fp,"|"); else if(n % 5 == 0) fprintf(fp,"+"); 
	   else fprintf(fp,"-"); 
     	} fprintf(fp,"\n");
     }
     for(g=1; g<= Hpt->NumSets(); g++){	
        // fprintf(fp,"%3d: ",g);
	Int4 NumZero=0;
	// should eventually set all failed columns to 'o'.
        for(n=1; n<= Hpt->NumBPPS(); n++){
	   if(Hpt->NumBPPS() <= 25){ fprintf(fp," "); }
	   if(IsFailedSet[g] || IsFailedBPPS[n]){ NumZero++; fprintf(fp," "); }
	   else if(HP[g][n] != 0 && HP[g][n] != 'o'){ fprintf(fp,"%c",HP[g][n]); }
	   // else { NumZero++; fprintf(fp,"o"); } // afn: 8/23/2022.
	   else { NumZero++; fprintf(fp," "); }
	   if(Hpt->NumBPPS() <= 25){ fprintf(fp," "); }
	} 
	Int4 card=CardSet(GrpSet[g]);
	char c=' '; if(IsFailedSet[g]) c='x'; else c=' '; 
	if(strcmp("Random",Hpt->ElmntSetName(g)) == 0){
		fprintf(fp," Rejected (%d)\n",card-NumRandom);
	} else if(Hpt->TypeOfSet(g) == '?'){
		fprintf(fp," %d.%s {%d} %c\n",g,Hpt->ElmntSetName(g),card,c);
	} else fprintf(fp," %d.%s (%d) %c\n",g,Hpt->ElmntSetName(g),card,c);
	// else fprintf(fp," %s (%d)\n",Hpt->ElmntSetName(g),CardSet(GrpSet[g]));
	if(!IsFailedSet[g] && NumZero == Hpt->NumBPPS())
		print_error("Fatal: seq grp absent from all partitions");
     }
     if(Hpt->NumBPPS() <= 25){
	fprintf(fp," === Contrast alignment statistics: ===\n"); 
     } else {
	// fprintf(fp,"     ");
	for(n=1; n<= Hpt->NumBPPS(); n++){
	  if(n % 10 == 0) fprintf(fp,"|"); else if(n % 5 == 0) fprintf(fp,"+"); 
	  else fprintf(fp,"-"); 
	} // fprintf(fp,"\n\n Column LPRs:\n");
	fprintf(fp,"\n\n ==== Contrast alignment statistics: ===\n"); 
     }
     double lpr,d=CalcTotalLPR(0,FALSE);
     Int4 numFailedCols=0;
     fprintf(fp,"col.   FG_sqs   BG_sqs      LPR    nps   npws  #cols");
     if(!IsTreeHpt) fprintf(fp,"  name\n"); else fprintf(fp,"\n");
     for(n=1; n<= Hpt->NumBPPS(); n++){
#if 0	// DEBUG...
	double *d=che[n]->SubMap( );
	if(d[0] < 0.0){ 	// might not be due to rst_typ set size!!!
		che[n]->PutSubLPRs(stderr);
		fprintf(stderr,"WARNING: the set size computed for #%d rho is too small.\n",n);
		fprintf(stderr,"some pattern residue set(s) is/are larger than expected.\n");
		fprintf(stderr,"see GetRhoCategoricalPriors( ) in cmc_init.cc file.\n");
		//assert(d[0] >= 0.0);
	} // need to see why this is happening...
	// only seems to happen when reading a cmc checkpoint file.
#endif
	if(IsTreeHpt){
	   assert(n < Hpt->NumSets());
	   fprintf(fp,"%3d: ",n); 
	   lpr=che[n]->PutInfoShort(fp); 
	   // fprintf(fp,"%s\n",Hpt->ElmntSetName(n)); 
	} else { 
		fprintf(fp,"%3d: ",n); 
		lpr=che[n]->PutInfoShort(fp,FALSE); 
		fprintf(fp," %s\n",Hpt->GrpName(n));
	}
	if(lpr <= 0.0 || che[n]->NumColumns( ) < 1) numFailedCols++;
     } 
	fprintf(fp," ====== Total LPR = %.1lf (%.1f K) (%d/%d failed) ======\n\n",
			d,temperature,numFailedCols,Hpt->NumBPPS());
}

Int4	cmc_typ::RtnNumFailed()
// 
{
     Int4   n,numFailedCols=0;
     double lpr; // d=CalcTotalLPR(0,FALSE);

     for(n=1; n<= Hpt->NumBPPS(); n++){
	if(IsTreeHpt){ assert(n < Hpt->NumSets()); }
	if(IsTreeHpt) lpr=che[n]->CalcLLR( ); else lpr=che[n]->CalcLLR();
	if(lpr <= 0.0 || che[n]->NumColumns( ) < 1) numFailedCols++;
     } return numFailedCols;
}

double  cmc_typ::Put(BooLean put_rtf)
{
     if(put_rtf) PutRTF(PutIntermediateFiles); 
     Int4	m,n,n_adj,N_adj,g,q,i;
     FILE	*ofp=0;
     double	MinKeyFrq=0.5,MaxGapFrq=0.5;
     Int4	min_nats=5;
     char	tmp_str[200],tmp[200];
     cma_typ mcma=0;
     BooLean	thesame;
     double	*fract_ignored,TotalMap=0.0;
     NEW(fract_ignored,25,double);

     Int4 TotalCHA=0;
     for(n=Hpt->NumBPPS(); n > 0; n--){ if(che[n] && Map[n] > 0){ TotalCHA++; } }
     Int4    NumberFound=0;
     FILE *ptninfo_fp=open_file(infile,".lpr","w");
     FILE *ptn_lpr_fp=0;
     if(IsTreeHpt) ptn_lpr_fp=open_file(infile,"_ptn.lpr","w");
     FILE *info_fp=open_file(infile,".info","w");
     for(N_adj=0,n=Hpt->NumBPPS(); n > 0; n--){
	if(IsFailedBPPS[n]) continue;
	if(!Hpt->OutputThis(n)) continue;
	if(che[n]){
	  // fprintf(stderr,"Map[%d] = %g\n",n,Map[n]);
	  CalcTotalLPR(0,FALSE); // Calculates all Map[n]
	  // only don't put out if IsFailedBPPS is true.
	  if(Map[n] > 0){
#if 1
		bpps_typ *bpps=che[n]->BPPS();
	        mcma = che[n]->RtnMainSet();
	        double alpha,A0,B0;
	        alpha = bpps->Parameters(A0,B0);
	        SetBPPS_CMA(alpha,(Int4)A0,(Int4)B0,set_mode[n],mcma);
#endif
		TotalMap += Map[n];
		NumberFound++;
		if(PutIntermediateFiles){
			che[n]->ContribSeqMAP(n,SFBG[n],fract_ignored,min_nats,MinKeyFrq);
			// che[n]->PutAll(n,SFBG[n],min_nats,MinKeyFrq);
			che[n]->PutFgBgSeqs(n);
		}
		{
		  che[n]->PutInfo(info_fp,n); N_adj++;
     		  fprintf(ptninfo_fp,":::::::::: BPPS category %d: ::::::::::\n",n);
		  che[n]->PutPttrnFile(ptninfo_fp,n);
		}
#if 1	// new pattern lpr file (*_ptrn.lpr) for CDTree.
		if(IsTreeHpt){
		  che[n]->PutCDTreePttrnFile(ptn_lpr_fp,Hpt->ElmntSetName(n));
		}
#endif
	  }
	}
     } free(fract_ignored);	// set all to 0.0
     fclose(info_fp); fclose(ptninfo_fp); 
     FILE *pttrn_fp=open_file(infile,".pttrns","w");
     for(n=Hpt->NumBPPS(),n_adj=N_adj; n > 0; n--){
	if(Map[n] > 0){
     		fprintf(pttrn_fp,"%d: ",n_adj); che[n]->PutBestPatterns(pttrn_fp,FALSE);
		n_adj--;
	}
     } fclose(pttrn_fp);
     if(ptn_lpr_fp) fclose(ptn_lpr_fp);
    Int4 num_sq = NumSeqsCMSA(MainCMA);
    // char tmp_str[200];
    sprintf(tmp_str,"_new.mma");
    ofp=open_file(infile,tmp_str,"w");
    for(g=1; g <= Hpt->NumSets(); g++){	
      // if(IsFailedSet[g]){ assert(CardSet(GrpSet[g]) == 0); continue; }
      if(IsFailedSet[g]){ continue; }
      if(PutIntermediateFiles){
       FILE *sfp=0;
       sprintf(tmp_str,"_set%d.seq",g);
       for(Int4 sq=1; sq <= num_sq; sq++){
        if(MemberSet(sq,GrpSet[g])){
          if(sfp==0) sfp=open_file(infile,tmp_str,"w");
	  PutSeq(sfp,TrueSeqCMSA(sq,MainCMA),AB);;
         }
       } fclose(sfp);
      }
      if(CardSet(GrpSet[g]) > 0){	// output optimized minimal set if non-empty.
         ReNameCMSA(Hpt->ElmntSetName(g),MainCMA);
	 if(strcmp(Hpt->ElmntSetName(g),"Random") == 0){
	    set_typ NonRandom=MakeSet(SetN(Labeled));
	    IntersectNotSet(GrpSet[g],Labeled,NonRandom);  // NonRandom = Set[g] && !Labeled.
            // UnLabelPutInSetCMSA(ofp,NonRandom,MainCMA);
	    if(CardSet(NonRandom) > 0){
                PutInSetCMSA(ofp,NonRandom,MainCMA);	// keep labels as unputted
	    } NilSet(NonRandom);
	 } else {
	    Int4	i,j,n,nl,J;
	    Int4	*list; NEW(list, num_sq+3,Int4);
	    BooLean	*skip; NEW(skip, num_sq+3,BooLean);
	    char	**HP=HyperPartition;
	    dh_type	dH=dheap(num_sq+2,4);
// assert(this->ConsistencyCheck());
	    for(nl=0,J=1; J <= num_sq; J++){
		if(MemberSet(J,GrpSet[g])){ 
		  skip[J]=FALSE;	
		  double lpr0 = CalcTotalLPR(0,FALSE); DeleteSet(J,GrpSet[g]);
                  for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition('o',J); }
                  double lpr = CalcTotalLPR(0,FALSE); keytyp Score=lpr0-lpr;
		  // WARNING: use CalcTotalLPR(0,FALSE); to avoid Saving bogus Best!!!!
		  insrtHeap(J,-Score,dH); AddSet(J,GrpSet[g]);
                  for(n=1; n<= Hpt->NumBPPS(); n++){ che[n]->SetPartition(HP[g][n],J); }
		} else { skip[J]=TRUE; }
	    }
// assert(this->ConsistencyCheck());
	    for(i=0; !emptyHeap(dH); ){ assert((J=delminHeap(dH)) != 0); i++; list[i]=J; }
	    for(J=1; J <= num_sq; J++){ if(skip[J]){ i++; assert(i <= num_sq); list[i]=J; } }
	    assert(i == num_sq); Nildheap(dH);
	    PutSelectOneCMSA(ofp,skip,list,MainCMA);
	    free(skip); free(list);
	 }
      }
    } fclose(ofp);

#if 0	// skip this...
    sprintf(tmp_str,"_new.dma");
    ofp=open_file(infile,tmp_str,"w");
    for(i=1; i <= NumDisplayCMA; i++){
	// char *name=Hpt->ElmntSetName(i);
	// Hpt->ReNameFamilies(i,name);	// rename input cma for *.hpt file; fails if i disallowed.
	PutCMSA(ofp,DisplayCMA[i]); 	// then put display files
    }
    fclose(ofp);
#endif

#if 0
     if(PutIntermediateFiles){
       sprintf(tmp_str,"_new.hpt");
       ofp=open_file(infile,tmp_str,"w"); Hpt->Put(ofp); fclose(ofp);
     }
#endif
     return TotalMap;
}

static sst_typ	*Str2SST(char *sstStr,cma_typ qcma)
// Use this to convert a pattern string into small sets
// for MainSet pattern (only upon output to file).
{
	sst_typ	*xsst=0,*qsst;
	a_type ab=AlphabetCMSA(qcma);
	Int4	i,s,r,nval,*pos;

	e_type Query=SeqSetE(1,DataCMSA(qcma));  // Fake sequence...

	NEW(qsst,LenSeq(Query)+2,sst_typ); 
        nval=ParseResidueSets(sstStr,pos,xsst,ab,"che_typ input error");
        for(i=1;i <= nval; i++){
		// s = RealToFakeCMSA(1,pos[i],qcma); // s == actual position in array.
		s = pos[i];
		// s == fake seq position for residue s in sq (with offset).
		if(s <= 0){
		   fprintf(stderr,"sstStr=%s\n",sstStr);
		   fprintf(stderr,"Position %d(%d):\n",pos[i],s);
		   print_error("cmc_put: Str2SST input pattern option error");
		}
                if(s > LenSeq(Query)){ print_error("che_typ input option error"); }
                r=ResSeq(s,Query);
                // fprintf(stdout,"%c%d(%d)\n",AlphaChar(r,ab),pos[i],s);
		if(!MemSset(r,xsst[i])){
			fprintf(stderr,
			  "**************** FATAL ERROR! **************\n");
                	fprintf(stderr,"%c%d(%d)\n",AlphaChar(r,ab),pos[i],s);
			fprintf(stderr,"xsst[%d]: \"",pos[i]);
			PutSST(stderr,xsst[i],ab);
			fprintf(stderr,"\"\n");
			PutSeq(stderr,Query,ab);
			gsq_typ *gsq=gsqCMSA(1,qcma);
			gsq->Put(stderr,ab); gsq->Put(stderr,60,ab);
			fprintf(stderr,"%d(%d): %c\n",pos[i],s,AlphaChar(r,ab));
			fprintf(stderr,"Query offset = %d\n",OffSetSeq(Query));
			fprintf(stderr,"%s\n",sstStr);
			print_error("in cmc_typ.cc: Str2SST() sst input error");
		} else qsst[s]=xsst[i];
        }
	free(xsst); free(pos); // exit(1);
	return qsst;
}

static void CheckCardinality(Int4 n, set_typ set, Int4 num, cma_typ cma)
{
        if(CardSet(set) != NumSeqsCMSA(cma)){
                fprintf(stderr,"CardSet(fg_set[%d]=%d; NumSeqsCMSA(rtn_cma[%d]=%d\n",
                                n,CardSet(set),num,NumSeqsCMSA(cma));
                assert(CardSet(set) == NumSeqsCMSA(cma));
        }
}

void	cmc_typ::PutRTF(BooLean	SaveChnFiles)
//********************* output n-tier contrast hierarchical alignments. ********************
//********************* output n-tier contrast hierarchical alignments. ********************
//********************* output n-tier contrast hierarchical alignments. ********************
{
     Int4	*DisplayHits,NumTiers,g,q,i,min_nats=5,m,n,s;
     double	MinKeyFrq=0.5,MaxGapFrq=0.5;
     FILE	*ofp=0,*efp=0; // efp=stderr;
     char	tmp_str[200],tmp[200];
     FILE	*rtf_fp=0;

     RestoreBest();
     if(cfp) fprintf(cfp,"%d %.1f %.0f %d\n",Iteration,TotalLPR,
				temperature,TotalColumns( ));
     NEW(DisplayHits,NumDisplayCMA+3,Int4);
     for(n=1; n <= Hpt->NumBPPS(); n++){
	if(IsFailedBPPS[n]) continue;
	if(che[n]->NumColumns( ) < 1) continue;
	// ^fixes core dump with "78: (454 0  seqs)(0)(0 cols)".
	for(g=1; g<=Hpt->nGrpsFG(n); g++){
           if(IsFailedSet[g]) continue;
	   q = Hpt->GrpsFG(n,g); assert(q>0 && q<=NumDisplayCMA);
	   DisplayHits[q]++; 
	}
     }
     cma_typ mcma=0;
     for(q=1; q <= NumDisplayCMA; q++){
       if(IsFailedSet[q]) continue;
       if(Hpt->TypeOfSet(q) != '!') continue;
       // ^Skip over sets not designated for output.
#if 0	// DEBUG...
	if(q !=4) continue;
#endif
       BooLean	UsedSomewhere=FALSE;
       for(n=Hpt->NumBPPS(); n >= 1; n--){
	  if(IsFailedBPPS[n]) continue;
	  if(che[n]->NumColumns( ) < 1) continue;
	  // ^Fixes core dump with "78: (454 0  seqs)(0)(0 cols)".
	  if(Hpt->OutputThis(n) && HyperPartition[q][n] == '+') UsedSomewhere=TRUE; 
       } if(!UsedSomewhere) continue;

       //=================== 2. get sequence sets for fg and bg... =============
       // if(DisplayHits[q] > 1)	
       // ^This seemed to be working!?  Check into this later...
       //  above assumes all subgroups modeled in the highest Misc category;
       //  but not if only 1 BPPS done.
       if(DisplayHits[q] > 0)
       {
	NumTiers=0;
	if(SaveChnFiles){
		sprintf(tmp_str,"_set%d.chn",q); ofp=open_file(infile,tmp_str,"w"); 
	} else ofp = tmpfile();
        set_typ	*fg_set,*bg_set; 
	NEW(fg_set, Hpt->NumBPPS()+3,set_typ);
	NEW(bg_set, Hpt->NumBPPS()+3,set_typ);
	hsw_typ HSW=0;
#if 0
	if(TrueHSW) HSW=chn[0]->RtnHSW(1);
	else // Pass Henikoff weights on to other analyses
#endif
	if(checkpoint){ HSW=checkpoint->hsw; }
	else if(passed_in_hsw){ HSW=passed_in_hsw; }
	else { HSW=chn[1]->RtnHSW(1); }
	// ^Pass Henikoff weights on to other analyses
	hsw_typ *hsw; NEW(hsw, Hpt->NumBPPS()*2 + 3, hsw_typ);
	Int4	NumAnal=0;
#if 0
        sprintf(tmp_str,"_set%d.pttrn",q);
	FILE *pttrn_fp=open_file(infile,tmp_str,"w");
#else
	FILE *pttrn_fp=0;
#endif
#if 1
	BooLean HptIsTree=FALSE;
	Int4 *Parent=0;
        if(Hpt->IsTree(Parent)) HptIsTree=TRUE;
        if(Parent) free(Parent);
#endif
	for(n=Hpt->NumBPPS(); n >= 1; n--){
	  if(IsFailedBPPS[n]) continue;
	  if(!Hpt->OutputThis(n)) continue;
          if(!MemberSet(q,SetFG[n])) continue;
	  if(che[n]->NumColumns( ) < 1) continue;
	  // ^Fixes core dump with "78: (454 0  seqs)(0)(0 cols)".
          if(pttrn_fp){ fprintf(pttrn_fp,"%d: ",n);
	  che[n]->PutBestPatterns(pttrn_fp,FALSE);}
	  mcma = che[n]->RtnMainSet();
          if(NumTiers==0){
	    NumTiers++;
	    PutCMSA(ofp,DisplayCMA[q]);
	    // che[n]->PutMainSet(ofp,min_nats,MinKeyFrq);
	    sprintf(tmp,"%s",NameCMSA(mcma)); 
	    if(HptIsTree) ReNameCMSA(Hpt->ElmntSetName(n),mcma);
	    else ReNameCMSA(Hpt->GrpName(n),mcma);
	    bpps_typ *bpps=che[n]->BPPS();
	    double alpha,A0,B0;
	    alpha = bpps->Parameters(A0,B0);
	    SetBPPS_CMA(alpha,(Int4)A0,(Int4)B0,set_mode[n],mcma);
#if 0		// Debug...
	    if(GlobalN){
		fprintf(stderr,"%d: GlobalN =%d; Contrast=%d\n",
			n,GlobalN,che[n]->Contrast);
		assert(GlobalN == che[n]->Contrast);
	    }
#endif
	    che[n]->PutFG(ofp,min_nats,MinKeyFrq);
	    fg_set[n]=che[n]->RtnFG_Set( );
	    NumAnal++; 	//********** rtf stuff **********
	    BooLean	*skip;
	    NEW(skip,NumSeqsCMSA(DisplayCMA[q])+3,BooLean);
	    for(i=1; i<=NumSeqsCMSA(DisplayCMA[q]); i++) skip[i]=TRUE;
	    for(i=1; i<=NumSeqsCMSA(DisplayCMA[q]); i++){
		skip[i]=FALSE;
		PutSelectCMSA(ofp,skip,DisplayCMA[q]);
		skip[i]=TRUE;
	    } free(skip); 
	    che[n]->PutBG(ofp,min_nats,MinKeyFrq); 
	    bg_set[n]=che[n]->RtnBG_Set( );
	    NumAnal++; 	//********** rtf stuff **********
	    ReNameCMSA(tmp,mcma);
	  } else {
	    NumTiers++;
	    sprintf(tmp,"%s",NameCMSA(mcma)); 
	    if(HptIsTree) ReNameCMSA(Hpt->ElmntSetName(n),mcma);
	    else ReNameCMSA(Hpt->GrpName(n),mcma);
	    bpps_typ *bpps=che[n]->BPPS();
	    double alpha,A0,B0;
	    alpha = bpps->Parameters(A0,B0);
	    SetBPPS_CMA(alpha,(Int4)A0,(Int4)B0,set_mode[n],mcma);
#if 0		// Debug...
	    if(GlobalN){
		fprintf(stderr,"%d: GlobalN =%d; Contrast=%d\n",
			n,GlobalN,che[n]->Contrast);
		assert(GlobalN == che[n]->Contrast);
	    }
#endif
	    che[n]->PutFG(ofp,min_nats,MinKeyFrq);
	    fg_set[n]=che[n]->RtnFG_Set( );
	    NumAnal++; 	//********** rtf stuff **********
	    che[n]->PutBG(ofp,min_nats,MinKeyFrq);
	    bg_set[n]=che[n]->RtnBG_Set( );
	    NumAnal++; 	//********** rtf stuff **********
	    ReNameCMSA(tmp,mcma);
	  }
	}
        if(pttrn_fp){ fclose(pttrn_fp); pttrn_fp=0; }
	if(NumTiers > 1 && sst_str[0] != 0){
	    // ^Put main set if pattern is specified.
	    cma_typ qcma = che[1]->RtnQuerySet();
	    sst_typ *msst = Str2SST(sst_str[0],qcma);
	    set_typ Set=MakeSet(NumSeqsCMSA(MainCMA)+1); 
	    FillSet(Set); DeleteSet(0,Set);
	    // ^WARNING: Don't count zero element.

	    sprintf(tmp,"%s",NameCMSA(MainCMA)); 
	    // if(HptIsTree) ReNameCMSA(Hpt->ElmntSetName(0),mcma);
	    // else ReNameCMSA(Hpt->GrpName(0),MainCMA);
	    ReNameCMSA(Hpt->GrpName(0),MainCMA);
	    PutInSetCMSA(ofp,Set,msst,MainCMA);
	    ReNameCMSA(tmp,MainCMA);
	    NilSet(Set);
	}
	if(SaveChnFiles){
		fclose(ofp); sprintf(tmp_str,"_set%d.chn",q);
		ofp=open_file(infile,tmp_str,"r"); 
	} else rewind(ofp); 
	{
	 char    *ArgV[30];
	 Int4     ArgC=0,NumCMA,num=0,NumHSW=0;
    	 // sprintf(tmp_str,"%s_set%d",infile,q);
    	 sprintf(tmp_str,"%s",Hpt->ElmntSetName(q));
	 ArgV[ArgC]=AllocString("chn_see"); ArgC++;
	 ArgV[ArgC]=AllocString(tmp_str); ArgC++;
	 ArgV[ArgC]=AllocString("-F6"); ArgC++;
	 ArgV[ArgC]=AllocString("-S=P"); ArgC++;
	 ArgV[ArgC]=AllocString("-concise"); ArgC++;
	 ArgV[ArgC]=AllocString("-sets=L"); ArgC++;
#if 0	// for printing out a subregion of the alignment...
	 ArgV[ArgC]=AllocString("-B=100:160"); ArgC++;
#endif
#if 0	 // should simply use the number of indicated columns ('*') by default.
    	 if(NthContrast[] > 0){
		sprintf(tmp_str,"-N=%d",NthContrast[]);
		ArgV[ArgC]=AllocString(tmp_str); ArgC++; 
	 } else ArgC=7;
#endif
#if 1	//*********** passed in arguments...
	 for(Int4 a=0; a < Hpt->RtnArgC(q); a++){
	     ArgV[ArgC] = Hpt->SetArgV(q,a); ArgC++; 
	 }
#endif
	 char **Status=0; 
	 cma_typ *rtn_cma=MultiReadCMSA(ofp,&NumCMA,&Status,AB);
#if 1   // check for null alignments.
         for(Int4 y=1; y <= NumCMA; y++) assert(NumSeqsCMSA(rtn_cma[y]) > 0);
#endif
	 double ***ResEvals; NEWPP(ResEvals,NumAnal +3, double);
	 // ^Some of these input cma files have zero sequences; fix this...!!!
	 fclose(ofp); ofp=0;
	 {	// scope for chn_typ tmpchn.
	   for(NumHSW=num=0,n=Hpt->NumBPPS(); n >= 1; n--){
	        if(IsFailedBPPS[n]) continue;
	 	if(!Hpt->OutputThis(n)) continue;
         	if(!MemberSet(q,SetFG[n])) continue;
	  	if(che[n]->NumColumns( ) < 1) continue;
		// ^Fixes core dump with "78: (454 0  seqs)(0)(0 cols)".
	  	mcma = che[n]->RtnMainSet();
		if(num == 0){
		    num=2;
		    CheckCardinality(n,fg_set[n],num,rtn_cma[num]);
		    NumHSW++; hsw[NumHSW]=GetSubHSW(HSW,fg_set[n],rtn_cma[num],mcma);
		    num++; ResEvals[NumHSW] = this->GetResEvals(n);
		    // ^Foreground only...
		    Int4 NumSeqAln=NumSeqsCMSA(rtn_cma[1]); // display set.
		    for(Int4 sq=1; sq <= NumSeqsCMSA(rtn_cma[1]); sq++) num++;
		    // ^Skip over these...
		    CheckCardinality(n,bg_set[n],num,rtn_cma[num]);
	 	    NumHSW++; hsw[NumHSW]=GetSubHSW(HSW,bg_set[n],rtn_cma[num],mcma);
		    num++; ResEvals[NumHSW] = ResEvals[NumHSW-1];
		} else {
		    CheckCardinality(n,fg_set[n],num,rtn_cma[num]);
		    NumHSW++; hsw[NumHSW] = GetSubHSW(HSW,fg_set[n],rtn_cma[num],mcma);
		    num++; ResEvals[NumHSW] = this->GetResEvals(n);
		    CheckCardinality(n,bg_set[n],num,rtn_cma[num]);
	 	    NumHSW++; hsw[NumHSW] = GetSubHSW(HSW,bg_set[n],rtn_cma[num],mcma);
		    num++; ResEvals[NumHSW] = ResEvals[NumHSW-1];
		}
	   } assert(NumHSW == NumAnal);

	   if(ShowIndels){ ArgV[ArgC]=AllocString("-show"); ArgC++; }
	   else { ArgV[ArgC]=AllocString("-hide=3"); ArgC++; }
#if 0	// uses the hsw routine above
	   // THIS WILL EXPECT CMA'S TO HAVE THE SAME ADDRESS!!!
	   chn_typ tmpchn(ArgC,ArgV,NumCMA,rtn_cma,hsw,Status,200);
#elif 1	// bpps version.
	   chn_typ tmpchn(ArgC,ArgV,NumCMA,rtn_cma,hsw,Status,200,ResEvals);
	   for(n=1; n <= NumHSW; n=n+2){
		for(Int4 s=1; s <= RtnLengthMainCMSA( ); s++) free(ResEvals[n][s]);
		free(ResEvals[n]);
	   } free(ResEvals); 
	
#else	// original version.
	   chn_typ tmpchn(ArgC,ArgV,NumCMA,rtn_cma,0,Status,200);
#endif
	   assert(!tmpchn.OwnsCMAs());
    	   sprintf(tmp_str,"%s_set%d",infile,q);
	   if(PrintEachRTF) tmpchn.PutHierarchicalAln(tmp_str);
	   if(rtf_fp==0){
     	        sprintf(tmp_str,"_sets.rtf"); rtf_fp=open_file(infile,tmp_str,"w"); 
		tmpchn.PutHierarchicalAlnHead(rtf_fp);
	   } else {
		tmpchn.PutPageBreak(rtf_fp);	// print page break between alignments.
		tmpchn.PrintFileName(rtf_fp);	// print filename at top of page.
	   }
#if 1
	   tmpchn.PutHierarchAligns(rtf_fp);
#elif 1
	   tmpchn.PutHierarchicalAlns(rtf_fp);
#else	// Put alignment using actual bpps lpr values; 
	   double *SubMap=che[n]->SubMap( ); // make sure best is initialized.
	   bpps_typ *best_pps=che[n]->BPPS();	// or return a copy??
	   // sst_typ *best_sst=best_pps->RtnSST( );
	   sst_typ *best_sst=best_pps->RtnCopySST( );

	   // che_typ::RtnOptPattern();
	   sst_typ *opt_sst=pps->RtnOptPattern(CntBG, CntFG, LegalSST,seq,MaxCols,lpr);
	   call from within che_typ
	   // ^^ can modify so as to return an array of subLPRs.
	   tmpchn.PutHierarchicalAlns(double *submap, sst_typ *opt_sst, rtf_fp);
	   tmpchn.PutHierarchicalAlns(bpps_typ *best_pps, rtf_fp);
#endif
	 }
	 for(n=1; n <= NumCMA; n++) if(rtn_cma[n]) TotalNilCMSA(rtn_cma[n]); free(rtn_cma);
	 for(n=1; n <= NumHSW; n++) NilHSW(hsw[n]); free(hsw);
	 for(n=0; n < ArgC; n++){ free(ArgV[n]); ArgV[n]=0; } ArgC=0;
	}	// chn_typ destructor called here...
        free(fg_set); free(bg_set);
       }  // end of if(DisplayHits[q] > 1) // was: end of for(n=Hpt->NumBPPS(); n >= 1; n--) loop...
     }  // end of for(q=1; q <= NumDisplayCMA; q++) loop...
     free(DisplayHits);
     if(rtf_fp){  che[1]->PutTailRTF(rtf_fp); fclose(rtf_fp); }
}

BooLean	cmc_typ::PutHptPttrns(FILE *fp)
{
	Int4	i,j,NumSMA;
	char	str[500];
	if(Hpt->NumSets() < 3) return FALSE; // Don't print if only root node.

	Hpt->Put(fp,FALSE,FALSE,FALSE);  // put settings; retain internal; don't putArg. 
	fprintf(fp,"\nSettings:\n");
	for(Int4 g=1; g<= Hpt->NumBPPS(); g++){ 
	   if(Hpt->nArg(g) == 0){
	      fprintf(fp,"%d.Group%d -P=",g,g);
	      che[g]->PutBestPatterns(fp,FALSE);
	      // ^Use column not actual sequence position.
	   } else {
	     fprintf(fp,"%d.%s",g,Hpt->GrpName(g));
	     char **Argv=Hpt->Argv(g);
	     for(Int4 i=0; i < Hpt->nArg(g); i++){
		if(strncmp(Argv[i],"-P",2) != 0 && strncmp(Argv[i],"- ",2) != 0){
			fprintf(fp," %s",Argv[i]); 
		}
	     }
	     // fprintf(fp,"%d.%s -P=",g,Hpt->GrpName(g));
	     fprintf(fp," -P=");
	     che[g]->PutBestPatterns(fp,FALSE);
	     // use column not actual sequence position.
	   } /// fprintf(fp,"\n");  // PutBestPatterns() ends with rtn char.
	} fprintf(fp,"\n\n"); 
        return TRUE;
}

