/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "scc_typ.h"

set_typ	scc_typ::RmWeakMatches(FILE *ofp, Int4 II, double NumStdev)
// Remove sequences from Set[II] that only weakly match the corresponding pattern.
// returns the fraction of sequences removed.
{
	// UInt4	n1= CardSet(sqset);
	sst_typ *Pattern=SST[II];
	set_typ sqset=CopySet(Set[II]);
	Int4	i,j,k,s,N=NumSeqsCMSA(cma);
	h_type	HG=Histogram("# Matching seqs", 0,Length+1,1.0);
	UInt2	*mat; NEW(mat,N+3,UInt2);
	assert(NumStdev >= 1.0  && NumStdev <= 9.0);

	//************************** compute pattern matches **************************
	Int4 hits=0,miss=0,del=0,NumOkaySeq=0,sq;
	for(i=0,sq=1; sq <= NumSeqsCMSA(cma); sq++){
		if(!MemberSet(sq,sqset)) continue;
		for(hits=miss=del=0,j=1; j <= Length; j++){
			Int4 r1 = ResidueCMSA(1,sq,j,cma);
			if(MemSset(r1,Pattern[j])){ hits++; }
			else if(r1 != 0) miss++;
			else { del++; }
		} 
		i++; // data[i]=(double)hits;
		IncdHist((double)hits,HG);
		mat[sq]=hits;
		// if((miss + del) > MaxMisMatches){ NumOkaySeq++; } 
	}
	// fprintf(stderr,"found %d perfect matches in file %d.\n",n,i);
	if(ofp) PutHist(ofp,60,HG); 
	double mean=MeanHist(HG);
	double	var=VarianceHist(HG);
	double  stdev=sqrt(var);
	double	cutoff = mean - (NumStdev*stdev); 
	for(hits=0,sq=1; sq <= NumSeqsCMSA(cma); sq++){
		if(!MemberSet(sq,sqset)) continue;
		if((double) mat[sq] < cutoff){ hits++; DeleteSet(sq,sqset);}
	} NilHist(HG); free(mat);
	if(ofp) fprintf(ofp,"  Set%d: %d sequences removed\n",SetID[II],hits);
	// dh_type	dH=dheap(N+2,4);
	return sqset;
}

BooLean	scc_typ::OptimizePatternsPartitions(FILE *fp, wdg_typ &Tree)
// Optimize the foreground, background and patterns for Tree... 
{
   BooLean IsChanged=FALSE;
   // Set[Root]=MakeSet(SetSize); FillSet(Set[Root]);
   Set[Root]=0;
   set_typ Leaves=RtnLeafSet(Tree);
   Int4 i,NumNodesInList,*ListOfTreeNodes=RtnSubTreeNodes(Root,NumNodesInList,Tree);
//****************** Create Sets = Union of subtree nodes **************************
   for(i=1; i <= NumNodesInList; i++){
	Int4 subroot=ListOfTreeNodes[i]; 
	assert(subroot <= Root);
	if(subroot == Root){		// root = universal set; need set array longer by one for this. 
		assert(Set[Root]==0); Set[Root]=MakeSet(SetSize);
		FillSet(Set[Root]); DeleteSet(0,Set[Root]);
		for(Int4 sq=NumSeqsCMSA(cma)+1; sq < SetSize; sq++) DeleteSet(sq,Set[Root]);
	} else {
   		set_typ TmpSet=RtnSubTreeSeqSet(subroot,Set,Tree);  // TmpSet = the union of subtree sets.
		NilSet(Set[subroot]); Set[subroot] = TmpSet;
	}
   }

  double *NodeLPR; NEW(NodeLPR,WdgraphN(Tree)+3,double);
  set_typ Rewired=MakeSet(WdgraphN(Tree)+1); FillSet(Rewired);
  Int4 worst_edge;
  do {
    worst_edge=0;
    double worst_lpr=MinLPRforEdge,lpr;
    for(i=1; i <= NumNodesInList; i++){
	Int4 node =ListOfTreeNodes[i]; 
	assert(node <= Root);
	if(node == Root){		// root = universal set; need set array longer by one for this. 
	   if(SST[Root]) free(SST[Root]);
	   NEW(SST[Root],Length + 3, sst_typ); // don't add any pattern sets for Root...
	} else {
	   Int4 p=ParentNode(node,Tree);
	   unsigned char *csq;
	   if(p==0) continue; // this node has been removed;
	   char typ;	// the LPR is computed with different paramters for Misc vs Leaf nodes.
	   if(MemberSet(node,Leaves)) typ = 'L'; else typ='M';
	   if(MemberSet(node,Rewired) || MemberSet(p,Rewired)){ // then find optimum pattern for preliminary tree...
	   	// SetI vs SetJ optimum PttrnI (how does node subtree differ from rest of parent subtree).
		// This procedure subtracts Set[node] from Set[p]...no need to do so directly...
	   	sst_typ *xsst= GetOptPttrnLPR(0,Set[node],Set[p],FALSE,lpr,20,csq,typ);
		NodeLPR[node]=lpr;
	        if(SST[node]) free(SST[node]); SST[node]=xsst;
		if(CSQ[node]) free(CSQ[node]); CSQ[node]=csq; 
		if(fp){
	            fprintf(fp,"Set%d(%d)%c vs Set%d(%d) IS(%d); Pattern %d: lpr = %.3f\n", node, 
				CardSet(Set[node]),typ,p,CardInterSetINotJ(Set[p],Set[node]),
				CardInterSet(Set[node],Set[p]),PatternLength(xsst),lpr);
		    PutPatternFromSST(fp,xsst); fprintf(fp,"\n");
		}
#if 1	   // DEBUGGING...
	   } else {	// check to see whether NodeLPR is okay.
	   	sst_typ *xsst= GetOptPttrnLPR(0,Set[node],Set[p],FALSE,lpr,20,csq,typ); free(csq);
		double d= fabs(lpr-NodeLPR[node])/lpr;	// fractional difference in the LPRs.
		if(fp && d > 0.05){
	           fprintf(fp,"*Set%d(%d) vs Set%d(%d) IS(%d); Pattern %d: lpr = %.1f(%.1f)\n", node,
			CardSet(Set[node]),p,CardSet(Set[p]),CardInterSet(Set[node],Set[p]),node,lpr,NodeLPR[node]);
		} 
		NodeLPR[node]=lpr;
#endif
	   }
	   // assert(lpr > 0);  // this should be positive; need to add something here....
	   if(lpr < MinLPRforEdge){
		if(lpr < worst_lpr){
		   worst_edge=FindEdge(p,node,Tree);	 // arc from parent (tail) to child (head)
		   assert(worst_edge > 0); worst_lpr=lpr;  
		}
	   }
	}
    }
    //************************* remove the worst edge and reconnect... *************************
    if(worst_edge > 0){
      IsChanged=TRUE;
      Int4 Parent;
      fprintf(fp,"\n Edge between %d and %d removed;",
		TailWdgraph(worst_edge,Tree),HeadWdgraph(worst_edge,Tree));
      wdg_typ NewTree=RmBadEdge(worst_edge,Parent,Tree); NilWdgraph(Tree); Tree=NewTree;
      NilSet(Leaves); Leaves=RtnLeafSet(Tree);
      NilSet(Rewired); Rewired=GetRewiredSet(Parent, Tree);
      fprintf(fp," %d nodes rewired\n\n", CardSet(Rewired));
    }
  } while(worst_edge != 0);
  NilSet(Leaves); free(NodeLPR); NilSet(Rewired);
  return IsChanged;
}

BooLean	scc_typ::RemoveBadNodes(FILE *fp,wdg_typ &Tree, Int4 LeafTrimCutoff)
// Remove nodes with too few sequences. 
// Remove nodes from tree and OptimizedSet. ************************************
// WARNING: at this point some leaf nodes may have zero members; need to trim these nodes...
// NOTE: assigns OptTree global variable.
{
   BooLean IsChanged=FALSE;

   assert(LeafTrimCutoff >0);
   Int4	oldSize,newSize=mWdgraph(Tree),iter=1;
   do {			// Remove leaf nodes with too few members (LeafTrimCutoff).
   	OptTree=TrimLeaves(Root,LeafTrimCutoff,Tree); 
	oldSize=newSize; newSize=mWdgraph(OptTree);
        if(oldSize != newSize){	IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: Trimmed from %d to %d leaves.\n", iter++,oldSize,newSize);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
   } while(oldSize != newSize);
#if 1	// Remove single child nodes...
   newSize=mWdgraph(Tree); iter=1;
   do {
   	OptTree=RmSingleChildNodes(Tree,LeafTrimCutoff); 
	oldSize=newSize; newSize=mWdgraph(OptTree);
        if(oldSize != newSize){  IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: Removed single child nodes from %d to %d\n", iter++,oldSize,newSize);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
   } while(oldSize != newSize);
#endif
   return IsChanged;
}

BooLean	scc_typ::RemoveMisfitNodes(FILE *fp,wdg_typ &Tree, Int4 LeafTrimCutoff)
{
   BooLean IsChanged=FALSE,IsRewired,IsRemoved;

   assert(LeafTrimCutoff >0);
#if 0
   Int4	oldSize,newSize=mWdgraph(Tree),iter=1;
   do {			// Remove internal nodes with too few members (< LeafTrimCutoff).
   	OptTree=RmBadInternalNode(Root,LeafTrimCutoff,Tree); 
	oldSize=newSize; newSize=mWdgraph(OptTree);
        if(oldSize != newSize){	IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: Trimmed from %d to %d internal nodes.\n", iter++,oldSize,newSize);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
   } while(oldSize != newSize);
   // Remove poorly matching nodes based on LPR.
   iter=1;
   do {			
	IsRewired=FALSE;
   	OptTree=RewireMisfitNodes(Root,LeafTrimCutoff,Tree,IsRewired); 
        if(IsRewired){ IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: rewired nodes.\n", iter++);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
   } while(IsRewired);
#else	// go back and forth between two routines...
   Int4	iter=1;
   do {			// Remove internal nodes with too few members (< LeafTrimCutoff).
   	OptTree=RmBadInternalNode(Root,LeafTrimCutoff,Tree,IsRemoved); 
        if(IsRemoved){	IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: Trimmed an internal node.\n", iter++);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
	// Remove poorly matching nodes based on LPR.
   	OptTree=RewireMisfitNodes(Root,LeafTrimCutoff,Tree,IsRewired); 
        if(IsRewired){ IsChanged=TRUE;
	    if(fp){
		fprintf(fp," %d: rewired nodes.\n", iter++);
            	PrintNewickTree(fp,Root,'x',Tree); PrintNewickTree(fp,Root,'x',OptTree);
	    }
	} NilWdgraph(Tree); Tree=OptTree;
   } while(IsRewired || IsRemoved);
#endif
   return IsChanged;
}

BooLean	scc_typ::RefineTree(FILE *fp,wdg_typ &Tree, Int4 LeafTrimCutoff)
// Need to pass in as &Tree because tree gets destroyed and preplaced.
{
   set_typ Leaves=RtnLeafSet(Tree);
   fprintf(stderr,"\n\nLeaves only:\n"); PutSet(stderr,Leaves); NilSet(Leaves);
   PutWdgraph(stderr,Tree); 

   BooLean IsChanged=OptimizePatternsPartitions(fp,Tree);
   if(MakeNodeSetsDisjoint(Tree)) IsChanged=TRUE;
   if(RemoveBadNodes(fp,Tree,LeafTrimCutoff)) IsChanged=TRUE;
   if(RemoveMisfitNodes(fp,Tree, LeafTrimCutoff)) IsChanged=TRUE;
   // OptTree == Tree at this point.
   return IsChanged;
}

// assert(CardInterSet(Set[subroot],RandomSet) == 0);

set_typ	*scc_typ::CreateTree(FILE *fp,Int4 &NumOptSets,Int4 LeafTrimCutoff)
// convert the input sets into a hyperpartition (FD-table) based on their hierarchical relationships.
{
   Int4	i,j,Root;
#if 0	// Clean up sets...
   double NumStdev=2.5;
   for(i=1; i <= NumSets; i++){
     if(Set[i]){
	 set_typ TmpSet=RmWeakMatches(fp,i,NumStdev);
	 double d=(double) CardSet(TmpSet)/(double)CardSet(Set[i]);
	 if(d >= 0.80){ NilSet(Set[i]); Set[i] = TmpSet; } else NilSet(TmpSet);
     }
   }
#endif

   gth_typ *gth=RtnDiGraph(fp); 	// sets Root for Grph.
   Root=gth->RtnRoot();
   gth->RtnShortestPathTree(fp);
   Root=gth->RtnRoot();
   wdg_typ Tree=gth->RtnTree();	// sets internally Tree=0;
   double *EdgeLPR=gth->RtnEdgeLPR();	// sets internally EdgeLPR=0;
   delete gth;
   BooLean KeepGoing; i=1;
   do { fprintf(fp,"\n=========== iter %d =============\n\n",i); i++;
	KeepGoing=RefineTree(fp,Tree, LeafTrimCutoff); } while(KeepGoing); 
   if(mWdgraph(Tree) <= 1) print_error("Failed to find a significant hierarchy");
   else fprintf(stderr,"Found a hiearchy with %d edges\n", mWdgraph(Tree));
   PutWdgraph(stderr,Tree);

   // return Optimized Sequence sets to be passed onto the cmcBPPS procedure.
   // eventually move this section to calling routine...?
   Int4 size;
   OptimizedSetToInSet=RtnSubTreeNodes(Root,size,OptTree);
   NumOptimizedSets=size;
   NEW(InSetToOptimizedSet,MaxSetID+4,Int4);	// map indices back to Inset...Root=MaxSetID+1.
   NEW(OptimizedSet,size+5,set_typ); NEWP(OptimizedSST,size+5,sst_typ);
   NEWP(OptimizedCSQ,size +5,unsigned char);
   fprintf(fp,"\nOptSets: ");
   for(Int4 j=1; j<= NumOptimizedSets; j++) fprintf(fp,"%d ",OptimizedSetToInSet[j]); fprintf(fp,"\n");
   for(i=1; i <= NumOptimizedSets; i++){
	Int4 subroot=OptimizedSetToInSet[i]; InSetToOptimizedSet[subroot]=i;	// 
	assert(subroot <= Root);
	if(subroot == Root){		// root = universal set; need set array longer by one for this. 
		OptimizedSet[i]=CopySet(Set[Root]);
	} else {
		OptimizedSet[i]=CopySet(Set[subroot]);
		set_typ TmpSet=RtnSubTreeSeqSet(subroot,Set,OptTree); // rtn union subtree sets.
		Int4    p=ParentNode(subroot,OptTree);
		set_typ PSet=RtnSubTreeSeqSet(p,Set,OptTree); 
  		Int4 crd=CardSet(TmpSet);
		char Type='G';
		if(crd == CardSet(Set[subroot])) Type='S'; else Type='M'; // Misc node?
		double lpr,Lpr;
		unsigned char *csq;
	   	// sst_typ *xsst=GetOptPttrnLPR(stderr,TmpSet,Set[p],FALSE,lpr,20,csq);
	   	// sst_typ *xsst=GetOptPttrnLPR(stderr,TmpSet,PSet,FALSE,lpr,20,csq);
	   	sst_typ *xsst=GetOptPttrnLPR(0,TmpSet,PSet,FALSE,lpr,20,csq,Type);
		if(1){ // debug
			fprintf(stderr,"FG_Set%d(%d) vs BG_Set%d(%d) IS(%d); Pattern %d: lpr = %.3f\n",
				subroot, crd,p,CardSet(PSet)-crd,CardInterSet(TmpSet,PSet),
				PatternLength(xsst),lpr);
			fprintf(stderr,"======================================");
			fprintf(stderr,"===========================================\n");
			// PutPatternFromSST(stderr,xsst); fprintf(stderr,"\n");
			// PrintSeq(stderr,csq); fprintf(stderr,"\n");
		}
		OptimizedSST[i]=xsst;
		OptimizedCSQ[i]=csq;
	  	if(lpr < MinLPRforEdge) fprintf(stderr,"Bad Edge: %d (lpr=%.2f)\n",subroot,lpr);
#if 1		// See if Parent pattern is present in subset...
		if(p != Root){
		   Int4    gp=ParentNode(p,OptTree);	// find grandparent...
		   set_typ GPSet=RtnSubTreeSeqSet(gp,Set,OptTree); 
	   	   xsst=GetOptPttrnLPR(0,PSet,GPSet,FALSE,Lpr,20,csq,'M');
		   double WtCntsParentFG,dummy; WtCardFG_BG_Sets(WtCntsParentFG,dummy);
		   
		   IntersectNotSet(GPSet,PSet); // GP := GP intersect not P.
		   lpr=CalcSetvsPttrnLPR(0,TmpSet,GPSet,xsst,FALSE,'M');  // does child node match parent pattern?
		   double WtCntsFG; WtCardFG_BG_Sets(WtCntsFG,dummy);
		   double min_lpr = Lpr*0.80*((double) WtCntsFG/(double) WtCntsParentFG);
		   if(lpr < MinLPRforEdge || lpr < min_lpr){ 
			fprintf(stderr,"Bad child node: %d (lpr=%.2f)\n",subroot,lpr);
		   } NilSet(GPSet);
		}
#endif
		NilSet(TmpSet); NilSet(PSet); 
	}
   } 

// Order sets to be consistent with OptTree DFS for hpt.
   {
	Int4    x,s,size,*list=RtnSubTreeNodes(Root, size, OptTree);
	NEW(RtnOptSets,size+3,set_typ);
	NEW(RtnOptSetID,size+3,Int4);
	for(x=1; x <= size; x++){
	    s=InSetToOptimizedSet[list[x]];
	    RtnOptSets[x]=OptimizedSet[s];
	    RtnOptSetID[x]=OptimizedSetToInSet[s];
	} NumOptSets=size; NumRtnOptSet=size;
   }
   fprintf(fp,"=================== Tree ===================\n");
   PutWdgraph(fp,OptTree);
#if 1	// set UCSet, NumClust, UCSST, UCSetID to be consistent with OptimizedSets.
   {
     Int4 s,x;
     assert(NumOptimizedSets == NumOptSets); assert(UCSet==0);
     NumClust=NumRtnOptSet -1;
     UCSetID=RtnOptSetID;
     UCSST=OptimizedSST;
     NEWP(UCSST,NumRtnOptSet+3,sst_typ);
     NEW(UCSetID,NumRtnOptSet+3,Int4);
     NEW(UCSet,NumRtnOptSet+ 3,set_typ);
     for(s=1,x=0; s <= NumRtnOptSet; s++){
	if(OptimizedSetToInSet[s] != Root){
	   x++; UCSet[x]=RtnSubTreeSeqSet(RtnOptSetID[s], Set,OptTree);
	   UCSST[x]=OptimizedSST[s]; 
	   UCSetID[x]=x; // RtnOptSetID[s];
	}
     } assert(NumClust == x);
   }
#endif
   NilSet(Set[Root]); Set[Root]=0;
   // sprintf(str,"Set%d",Root); ReNameCMSA(str,cma); PutConsensusCMSA(smafp,cma);
   // free(OptSet2InSet);
   return RtnOptSets;
}

void	scc_typ::PrintSMA(FILE *smafp,Int4 num_phyla)
{
   assert(OptTree != 0);
   Int4 i;
   set_typ *OptimizedSet2;
   NEW(OptimizedSet2,NumSets +3,set_typ);		// This array needed below.
   for(i=1; i <= NumSets; i++){	
	Int4 j=InSetToOptimizedSet[i];
	if(OptimizedSet[j] != 0) OptimizedSet2[i]=OptimizedSet[j];
   } OptimizedSet2[Root]=OptimizedSet[InSetToOptimizedSet[Root]];
//************* Reset OptimizedSet, InSetToOptimizedSet, OptimizedSetToInSet & NumOptSets. ***********
   
#if 0	// Create consensus sequences consistent with the tree...
	Starting from leaves, make sure that consequed for path to root is consistent with patterns.
#endif
   //****************** PrintSMA(smafp) ***********************
   char str[100];
   set_typ Leaves=RtnLeafSet(OptTree);
   for(i=1; i <= NumOptimizedSets; i++){
     FILE *tfp=0; 
     // if(OptmizedSet[i]==0) continue;
     // if(NumEdgesIn(i,OptTree)==0 && NumEdgesOut(i,OptTree)==0) continue; // not in tree.
     Int4 node=OptimizedSetToInSet[i];
     if(MemberSet(node,Leaves)){	// for these 
	assert(CardSet(OptimizedSet[i]) > 0);
	tfp=tmpfile(); PutInSetCMSA(tfp,OptimizedSet[i],cma); rewind(tfp);
	cma_typ subcma=ReadCMSA(tfp,AB); fclose(tfp);
	BooLean IgnoreGaps=TRUE;
	tfp=tmpfile(); PutBestRepsCMSA(tfp,num_phyla,IgnoreGaps,subcma); rewind(tfp);
	TotalNilCMSA(subcma); subcma=ReadCMSA(tfp,AB); fclose(tfp);

	tfp=tmpfile(); 
	sprintf(str,"Set%d",node); ReNameCMSA(str,subcma);
	if(OptimizedCSQ[i]) PrintOptimizedCSQ(tfp,i,node); else PutConsensusCMSA(tfp,subcma);
	PutCMSA(tfp,subcma); // adding these is causing the cmcBPPS to recompute an incompatible csq.
	rewind(tfp); TotalNilCMSA(subcma); 
	Int4 number;
	cma_typ *IN_CMA=MultiReadCMSA(tfp,&number,AB); fclose(tfp);

	PutMergedCMSA(smafp,number,IN_CMA); 
	for(Int4 f=1; f <= number; f++) TotalNilCMSA(IN_CMA[f]); free(IN_CMA);
     } else {			// Then is a Misc node.
	set_typ SetU=RtnSubTreeSeqSet(OptimizedSetToInSet[i],OptimizedSet2,OptTree);
	assert(CardSet(SetU) > 0);
#if 0	// DEBUG: can delete... now checking for this straight away within scc_init.cc.
	fprintf(stderr,"Card SetU = %d; RandomSet = %d; Size cma = %d\n",
		CardSet(SetU),CardSet(RandomSet),NumSeqsCMSA(cma));
	assert(CardInterSet(SetU,RandomSet) == 0);
	assert(CardSet(SetU) <= NumSeqsCMSA(cma));
#endif
	tfp=tmpfile(); PutInSetCMSA(tfp,SetU,cma); rewind(tfp);
#if 0	// DEBUG: can delete...
	FILE *efp=open_file("junk",".cma","w");
	PutInSetCMSA(efp,SetU,cma); fclose(efp);
#endif
	cma_typ subcma=ReadCMSA(tfp,AB); fclose(tfp); NilSet(SetU);
	sprintf(str,"Set%d",node); ReNameCMSA(str,subcma);
	if(OptimizedCSQ[i] && node != Root) PrintOptimizedCSQ(smafp,i,node);
	else PutConsensusCMSA(smafp,subcma);  
	TotalNilCMSA(subcma);
     }
   } free(OptimizedSet2);
}

void	scc_typ::PutVerboseReport(FILE *fp)
//*************************** Find intersections between subsets. *******************************
#if 0	// Call as:
	fp = open_file(argv1,".Anal","w");
	scc.PutVerboseReport(fp); fclose(fp);
#endif
//*************************** Find intersections between subsets. *******************************
{
	// h_type HG=Histogram("pattern similarity scores",0,400,2.5);
	h_type HG=Histogram("set intersections",0,1,0.05);
	// grf_typ grf(MaxSetID+1);
	set_typ SetI,SetJ;
	for(Int4 i=1; i <= NumSets; i++){		// skip 1st set = MainSet...
	     SetI=Set[i];
	     for(Int4 j=i+1; j <= NumSets; j++){	// skip 1st set = MainSet...
		SetJ=Set[j];
		Int4 red_in,black_in,red_out,black_out;
		long double onetail,twotail;
		red_out=CardInterSet(SetI,SetJ); // cardinality of SetI & SetJ
		if(red_out > 0){
		   black_out=CardInterSetINotJ(SetI,SetJ);	// cardinality of SetI & NotSetJ:
		   red_in=CardInterSetNotIJ(SetI,SetJ);		// cardinality of NotSetI & SetJ:
		   Int4 Total=SetN(SetI)-1;
		   black_in = Total - (red_in  + black_out + red_out) - NumRandom;
		   twotail=FisherExactTest(0, red_in,black_in,red_out,black_out,onetail);
		   // double score = PatternIntersection(0,Length,LPR[i],LPR[j], SST[i], SST[j],AB);
		   // WARNING: assumes ith column corresponds to ith row in hpt.
		   // if(score > 0.0) IncdHist(score,HG);
		   double d1=(double) red_out/(double) CardSet(SetI);
		   double d2=(double) red_out/(double) CardSet(SetJ);
		   double d = MAXIMUM(double,d1,d2);
		   if(d > 0.0) IncdHist(d,HG);
		   // if(twotail < 0.01 || score  >= 10.0)
		   // if(d >= 0.33){ grf.AddEdge(SetID[i],SetID[j]); }
		   if(fp){
		     if(d >= 0.1) {
		      Int4 Iter = SetID[i]/10,ii=(SetID[i]%10);
		      Int4 Jter = SetID[j]/10,jj=(SetID[j]%10);
		      // i = row; i -1 = Set in that row...
		      fprintf(fp,"Set%d_%d (%d) && Set%d_%d (%d) = %d; %.2Lg (%.2Lg)\n",
			Iter,ii,CardSet(SetI),Jter,jj,CardSet(SetJ),red_out,onetail,twotail);
		      fprintf(fp,"exact %d %d %d %d (total=%d)\n",
			red_out,black_out,red_in,black_in,SetN(SetI)-1 -NumRandom);
		      // Input i,j = BPPS column for pattern in hpt; not Set3_9 designation.
   		      // SimilarPatterns(fp,5.0,i,j);
   		      SimilarPatterns(fp,0.0,i,j);
		      // fprintf(fp,"  new score = %.1f\n\n",score);
		     }
		   }
		}
	    }
	}
	if(fp){ PutHist(fp,60,HG); // grf.Put(fp); fprintf(fp,"\n");
	} NilHist(HG);
}

