/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "scc_typ.h"

set_typ	scc_typ::ConsensusSet(set_typ *inSet,set_typ ISet, set_typ USet, Int4 num)
{	// Find the consensus set for the array of input Sets.
	Int4 i,sq,hits;
	set_typ CSet=0;
	if(num==2){ CSet=CopySet(USet); return CSet; }
	else CSet=CopySet(ISet);
	set_typ TmpSet=MakeSet(SetN(ISet)); ClearSet(TmpSet);
	// UnionSet(CSet,ISet);
	IntersectNotSet(USet, ISet, TmpSet); // modifies TmpSet to equal USet & not ISet.
	for(sq=1; sq<=NumSeqsCMSA(cma); sq++){
		if(MemberSet(sq,TmpSet)){
	   	   for(hits=0,i=1; i <= num; i++){ if(MemberSet(sq,inSet[i])) hits++; }
		   if((num/hits) <= 2) AddSet(sq,CSet);  // if >= 50%, then add.
		   // if((num/hits) < 2) AddSet(sq,CSet);  // if in majority, then add.
		}
	} NilSet(TmpSet); return CSet;
}

double	scc_typ::GetBasicIntersectInfo(Int4 i, Int4 j,double &ratio, Int4 cardI,
				Int4 &cardJ, Int4 &cardIJ)
{
	if(i <= 0 || i > NumSets || j <= 0 || j > NumSets)
			print_error("GetBasicIntersectInfo() input error");
	if(i==j){ ratio=1.0; cardJ=cardI; cardIJ=cardI; return 1.0; }
	cardIJ=CardInterSet(Set[i],Set[j]); cardJ=CardSet(Set[j]); 
	ratio=(double) cardJ/(double) cardI;
	double d1=(double) cardIJ/(double) (cardI),d2=(double)cardIJ/(double)(cardJ);
	return MAXIMUM(double,d1,d2);
}

void	scc_typ::DeleteOptimizedSet(Int4 node)
{
	assert(node > 0 && node <= NumOptimizedSets);
	Int4 i=OptimizedSetToInSet[node];
	Int4 j=OptimizedSetToInSet[NumOptimizedSets];
	if(node == NumOptimizedSets){
		OptimizedSetToInSet[node]=0;
		InSetToOptimizedSet[i]=0;
		NilSet(OptimizedSet[node]); OptimizedSet[node]=0;
		free(OptimizedSST[node]); OptimizedSST[node]=0;
		NumOptimizedSets--; return;
	}
	NilSet(OptimizedSet[node]);
	free(OptimizedSST[node]);
	OptimizedSet[node]=OptimizedSet[NumOptimizedSets];
	OptimizedSST[node]=OptimizedSST[NumOptimizedSets];
	OptimizedSetToInSet[node]=j;;
	InSetToOptimizedSet[j]=node;	// j's new position in the array.

	OptimizedSet[NumOptimizedSets]=0;
	OptimizedSST[NumOptimizedSets]=0;
	OptimizedSetToInSet[NumOptimizedSets]=0;
	InSetToOptimizedSet[i]=0;
	NumOptimizedSets--;
}

Int4	scc_typ::FindSuperSets(FILE *fp, double ratio_cutoff,Int4 MinDisjointSetSize,double MinLPR)
{

	// 1. Find significantly overlapping sets as candidates for members of a superset.
	// wdg_typ ClustGrph=MkWdgraph(MaxSetID, MaxSetID*MaxSetID);
	Int4	i,j,k,seti,setj;
	ds_type sets=DSets(NumSets);
#if 0
	double prb_cutoff=0.001/(double) (NumSets*(NumSets-1)/2);
	prb_cutoff=prb_cutoff*1e-40;
	fprintf(fp,"prb_cutoff=%g\n",prb_cutoff);
#endif
	// h_type HG=Histogram("CumHypGeomProb()",0,400,10.0);
	for(i=1; i < NumSets; i++){
	  if(Set[i]){
	    seti=findDSets(i,sets);
	    Int4 cardI=CardSet(Set[i]);
	    for(j=i+1; j <= NumSets; j++){
	     if(Set[j]){
		Int4 cardIJ=CardInterSet(Set[i],Set[j]),cardJ=CardSet(Set[j]);
#if 1
		if(cardIJ < 5) continue;
		Int4 smallest=MINIMUM(Int4,cardI,cardJ);	
		double d=(double) cardIJ/(double)smallest;
		if(d < 0.6666) continue;
#elif 0
		double d=(double) cardIJ/(double)cardI;
		if(d < ratio_cutoff) continue;	// ratio is 20% by default.
		d=(double) cardIJ/(double)cardJ;
		if(d < ratio_cutoff) continue;
#else
		Int4 smallest=MINIMUM(Int4,cardI,cardJ);	
		double d=(double) cardIJ/(double)smallest;
		// if(d < ratio_cutoff)
		if(d < 0.05) continue;
		else if(d < 0.40)	// is smallest < 40% of intersection?
		{ 
			Int4 largest=MAXIMUM(Int4,cardI,cardJ);	
			d=(double) cardIJ/(double)largest;
		// double cumprb=CumHypGeomProb(Int4 N1,Int4 N2, Int4 n,Int4 x)
			if(d < 0.05) continue;	// ratio is 20% by default.
		}
#endif
		setj=findDSets(j,sets);
		seti=linkDSets(seti,setj,sets);  
#if 0
fprintf(fp,"Sets %d(%d) && %d(%d) = %d: %.3f > %.3f\n",
	i,cardI,j,cardJ,cardIJ,d,ratio_cutoff);
#endif
     	     }
	    }
	  }
	}
#if 0
	PutHist(fp,60,HG); NilHist(HG);
#endif
	// PutDSets(stderr,sets);
	// PutWdgraph(stderr,ClustGrph);

	// Obtain disjoint sets.
	Int4 *Cardinality,num_dsets,*dset;
	dset=AssignDSets(sets, &Cardinality, &num_dsets);	// number the disjoint (candidate) sets sequentially.

	set_typ USet=MakeSet(SetSize);
	Int4 index,ArrayLength = NumSets + num_dsets +1;	// extra NumClust added for potential supersets.

	NEW(UCSet,ArrayLength+3,set_typ);	// store union of clique set for each.
	NEWP(UCSST,ArrayLength+3,sst_typ);	// store pattern union for clique.
	NEW(UCSetID,ArrayLength+3,Int4);	// store union of clique set for each.
	for(i=0; i < NumSets; ){ i++; UCSet[i]=Set[i]; UCSST[i]=SST[i]; UCSetID[i]=i; } index=i;

fprintf(fp,"Searching %d disjoint sets\n",num_dsets); 
	// 2. Find legitimate supersets by checking pattern-partition LPRs...
	for(Int4 ds=1; ds <= num_dsets; ds++){
	  BooLean seek=TRUE;
	  while(seek){	   // Go through each disjoint set to confirm whether these should be combined into a superset.
	   if(Cardinality[ds] < MinDisjointSetSize) break;	// number of items (sets) in this disjoint set.

	   fprintf(fp,"SuperSet %d(%d): ",ds,Cardinality[ds]);	// these will be merged into a superset.
	   Int4 *set_id; NEW(set_id,Cardinality[ds]+3,Int4);
	   Int4 *card_set; NEW(card_set,Cardinality[ds]+3,Int4);
	   set_typ *inset; NEW(inset,Cardinality[ds]+3,set_typ);
	   ClearSet(USet);
	   for(k=0,j=1; j<=NumSets; j++){
	     if(dset[j] == ds){
		k++; inset[k]=Set[j]; set_id[k]=j; assert(k <= Cardinality[ds]); fprintf(fp,"%d ",j);
		card_set[k]=CardSet(inset[k]); UnionSet(USet,Set[j]);	// 1st Set = 1st Set U 2nd Set 
	     }
	   } fprintf(fp,"\n"); assert(Cardinality[ds] == k);
	   Int4 CardUSet=CardSet(USet);
	   set_typ SetBG=CopySet(USet); FillSet(SetBG); 
	   IntersectNotSet(SetBG, USet); // SetBG = not USet (includes random seqs).
	   IntersectNotSet(SetBG, RandomSet); // Remove Random sequences.

	   double Lpr,lpr;
	   sst_typ *xsst=GetOptPttrnLPR(0,USet,SetBG,FALSE,Lpr,20);
	   double WtCntsSfFG,dummy; WtCardFG_BG_Sets(WtCntsSfFG,dummy);
	   seek=FALSE;
	   if(Lpr < MinLPR){
	       // then print out additional information...
	       for(Int4 s=1; s <= Length; s++){ // print out the pattern and Lpr.
		  if(xsst[s]){ char *tmp=GetPatternFromSST(xsst[s]); fprintf(fp,"%s%d ",tmp,s); free(tmp); }
	       } fprintf(fp,"\nSuperSet %d: lpr = %.3f (%d sqs)(failed)\n\n",ds,Lpr,CardUSet);
		free(xsst); xsst=0; 	// skip this one entirely...
	   } else {
	     for(Int4 s=1; s <= Length; s++){ // print out the pattern and Lpr.
		  if(xsst[s]){ char *tmp=GetPatternFromSST(xsst[s]); fprintf(fp,"%s%d ",tmp,s); free(tmp); }
	     } fprintf(fp,"\nSuperSet %d: lpr = %.3f (%d sqs)\n",ds,Lpr,CardUSet);
	     char x; ClearSet(USet);
	     for(j=1; j<= Cardinality[ds]; j++){
	        lpr=CalcSetvsPttrnLPR(0,inset[j],SetBG,xsst,FALSE); // change 0 to fp to print out.
	        // lpr=CalcSetvsPttrnLPR(0,inset[j],SetBG,xsst,FALSE,'M'); // change 0 to fp to print out.
		// ^ FALSE --> use SetBG - inset[j];
	        double WtCntsFG; WtCardFG_BG_Sets(WtCntsFG,dummy);
		// ^^WARNING: assumes that CalcSetvsPttrnLPR( ) was called to initialize CntBG & CntFG.
		double min_lpr = Lpr*0.80*((double) WtCntsFG/(double) WtCntsSfFG); 
	        if(lpr < MinLPR || lpr < min_lpr){ x = '*'; seek=TRUE; }
	        else { x = ' '; UnionSet(USet,inset[j]); }
	        fprintf(fp,"  SubSet %d.%d: lpr = %.3f (%d sqs)%c (cut: %.3f)\n",
			ds,set_id[j],lpr,card_set[j],x,min_lpr);
	        if(x == '*') card_set[j] = -1;
	     } fprintf(fp,"\n");
	     if(seek) {  // remove failed set(s) from dsets. 
		for(j=1; j<= Cardinality[ds]; j++) if(card_set[j] < 0){ dset[set_id[j]]=0; Cardinality[ds]--; }
		free(xsst); xsst=0; 
	     } else { index++; UCSST[index]=xsst; UCSet[index]=CopySet(USet); UCSetID[index]=index; }
	   } NilSet(SetBG); free(set_id); free(inset); free(card_set);
	  }	// end of while(seek) loop.

	} fprintf(fp,"\n %d disjoint sets found\n\n",num_dsets); fflush(fp);
	NumClust=index; NilSet(USet); NilDSets(sets);
	return index-NumSets;
}

void	scc_typ::PutSetCluster(FILE *fp,Int4 Size,Int4 *set_id,set_typ *inset,set_typ cns_set)
{
	Int4	setsize=SetN(inset[1]);
	set_typ ISet=MakeSet(setsize); FillSet(ISet); 
// IntersectNotSet(ISet, RandomSet); // Remove Random sequences.
	set_typ USet=MakeSet(setsize); ClearSet(USet);
        set_typ *UofRestSet;
	Int4	Smallest=INT4_MAX;
        NEW(UofRestSet,Size+3,set_typ); // Store the Union of all but j.
        for(Int4 jj=1; jj <= Size; jj++){
                UofRestSet[jj]=MakeSet(setsize); ClearSet(UofRestSet[jj]);
		UnionSet(USet,inset[jj]);
		IntersectSet3(ISet,inset[jj]);	// 1stSet = 1stSet && 2ndSet 
		Int4	x=CardSet(inset[jj]);
		Smallest=MINIMUM(Int4,x,Smallest);
        }
	set_typ conset=0;
	if(cns_set == 0){ conset=ConsensusSet(inset,ISet,USet,Size); }
	else conset=cns_set;
        for(Int4 jj=1; jj <= Size; jj++){
              for(Int4 j=1; j <= Size; j++){
		   if(jj != j) UnionSet(UofRestSet[jj],inset[j]);	
	      }
	}
	Int4 SizeIntersect=CardSet(ISet);
	double f=(double) SizeIntersect/(double) Smallest;
	fprintf(fp,"\nSet pairwise intersections (U=%d; I=%d; C=%d,%.0f%c):\n_",
			CardSet(USet),SizeIntersect,CardSet(conset),100*f,'%');
	if(cns_set == 0) NilSet(conset);
	for(Int4 j=1; j <= Size; j++){ fprintf(fp,"_%3d _",set_id[j]); }
	fprintf(fp," set\n");
	for(Int4 j=1; j <= Size; j++){
	      Int4    k1=set_id[j]; 
	      // fprintf(fp," %4d|",k1);
	      for(Int4 jj=1; jj <= Size; jj++){
		if(j==jj) fprintf(fp,"%5d ",CardSet(inset[j]));
		else if(j > jj){
		    Int4 MinSize=MINIMUM(Int4,CardSet(inset[j]),CardSet(inset[jj]));
		    Int4 IntersectSize=CardInterSet(inset[j],inset[jj]);
	   	    double f=(double) IntersectSize/(double) MinSize;
		    Int4   ifr=(Int4) floor((10*f)+0.5);
		    assert(ifr <= 10);
		    if(IntersectSize == MinSize) fprintf(fp," 1.0  ");
		    else if(ifr == 10) fprintf(fp,"  .9+ ");
		    else fprintf(fp,"  .%d  ",ifr);
		    // fprintf(fp,"      ");
		} else {
	         Int4    k2=set_id[jj]; 
		 fprintf(fp,"%5d ",CardInterSet(inset[j],inset[jj]));
		 // fprintf(fp," %d(%d)",k2,CardInterSet(inset[j],inset[jj]));
		 // if(jj < Size) fprintf(fp,"; "); else fprintf(fp,"\n");
		}
	      } 
	      //fprintf(fp,"\n");
	      fprintf(fp," |%d\n",k1);
	} //fprintf(fp,"\n");
	// new Union of the remaining sets with each set.
	for(Int4 j=1; j <= Size; j++){ fprintf(fp,"------"); } fprintf(fp," |\n");
	for(Int4 j=1; j <= Size; j++){
		fprintf(fp,"%5d ",CardInterSet(UofRestSet[j],inset[j])); 
	} fprintf(fp," | & U other sets.\n"); 
	for(Int4 j=1; j <= Size; j++){
		Int4 ciu=CardInterSet(UofRestSet[j],inset[j]);
		Int4 cs=CardSet(inset[j]);
	   	double f=(double) ciu/(double) cs;
		Int4   ifr=(Int4) floor((100*f)+0.5);
		assert(ifr <= 100);
		if(ciu == cs) fprintf(fp,"  100 ");
		else if(ifr == 100) fprintf(fp,"  99+ ");
		else fprintf(fp,"  %2d  ",ifr);
		NilSet(UofRestSet[j]);
	} fprintf(fp," | %c\n\n",'%'); free(UofRestSet);
	NilSet(ISet); NilSet(USet);
}

void	scc_typ::ClusterSets(double ratio_cutoff)
{
	wdg_typ ClustGrph=MkWdgraph(MaxSetID, MaxSetID*MaxSetID);
	Int4 i,j,seti,setj;
	ds_type sets=DSets(NumSets);
	for(i=1; i < NumSets; i++){
	  if(Set[i]){
	    // PutSet(stderr,Set[i]);
	    seti=findDSets(i,sets);
	    Int4 cardI=CardSet(Set[i]);
	    for(j=i+1; j <= NumSets; j++){
	     if(Set[j]){
		Int4 cardIJ=CardInterSet(Set[i],Set[j]),cardJ=CardSet(Set[j]);
		Int4 smallest=MINIMUM(Int4,cardI,cardJ);
		// double d=(double) cardIJ/(double)smallest;
		double d=(double) cardIJ/(double)cardI;
		if(d < ratio_cutoff) continue;
		d=(double) cardIJ/(double)cardJ;
		if(d < ratio_cutoff) continue;
		Int4 wt=(Int4) ceil(d*1000);
		if(cardI > cardJ) JoinWdgraph(j,i,-wt,ClustGrph);
		else JoinWdgraph(i,j,-wt,ClustGrph);
		setj=findDSets(j,sets);
		seti=linkDSets(seti,setj,sets);  
#if 0
fprintf(stderr,"Sets %d(%d) && %d(%d) = %d: %.3f > %.3f\n",
	i,cardI,j,cardJ,cardIJ,d,MaxDistinctRatio);
#endif
     	     }
	    }
	  }
	}
	// PutDSets(stderr,sets);
	// PutWdgraph(stderr,ClustGrph);
	Int4 *Cardinality,num_dsets,*dset,smallest,largest;
	dset=AssignDSets(sets, &Cardinality, &num_dsets);
	Int4 *Smallest; NEW(Smallest,num_dsets+3,Int4);
	Int4 *Largest; NEW(Largest,num_dsets+3,Int4);
	set_typ USet=MakeSet(SetSize);
	for(i=1; i<=num_dsets; i++){
	   // if(Cardinality[i] < 2) continue;
	   if(Cardinality[i] < 5) continue;
	   fprintf(stderr,"set %d(%d): ",i,Cardinality[i]);
	   Int4 *set_id; NEW(set_id,Cardinality[i]+3,Int4);
	   set_typ *inset; NEW(inset,Cardinality[i]+3,set_typ);
	   smallest=INT4_MAX;
	   largest=0;
	   Int4 k=1;
	   ClearSet(USet);
	   for(j=1; j<=NumSets; j++){
	     if(dset[j] == i){
		inset[k]=Set[j]; set_id[k]=j; k++;
		Int4 cardI=CardSet(Set[i]);
		if(cardI < smallest){ smallest=cardI; Smallest[i]=i; }
		if(cardI > largest){ largest=cardI; Largest[i]=i; }
		fprintf(stderr,"%d ",j);
		UnionSet(USet,Set[j]);	// 1stSet = 1stSet U Set[II][ii] 
	     }
	   } fprintf(stderr,"\n\n");
#if 1
	   char str[20]; sprintf(str,"%d.cma",i);
	   FILE *cfp = open_file("junkSet",str,"w");
	   PutInSetCMSA(cfp,USet,cma); fclose(cfp);
	   
	   FILE *fp=stderr;
	   set_typ SetBG=CopySet(USet); FillSet(SetBG); 
	   IntersectNotSet(SetBG, USet);
// IntersectNotSet(SetBG, RandomSet); // Probably want RandomSet in this background.
	   double lpr;
	   sst_typ *xsst=GetOptPttrnLPR(0,USet,SetBG,FALSE,lpr,20);
	     fprintf(fp,"SuperSST: lpr = %.3f (%d sqs)\n",lpr,CardSet(USet));
	     fprintf(fp," ");
	     for(Int4 s=1; s <= Length; s++){
		  if(xsst[s]){
			char *tmp=GetPatternFromSST(xsst[s]); fprintf(fp,"%s%d ",tmp,s); free(tmp); 
		  }
	     } fprintf(fp," \n"); free(xsst);
	   NilSet(SetBG);
#endif
	   PutSetCluster(stderr,Cardinality[i],set_id,inset,0);
	   free(set_id); free(inset);
	} fprintf(stderr,"\n %d disjoint sets found\n\n",num_dsets);
	NilSet(USet);
#if 0
  for(i=1; i <= num_dsets; i++){
	Int4	*path,*dist;
	NEW(path,MaxSetID+5,Int4);
	NEW(dist,MaxSetID+5,Int4);
        TopoScanWdigraph(ClustGrph, s, path, dist);
        // fprintf(stderr,"\nstart ");
        for(j=path[e]; i!=s; i=path[i]){ fprintf(stderr,"-> %d",i);}
        // *debug** { fprintf(stderr,"-> %d",r); }
        // fprintf(stderr," (dist = %d)\n",-dist[e]);
  }
#endif
	NilDSets(sets);
	NilWdgraph(ClustGrph);
	exit(1);
}

double	scc_typ::CalcSetSimilarity(FILE *ofp, cma_typ cma, set_typ sqset1, set_typ sqset2, sst_typ *Pttrn,
					Int4 NumPttrnPos, double &Z,double &RE)
{
#if 0
	double	*data1, *data2,d,prob;
	UInt4	n1=CardSet(sqset1),n2=CardSet(sqset2);
	data1=NumPartlyMatchingSeqCMSA(ofp,cma,sqset1,Pttrn,NumPttrnPos,MaxMisMatches);
	data2=NumPartlyMatchingSeqCMSA(ofp,cma,sqset2,Pttrn,NumPttrnPos,MaxMisMatches);
	kstwo(data1, n1, data2, n2, &d, &prob);
	// return prob;
#endif
	h_type	HG1,HG2;
	HG1=NumPartlyMatchingSeqCMSA(ofp,cma,sqset1,Pttrn,NumPttrnPos);
	HG2=NumPartlyMatchingSeqCMSA(ofp,cma,sqset2,Pttrn,NumPttrnPos);
	double mean1=MeanHist(HG1),mean2=MeanHist(HG2);
	double sd1=sqrt(VarianceHist(HG1)),sd2=sqrt(VarianceHist(HG2));
	RE=ComputeRelativeEntropy(HG1,HG2);
	double JSD= ComputeJSD(HG1,HG2);
	NilHist(HG1); NilHist(HG2);
	double Z1=fabs((mean1-mean2)/sd1);  
	double Z2=fabs((mean1-mean2)/sd2);  
	Z = (Z1 + Z2)/2.0;
	return 100.0*JSD;
}

set_typ	scc_typ::SetsInCliques(Int4 maxsetid, Int4 NC, vst_typ **clique)
{
	set_typ	IdSet=MakeSet(maxsetid+4); ClearSet(IdSet);
	for(Int4 i=1; i <= NC; i++){
	  for(Int4 j=0; j < clique[i]->Size(); j++){
		Int4 k1=clique[i]->Vertex(j); AddSet(k1,IdSet);
	  }
	} return IdSet;
}

BooLean	scc_typ::MergeSimilarSets(FILE *fp, FILE *ptrn_fp, BooLean WithClustering)
//*************************** Find intersections between subsets. *******************************
//*************************** Find intersections between subsets. *******************************
{

	// grf_typ *grfSS=MkGraphOfSetOverlaps(stderr, NumSets, Set, SST, SetID);
	// grfSS->PutWeighted(stderr,0);
	// delete grfSS; exit(1);

	//********************** Obtain a graph of related sets. ***************************
	// grf_typ	*grf=MkGraphOfSimilarSets(fp,ptrn_fp);
	grf_typ	*grf=MkGraphOfSimilarSets(fp,0);
	//********************** Find cliques of related subsets. ***************************
	Int4	MinClique=2;
        double pcut=0.001;
        set_typ *NodeSet=0; // Not used...
        vst_typ **clique=0;
        if(WithClustering){
           clique=grf->Bron_Kerbosch_cluster(MinClique,100,&NumClust,pcut,NodeSet,MaxSetID+1);
        } else {
           clique=grf->Bron_Kerbosch(MinClique,100,&NumClust,pcut,NodeSet,MaxSetID+1);
        }
	BooLean	Rtn;
	if(NumClust==0) Rtn=FALSE; else Rtn=TRUE;
	  //***************** Find intersections between sets *****************
	set_typ	CliqueSets=SetsInCliques(MaxSetID, NumClust, clique);
	Int4 NumNotInClqs=NumSets - CardSet(CliqueSets);
	set_typ ISet=MakeSet(SetSize);
	set_typ UGSet=MakeSet(MaxSetID+4); ClearSet(UGSet);	// 
	set_typ **CSet=0;
	// WARNING: Need to leave array long enough to accomodate the root node!!!!!!!!
	// WARNING: Need to leave array long enough to accomodate the root node!!!!!!!!
	// WARNING: Need to leave array long enough to accomodate the root node!!!!!!!!
	Int4 ArrayLength = 2*NumClust + NumNotInClqs+1;	// extra NumClust for supersets.
	NEWP(CSet,NumClust+3,set_typ);	// store sets for each clique.
	NEW(UCSet,ArrayLength + 3,set_typ);	// store union of clique set for each.
	NEWP(UCSST,ArrayLength+3,sst_typ);	// store pattern union for clique.
	NEWP(ICSST,ArrayLength+3,sst_typ);	// store pattern intersection for clique.
	NEW(UCSetID,ArrayLength+3,Int4);	// store union of clique set for each.
	set_typ USet=MakeSet(SetSize);
	fprintf(fp,"************************* Cliques (%d) *************************\n",NumClust);
	for(Int4 i=1; i <= NumClust; i++){
	   fprintf(fp,"===========================================================\n");
	   fprintf(fp,"%d(%d):",i,clique[i]->Size());
	   FillSet(ISet); ClearSet(USet);
// IntersectNotSet(ISet, RandomSet); // Probably not necessary to remove RandomSet as will be done below.
	   // NEW(UCSST[i],Length+3,set_typ);	// store sets for each clique.
	   sst_typ *SST_I[1003];
	   assert(clique[i]->Size() < 1000);
	   NEW(CSet[i],clique[i]->Size()+3,set_typ);	// store sets for each clique.
	   set_typ GSet=MakeSet(MaxSetID+4); ClearSet(GSet);
	   UCSetID[i]=i;	// set to number in cluster for now.
	   for(Int4 j=0; j < clique[i]->Size(); j++){
		Int4    k1=clique[i]->Vertex(j); 
		// if(j==0) UCSetID[i]=k1;	// set to first one in clique for now.
		AddSet(k1,GSet);
		Int4 index=SetID2index[k1];
		Int4	II=k1/10,ii=(k1%10)+2;
		Int4	set_size=CardSet(Set[index]);
		fprintf(fp," Set%d_%d (%d)",II,ii-2,set_size);
		IntersectSet3(ISet,Set[index]);	// 1stSet = 1stSet && Set[II][ii] 
		UnionSet(USet,Set[index]);	// 1stSet = 1stSet U Set[II][ii] 
		CSet[i][j+1]=Set[index];	// store set. 1..Clique->Size
	        SST_I[j+1]=SST[index];
		// MergeTheseSSTs(SST[i],SST[j]);	// Merge the pattern sets.
	   }
#if 1
	   UCSet[i]=ConsensusSet(CSet[i],ISet,USet,clique[i]->Size());	// for ith clique.
#else
	   UCSet[i]=CopySet(USet);	
#endif
	   fprintf(fp,"\n");
	   grf->Put(fp,GSet);
	   UnionSet(UGSet,GSet); 	// find out 
	   NilSet(GSet); // fprintf(fp,"\n");

	   Int4 iscore;
	   ICSST[i]=PatternIntersection(fp, clique[i]->Size(),SST_I,iscore);
	   if(iscore > 0) fprintf(fp," (intersect: %d)\n",iscore); else fprintf(fp,"\n");
	   // fprintf(fp,"\n      _");

#if 1	// debug...
	   for(Int4 j=1; j <= clique[i]->Size(); j++){
		iscore=0; fprintf(fp,"%d: ",j);
		for(Int4 s=1; s <= Length; s++){
		  if(SST_I[j][s]){
	  	    char *tmp=GetPatternFromSST(SST_I[j][s]);
		    fprintf(fp,"%s%d ",tmp,s); free(tmp); iscore++;
		  } 
		} fprintf(fp," (%d)\n",iscore);
	   }
#endif

#if 0
	   UCSST[i]= ConsensusPattern(fp,clique[i]->Size(),SST_I,iscore);
#elif 1
{
	   FILE *fpr=stderr;
	   double lpr;
	   set_typ SetBG=CopySet(USet); FillSet(SetBG); IntersectNotSet(SetBG, UCSet[i]);
// IntersectNotSet(SetBG, RandomSet); // Not sure if RandomSet should be in background.
	   UCSST[i]=GetOptPttrnLPR(0,UCSet[i],SetBG,FALSE,lpr,20);
	   fprintf(fpr,"Optimum Pattern (lpr=%.1f; %d vs %d seqs): ",lpr,CardSet(UCSet[i]),CardSet(SetBG)); 
	   PutPatternFromSST(fpr,UCSST[i]);

	   lpr=CalcSetvsPttrnLPR(0,UCSet[i],SetBG,UCSST[i],FALSE);	// SetJ vs Not SetI should not match PttrnI!
	   fprintf(fpr,"Optimum Pattern2(lpr=%.1f; len=%d): ",lpr,LengthPattern(UCSST[i])); 
	   PutPatternFromSST(fpr,UCSST[i]);

	   sst_typ *xsst = ConsensusPattern(0,clique[i]->Size(),SST_I,iscore);
	   lpr=CalcSetvsPttrnLPR(0,UCSet[i],SetBG,xsst,FALSE);	// SetJ vs Not SetI should not match PttrnI!
	   fprintf(fpr,"Consensus Pattern (lpr=%.1f; len=%d): ",lpr,LengthPattern(xsst)); 
	   PutPatternFromSST(fpr,xsst); fprintf(fpr,"\n");
	   free(xsst); NilSet(SetBG);
	   if(iscore > 0) fprintf(fp," (consensus pattern length: %d)\n",iscore); // else fprintf(fp,"\n");
}
#else
	   UCSST[i]= UnionizePatterns(fp,clique[i]->Size(),SST_I,iscore);
	   if(iscore > 0) fprintf(fp," (union: %d)\n",iscore); // else fprintf(fp,"\n");
#endif

	   Int4 *set_id; NEW(set_id,clique[i]->Size()+3,Int4);
	   for(Int4 j=1; j <= clique[i]->Size(); j++){ set_id[j]=clique[i]->Vertex(j-1); }
	   PutSetCluster(fp,clique[i]->Size(),set_id,CSet[i],UCSet[i]); free(set_id);
	   // clique[i]->Put(fp); 
	}
	// Add additional sets to the array.
	Int4 NewNumSets=0;
    {	Int4 k,i,j;
	fprintf(fp,"Unclustered sets: ");
	for(k=1,i=NumClust,j=0; k <= NumSets; k++){
	   if(!MemberSet(SetID[k],CliqueSets)){	// if Set is not in Cliques
	     if(Set[k]){
		fprintf(fp,"%d ", k);
		i++; if(UCSST[i] != 0){ free(UCSST[i]); UCSST[i]=0; }
		double lpr; UCSST[i]=GetOptPttrnLPR(0,Set[k],Set[k],TRUE,lpr,20);
		UCSetID[i]=i;		// set next id to the position in the array...can trace back...
		UCSet[i]=Set[k]; 		// append to end of UCSets...
		j++; assert(j <= NumNotInClqs);	// keep track of unmerged sets...
	     }
	   }
	} fprintf(fp,"\n");
	NewNumSets=i;
	// assert(j == NumNotInClqs);	// CliqueSets based on SetIDs & need not satisfy this.
    }	
	for(Int4 i=1; i <= NumClust; i++) delete clique[i]; free(clique);
	NilSet(USet);
	NilSet(ISet);
	NumClust = NewNumSets;
	delete grf;
	return Rtn;
}

scc_typ	*CreateSuperSetsDriver(FILE *fp, double cutoff,double MinLPR, scc_typ *in_scc)
{
	Int4	StartSet=1,LastNumSets,NewNumSets;
	scc_typ	*scc=in_scc;
	NewNumSets=scc->RtnNumSets();
	swt_typ *SWT=scc->RtnSWT();
	cma_typ cma=scc->RtnCMA();

	do {
	  LastNumSets=NewNumSets;
          Int4 j =scc->CreateSuperSets(fp,cutoff,StartSet,MinLPR);
	  fprintf(fp,"  %d supersets created\n",j); 
	  NewNumSets = scc->RtnNewNumSets();
          fprintf(fp,"  Number of sets %d --> %d\n",LastNumSets,NewNumSets); fflush(fp);
	  if(NewNumSets > LastNumSets) {
                StartSet=LastNumSets+1;  // Start with added supersets on next round...
          	set_typ *NewSet = scc->RtnUCSets(NewNumSets); 
	  	sst_typ **NewSST = scc->RtnUCSSTs();
          	Int4 *set_ids = scc->RtnUCSetIDs();
		delete scc; scc= new scc_typ(NewNumSets,NewSet,NewSST,cma,set_ids,SWT);
	  } else break;
	} while(TRUE);
	return scc;
}

Int4	scc_typ::CreateSuperSets(FILE *fp, double cutoff,Int4 StartSet,double MinLPR)
// Create new supersets by examining change in LPR when distinct sets are combined.
{
	set_typ USet=MakeSet(SetSize);
	set_typ SetBG=MakeSet(SetSize);
	Int4	Index,ArrayLength = NumSets * 5 +1;	// 5 times extra added for potential supersets.
	Int4	i,j,k,cardIJ,EndSet;

	NEW(UCSet,ArrayLength+3,set_typ);	// store union of clique set for each.
	NEWP(UCSST,ArrayLength+3,sst_typ);	// store pattern union for clique.
	NEW(UCSetID,ArrayLength+3,Int4);	// store union of clique set for each.
	for(i=0; i < NumSets; ){ i++; UCSet[i]=Set[i]; UCSST[i]=SST[i]; UCSetID[i]=i; } Index=i;

	BooLean	*IsSubSet; NEW(IsSubSet,ArrayLength+3,BooLean);
	Int4	*SetCard; NEW(SetCard,ArrayLength+3,Int4);
	double	*SetLPR; NEW(SetLPR,ArrayLength+3,double);

	// 1. Find subsets of other sets to eliminate from consideration for merging.
	double	d,Lpr,lpr,WtCntsSfFG,dummy; 
	// h_type HG=Histogram("CumHypGeomProb()",0,400,10.0);
	for(i=1; i <= Index; i++){
		if(UCSet[i]) SetCard[i]=CardSet(UCSet[i]); 
		sst_typ *xsst=GetOptPttrnLPR(0,UCSet[i],UCSet[i],TRUE,Lpr,20);	// 
		SetLPR[i]=Lpr; free(xsst);
	}
	// for(i=StartSet; i <= Index; i++){
	if(StartSet==1) EndSet=NumSets; else EndSet=StartSet;
	set_typ SuperSet=0;
	Int4	CardSuperSet=0;
	double	SuperSetLPR=0;
	for(i=StartSet; i <= NumSets; i++){
	  if(UCSet[i]){
	    SuperSet=UCSet[i]; CardSuperSet=SetCard[i]; SuperSetLPR=SetLPR[i];
	    for(j=1; j < EndSet; j++){
	     if(i==j) continue;
	     if(UCSet[j]){
		cardIJ=CardInterSet(SuperSet,UCSet[j]);		// Get the intersection of the two sets.
		d=(double) cardIJ/(double)CardSuperSet;
		if(d > cutoff) continue;	// I is a likely subset of J...So no use combining...
		d=(double) cardIJ/(double)SetCard[j];
		if(d > cutoff) continue;	// J is a likely subset of I...So no use combining...
		UnionSet3(SuperSet,UCSet[j],USet); 	// USet := SuperSet(I) U SetJ.
	        FillSet(SetBG); IntersectNotSet(SetBG,USet); // SetBG = not USet (includes random seqs).
	        IntersectNotSet(SetBG,RandomSet); // Remove Random sequences.

	   	Int4 CardUSet=CardSet(USet);
		sst_typ *xsst=GetOptPttrnLPR(0,USet,SetBG,FALSE,Lpr,20);
		if(Lpr < MinLPR || Lpr < SuperSetLPR || Lpr < SetLPR[j]){  // LPR insignificant --> continue.
#if 0
			PrintPattern(fp,xsst); 
			fprintf(fp,"\nSuperSet %d U %d: lpr = %.3f (%d sqs)(failed)\n\n",i,j,Lpr,CardUSet);
#endif
			free(xsst); xsst=0; // skip this one entirely...
		} else {
		  char x; BooLean okay=TRUE;
		  WtCardFG_BG_Sets(WtCntsSfFG,dummy);	// computes FG for the above Lpr.
		  // WARNING: assumes that GetOptPttrnLPR( ) was called to initialize CntBG & CntFG
#if 1
		  fprintf(fp,"\nSuperSet %d U %d: lpr = %.3f (%d sqs)(cardIJ=%d)\n ",i,j,Lpr,CardUSet,cardIJ);
		  PrintPattern(fp,xsst); 
#endif
		  // Check set i for contribution to superset Lpr.
		  lpr=CalcSetvsPttrnLPR(0,SuperSet,SetBG,xsst,FALSE); // Same as for Lpr.
		  // lpr=CalcSetvsPttrnLPR(0,SuperSet,USet,xsst,TRUE); // change 0 to fp to print out.
		  // ^ The above removes I from background set USet; it also computes CntFG CntBG for below.
		  double WtCntsFG; WtCardFG_BG_Sets(WtCntsFG,dummy);
		  double min_lpr = Lpr*0.80*((double) WtCntsFG/(double) WtCntsSfFG); 
		  if(lpr < MinLPR || lpr < min_lpr){ x = '*'; okay=FALSE; } else { x = ' '; }
		  fprintf(fp,"  SubSet %d: lpr = %.3f (%d sqs)%c (cut: %.3f)\n",UCSetID[i],lpr,CardSuperSet,x,min_lpr);

		  if(okay){ // then check set j for contribution to superset Lpr.
		    lpr=CalcSetvsPttrnLPR(0,UCSet[j],SetBG,xsst,FALSE); // Same as for Lpr.
		    // lpr=CalcSetvsPttrnLPR(0,UCSet[j],USet,xsst,TRUE); // change 0 to fp to print out.
		    WtCntsFG; WtCardFG_BG_Sets(WtCntsFG,dummy);
		    min_lpr = Lpr*0.80*((double) WtCntsFG/(double) WtCntsSfFG); 
		    if(lpr < MinLPR || lpr < min_lpr){ x = '*'; okay=FALSE; } else { x = ' '; }
		    fprintf(fp,"  SubSet %d: lpr = %.3f (%d sqs)%c (cut: %.3f)\n\n",UCSetID[j],lpr,SetCard[j],x,min_lpr);
		  }

		  if(okay) {	// Found a beginning Superset...
			if(SuperSet==UCSet[i]){	// Create the SuperSet.
				Index++; UCSST[Index]=xsst; UCSetID[Index]=Index; 
				UCSet[Index]=CopySet(USet); SetLPR[Index]=Lpr; SetCard[Index]=CardUSet;
				SuperSet=UCSet[Index]; SuperSetLPR=Lpr; CardSuperSet=SetCard[Index];
			} else {	// Update the SuperSet...
				CopySet(SuperSet,USet);	// copies set USet to SuperSet 
				SetLPR[Index]=Lpr; SetCard[Index]=CardUSet;
				SuperSetLPR=Lpr; CardSuperSet=CardUSet;
				free(UCSST[Index]); UCSST[Index]=xsst; 
			}
		  } else { free(xsst); xsst=0; } 
		}
	  	// if(Index >= ArrayLength) break; // don't need; will only increment Index once per cycle...
     	     }
	    }
	    if(Index >= ArrayLength) break;
	  }
	}
	// PutHist(fp,60,HG); NilHist(HG);
        fprintf(fp,"\n %d supersets created\n\n",Index - NumSets); fflush(fp);
	NumClust=Index; NilSet(USet); NilSet(SetBG);
	return Index-NumSets;
}


