/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "pmcBPPS.h"
#include "blosum62.h" 

#define OPTIMIZE_THE_HPT  1

#define USAGE_START     "USAGE: pmcBPPS prefix [options]\n\
   options:\n\
   Note: Will look for files prefix.stb and prefix.ptb (generated by omcBPPS) and prefix.mma\n\
    (*.stb & *.ptb files can be concatenated to increase the number of input sets & corresponding patterns)\n\
     -C          - perform Bron_Kerbosch with further Clustering option\n\
     -C=<real>   - Set Clustering Ratio\n\
     -D=<real>   - Set maximum ratio of intersection to smaller set to consider sets as distinct\n\
     -i=<real>   - Set minimum smaller set fractional intersection to merge with larger set (range: 0..1)\n\
     -M=<real>   - Set minimum ratio of smaller to larger set to merge them (range: 0..1)\n\
     -P=<int>    - Print out intersect and union files for clusters of size >= <int>\n\
     -phyla=<int> - Designate the number of phyla to use for seed alignments (default: 4)\n\
     -R=<int>    - Set the number of random sequences\n\
     -seed=<int> - random seed\n\
     -sets       - Save the sets for comparing hierarchies (evalBPPS program)\n\
     -stb        - Use *.stb and *.ptb input files instead of *.amc file\n\
     -S=<real>   - Set minimum superset-to-subset ratio for tree construction (default: 4.0; 1..1000)\n\
     -trim=<int>  - Trim leaves with less than <int> sequences from hpt tree (default: 30)\n\
     -v          - verbose\n\
     -x          - dummy\n\n"

#if 0	// moved to cmsa.h
cma_typ MkMainFileCMSA(cma_typ cma, Int4 num_random,cma_typ &rcma)
{
        cma_typ TmpCMA[3]; TmpCMA[1]=cma;
        FILE *tfp = tmpfile();
        PutRandomCMA(tfp,blosum62freq,LengthCMSA(1,cma),num_random,AB);
        rewind(tfp); TmpCMA[2]=ReadCMSA(tfp,AB); fclose(tfp);
        tfp = tmpfile(); PutMergedCMSA(tfp,2,TmpCMA); rewind(tfp);
        cma_typ mcma=ReadCMSA(tfp,AB); fclose(tfp);
        // TotalNilCMSA(TmpCMA[2]); // destroy the temporary Random sequence alignment.
        rcma=TmpCMA[2];
        return mcma;
}
#endif

void	MakeArgumentsPmcBPPS(char *argv1, Int4 &Argc, char *Argv[])
{
	char    str[1000];
	for(Int4 arg=0; arg < Argc; arg++) if(Argv[arg]) free(Argv[arg]);
	// 5. Run a final cmcBPPS analysis to obtain a high quality model. 
	Argc=0; 
	Argv[Argc]=AllocString("pmcBPPS"); Argc++;	// name of program; cmc_typ needs to know this...
	sprintf(str,"%s_Opt",argv1); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-N=20"); Argv[Argc]=AllocString(str); Argc++;
	// sprintf(str,"-wchkpt"); Argv[Argc]=AllocString(str); Argc++;   // Causing an uninitialized memory read error
	// sprintf(str,"-col=30:50"); Argv[Argc]=AllocString(str); Argc++; // PROBLEMS WITH '30' !!!
	sprintf(str,"-col=5:50"); Argv[Argc]=AllocString(str); Argc++;
	// Need to make sure that BPPS parameters are consistent between scc_typ and mcBPPS program.
	sprintf(str,"-A=48:2"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-Am=40:10"); Argv[Argc]=AllocString(str); Argc++;
#if 1	// new settings...equivalent to 2*map...
	sprintf(str,"-Ri=0.03"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-Rim=0.4"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-rho=0.003"); Argv[Argc]=AllocString(str); Argc++;
#else	// old settings
	sprintf(str,"-Ri=0.001"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-Rim=0.2"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-rho=0.00001"); Argv[Argc]=AllocString(str); Argc++;
#endif
	// sprintf(str,"-ppb=100000"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-ppb=100"); Argv[Argc]=AllocString(str); Argc++;
	sprintf(str,"-R=0"); Argv[Argc]=AllocString(str); Argc++;	// Don't reseed random generator.
	// sprintf(str,"-R=0"); Argv[Argc]=AllocString(str); Argc++;
	// sprintf(str,"-sets=L"); Argv[Argc]=AllocString(str); Argc++;
}

Int4	CheckSets(Int4 iter,Int4 nsets,set_typ *set,set_typ RandomSet,ptrn_typ Ptrn,a_type AB)
// return the number of valid sets...
{
   Int4	x,y,z;
   fprintf(stderr,"================== iter %d (%d sets; %d pttrns) ===================\n",
	iter,nsets,Ptrn->N);
   for(z=0,x=1; x <= nsets; x++){
	if(set[x] == 0) continue;
	y=CardInterSet(set[x],RandomSet);
	if(y == 0) z++;
	double d=Ptrn->LPR[x][0];
	fprintf(stderr,"Card Set[%d] = %d [%d]. (%.2f)", x,CardSet(set[x]),y,d);
	if(0 && x <= Ptrn->N) PutPtrn(stderr,Ptrn,x,AB); else fprintf(stderr,"\n");
	// assert(CardInterSet(set[x],RandomSet) == 0);
   } return z;
}

int	Process_amcBPPS(Int4 argc,char *argv[])
{
	Int4	arg,Iter,Jter,time1;
	Int4	Arg,Argc,MinSize=0;
	UInt4   seed=7061950;
	char	str[1000],name[1000];
	double	d,MinIntersect=0.0,MinSetSizeRatio=0.0;
	double	SuperSetMinRatio=0.0,MaxDistinctRatio=0.0,ClusterRatio=-1.0;
	Int4	i,j,LeafTrimCutoff=30;
	cma_typ	cma;
	a_type	AB=MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	BooLean	WithClustering=FALSE,PrintUI_files=FALSE,verbose=FALSE,SaveSets=FALSE;
	Int4	TotalNumSets=0,*NumSets=0,NumRandom=0;
	Int4	num_phyla=4;
	BooLean	UseAMC=TRUE;
	char	*Argv[1000];

	time1=time(NULL);
	for(arg=0; arg < 1000; arg++) Argv[arg]=0;

	if(argc < 2) print_error(USAGE_START);

	sprintf(str,"%s_1",argv[1]);
	Argc=0; 
	Argv[0]=AllocString("pmcBPPS"); Argc++;	// name of program.
	Argv[1]=AllocString(str); Argc++;	// copy main file.
	for(arg = 2; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(USAGE_START);
	   switch(argv[arg][1]) {
	     case 'C':
		if(argv[arg][2] != 0){
		  if(sscanf(argv[arg],"-C=%lf",&ClusterRatio)==1){
                   if(ClusterRatio <= 0.0 || ClusterRatio >= 1.0) print_error(USAGE_START);
		  } else print_error(USAGE_START);
		} else WithClustering=TRUE;
		break;
	     case 'D':
		if(sscanf(argv[arg],"-D=%lf",&MaxDistinctRatio)==1){
                   if(MaxDistinctRatio <= 0.0 || MaxDistinctRatio > 0.25) print_error(USAGE_START);
                } else print_error(USAGE_START);
		break;
	     case 'M':
		if(sscanf(argv[arg],"-M=%lf",&MinSetSizeRatio)==1){
                   if(MinSetSizeRatio <= 0.0 || MinSetSizeRatio > 1.0) print_error(USAGE_START);
                } else print_error(USAGE_START);
		break;
	     case 'i':
		if(sscanf(argv[arg],"-i=%lf",&MinIntersect)==1){
                   if(MinIntersect <= 0.0 || MinIntersect >= 1.0) print_error(USAGE_START);
                } else print_error(USAGE_START);
		break;
	     case 'P':
		if(sscanf(argv[arg],"-P=%d",&MinSize)==1){
			if(MinSize < 0) print_error(USAGE_START);
			PrintUI_files=TRUE;
		} else print_error(USAGE_START);
		break;
	     case 'p':
		if(sscanf(argv[arg],"-phyla=%d",&num_phyla)==1){
			if(num_phyla < 1) print_error(USAGE_START);
		} else print_error(USAGE_START);
		break;
	     case 'R':
		if(sscanf(argv[arg],"-R=%d",&NumRandom)==1){
			if(NumRandom <= 0) print_error(USAGE_START);
		} else print_error(USAGE_START);
		break;
	     case 'S':
		if(sscanf(argv[arg],"-S=%lf",&SuperSetMinRatio)==1){
                   if(SuperSetMinRatio <= 1.0 || SuperSetMinRatio >= 100000.0) print_error(USAGE_START);
                } else print_error(USAGE_START);
		break;
	     case 's':
		if(strcmp("-stb",argv[arg]) == 0){ UseAMC=FALSE; }
		else if(strcmp("-sets",argv[arg]) == 0) SaveSets=TRUE;
		else if(sscanf(argv[arg],"-seed=%d",&seed)!=1) print_error(USAGE_START);
		break;
	     case 't':
		if(sscanf(argv[arg],"-trim=%d",&LeafTrimCutoff)==1){
			if(LeafTrimCutoff <= 0) print_error(USAGE_START);
		} else print_error(USAGE_START);
		break;
	     case 'v':
		if(argv[arg][2] != 0) print_error(USAGE_START);
		else verbose=TRUE; break;
	     case 'x': break;
	     default: print_error(USAGE_START); break;
	   }
	}

	FILE *fp=open_file(argv[1],".cmd","w");
	for(arg = 0; arg < argc; arg++) fprintf(fp,"%s ",argv[arg]); 
	if(seed == 7061950){ seed = (UInt4) time(NULL); fprintf(fp,"-seed=%d ",seed); }
	fprintf(fp,"\n"); fclose(fp);
        sRandom(seed);
	Int4 Length;
	int	hpt_created;
	char	*argv1=argv[1];

	FILE *cfp=open_file(argv1,".mma","r");
	cma_typ TrueMainCMA=ReadCMSA(cfp,AB);
	fclose(cfp); cfp=0;
	if(NumRandom==0) NumRandom=1+(NumSeqsCMSA(TrueMainCMA)/3); // sam
	// also compute MainCMA (with Random sequences) here...?
	// just need to concatenate old binary files to combine searches.

	Int4 *set_ids=0,NewNumSets=0,LastNumSets;
	set_typ	*NewSet,*OptSet;
	sst_typ	**NewSST;
	Int4	NumOptSets=0,last_iter=100;
	wdg_typ	OptTree=0;
	Int4	Root=0;
	scc_typ *scc=0;

	BooLean own_hsw=TRUE;
	hsw_typ hsw=0; swt_typ *swt=0;
	sprintf(str,"%s.hsw",argv1);
	if((fp=fopen(str,"r")) == NULL){	// create file...
		swt = new swt_typ(TrueMainCMA);
		hsw=swt->RtnHSW( ); own_hsw=FALSE;
		fp = open_file(argv1,".hsw","w");
		FWriteHSW(fp,hsw); fclose(fp);
	} else { hsw=FReadHSW(fp,AB,TrueMainCMA); fclose(fp); swt = new swt_typ(hsw); }

        if(ClusterRatio <= 0.0) ClusterRatio=0.20; 
	if(UseAMC){	// use this only eventually.
	  scc= new scc_typ(argv1,TrueMainCMA,swt);
	} else {
//******************************** delete the below eventually ****************************
	  Int4	NumIters=1003;
	  set_typ	**Set; 	NEWP(Set,NumIters+3,set_typ);
	  pat_typ	**ptrn; NEWP(ptrn,NumIters+3,pat_typ);
	  NEW(NumSets,NumIters+3,Int4);
	  sst_typ	***SST; NEWPP(SST,NumIters+3,sst_typ);
	  ptrn_typ *Ptrn; NEW(Ptrn,NumIters+3,ptrn_typ);

	  FILE *stbfp=open_file(argv1,".stb","r");
	  FILE *ptbfp=open_file(argv1,".ptb","r");
	  set_typ RandomSet=0;
	  for(Iter = 1; Iter <= NumIters; Iter++){
		if(Iter > 1000) print_error("Too many input files for processing");
		// read sets from a binary file. 
		Set[Iter]=ReadSets(stbfp,NumSets[Iter]); // returns Set[Iter]=0 at end of file.
		if(Iter == 1 && NumSets[Iter] < 1){
		   fprintf(stderr,"\n\tInput file '%s.stb' contains no sets!\n",argv1);
		   print_error("\t***** Fatal error in pmcBPPS procedure *****\n\n");
		}
		if(RandomSet==0){
			RandomSet=MakeSet(SetN(Set[Iter][1])); ClearSet(RandomSet);
			for(i=NumSeqsCMSA(TrueMainCMA)+1; i < SetN(RandomSet); i++) AddSet(i,RandomSet);
		}
		if(Set[Iter]==0){ NumIters = Iter-1; break; }
		else if(Iter==1){
			Int4 n1=NumSeqsCMSA(TrueMainCMA);
			Int4 n2=SetN(Set[Iter][1])-1;
			if((n2 - n1) != NumRandom){
				fprintf(stderr,"%d seqs + %d random != %d card_sets\n",
					n1,NumRandom,n2);
				fprintf(stderr,"Need %d random\n",n2-n1);
				print_error("pmcBPPS: mma & stb input files are inconsistent");
			}
		}
		Ptrn[Iter]=ReadPtrn(ptbfp); 
		Int4 z=CheckSets(Iter,NumSets[Iter],Set[Iter],RandomSet,Ptrn[Iter],AB);
#if 1	// DEBUG...
		if(z < NumSets[Iter]){
		    Int4 x,y,n=NumSets[Iter];
		    ptrn_typ P=Ptrn[Iter];
		    for(y=NumSets[Iter]; y > z; y--){
			if(Set[Iter][y]) NilSet(Set[Iter][y]); Set[Iter][y]=0; NumSets[Iter]--;
		    }
		    for(y=1; y < P->N; y++){
			// PutPtrn(stderr,P,y,AB);
			if(P->LPR[y][0] == 0.0){
			    if(P->SST[y] != 0) { free(P->SST[y]); P->SST[y] = 0; }
			    for(z=y; z < P->N; z++){
			    	P->SST[z] = P->SST[z+1]; P->SST[z+1] = 0;
			    	P->LPR[z] = P->LPR[z+1]; P->LPR[z+1] = 0; 
			    } P->N--;
			}
		    }
		    fprintf(stderr,"  reduced number of sets and patterns to %d\n",P->N);
		    CheckSets(Iter,NumSets[Iter],Set[Iter],RandomSet,Ptrn[Iter],AB);
		}
#endif
		// fprintf(stderr,"Ptrn->Len=%d; Ptrn->N=%d\n",Ptrn[Iter]->Len,Ptrn[Iter]->N);
		SST[Iter]=Ptrn[Iter]->SST;
		if(Iter == 1) Length=Ptrn[Iter]->Len;
		else assert(Length==Ptrn[Iter]->Len);

	  } fclose(stbfp); fclose(ptbfp); 
	  NilSet(RandomSet); fprintf(stderr,"=====================================================\n\n");

	  scc= new scc_typ(NumIters,NumSets,Set,SST,TrueMainCMA,swt);
	  // for(Iter = 1; Iter <= NumIters; Iter++) NilPtrn(Ptrn[Iter]); free(Ptrn);
//******************************** delete the above eventually ****************************
	}

	fp = open_file(argv1,"_Opt.Anal","w");
	FILE *pfp = open_file(argv1,"_Opt.spr","w");
   for(i=1; i <= last_iter; i++){
	fprintf(fp,"************************* Set MergeSimilarSets (%d) *************************\n",i);
	if(i > 1){
	    fprintf(fp,"========= iter %d: %d sets =========\n",i,NewNumSets);
	    scc= new scc_typ(NewNumSets,NewSet,NewSST,TrueMainCMA,set_ids,swt); 
	} else fprintf(fp,"========= iter 0: %d sets =========\n",scc->RtnNumSets());
	if(MinIntersect > 0) scc->SetMinIntersect(MinIntersect);
	if(MinSetSizeRatio > 0) scc->SetMinSetSizeRatio(MinSetSizeRatio);
	if(MaxDistinctRatio > 0) scc->SetMaxDistinctRatio(MaxDistinctRatio);
	if(SuperSetMinRatio > 0) scc->SetSuperSetMinRatio(SuperSetMinRatio);
	fflush(fp);
	if(i == 0){	// can probably skip this.
	  // j =scc->FindSuperSets(pfp,ClusterRatio,2,100.0);
	  j =scc->FindSuperSets(pfp,ClusterRatio,2,50.0);
	  fprintf(pfp,"=========  %d supersets found =========\n",j); fflush(pfp); // exit(1);
	  NewSet = scc->RtnUCSets(NewNumSets);
	  NewSST = scc->RtnUCSSTs();	// scc no longer owns UCSST! Need to free NewSST.
	  set_ids = scc->RtnUCSetIDs();	// scc no longer owns set_ids! Need to free set_ids.
	  LastNumSets=NewNumSets;
	  delete scc;
	  scc= new scc_typ(NewNumSets,NewSet,NewSST,TrueMainCMA,set_ids,swt);
	}
	if(i < last_iter){
	  if(verbose) scc->PutVerboseReport(fp); 	// informative only...
	  BooLean WereMerged=scc->MergeSimilarSets(fp,pfp,WithClustering);
	  NewSet = scc->RtnUCSets(NewNumSets);	
	  NewSST = scc->RtnUCSSTs();	// scc no longer owns UCSST! Free NewSST.
	  set_ids = scc->RtnUCSetIDs(); // scc no longer owns set_ids! Free set_ids.
	  if(i > 2 && NewNumSets == LastNumSets){ last_iter=i+1; }
	  LastNumSets=NewNumSets;
	  delete scc;
	} else if(i >= last_iter){	// converged...
		// find the supersets...
		// fprintf(pfp,"========= iter %d: %d sets =========\n",i,NewNumSets);
		// scc->FindSuperSets(pfp,ClusterRatio,5,100.0);
		// j =scc->FindSuperSets(pfp,ClusterRatio,2,100.0);
		j =scc->FindSuperSets(pfp,ClusterRatio,2,50.0);
		fprintf(pfp,"  %d supersets found\n",j); fflush(pfp); // exit(1);
		NewSet = scc->RtnUCSets(NewNumSets); NewSST = scc->RtnUCSSTs();
		set_ids = scc->RtnUCSetIDs(); 
		delete scc;
// exit(1); // Debug...
		// Create Additional Super Sets...
		scc= new scc_typ(NewNumSets,NewSet,NewSST,TrueMainCMA,set_ids,swt);
if(!scc->WillFreeInput()){
}
		scc=CreateSuperSetsDriver(pfp,0.50,150.0,scc);
		NewSet = scc->RtnUCSets(NewNumSets); NewSST = scc->RtnUCSSTs();
		set_ids = scc->RtnUCSetIDs(); 
		delete scc;

		// one more more merge after generating supersets.
		scc= new scc_typ(NewNumSets,NewSet,NewSST,TrueMainCMA,set_ids,swt);
	  	scc->MergeSimilarSets(fp,pfp,WithClustering);
		fclose(pfp); // exit(1);	// .spr output file closed.
		NewSet = scc->RtnUCSets(NewNumSets); NewSST = scc->RtnUCSSTs();
		set_ids = scc->RtnUCSetIDs(); 
		delete scc;

		scc= new scc_typ(NewNumSets,NewSet,NewSST,TrueMainCMA,set_ids,swt);
		scc->PutVerboseReport(fp);
		fclose(fp);	// .Anal output file closed.
		FILE *gfp = open_file(argv1,"_Opt.grph","w");
		// 3. return optimized sets corresponding to sma file below.
		OptSet=scc->CreateTree(gfp,NumOptSets,LeafTrimCutoff); fclose(gfp); 

#if 0		// 
		// create a  complete graph of every node vs every other node?   
#endif
		// WARNING: CreateTree modifies the Sets in NewSet[i] so they can be used by cmc_typ below.
		FILE *phfp = open_file(argv1,"_Opt.ph","w");
		FILE *nwfp = open_file(argv1,"_Opt.nwt","w");
		scc->PrintNewickTree(phfp,nwfp); fclose(nwfp); fclose(phfp); 
		// 2. Create a new *.sma file based on processing analysis.
		//    Want to pick the most typical foreground sequences from distinct phyla.
		FILE *smafp = open_file(argv1,"_Opt.sma","w");
   		scc->PrintSMA(smafp,num_phyla); fclose(smafp); 
// exit(1); // Debug...

		int argcnt=0;
		char *argval[9];
		argval[0]=AllocString("tree2hpt"); argcnt++;
		sprintf(str,"%s_Opt.ph",argv1); 	// input file name
		argval[1]=AllocString(str); argcnt++;
		argval[2]=AllocString("-seed=0"); argcnt++; // don't reseed random generator.
		FILE *hpt_fp=open_file(argv1,"_Opt.hpt","w");
		// 4. Create a new Hyperpartition based on processing analysis.
		tree2hpt(hpt_fp,argcnt,argval,NumRandom);
		// scc->PutOptSST(hpt_fp); 	// leave this out; not working correctly...
		fprintf(hpt_fp,"\nSettings:\n"); // add Patterns to hpt file...
		for(Int4 j=1; j <= NumOptSets; j++){ scc->PutOptPattern(hpt_fp,j); }
		fprintf(hpt_fp,"\n\n");
		fclose(hpt_fp); for(Int4 x=0; x < argcnt; x++) free(argval[x]);

		if(PrintUI_files){
		  for(Int4 j=1; j <= NumOptSets; j++){
		    if(CardSet(OptSet[j]) >= MinSize){
			Int4 s=scc->MapOptSetToInSet(j);
			sprintf(str,"_OptSet%d.cma",s); 
			FILE *cfp = open_file(argv1,str,"w");
			sprintf(str,"Set%d",s); ReNameCMSA(str,TrueMainCMA);
			PutInSetCMSA(cfp,OptSet[j],TrueMainCMA); fclose(cfp);
		    }
		  }
		} 
#if OPTIMIZE_THE_HPT
		OptTree=scc->RtnOptTree(Root);
#endif
		delete scc; break; 
	} 
   }
// exit(1); // Debug...

	MakeArgumentsPmcBPPS(argv1,Argc,Argv);
	FILE *xfp=open_file(argv1,"_Opt.cmd","w"); 
	for(arg=0; arg < Argc; arg++) fprintf(xfp,"%s ",Argv[arg]); fprintf(xfp,"-nocsq\n");
	fclose(xfp);
   {
	Int4 NumInSets;
	set_typ	*InSet;
        BooLean Converged=FALSE;
	cma_typ rcma,in_mcma=MkMainFileCMSA(TrueMainCMA,NumRandom,rcma);
	hsw_typ HSW=AddRandomHSW(hsw,TrueMainCMA,rcma,in_mcma);
        TotalNilCMSA(rcma);	// destroy the temporary Random sequence alignment.

	// create new sets...Size of OptSets already includes NumRandom.
	Int4	NumMOptSets=NumOptSets+1, N=NumSeqsCMSA(TrueMainCMA),st,M,sq;
	set_typ *MOptSet;  NEW(MOptSet,NumMOptSets +3, set_typ);
	for(st=1; st <= NumOptSets; st++) MOptSet[st]= OptSet[st];
	MOptSet[st]= MakeSet(SetN(OptSet[1])); M = N + NumRandom;
	for(Int4 sq=N+1; sq <= M; sq++) AddSet(sq,MOptSet[st]);

#if OPTIMIZE_THE_HPT
	set_typ *TOptSet;  NEW(TOptSet,NumMOptSets +3, set_typ);
	for(st=1; st <= NumMOptSets; st++) TOptSet[st]= CopySet(MOptSet[st]);
#endif
	cmc_typ *cmc= new cmc_typ(TrueMainCMA, in_mcma, HSW, NumMOptSets,MOptSet,Argc, Argv);
	// cmcBPPS TestD -A=90:10 -Am=80:20 -N=20 -col=10:30
        fflush(stdout);
	FILE *hfp=open_file(argv1,"_Opt0.hpt","w");	// writes over previous hpt.
	cmc->PutHpt(hfp);  fclose(hfp); // saves hpt settings.
#if 0
	fprintf(stderr,"\ttime(pmcBPPS): %d seconds (%0.2f minutes)\n",
		time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
return 0;	// debug...
#endif
	FILE *ofp=open_file(argv1,"_hpt_sample.out","w");
	cmc->PutHyperPartition(ofp);
#if OPTIMIZE_THE_HPT
	PrintNewickTreeWDG(stderr,Root,OptTree); // DEBUG: make sure it checks out.
	// if(cmc->SampleHpt(ofp,Root,OptTree)) cmc->PutHyperPartition(ofp);
	cmc->SampleHpt(ofp,Root,OptTree);
	cmc->PutHyperPartition(ofp);
	// Try Tree strategy...Much more elegant...
	hfp=open_file(argv1,"_Raw.hpt","w");	// save for comparison.
	cmc->PutHpt(hfp);  fclose(hfp); // saves hpt settings.

	// Get new and old hpt (FD-tables).
        FILE *tfp =open_file(argv1,"_Opt.ph","w"); // writes over previous phylogeny.
	PrintNewickTreeWDG(tfp,Root,OptTree); fclose(tfp);

	int argcnt=0;
	char *argval[9];
	argval[0]=AllocString("tree2hpt"); argcnt++;
	sprintf(str,"%s_Opt.ph",argv1); 	// input file name
	argval[1]=AllocString(str); argcnt++;
	argval[2]=AllocString("-seed=0"); argcnt++;
        tfp = tmpfile(); tree2hpt(tfp,argcnt,argval,NumRandom); rewind(tfp);
	hpt_typ *hptN= new hpt_typ(tfp); fclose(tfp);

	hpt_typ *hptO=cmc->GetHpt();
	// at same time as output settings set up new OptSet array as input to new cmc_typ.
	if(hptO->NumSets() != hptN->NumSets() || hptN->NumSets() != NumMOptSets){
		fprintf(stderr,"hptO->NumSets =%d; hptN->NumSets = %d\nTree: ",hptO->NumSets(),hptN->NumSets());
		PrintNewickTreeWDG(stderr,Root,OptTree);
		fprintf(stderr,"hptO: "); hptO->Put(stderr);
		fprintf(stderr,"hptN: "); hptN->Put(stderr);
		assert(hptO->NumSets() == hptN->NumSets());
	}
	set_typ *NOptSet;  NEW(NOptSet,NumMOptSets +3, set_typ);
	NOptSet[NumMOptSets] = TOptSet[NumMOptSets]; 	// for Random set.

	hfp=open_file(argv1,"_Opt.hpt","w");	// writes over original Hpt.
	// cmc->PutSortedHpt(hfp);  fclose(hfp); // THIS WASN'T WORKING.
	fprintf(hfp,"\n");
	tree2hpt(hfp,argcnt,argval,NumRandom); tree2hpt(stderr,argcnt,argval,NumRandom);
	fprintf(hfp,"\nSettings:\n"); fprintf(stderr,"\nSettings:\n"); // add Patterns to hpt file...
	for(Int4 j=1; j <= hptN->NumBPPS(); j++){
		Int4 J=hptN->SameSet(j,hptO);  // find the set in hptO corresponding to set j in hptN.
		assert(J > 0 && J <= hptN->NumBPPS());	// i.e., J is within range.
		hptO->PutSettings(hfp,j,J); hptO->PutSettings(stderr,j,J);
		assert(TOptSet[J] != 0);
		NOptSet[j]=TOptSet[J]; // make a new set that corresponds to old set
		TOptSet[J]=0;
	} fprintf(hfp,"\n\n"); fprintf(stderr,"\n\n");
	fclose(hfp); free(TOptSet);
	for(Int4 x=0; x < argcnt; x++) free(argval[x]);

	delete hptN;
	delete cmc;

	MakeArgumentsPmcBPPS(argv1,Argc,Argv);
	// Argv[0][0]='c'; // change from pmcBPPS to cmcBPPS; avoids changing Hpt.
#if 0	// start over from scratch to avoid local traps.
	cmc = new cmc_typ(TrueMainCMA,in_mcma, HSW, Argc, Argv);
#else
	Argv[Argc] = AllocString("-fixed=80"); Argc++;
	cmc= new cmc_typ(TrueMainCMA, in_mcma, HSW, NumMOptSets,NOptSet,Argc, Argv);
#endif
        cmc->PutHyperPartition( );
#else
	cmc->SampleHpt(ofp);
#endif
	if(OptTree) NilWdgraph(OptTree);
        Int4   iter=0;
        do {
            iter++;
#if 0
	    FILE *xfp=open_file(argv1,"_Opt.xlpr","w"); cmc->PutAllSubLPRs(xfp); fclose(xfp); // exit(1);
#endif
            Converged=cmc->Sample( );
#if 1
	    if(iter==1){ 
	       cmc->RemoveSimilarSets( );	// need: *.hpt, same *.sma, 
               cmc->PutHyperPartition( );
	    }
#endif
	    // cmc->SampleHpt(ofp);
            double lpr=cmc->CalcTotalLPR();
            if(lpr <= 0.0) print_error("Failed to find significant set assignments for this Hyperpartition");
        } while(!Converged);
	cmc->RestoreBest();
#if 0	// 
	cmc->PutMapContributions(stdout); fflush(stdout);
#endif
        cmc->Put();     // creates <infile>_grp.chn
#if 1   // create new cmc_typ to output optimal sequences (pdb seqs in particular)...
        char *tmp_arg=Argv[1]; sprintf(str,"%s_sarp",Argv[1]); Argv[1]=AllocString(str);

        cmc_typ *cmc2 = cmc->OptimizeDisplaySet(HSW,Argc,Argv);
        if(cmc2){
        	// assert(cmc->TheSame(cmc2));
                cmc2->PutRTF(FALSE); // cmc2->Put();
                delete cmc2;
        } free(Argv[1]); Argv[1]=tmp_arg;
#endif
	hfp=open_file(argv1,"_Opt_new.hpt","w");  // writes a SARP readable 'corrected' hpt.
	cmc->PutSARPHpt(hfp);  fclose(hfp); // without failed nodes.

        fprintf(stderr,"done printing results\n");
        cmc->PutHyperPartition( );
	if(cmc->WriteCheckPoint) cmc->WriteCheckpoint(0);

	xfp=open_file(argv1,"_Opt.cntrb","w");
	cmc->PutMapContributions(xfp); fclose(xfp);

	if(SaveSets){
	  set_typ *sets=cmc->CopyOfSeqSets(); 
	  Int4 NumSets=cmc->RtnNumElmntSetCMA( );
	  xfp=open_file(argv1,"_Opt.sets","w");
	  WriteSets(xfp,NumSets,sets); fclose(xfp);
	}
        if(TRUE){ // replace with a cmc->xxx()?
           xfp=open_file(argv1,"_sq.lpr","w");
           hfp=open_file(argv1,"_sq.hst","w");
           FILE *sfp=open_file(argv1,"_bst.mma","w");
           FILE *pfp=open_file(argv1,"_pdb.mma","w");
           cmc->PutSeqContrib(xfp,hfp,sfp,pfp);
           fclose(xfp); fclose(hfp); fclose(sfp); fclose(pfp);
        }
        delete cmc; NilHSW(HSW); TotalNilCMSA(in_mcma);
	fclose(ofp);
   }
	argv[1]=argv1;
	delete swt; if(own_hsw) NilHSW(hsw);
	TotalNilCMSA(TrueMainCMA);
	fprintf(stderr,"\ttime(pmcBPPS): %d seconds (%0.2f minutes)\n",
		time(NULL)-time1,(float)(time(NULL)-time1)/60.0);

	return 0;
}

