#include "prb_typ.h"

const Int4	MAX_IN_SEQS=20000000;

static char PROBE_USAGE[]= "\nUsage: probe fastafile database [options] \n\
  options (range:default):\n\
\t-A           - start by searching database with input file fastafile.msa \n\
\t-a<int>      - algebraic operations (0-9:1)\n\
\t               '0' = fast & dirty mode (no refinement)\n\
\t               '1' = fast mode (split & fuse once)\n\
\t               '2' = refine mode (do refinement steps)\n\
\t               '3' = rigorous mode (careful, time-consuming refinement)\n\
\t               '4' = experimental (time-consuming refinement)\n\
\t               '10'-'14' = experimental gapped version of probe.\n\
\t               '5'-'9' = not yet implemented.\n\
\t-B<char>     - residue frequencies to use for background model\n\
\t                m = use multiple alignment background counts.\n\
\t                s = use standard background counts.\n\
\t                d = use database background counts.\n\
\t                (default = use standard background counts).\n\
\t-b<int>      - initial mean number of blocks (1-10:5)\n\
\t-C<float>    - minimum 'field map' Cutoff for individual blocks (0.0)\n\
\t-c<int>      - initial mean number of columns per block (3-200:15)\n\
\t-col=<int>:<int>  - minimum and maximum number of starting columns\n\
\t-D           - don't delete blocks before adding to cma population\n\
\t-d<float>    - maximum E-value for repeat domain detection\n\
\t-E<float>    - scan single-block E-value cutoff (0-50000:1)\n\
\t-e<float>    - scan E-value cutoff (0-50000:0.01)\n\
\t-f<int1:int2>- fix number of blocks to within the rangeint1-int2.\n\
\t-G<file>     - use guide sequence given in <file>\n\
\t-G=<int>,<int>- perform goscan gapped search with open & extend penalties (suggested: 18,2)\n\
\t-g<int>,<int>- specify gap opening and extension penalties (default: 10000,2000)\n\
\t-H           - create a histogram file (*.gaps) of gap lengths\n\
\t-h<int>      - heapsize for msa population (2-1000:10)\n\
\t-I<int>:<int>- save internal repeats with flanking regions of given lengths\n\
\t-i<int>      - purge score decrement (1-100:5)\n\
\t-L           - Don't mask low complexity regions\n\
\t-l<int>      - set rapid convergence limit (higher = longer to converge)\n\
\t-M<int>:<int>- minimum & maximum number of sequences in alignment\n\
\t-m<char>     - specify scan method (cCdDgGrR:c)\n\
\t               'g' or 'G' = Gribskov's (fragment or don't fragment)\n\
\t               'b' or 'B' = modified Gribskov's with motif background\n\
\t               'm' or 'M' = modified Gribskov's\n\
\t               'd' or 'D' = Dirichlet mixture priors\n\
\t               'r' or 'R' = product multinomial\n\
\t               'h' or 'H' = Henikoff's method\n\
\t-N<int>      - maximum number of repeats for scan step (1-1000:1)\n\
\t-n           - mask potential nonglobular regions using 'seg x 45 3.4 3.75'\n\
\t-n<int>      - minimum number of blocks with good matches in search phase (default: all blks)\n\
\t-O           - breed using input file fastafile.msa\n\
\t-O<int>      - Create an alignment from database search step to start Gibbs sampling\n\
\t                Purge the alignment at a cutoff of <int> percent identity\n\
\t-o<int>      - improve (optimize) by removing segments pulling down map\n\
\t-P<int>      - Patience in seconds for breed time (1-900000:600)\n\
\t-p<int>      - msa population size (2-10000:10)\n\
\t-R<int>      - maximum number of model refinement cycles (1-100:10)\n\
\t-r<int>      - minimum number of repeats to save a hit (1-1000:1)\n\
\t-S<int>      - purge maximum cutoff score (20-500:150)\n\
\t-s<int>      - random seed\n\
\t-T<real>     - Use simulated annealing starting with temperature T (0-300:nil)\n\
\t-t           - Use a template cmsa file to start alignment (0-300:nil)\n\
\t-trim=<real> - trim the cmsa file at <real> bits of information prior to a search\n\
\t-U           - use the repset option\n\
\t-U+          - use the repset option with priority on labeled sequences\n\
\t-u<char>     - use the scan mode specified by <char>\n\
\t                'g' = global with gap function\n\
\t                'G' = global without gap function (default)\n\
\t                'c' = local core with gap function\n\
\t                'C' = local core without gap function\n\
\t                'o' = local overlap with gaps\n\
\t                'O' = local overlap without gaps\n\
\t-v<int>      - maximum variation in map for convergence in optbreed (default = 1.0)\n\
\t-W           - output a *.see file to plot progress of sampler\n\
\t-W<int>      - Maximum starting motif width (default: 25)\n\
\t-w           - DON'T use sequence weighting in scan step\n\
\t-x           - dummy\n\n";

// \t-minlen=<int>- minimum length of sequences in alignment (used only with -G= option)\n\

prb_typ::prb_typ(int argc, char *argv[], a_type alphabet)
{ Init(0,argc, argv, alphabet); }

prb_typ::prb_typ(char *PRB_USAGE,int argc, char *argv[], a_type alphabet)
{ Init(PRB_USAGE,argc, argv, alphabet); }

void	prb_typ::Init(char *INPUT_USAGE,int argc, char *argv[], a_type alphabet)
// Initialize all items for prb_typ alignment and searches 
{
	Int4	i;
	char *PRB_USAGE=0;
	FILE	*fptr;

	if(INPUT_USAGE == 0){ // for probe...
		PRB_USAGE=PROBE_USAGE;
    		if(argc < 3) print_error(PRB_USAGE);
		Argv[2] = AllocString(argv[2]);
    		if(!(argc == 3 || argv[3][0] == '-')) print_error(PRB_USAGE);
	} else {		// for gismo;
		PRB_USAGE=INPUT_USAGE;
    		if(argc < 2) print_error(PRB_USAGE);
    		if(!(argc == 2 || argv[2][0] == '-')) print_error(PRB_USAGE);
		Argv[2] = AllocString(argv[1]);
	}
	if(argc > 150) print_error(PRB_USAGE);
    	if((fptr = fopen(Argv[2],"r")) == NULL) {
		fprintf(stderr,"Could not open file \"%s\"\n",Argv[2]);
		print_error("File does not exist!");
	} fclose(fptr);

// initialize with default values...
	//**************** new gapped search options *****************
	do_gapped_srch=FALSE;
	MinSeqLen=0;
	trim_info=0.0;
	MinCol=MaxCol=0;
	//**************** end gapped search options *****************

	minseq=5; number=-1; maxbreed=40; avecol=15; min_rpt=1;
	aveblk=5; breedVar=1.0; maxseq=10000; inc=5; maxseqleng = 0;
	maxrun=10; cutoff=150; 
	align_mode = 1; // default alignment method.
        if(INPUT_USAGE) align_mode=2;
	mhpsz=5;maxrpts=1; maxcycle=4*mhpsz;
	left_flank=5;right_flank=5;
        useSA=FALSE; // flag to indicate use of sim. annealing option
	// temperature = 300;  // 1/kT = 1.0 (T = 300 K)
	temperature = 150;  // 1/kT = 1/k*150 K = 2.0
	use_breed = FALSE; UseLabeled=FALSE; 
	method='H';	// 'H' = Henikoff's not fragmented 
	patience=600;	// willing to wait 10 minutes by default
	// patience=1800;	// willing to wait 30 minutes by default
	pseudo=0.5;Ecut=1.0;ecut=0.01; minmap=0.0;
	mask_nonglobular=FALSE;
        UseRepset=FALSE; 
	time1=time(NULL); seed=18364592;
	go=TRUE;flag=FALSE;noseg=FALSE; A=NULL;
	report_gaps=FALSE; bestmap=-9999999999.;
	mode='G'; DBS_hits=NULL; run=0;
	cardB=0;oldcardB=0;
	weight=TRUE; combine=FALSE;
	guide=NULL; Guide=NULL; limit=5;
	repeats=FALSE; input_msa=FALSE; force=FALSE;fix=FALSE;
	minblk=3;maxblk=10;
	repeatEval=0.01;
	cma_purge=0;	// don't use this by default.
	WG=NULL;
	maH=NULL;
	aafreq='s';
	Argv[1] = AllocString(argv[1]);
	cmsa=NULL; bestmsa=NULL; cmsa_in=NULL;
	data=NULL;
	pernats=1000.0; gapopen=10000; // = log(freq_open=0.05)*pernats = -30.
	gapextend=2000; 		//  = log(freq_extend=0.25)*pernats = -5
	indel_penalty=0.0; use_gseq=TRUE;
	BlkDist=ColDist=NULL;
	MaxBadBlks=0;
	MinGoodBlks=1000;
   improve=FALSE;
   dont_delete=FALSE;
   improve_cut=0.0;
   max_motif_width=25;
   A=alphabet;

// initialize with user input 
    NEW(name,200,char); strcpy(name,argv[1]); 
    nsize=NULL;  counts=NULL;

   Int4 arg_start=3;
   if(INPUT_USAGE) arg_start=2;
   for(i=arg_start; i < argc; i++){
     if(argv[i][0] != '-') print_error(PRB_USAGE);;
     switch(argv[i][1]) {
	case 'A': input_msa = TRUE; break;
	case 'a': align_mode=IntOption(argv[i],'a',0,19,PRB_USAGE); 
		  if(align_mode >= 10){
// fprintf(stderr,"FATAL: -a >10 option inactivated!\n"); print_error(PRB_USAGE);
			// input_msa = TRUE; 
			align_mode -= 10; use_gseq=TRUE; 
		  }
		  if(INPUT_USAGE && align_mode > 3) print_error(PRB_USAGE);
		  break;
	case 'b': aveblk=IntOption(argv[i],'b',1,50,PRB_USAGE); break;
	case 'B': if(!isalpha(aafreq=argv[i][2])) 
			print_error(PRB_USAGE); break;
	case 'C': minmap=RealOption(argv[i],'C',-5000,5000,PRB_USAGE); break;
	case 'c': 
		if(sscanf(argv[i],"-col=%d:%d",&MinCol,&MaxCol) == 2){
			if(MinCol < 1 || MinCol > MaxCol) print_error(PRB_USAGE);
		} else {
			avecol=IntOption(argv[i],'c',3,200,PRB_USAGE); 
		} break;
	case 'D': dont_delete=TRUE; break;
	case 'd': repeatEval=RealOption(argv[i],'d',0,500000,PRB_USAGE); break;
	case 'e': ecut=RealOption(argv[i],'e',0,5000,PRB_USAGE); break;
	case 'E': Ecut=RealOption(argv[i],'E',0,5000,PRB_USAGE); break;
	case 'F': force= TRUE; break;
	case 'f': fix = TRUE; 
		  if(sscanf(argv[i],"-f%d:%d",&minblk,&maxblk) != 2 ||
			minblk > maxblk || minblk < 1)
                        print_error(PRB_USAGE);
		  break;
	case 'H': report_gaps = TRUE; break;
	case 'h': mhpsz=IntOption(argv[i],'h',2,1000,PRB_USAGE); break;
	case 'G': 
        	   if(sscanf(argv[i],"-G=%d,%d",&Gap_o,&Gap_x) == 2){
			do_gapped_srch=TRUE;
		   } else if(argv[i][2] == 0) {
			print_error(PRB_USAGE);
		   } else {
		      guide=AllocString(argv[i]+2); 
		   } break;
        case 'g': if(sscanf(argv[i],"-g%d,%d",&gapopen,&gapextend) != 2)
                                        print_error(PRB_USAGE);
		  if(gapopen== 0 && gapextend==0)use_gseq=FALSE;
                  if(gapopen < 0 || gapextend < 0) print_error(PRB_USAGE); break;
	case 'i': inc=IntOption(argv[i],'i',1,100,PRB_USAGE); break;
	case 'I': repeats=TRUE; 
		  if(sscanf(argv[i],"-I%d:%d",&left_flank,&right_flank) != 2)
                        print_error(PRB_USAGE);
		  break;
	case 'L': noseg=TRUE; break;
	case 'l': limit=IntOption(argv[i],'l',1,500,PRB_USAGE); break;
	case 'M': if(sscanf(argv[i],"-M%d:%d",&minseq,&maxseq) != 2 
			|| minseq > maxseq || minseq < 2) print_error(PRB_USAGE);
		  break;
	case 'm': 
#if 0
		if(sscanf(argv[i],"-minlen=%d",&MinSeqLen) == 1){ 
			if(MinSeqLen < 2) print_error(PRB_USAGE);
		} else 
#endif
		if(!isalpha(method=argv[i][2])) print_error(PRB_USAGE); 
		break;
	case 'N': maxrpts=IntOption(argv[i],'N',1,1000,PRB_USAGE); break;
	case 'n': {
		 if(argv[i][2]==0) mask_nonglobular=TRUE; 
	         else MinGoodBlks=IntOption(argv[i],'n',0,1000,PRB_USAGE);
		} break;
	case 'O': use_breed = TRUE; 
		if(argv[i][2]) cma_purge=IntOption(argv[i],'O',10,100,PRB_USAGE);
		break;
	case 'o': improve = TRUE; 
		  improve_cut = RealOption(argv[i],'o',-10000.,10000.,PRB_USAGE); break;
	case 'p': maxcycle=IntOption(argv[i],'p',2,10000,PRB_USAGE); break;
	case 'P': patience=IntOption(argv[i],'P',1,9999999,PRB_USAGE); break;
	case 'R': maxrun=IntOption(argv[i],'R',0,100,PRB_USAGE); break;
	case 'r': min_rpt=IntOption(argv[i],'r',1,1000,PRB_USAGE); break;
	case 'S': cutoff=IntOption(argv[i],'S',20,5000,PRB_USAGE); break;
	case 's': seed=atoi(argv[i]+2); break;
	case 'T': useSA=TRUE; temperature=RealOption(argv[i],'T',0,5000.,PRB_USAGE); break;
	case 't': 
		if(sscanf(argv[i],"-trim=%lf",&trim_info) == 1){ 
			if(trim_info <= 0.0) print_error(PRB_USAGE);
		} else {
		   if(cmsa_in != NULL) break;
		   sprintf(str,"%s.cma",name); std::cerr << str; cmsa_in=ReadCMSA2(str,A); 
		} break;
        case 'U': UseRepset=TRUE; 
		if(argv[i][2] == '+') UseLabeled=TRUE; 
		break;
        case 'u': if(!isalpha(mode=argv[i][2])) print_error(PRB_USAGE); 
                  if(argv[i][3] == '+') combine=TRUE;
                  else if(argv[i][3] == '\0') combine=FALSE;
                  else print_error(PRB_USAGE); break;
	case 'v': breedVar=RealOption(argv[i],'v',0.0,10000.0,PRB_USAGE); break;
	case 'w': weight=FALSE; break;
	case 'W': 
		if(argv[i][2] != 0){
			max_motif_width=IntOption(argv[i],'W',8,MAX_LENG_SITES,PRB_USAGE); 
		} else if(WG==NULL) WG=MakeWatchGibbs(20, 1000); 
		break;
	case ' ': break;  // ignore blanked out options.
	default: print_error(PRB_USAGE); break;
     }
   }
   if(minseq > maxseq) print_error(PRB_USAGE);
   if(mhpsz > maxcycle) print_error(PRB_USAGE);
   if(cma_purge > 0 && maxrpts > 1) print_error("-O<int> option not yet implemented for repeats");

   // gapopen*=pernats; gapextend*=pernats; 

   fptr = open_file(argv[1],".cmd","w");
   for(i = 0; i < argc; i++) { if(argv[i][1] != ' ') fprintf(fptr,"%s ",argv[i]); }
   if(seed == 18364592) {  // not provided by user 
	seed = (UInt4) time(NULL);
   	fprintf(fptr,"-s%d\n",seed); 
   } else fprintf(fptr,"\n"); 
   fclose(fptr);
   sRandom(seed);
   if(guide != NULL) {
	Guide = MkGuide(guide, A); PutGuide(stderr,Guide, A); free(guide);
   }
}

void	prb_typ::purge() 
{
    Int4	n;

    if(input_msa) return; 
    sprintf(str,"%s",name); sprintf(name,"%s0",Argv[1]);
    if(UseRepset){  // then first generate representative set...
        // FILE *fp2 = open_file(name,".rep","w");
        FILE *fp2 = open_file(name,"","w");
	n=RepSetCluster(fp2,str,0,!noseg,'S',cutoff,UseLabeled,A);
	// this should call gapped blast with score option...
	fclose(fp2);
        // sprintf(str,"%s.rep",name);
    } else n=PurgeFiles(str,name,cutoff,inc,minseq,maxseq,A);
    if(n < minseq) print_error("purged set has too few sequences");
}

BooLean	prb_typ::Add(Int4 minlength)
// TRY ADDING BLOCKS with minlength contiguous columns
{
	BooLean improved=FALSE;
	cma_typ	ma2;

	fprintf(stderr,"%d blocks; try adding blocks...\n",nBlksCMSA(cmsa));
	for(Int4 end=nBlksCMSA(cmsa), t=0; t <= end; t++){
	   if((ma2=AddBlkCMSA(t, minlength, cmsa))!= NULL){
	     fprintf(stderr,"block added between motifs %d & %d?\n",t,t+1);
	     if(gibbs(ma2)) improved=TRUE;
           } 
	}
	return improved;
}

BooLean	prb_typ::Refine()
{
	BooLean improved=FALSE;
	cma_typ	maD;

	fprintf(stderr,"delete blocks?\n");
        while(nBlksCMSA(cmsa) > 1 && (maD=RefineCMSA(cmsa))!=NULL){
	     fprintf(stderr,"Block deleted\n");
  	     NilCMSA(cmsa); cmsa=maD; 
#if 0
	     gibbs(NULL); improved = TRUE;
#endif
#if 1
	  if(use_gseq) strcpy(options,"-t1 -g "); else strcpy(options,"-t1 ");
  	  map = RunGibbs(options,&cmsa);
	  fprintf(stderr," refined map %.1f\n",map);
  	  Record( ); improved=TRUE; 
#endif
	}
        return improved;
}

BooLean	prb_typ::Delete(){ return Delete('N'); }
BooLean	prb_typ::Delete(char tweakmode)
/********************* TRY DELETING BLOCKS *************************/
{
	BooLean improved=FALSE;
	cma_typ	maD;

	fprintf(stderr,"delete blocks?\n");
        while(nBlksCMSA(cmsa) > 1 && (maD=DeleteBlkCMSA(cmsa))!=NULL){
	     fprintf(stderr,"Block deleted. Tweaking alignment...\n");
  	     map = RelMapCMSA(maD); NilCMSA(cmsa); cmsa=maD; 
	     SaveBestCMSA(cmsa);
	     Record( ); gibbs(NULL,tweakmode);  // tweak the best alignment.
	     fprintf(stderr,"Done Tweaking alignment\n");
	     improved = TRUE;
        } return improved;
}

BooLean	prb_typ::Split(Int4 minlength)
/********** TRY Splitting BLOCKS with >= minlength columns ***********/
{
	BooLean improved=FALSE;
	double	netlen,p;
	cma_typ	maS;

	fprintf(stderr,"Try splitting blocks...(%d total blks)\n",nBlksCMSA(cmsa));
	for(Int4 end=nBlksCMSA(cmsa), t=1; t <= end; t++){
	  if(LengthCMSA(t,cmsa) < minlength) continue;
	  netlen= LengthCMSA(t,cmsa) - 3;
	  p = (netlen*netlen + 1.0)/400.0;
	  // len=8: p = 0.065; len=12: p = 0.20; len=18: p = 0.56; len=22: p = 1.0.
	  if(netlen >= minlength && p >= SampleUniformProb( )){
	   if((maS=SplitBlkCMSA(t, minlength, cmsa))!= NULL){
	     fprintf(stderr,"block %d split in two.\n",t);
	     if(gibbs(maS,'n')) improved = TRUE;
	     // if(gibbs(maS,'S')) improved = TRUE;
           }
	  }
	}
	return improved;
}

BooLean	prb_typ::Fuse(Int4 maxlengs)
// TRY Fusing BLOCKS with >= 16 columns.
// Sample fusions proportional to number of intervening residues in gaps.
{
	cma_typ	maF;
	double	avegap,p;
	BooLean improved=FALSE;

	fprintf(stderr,"Fuse blocks?\n");
	for(Int4 end=nBlksCMSA(cmsa), t=1; t <  end; t++){
	  avegap=(double)ResBetweenCMSA(t,cmsa)/(double)NumSeqsCMSA(cmsa);
	  p = 1.1/(avegap*avegap + 0.1); // always try fusing blocks if avegap <= 1.0
	  // avegap <=1.0 --> p = 1.0; avegap=2.0 --> p = 0.25; avegap=10 --> p = 0.01;
	  if(p >= SampleUniformProb( )){
	   if((maF=FuseBlksCMSA(t, maxlengs, cmsa))!= NULL){
	    fprintf(stderr,"blocks %d & %d fused.\n",t,t+1);
	    if(gibbs(maF)) improved = TRUE;
	   }
	  }
        }
	return improved;
}

double	prb_typ::Breed( )
{
   double	Map,map1,map2,*mapn,*mapo;
   cma_typ	tmp_ma,ma,ma1,ma2,*New,*old;
   Int4		N,n,nn,no,i,j,o,c=1;
   BooLean	*recomb;
   Int4		last_time;

   last_time=time(NULL);
   if(use_gseq) strcpy(options,"-t1 -g -l1 ");
   else strcpy(options,"-t1 -l1 ");
   N = 2*SizeMSAHeap(maH); 
   NEW(New,N+2,cma_typ); NEW(old,N+2,cma_typ);
   NEW(mapn,N+2,double); NEW(mapo,N+2,double); NEW(recomb,N+2,BooLean);
   PurgeMSAHeap(maH); // remove all but one of those items with identical keys
   for(n=0,c=1; !EmptyMSAHeap(maH); c++){
	for(no=0,o=1; o <= n; o++){  // n==0 --> skipped the first time around 
	   if(recomb[o]){ NilCMSA(New[o]); } // discard parent alignments 
	   else { ++no; old[no] = New[o]; mapo[no]=mapn[o]; }
	}
        for(nn=0; (ma=DelMinMSAHeap(&Map, maH)) != NULL; )
		{ ++nn; New[nn]=ma; mapn[nn] = Map; recomb[nn]=FALSE; }
	for(n=nn,o=1; o <= no; o++) // also add previous recombinants to heap
		{ ++n; New[n]=old[o]; mapn[n]=mapo[o]; recomb[n]=FALSE; }
	fprintf(stderr,
	   "******** %d: %d new, %d old, %d total alignments ********\n",
		c,nn,no,n);
	if(no==0) nn--;
	for(i=1; i <= nn; i++){  /** try every combination of recombinants **/
	   ma1 = New[i]; map1=mapn[i];
	   for(j=i+1; j <= n; j++){
	     ma2 = New[j]; map2=mapn[j];
	     fprintf(stderr,"x");
	     if(DataCMSA(ma1) != DataCMSA(ma2)) ma=GRecombineCMSA(ma1,ma2); 
	     else ma=RecombineCMSA(ma1,ma2);
	     if(ma != NULL){
		 // if get a recombinant then try to refine it. 
		 recomb[i]=recomb[j]=TRUE;
		 fprintf(stderr,"RecombineCMSA( ) map = %g\n",map2=RelMapCMSA(ma));
		 if(map2 > 0.0){
		   Map=SimAnnealGibbs(options,&ma,'S',150);
		   if(Map > bestmap){
	        	fprintf(stderr,"!!!map improved from %.1f to %.1f\n", map2,Map);
			tmp_ma=cmsa; cmsa=ma; map=Map; 
			Record( ); last_time=time(0);
			cmsa=tmp_ma; 
		   } 
		   if(!KeyInMSAHeap(Map, maH)){
			if(InsertMSAHeap(ma, Map, maH) == 0) NilCMSA(ma);
			else { fprintf(stderr,"\thybrid map=%f\n",Map); }
		   } else NilCMSA(ma);
		 } else NilCMSA(ma);
	     } 
   	   }
	}
	fprintf(stderr,"\n");
	if(ConvergedMSAHeap(maH) < 2.0) break; // I haven't tested this at all!!!!
	
	if((time(0)-last_time) > patience) break; // give up after 30 minutes...
	// ConvergedMSAHeap(maH);
   }
   for(o=1; o <= n; o++) {
	/** after breeding move the parent population to the heap **/
	if(InsertMSAHeap(New[o],mapn[o],maH)==0) NilCMSA(New[o]);
   }
   PurgeMSAHeap(maH);
   // get the Map for the best alignment.
   ma=DelMinMSAHeap(&Map,maH); 
   if(InsertMSAHeap(ma,Map,maH)==0) NilCMSA(ma);
   // get the map for the best alignment.
   free(New); free(old); free(mapo); free(mapn); free(recomb);
   return map;
}


BooLean	prb_typ::Record( )
// Record current alignment data.
{
	FILE	*fp;

	fprintf(stderr,"@map = %.3f; %d blocks; %d columns\n",
		   map, nBlksCMSA(cmsa), NumColumnsCMSA(cmsa)); 
	if(WG != NULL){
		fp=open_file(name,".see","w");
		AddWatchGibbs(map,nBlksCMSA(cmsa),NumColumnsCMSA(cmsa),WG);
		PutWatchGibbs(fp, WG); fclose(fp);
	}
	if(map > bestmap){
	        fprintf(stderr,"!!!bestmap improved from %.1f to %.1f\n",bestmap,map);
		bestmap=map; PutAlnCMSA(name,cmsa,Guide); 
		if(bestmsa !=NULL) NilCMSA(bestmsa); bestmsa=CopyCMSA(cmsa);
		aveblk = nBlksCMSA(bestmsa); avecol = TotalLenCMSA(bestmsa);
		if(fix){
		   if(aveblk < minblk) aveblk = minblk;
		   else if(aveblk > maxblk) aveblk = maxblk;
		}
		SetBinomial(aveblk,BlkDist); 
		SetBinomial(avecol,ColDist); 
		PutBinomial(stderr, 10000, BlkDist);
		PutBinomial(stderr, 10000, ColDist);
		return TRUE;
	} return FALSE;
}

static Int4 COL=5; 
static Int4 BLK=0; 

BooLean	prb_typ::gibbs(cma_typ ma2){ return gibbs(ma2,'S'); }

BooLean	prb_typ::gibbs(cma_typ ma2,char tweakmode)
{
	cma_typ	recombinant;
	double	map2;
	Int4	b,c,j;
	BooLean	improved = FALSE;

	if(ma2 != NULL){
	    if(use_gseq) strcpy(options,"-t1 -g ");
	    else strcpy(options,"-t1 ");
  	    if(useSA) map2=SimAnnealGibbs(options,&ma2,'S',150);
  	    else map2 = RunGibbs(options,&ma2);
	    if((recombinant = RecombineCMSA(ma2, cmsa)) != NULL){
  		if(useSA) map2=SimAnnealGibbs(options,&recombinant,'S',150);
  		else map2 = RelMapCMSA(recombinant);
//	    	else map2 = RunGibbs(options,&recombinant);
		fprintf(stderr,"!!!recombinant map = %.2f\n", map2);
		NilCMSA(ma2); ma2 = recombinant; 
	    }
	    if(map2 > map){
  		if(!useSA) map2=SimAnnealGibbs(options,&ma2,'S',150);
	        fprintf(stderr,"!!!map improved from %.1f to %.1f\n", map,map2);
		NilCMSA(cmsa); cmsa=ma2; map=map2; 
		Record( ); return TRUE;
	    } else { 
		fprintf(stderr,"map drops ap from %.1f to %.1f\n",map,map2);
		NilCMSA(ma2); 
		return FALSE;
	    }
        } else if(cmsa!=NULL){ // simply tweak the cmsa
	  switch(tweakmode){
	   case 'N': // use gapped...
	  	if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
	        else sprintf(options,"-t1 -l%d ",limit);
		map = RunGibbs(options,&cmsa); break; 
	   case 'n': // Normal (Fast) mode
	        sprintf(options,"-t1 -l%d ",limit);
		map = RunGibbs(options,&cmsa); break;
	   case 'S': // Simulated Annealing mode
  	    map = SimAnnealGibbs(options,&cmsa,'S',temperature); break;
	   default: 
  	    if(useSA) map = SimAnnealGibbs(options,&cmsa,'S',temperature);
  	    else map = RunGibbs(options,&cmsa);
	    break;
	  }
	  fprintf(stderr," refined map %.1f\n",map);
  	  if(Record( )) improved=TRUE; 
#if 0	// experimental gap function...
if(use_gseq){
  fprintf(stderr,"performing gapped SA version of gibbs...\n");
  map2=GappedSimAnnealCMSA(&cmsa, 200.0, 75.0, 5.0);
  if(map2 > map){ 
	fprintf(stderr,"!!!map improved from %.1f to %.1f\n", map,map2);
	map=map2;
  }
  if(Record( )) improved=TRUE; 
}
#endif
	} else {	// return a new cmsa...
  char	other_options[]="-T1";
  // char	other_options[]=" ";
  if(run==1 && cmsa_in != NULL){
	// sprintf(options,"-t1 -T1 "); // no gaps (-g) for initial alignment...
	if(use_gseq) sprintf(options,"-t1 -g -l%d %s ",limit,other_options); 
	else sprintf(options,"-t1 -l%d %s ",limit,other_options);
	over=1; under=0;
	b=ConfigCMSA2(num,cmsa_in); c=0;
	fprintf(stderr,"%d: %d blocks\n",cycle,b); 
	for(j=1; j<= b; j++) fprintf(stderr," block %d: %d columns\n",j,num[j]); 
  } else {
	
	if(cycle == 1){
	   if(use_gseq) sprintf(options,"-t1 -l%d -g %s ",limit,other_options);
	   else sprintf(options,"-t1 -l%d %s ",limit,other_options);
	   if(BLK==0){ // first time...
		BLK = gss.MinTrueSqLen( )/20;
		COL=avecol;
	   } else { BLK--; COL=avecol; }
	   if(fix){ 
		BLK = MAXIMUM(Int4,BLK,minblk); BLK = MINIMUM(Int4,BLK,maxblk); 
	   }
	   while(BLK*COL > gss.MinTrueSqLen( )){ COL--; if(COL<8){ BLK--; COL=15; } }
	   assert(BLK > 0);
	   b=BLK;
#if 1	//********************** New code ********************
	   if(BLK == 1 && MinCol > 0){
	   	c = num[1]= SampleBinomial(MinCol,MaxCol,ColDist);
	   } else {
	   	assert(COL <= maxlenModels);
	   	assert(COL <= MAX_LENG_SITES);
	   	for(c=0,j=1; j<= b; j++) { c+=num[j]=COL; }
	   }
#else	//***************** old code ********************
	   assert(COL <= maxlenModels);
	   assert(COL <= MAX_LENG_SITES);
	   for(c=0,j=1; j<= b; j++) { c+=num[j]=COL; }
#endif
	} else {

	  if(use_gseq) sprintf(options,"-t1 -g -l%d %s ",limit,other_options);
	  else sprintf(options,"-t1 -l%d %s ",limit,other_options);
std::cerr << options;  std::cerr << std::endl;
	  if(cycle > 15){ over=1; under=0; }
	  else if(cycle > 5){ over=2; under=1; }
	  else { over=3; under=2; }
#if 0
	  assert(bestmsa != NULL);
	  b=nBlksCMSA(bestmsa);	
	  // NumColumnsCMSA(bestmsa);
	  if(!fix){ minblk = MAXIMUM(Int4,1,b-under); maxblk=b+over; }
	  fprintf(stderr,"minblk = %d; maxblk = %d\n",minblk,maxblk); 
	  tries=0;
	  do {
	       if(tries > 100){  minblk = MAXIMUM(Int4, 1, minblk-1); tries=0; }
	       b = random_integer(maxblk-minblk+1)+minblk;
	  // columns from 6...ave...max???
	       for(c=0,j=1; j<= b; j++) c+=num[j]=random_integer(20) + 6;
	       tries++;
	  } while(c > maxlenModels);
#else
	  b = SampleConfig( );
	  // for(c=0,j=1; j<= b; j++) c+=num[j];  // not initialized!?
	  for(c=0,j=1; j<= b; j++){
		// if(num[j] > MAX_LENG_SITES) num[j] = MAX_LENG_SITES;
#if 0	  // this is ruining the sampler for repeats (AFN 7-10-09)
		if(num[j] > max_motif_width) num[j] = max_motif_width;
#endif
		c+=num[j]; 
	  }
#endif
	}
  }
	 fprintf(stderr,"%d: %d blocks %d columns\n",cycle,b,c); 
	 for(j=1; j<= b; j++) fprintf(stderr," block %d: %d columns\n",j,num[j]); 
	 cmsa= RandomCMSA(b,num, gss);
	 PutConfigCMSA(stderr,cmsa);
// std::cerr << options; std::cerr << std::endl;
	 // map=RunMinMapGibbs(options, bestmap, &cmsa);  // Quit if not promising...
	 map = RunGibbs(options, &cmsa);
	 PutConfigCMSA(stderr,cmsa);
	 if(map <= 0.) { NilCMSA(cmsa); cmsa = NULL; return FALSE; }
  	 Record( ); return TRUE; 
	}
	return improved;
}

Int4	prb_typ::SampleConfig( )
{
	Int4	b,B,c=0,i,j,mincol,maxcol,N;
	dh_type H;

	assert(bestmsa != NULL);
	b = nBlksCMSA(bestmsa);
	if(!fix){ minblk = MAXIMUM(Int4,1,b-under); maxblk=b+over; }
	B=SampleBinomial(minblk,maxblk,BlkDist);
	// B = random_integer(maxblk-minblk+1)+minblk;

   if(MinCol > 0){
	assert(MinCol <= MaxCol);
	mincol = MinCol;
	maxcol = MaxCol;
   } else {
	c = NumColumnsCMSA(bestmsa);
	mincol = MAXIMUM(Int4,c-B*2,B*6);
	maxcol = gss.MinTrueSqLen( )-B*2;
	maxcol = MINIMUM(Int4, maxcol,c+B*(2+over));
	if(mincol > maxcol) maxcol=mincol;  // SHOULD FIX ERROR...
   }
	c = SampleBinomial(mincol,maxcol,ColDist);
	// c = random_integer(maxcol-mincol+1)+mincol;
   if(B > 1){
	c -= B*6;   // use a minimum of 6 columns per block
	for(b=1; b<=B; b++) num[b]=6;
	N = c+B; H = dheap(N+1,4);
	for(i=1; i<=c; i++) insrtHeap(i,((keytyp)Random()),H);
	for( ; i<=N; i++) insrtHeap(i,((keytyp)Random()),H);
	for(b=1,j=1; j<=N; j++) {
		i=delminHeap(H);
		if(i<=c) num[b]++; else b++;
	}
	Nildheap(H);
   } else {
	num[1] = c;
   }
	return B;
	// i=ResBetweenCMSA(b, cmsa); // between b and b+1
}


void prb_typ::read_input_data()
{
   // free up previous alignment at this point... (Stored as a file).
   if(cmsa != NULL){ NilCMSA(cmsa); cmsa= NULL; }
   if(bestmsa!=NULL) { NilCMSA(bestmsa);  bestmsa=NULL; }
   if(data!=NULL) { NilSeqSet(data);  data=NULL; }
   BLK=0;

   if(!noseg) data = MkXSeqSet(name,A); 
   else data = MkSeqSet(name,A);
#if 0	// set old C-terminal offset for each seq. to 0
   for(Int4 sq=1; sq <= NSeqsSeqSet(data); sq++){
	e_type	Sq=SeqSetE(sq,data); SetTaxIdSeq(0,Sq); // == set Cterminal offset
   }
#endif
   gss.initialize(data,gapopen,gapextend,pernats,left_flank,right_flank);
   SetIndelPenaltySeqSet(indel_penalty,data);
   make_binomials(data);
}

void prb_typ::make_binomials(ss_type sqset)
{
   maxlenModels = MinSeqSeqSet(sqset);
   if(avecol == 0) avecol = (Int4) ((float) maxlenModels/(float)(aveblk*2))+1;
#if 1	// NEW MaxCol option: AFN 7-10-09
   if(MinCol > 0) avecol = (MinCol + MaxCol)/2;
#endif
   NEW(num,maxlenModels+2,Int4);
   if(minseq > NSeqsSeqSet(sqset)) print_error("input set has too few sequences");
   bestmap=-9999999999.;
   BlkDist = MakeBinomial(30, aveblk,"number of blocks");
   Int4 Nc = maxlenModels;
   if(aveblk*avecol > Nc){ /** adjust for short sequences **/
        for(Int4 b = aveblk; TRUE; b--){
                avecol = (Int4) floor((double)Nc/((double)b*2.0));
                if(avecol >= 4) { aveblk = b; break; }
        } 
        fprintf(stderr,"aveblk = %d; avecol =%d; Nc = %d\n",aveblk,avecol,Nc);
        if(aveblk < 2) print_error("make_binomials( ): aveblk < 2; seq.too short");
   }
   if(Nc < 10) print_error("make_binomials( ): Nc < 10; seq.too short");
#if 1
   ColDist=MakeBinomial(Nc, avecol,"number of columns");
#else
   ColDist=MakeBinomial(2*Nc, avecol,"number of columns");	// Okay to set N large??...
#endif
}

void prb_typ::free_input_data()
{
     NilBinomial(BlkDist); NilBinomial(ColDist);
     if(WG != NULL) { NilWatchGibbs(WG); WG=MakeWatchGibbs(20, 1000); }
     free(num); 
}

void	prb_typ::create_population()
// BUILD INITIAL POPULATION (MSAHEAP).
{
   Int4			futile_attempts=0;
   UInt4	time0,time0b;

   Int4	t,aln,NumAln;
   cma_typ	*list,lastcma=0;
   double	avelen=0.0,*lpr;

   maH=MkMSAHeap(mhpsz);
   item=0; time0b=time(NULL); 
   if(cmsa != NULL){ NilCMSA(cmsa); cmsa= NULL; }
   for(cycle=1; cycle <= maxcycle; cycle++){
	time0=time(NULL); 
	cmsa = NULL; gibbs(NULL);
	if(cmsa != NULL && map > 0) {  // THEN ADD ALIGNMENT TO HEAP.
	// if(create_align() && map > 0) // ADD ALIGNMENT TO HEAP.
	    // if(cycle==1) Refine( );  // Delete bad blocks and tweak alignment.
	    // if(cycle==1) Delete( );  // Delete and tweak alignment.
#if 0
	    map0=ReAlignBestCMSA(cmsa);
	    if(map0 > map){ map=map0; Record( ); }
	    else InitMAPCMSA(cmsa);
#endif
	    // Delete('N');  // Delete and tweak alignment.
	    if(!dont_delete) Delete('n');  // Delete and tweak alignment.
	    // Delete('n');  // Delete and tweak alignment.
#if 0
	    if(lastcma!=0){
		cmaGR=GRecombineCMSA(lastcma, cmsa);
		if(cmaGR != NULL){
		   if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
		   else sprintf(options,"-t1 -l%d ",limit);
        	   map0=RunGibbs(options,&cmaGR);
		   if(map0 > map){
			map=map0; NilCMSA(cmsa); cmsa=cmaGR; Record( ); 
		   } else NilCMSA(cmaGR);
		}
	    }
#endif
	    if(InsertMSAHeap(cmsa,map,maH)==0){ NilCMSA(cmsa); cmsa=0; }
	    ConvergedMSAHeap(maH);
	    if(MinItemMSAheap(maH) != item){ // i.e., new optimum alignment.
		     item = MinItemMSAheap(maH);
		     // SeeMSAHeap(item,maH); 
	    }
	    lastcma=cmsa;
	    futile_attempts=0;
	} else {
		if(cmsa!=NULL) NilCMSA(cmsa); cmsa=NULL; 
		cycle--; futile_attempts++; 
		if(futile_attempts > 20) print_error("failed to find a motif");
		else if(futile_attempts % 10 == 0) { 
		   if(!fix){
			minblk--; maxblk--;
			minblk = MAXIMUM(Int4,minblk,1); 
			if(maxblk < minblk) maxblk = minblk;
		   }
		}
	}
        fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
                time(NULL)-time0,(float)(time(NULL)-time0)/60.0);
     }
     fprintf(stderr,"\tTotal time: %d seconds (%0.2f minutes)\n",
                time(NULL)-time0b,(float)(time(NULL)-time0b)/60.0);
#if 1 // DO SOME POST PROCESSING HERE...
     NumAln=nMSAHeap(maH);
     NEW(list,NumAln+3,cma_typ);
     NEW(lpr,NumAln+3,double);
     for(aln=1; nMSAHeap(maH) > 0; aln++){ 
	assert((cmsa=DelMinMSAHeap(&map, maH)) != NULL);
fprintf(stderr,"******************** refining alignment %d *********************\n",aln);
	for(t=1; t<=nBlksCMSA(cmsa); t++) avelen+=LengthCMSA(t,cmsa);
	avelen/=(double)nBlksCMSA(cmsa);
#if 1
	if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
	else sprintf(options,"-t1 -l%d ",limit);
        // map=SimAnnealGibbs(options,&cmsa,'S',300);
        map=RunGibbs(options,&cmsa);
	Record( );
#else
	if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
	else sprintf(options,"-t1 -l%d ",limit);
	Delete();	// map?  did this above already...!!!
	Record( );
#endif
#if 1
        if(align_mode == 2){
	  if(Split(8)){
		Record( );
		fprintf(stderr,"Splitting block improved map\n"); 
	  }
	  if(Delete()){
		Record( );
		fprintf(stderr,"Deleting block improved map\n"); 
	  }
	  // if(avelen <= 15){ Delete(); Fuse(50); Split(8); }
	  // else { Delete(); Split(8); Fuse(50); }
        } else if(align_mode == 3){
	  if(SFR(8,40)){	// slide pieces to the right.
		Record( );
		fprintf(stderr,"Split Fuse to right improved map\n"); 
	  }
	  if(SFL(8,40)){	// slide pieces to the right.
		Record( );
		fprintf(stderr,"Split Fuse to right improved map\n"); 
	  }
        }
#endif
#if 0
	map0=ReAlignBestCMSA(cmsa);
	if(map0 > map){ map=map0; Record( );}
	else InitMAPCMSA(cmsa);
#endif
	list[aln]=cmsa; lpr[aln]=map;
     }
     for(aln=1; aln <= NumAln; aln++){
	assert(InsertMSAHeap(list[aln],lpr[aln],maH)!=0);
     }
     free(list); free(lpr);
#endif
}

void	prb_typ::Recombine()
// GENETIC RECOMBINATION OF ALIGNMENT POPULATION.
{
   Int4		min;
   cma_typ	ma2;
   double	map2;

     if(maH==NULL) print_error("prb_typ::Recombine() input error");
     if(use_gseq) sprintf(options,"-t1 -g "); else sprintf(options,"-t1 ");
     fprintf(stderr,"******** breed ********\n");
     map=Breed( );
     while(nMSAHeap(maH) > 3){ 
	if((ma2 = DelMaxMSAHeap(&map, maH)) != NULL) NilCMSA(ma2); 
     }
     assert(bestmsa != NULL); min = nBlksCMSA(bestmsa); 
     if(!fix){ minblk = MAXIMUM(Int4,min-under,2); maxblk = min+over;}
     fprintf(stderr,"--> End breed ");
     double runtime=difftime(time(NULL),time1);
     fprintf(stderr," time thus far: %0.1f seconds (%0.2f minutes)\n",runtime,runtime/60.0);
     cmsa = DelMinMSAHeap(&map2, maH); NilMSAHeap(maH); maH=NULL;
}

void prb_typ::optimize() 
// FINAL OPTIMIZATION OF BEST ALIGNMENT.
{
   Int4		j;
   cma_typ	ma2,ma3;
   double	map2;

   if(align_mode == 2){
	map = bestmap;
	Split(8); while(Fuse(50)){ if(!Split(8)) break; }
	do {
	    if(SFL(8, 50)) FSR(60); else if(!FSR(60)) break; 
	    if(SFR(8, 50)) FSL(60); else if(!FSL(60)) break; 
	} while(TRUE);
   }
   if(use_gseq) strcpy(options,"-t1 -g "); else strcpy(options,"-t1 ");
   ma2 = CopyCMSA(cmsa);
   fprintf(stderr,"starting temperature = %.2f K\n", 300.);
   for(j=1; j<=10; j++){
     map2=SimAnnealGibbs(options,&ma2,'S',300);
     fprintf(stderr,"map = %.2f\n", map2);
     if((ma3 = RecombineCMSA(cmsa, ma2)) != NULL){
   	map2=SimAnnealGibbs(options,&ma3,'S',150);
        NilCMSA(ma2); ma2=ma3; ma3=NULL;
     } 
     if(map2 > bestmap){ 
	NilCMSA(cmsa); cmsa=ma2; bestmap=map2; PutAlnCMSA(name,cmsa,Guide); 
	ma2=NULL; break;
     } 
   }
   if(ma2 != NULL) NilCMSA(ma2);

#if 0
     map = SimAnnealGibbs(options,&cmsa,'S',150);
     if(map > bestmap){ bestmap=map; PutAlnCMSA(name,cmsa,Guide); }
#endif

}

void prb_typ::align()
{
   read_input_data(); 
   fprintf(stderr,"blocks columns .. MAP\n"); run++;
   create_population();
   Recombine();
   optimize();
   free_input_data();
}

void prb_typ::gapped_align()
{
   double	d;
   read_input_data();
   fprintf(stderr,"blocks columns .. MAP\n"); run++;
   create_population();
   cmsa = DelMinMSAHeap(&d, maH); NilMSAHeap(maH); maH=NULL;
   free_input_data();
}

e_type	**prb_typ::search(Int4 inso, Int4 insx, Int4 del, BooLean segmask, char *msafile)
{
	e_type	**ListE;
	sma_typ MA;
	Int4	j,n,total,*mtfcnts=NULL;
	double	*Freq;

	fprintf(stderr,"creating profile...\n");
	// if(bestmap <= 1. && !input_msa) print_error("failed to find a significant motif");
	// input_msa=FALSE;
	if(counts) free(counts); if(nsize) free(nsize);
        if(DBS_hits) NilSet(DBS_hits);
	number = GetFastaInfo(Argv[2], MAX_IN_SEQS, &counts, &nsize, A);
	for(maxseqleng=0,j=1; j<= number; j++) 
		maxseqleng = MAXIMUM(Int4,nsize[j],maxseqleng);
	DBS_hits = MakeSet(number+1);
	// oldcardB = cardB = 0;
	for(total=0, j=0; j<=nAlpha(A); j++) total += counts[j];
        NEW(Freq,nAlpha(A)+2,double);
        if(aafreq == 'm') {
          NEW(mtfcnts, nAlpha(A)+2, Int4);
          MA=ReadSMA(Argv[2]); CountsSMA(mtfcnts, MA); NilSMA(MA);
          for(n=0, j=0; j<=nAlpha(A); j++) n += mtfcnts[j];
          for(j=0; j<=nAlpha(A); j++) Freq[j]= (double)mtfcnts[j]/(double)n;
          free(mtfcnts);
        } else if(aafreq == 'd') {
          for(j=0; j<=nAlpha(A); j++) Freq[j] = (double)counts[j]/(double)total;
        } else { 	// aafreq = 's'
          for(j=0; j<= nAlpha(A); j++) Freq[j] = blosum62freq[j];
        }

	FILE	*fptr = open_file(Argv[2],"","r");
	gsn_typ	F;
	F=MakeGOScan(msafile,A,maxrpts,method,mode,ecut,Ecut,maxseqleng,
		weight,minmap,pseudo,Freq);
#if 1
	// double tmp_d = (double) TotLenPrtnModel(PrtnModelGOScan(F)) * 0.90;
	MinSeqLen = TotLenPrtnModel(PrtnModelGOScan(F));
#endif
        if(!segmask) NoMaskGOScan(F);
	if(mask_nonglobular) MaskNonGlobularGOScan(F);
	SetRptEvalGOScan(repeatEval,F);
	fprintf(stderr,"scanning database...\n");
	FILE *ofp=open_file(name,".gscn","w");
#if 1
	ListE = GapSeqGOScanScan(ofp,fptr,inso,insx,left_flank,right_flank,
					min_rpt,number,total,nsize,F);
#else	// code below is a work in progress; want to speed up purge by starting with a cma. afn: 7_17_12
#if 0
Int4    GapSeqGOScanScan(FILE *fptr,Int4 a, Int4 b, Int4 gapo, Int4 gapx,
        Int4 left, Int4 right, Int4 min_rpt, Int4 number,UInt8 total,
        unsigned short *nsize, char ***Operation, e_type **RtnE, e_type **FullE,
        unsigned short **FullR, Int4 **Start, char Mode, gsn_typ F,
        UInt4 minlen, UInt4 maxlen)
        Int4 NumHits=GapSeqGOScanScan(fptr,a,b,gapo,gapx,left,right,min_rpt,number,
                total,nsize,&operation,&ListE,&FullE,&FullR,&start,Test,F,minlen,maxlen);
#endif
	// Taken from GOScanToCMSA() within goscan.cc
	// need a,b,operation,&ListE,&FullE,&FullR,&start,Test,minlen,maxlen.
	Int4		a=18,b=2;
        char            **operation;
        e_type          *ListE,*FullE;
        unsigned short  *FullR;
        Int4            *start;
	cma_typ         cma=0;
	Int4		minlen=0,maxlen=INT4_MAX;
	ptm_typ         PM=PrtnModelGOScan(F);
	Int4            nblks=NumModelsPrtnModel(PM);
	BooLean         *skip;

        Int4 NumHits=GapSeqGOScanScan(fptr,a,b,inso,insx,left,right,min_rpt,number,total,
		nsize,&operation,&ListE,&FullE,&FullR,&start,Test,F,minlen,maxlen);
        if(NumHits > 0){
          cma=MakeCMSA(ListE,NumHits,operation,start,nblks,PrtnModelLengths(PM),
                        gapo,gapx,pernats,left,right,argv[1],PrtnModelA(PM),FullE,FullR);
                        // 10000,2000,1000,left,right,argv[1],PrtnModelA(PM),FullE,FullR);
          ss_type FullSeq = FullSeqCMSA(cma);
          if(!AddFull) RmFullCountsCMSA(cma);
          if(verbose) { PutAlnCMSA(stdout,cma); }
	} // WARNING: NOT DEALLOCATING ALL MEMORY WHEN NumHits == 0! Fix here later...
        free(operation); free(start);
	// free(counts); free(nsize); 
#endif
	fclose(ofp); NilGOScan(F); fclose(fptr); free(Freq);
	return ListE;
}

void    prb_typ::gapped_srch()
// NEW: AFN 7/8/09.
// output 
{
	Int4 del = 0;
	BooLean segmask=TRUE;
	if(trim_info > 0.0){
          sprintf(str,"%s.trim.msa",name);
	} else {
          sprintf(str,"%s.msa",name);
	}
	e_type	**ListE=search(Gap_o, Gap_x, del, segmask, str);
	FILE *sfp=open_file(name,".grpts","w");
        for(Int4 s=1; ListE[s]; s++){
	   Int4 r,NumRpts=0;
           for(r=1; ListE[s][r]; r++){ }
	   r--;
	   if(r >= min_rpt){
             for(r=1; ListE[s][r]; r++){ 
		e_type tE=ListE[s][r];
		if(LenSeq(tE) >= MinSeqLen) PutSeq(sfp,tE,A); 
		NilSeq(tE); 
	     }
           } free(ListE[s]);
        } free(ListE);
	fclose(sfp);
}

void prb_typ::search( )
{
	Int4	j,n,total;
	FILE	*sfp,*fptr,*fp2;
	gsn_typ	F;
	snh_typ	sH;
	h_type	HG;
	sma_typ MA;
	Int4	*mtfcnts=NULL;

	fprintf(stderr,"creating profile...\n");
	if(bestmap <= 1. && !input_msa) print_error("failed to find a significant motif");
	input_msa=FALSE;
	if(number == -1){ /*** run GetFastaInfo( ) on first iteration ***/
	   number = GetFastaInfo(Argv[2], MAX_IN_SEQS, &counts, &nsize, A);
	   for(maxseqleng=0,j=1; j<= number; j++) 
		maxseqleng = MAXIMUM(Int4,nsize[j],maxseqleng);
	   DBS_hits = MakeSet(number+1);
	   oldcardB = cardB = 0;
	}
	fptr = open_file(Argv[2],"","r");
	// Read cma file and create trimmed cma and msa files...
	fprintf(stderr,"***************** trim_info = %.3f ****************\n",trim_info);
	if(trim_info > 0.0){
	   sprintf(str,"%s.cma",name);
           cma_typ tmp_cma=ReadCMSA2(str,A);
           Int4 *RmLeft,*RmRight,*TrimLimit;
           NEW(RmLeft,nBlksCMSA(tmp_cma)+3,Int4);
           NEW(RmRight,nBlksCMSA(tmp_cma)+3,Int4);
           NEW(TrimLimit,nBlksCMSA(tmp_cma)+3,Int4);
           cma_typ cma2 = TrimCMSA(trim_info,TrimLimit,RmLeft,RmRight,tmp_cma);
           sprintf(str,"%s.trim",name); PutAlnCMSA(str,cma2,NULL);
           NilCMSA(cma2);
           free(RmLeft); free(RmRight); free(TrimLimit);
           TotalNilCMSA(tmp_cma);
           sprintf(str,"%s.trim.msa",name);
        } else {
           sprintf(str,"%s.msa",name);
	} 

	for(total=0, j=0; j<=nAlpha(A); j++) total += counts[j];
        NEW(freq,nAlpha(A)+2,double);
        if(aafreq == 'm') {
          NEW(mtfcnts, nAlpha(A)+2, Int4);
          MA=ReadSMA(Argv[2]); CountsSMA(mtfcnts, MA); NilSMA(MA);
          for(n=0, j=0; j<=nAlpha(A); j++) n += mtfcnts[j];
          for(j=0; j<=nAlpha(A); j++) freq[j]= (double)mtfcnts[j]/(double)n;
          free(mtfcnts);
        } else if(aafreq == 'd') {
          for(j=0; j<=nAlpha(A); j++) freq[j] = (double)counts[j]/(double)total;
        } else { 	// aafreq = 's'
          for(j=0; j<= nAlpha(A); j++) freq[j] = blosum62freq[j];
        }

	F=MakeGOScan(str,A,maxrpts,method,mode,ecut,Ecut,maxseqleng,
		weight,minmap,pseudo,freq);
	if(mask_nonglobular) MaskNonGlobularGOScan(F);
	SetRptEvalGOScan(repeatEval,F);
	fprintf(stderr,"scanning database...\n");
	ptm_typ PM=PrtnModelGOScan(F);
	MaxBadBlks=NumBlksPrtnModel(PM)-MinGoodBlks;
	if(MaxBadBlks < 0) MaxBadBlks=0;
	sH=GOScanScan(fptr,number, total, nsize,1,MaxBadBlks,F);
	fprintf(stderr,"output results...\n");
        if(nScanHeap(sH) > 0){
	     fp2 = open_file(name,".scn","w");
	     fprintf(fp2,"goscan %s %s.msa -c -e%f -E%f\n",
		Argv[2],name,ecut,Ecut);

             HG=HistScanHeap(sH);
             PutHist(fp2,60,HG);
             PutInfoScanHeap(fp2,sH,A);
             for(j=1; j<=nblksScanHeap(sH); j++){
                  HG=histScanHeap(j,sH); PutHist(fp2,60,HG);
             } fclose(fp2);
	     if(report_gaps){
	     	fp2 = open_file(name,".gaps","w");
	     	PutGapsScanHeap(fp2, sH); fclose(fp2);
	     }
	     sfp = open_file(name,".seq","w");
             PutSeqScanHeap(sfp, sH, A);
	     AddSetScanHeap(sH, DBS_hits, TRUE);
	     cardB = CardSet(DBS_hits);
	     fclose(sfp); 
	     if(NumFailSeqScanHeap(sH) > 0){
	     	sfp = open_file(name,".fsq","w");
             	PutFailSeqScanHeap(sfp, sH, A);
	     	fclose(sfp); 
	     }
    	     if(repeats){ 
	        sfp=open_file(name,".rpts","w");
		// PutRptsScanHeap(sfp,left_flank,right_flank, sH, A); // OLD
		if(min_rpt < 2) PutRptsScanHeap(sfp,left_flank,right_flank,sH,A);
		else PutMinRptsScanHeap(sfp,left_flank,right_flank,min_rpt,sH,A);
		fclose(sfp);
	     }
        }
	fclose(fptr); 

#if 1	//*********************** Gapped repeat search *****************************
	if(do_gapped_srch) gapped_srch();
#endif

	fprintf(stdout,"total # of sequences detected: %d\n", cardB);
	if(cardB > oldcardB){ oldcardB = cardB; }
	else if(!force) maxrun = run;	// i.e., quit 

	if(run < maxrun){  
	  if(cma_purge > 0){ // Then add step to output scanned sequences as *cma file.
                assert(maxrpts == 1);
	        run++;
	        cma_typ	tmpcma1,tmpcma2;
                cma_typ tmpcma = ScanHeap2CMSA(sH,0,name,A,FALSE);
                sH = NULL;          /** WARNING: ScanHeap2CMSA( ) destroys sH ***/
                // if(tmpcma!= NULL){ WriteMtfCMSA(name,tmpcma,NULL); }

	// Purge cma file 
                Int4 Nset;
	        sprintf(name,"%s%d",Argv[1],run);
                sprintf(str,"%s.purge",name);
                FILE *fp = open_file(str,".cma","w");
                PutRepSetCMSA(fp,cma_purge,&Nset,tmpcma); fclose(fp);

                sprintf(str,"%s.purge.cma",name);
                tmpcma1=ReadCMSA2(str,A);

	        if(tmpcma != NULL) TotalNilCMSA(tmpcma);
	        if(sH) NilScanHeap(sH); NilGOScan(F); free(freq);
		tmpcma=cmsa; cmsa=0; 
	        tmpcma2 = optbreed(tmpcma1);
		if(tmpcma2) TotalNilCMSA(tmpcma2);
		cmsa=tmpcma; // This gets freed in ~prb_typ().
         } else {
	  NilScanHeap(sH); 
	  NilGOScan(F); 
	  free(freq);
	  fprintf(stderr,"purging sequence set...\n");
	  if(do_gapped_srch) sprintf(str,"%s.grpts",name);
	  else if(repeats) sprintf(str,"%s.rpts",name);
	  else sprintf(str,"%s.seq",name);
	  sprintf(name,"%s%d",Argv[1],run);
	  if(UseRepset){  // then first generate representative set...
	        // fp2 = open_file(name,".rep","w");
	        fp2 = open_file(name,"","w");
		// n= RepSetCluster(fp2, str, 0, !noseg, 'b', cutoff, A);
		// n=RepSetCluster(fp2,str,0,!noseg,'b',cutoff,UseLabeled,A);
		n=RepSetCluster(fp2,str,0,!noseg,'S',cutoff,UseLabeled,A);
		// this should call gapped blast with score option...
		fclose(fp2);
	        // sprintf(str,"%s.rep",name);
	  } n =PurgeFiles(str,name,cutoff,inc,minseq,maxseq,A);
	  if(n < minseq) go = FALSE;
	 }
	} else { NilScanHeap(sH); NilGOScan(F); free(freq); }
}

prb_typ::~prb_typ()
{ 

    if(counts != NULL) free(counts); 
    if(nsize != NULL) free(nsize); 
    if(Guide != NULL) NilGuide(Guide);
    if(DBS_hits != NULL) NilSet(DBS_hits);
    free(name); 
    free(Argv[1]); free(Argv[2]); 
    if(WG != NULL) { NilWatchGibbs(WG); WG=NULL; }
    if(cmsa != NULL) NilCMSA(cmsa); 
    if(bestmsa != NULL) NilCMSA(bestmsa); 
    if(cmsa_in != NULL){
    	gss_typ *gssX=gssCMSA(cmsa_in); 	// not owned by cmsa_in!!!
    	gssX->~gss_typ();
	NilCMSA(cmsa_in); 
    }
    if(data != NULL) NilSeqSet(data);
    // if(A != NULL) NilAlpha(A);
    double runtime=difftime(time(NULL),time1);
    fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",runtime,runtime/60.0);
}

//==================== TEST NEW ROUTINES =====================

BooLean	prb_typ::SFR(Int4 minlength, Int4 maxlengs)
//   Split & Fuse operation (to the right): 2 -> 3 -> 2 blocks.
//
// --[____|\\\\]----[____]--   ====> --[____]----[/////|____]--  
//        :---->                                 
//
// decide whether to fuse first & split second or vice versa
{
   cma_typ	maS,maSF;
   Int4		t,numblks = nBlksCMSA(cmsa);
   BooLean	improved=FALSE;
#if 1
static Int4	ncalls=0;
#endif

   if(numblks > 1){
     fprintf(stderr,"%d blocks; try sliding halves of blocks right...\n",numblks);
     for(t=1; t < numblks; t++){  // requires at least two blocks 
	// split & then fuse (also try other way?).
	if((LengthCMSA(t,cmsa)+LengthCMSA(t+1,cmsa)) > (minlength+maxlengs))
		continue;
	if((maS=SplitBlkCMSA(t, minlength, cmsa)) != NULL){
	   if((maSF=FuseBlksCMSA(t+1, maxlengs, maS)) != NULL){
#if 0
		PutAlnCMSA("junk0", cmsa,NULL);
		PutAlnCMSA("junk1", maS,NULL);
		PutAlnCMSA("junk2", maSF,NULL);
#endif
		if(gibbs(maSF)){ 
			improved = TRUE;
#if 0
			PutAlnCMSA("junk3", cmsa,NULL);
			exit(1);
#endif
		}
	   }
	   if(gibbs(maS)) improved = TRUE;
	}
        numblks=nBlksCMSA(cmsa);
     }
   } // for completeness do another split operation at the end.
#if 0
   if((maS=SplitBlkCMSA(numblks, minlength, cmsa)) != NULL){
	   if(gibbs(maS)) improved = TRUE;
   }
#endif
   return improved;
}

BooLean	prb_typ::SFL(Int4 minlength, Int4 maxlengs)
//   Split & Fuse operation (to the right): 2 -> 3 -> 2 blocks.
// TRY Splitting BLOCKS with >= minlength columns and fusing with next block
//
// --[____]----[\\\\|____]--   ====> --[____|/////]----[____]--  == !slide_right
//             <----:                                 
{
   cma_typ	maS,maSF;
   Int4		t,numblks = nBlksCMSA(cmsa);
   BooLean	improved=FALSE;

   if(numblks > 1){
     fprintf(stderr,"Try sliding halves of blocks left...\n");
     for(t=numblks; t > 1; t--){
	if((LengthCMSA(t,cmsa)+LengthCMSA(t+1,cmsa)) > (minlength+maxlengs))
		continue;
	if((maS=SplitBlkCMSA(t,minlength,cmsa)) != NULL){
	   if((maSF=FuseBlksCMSA(t-1, maxlengs, maS)) != NULL){
		if(gibbs(maSF)) improved = TRUE;
	   }
	   if(gibbs(maS)) improved = TRUE;
	} 
     }
   }	// for completeness do another split operation at the end.
#if 0
   if((maS=SplitBlkCMSA(1, minlength, cmsa)) != NULL){
	   if(gibbs(maS)) improved = TRUE;
   }
#endif
   return improved;
}

BooLean	prb_typ::FSR(Int4 maxlength)
// FuseSplit operation:   2 -> 1 -> 2 blocks (net = 0)
// ***************************************************************************
//                                  F(t)           
//   msa   --[____]----[\\\\\\]--   ===>  --[____|\\\\\\]---  maF
//             t          t+1                    t        
//                                                 |  
//                                                 V  S(t)
//                                     
//                                        --[____|\\]--[\\\]-  maFS
//                                               t      t+1     
// ***************************************************************************
{
   cma_typ	maF,maFS;
   Int4		t,len1,len2,numblks = nBlksCMSA(cmsa);
   BooLean	improved=FALSE;

   if(numblks < 2) return FALSE;
   fprintf(stderr,"Try FSR operation ...\n");
   for(t=1; t < numblks; t++){
      len1 = LengthCMSA(t,cmsa); len2 = LengthCMSA(t+1,cmsa);
      if((len1+len2) <= maxlength){
	if((maF=FuseBlksCMSA(t,len1+len2,cmsa)) != NULL){
	   if((maFS= SplitBlkCMSA(t,len1,maF)) != NULL){
		if(gibbs(maFS)) improved = TRUE; 
	   } else print_error("prb_typ::FSR( ): This should not happen");
	   if(gibbs(maF)) improved = TRUE;
	} else print_error("prb_typ::FSR( ): This should not happen");
        numblks=nBlksCMSA(cmsa);
      }
   }
   return improved;
}

BooLean	prb_typ::FSL(Int4 maxlength)
// Same as FSL only going to the left. 2 -> 3 -> 4 -> 3
{
   cma_typ	maF,maFS;
   Int4		t,len1,len2,numblks = nBlksCMSA(cmsa);
   BooLean	improved=FALSE;

   if(numblks < 2) return FALSE;
   fprintf(stderr,"Try FSL operation ...\n");
   for(t=numblks-1; t >= 1; t--){
      len1 = LengthCMSA(t,cmsa); len2 = LengthCMSA(t+1,cmsa);
      if((len1+len2) <= maxlength){
	if((maF=FuseBlksCMSA(t,len1+len2,cmsa)) != NULL){
	   if((maFS= SplitBlkCMSA(t,len1,maF)) != NULL){
		if(gibbs(maFS)) improved = TRUE; 
	   } else print_error("prb_typ::FSL( ): This should not happen");
	   if(gibbs(maF)) improved = TRUE;
	} else print_error("prb_typ::FSL( ): This should not happen");
      }
   }
   return improved;
}

BooLean	prb_typ::SSF(Int4 t, Int4 minlength)
// SplitSplitFuse operation:   2 -> 3 -> 4 -> 3
// ***************************************************************************
//                                     S(t)           
//   msa   --[__|//]----[\\\|____]--   ===>  --[__|//]--[\\\]--[____]--  maS
//             t-1          t                    t-1     t      t+1
//                                                       |  
//                                                       V  S(t-1)
//                                     F(t)
//   maSSF --[__]--[//|\\]--[____]--   <===  --[__]-[//]-[\\\]-[____]--  maSS
//             t-1    t       t+1               t-1  t    t+1    t+2  
// ***************************************************************************
{
      cma_typ	maS,maSS,maSSF;
      Int4	len1,len2,numblks = nBlksCMSA(cmsa);
      BooLean	improved=FALSE;

      if(numblks < 2 || t < 2 || t > numblks) return FALSE;
      fprintf(stderr,"Try SSF operation on block %d...\n",t);
      len1 = LengthCMSA(t-1,cmsa); len2 = LengthCMSA(t,cmsa);
      if(len1 >= minlength && len2  >= minlength){
	if((maS = SplitBlkCMSA(t,minlength,cmsa)) != NULL){
	   if((maSS = SplitBlkCMSA(t-1,minlength,maS)) != NULL){
		maSSF=FuseBlksCMSA(t,len1+len2,maSS);
	   	if(maSSF==NULL) print_error("prb_typ::SSF( ): This should not happen");
		if(gibbs(maSSF)) improved = TRUE; 
	   } else print_error("prb_typ::SSF( ): This should not happen");
	   // gibbs(maSS); 
	   NilCMSA(maSS); 
	} else print_error("prb_typ::SSF( ): This should not happen");
	// gibbs(maS);
	NilCMSA(maS); 
      }
      numblks=nBlksCMSA(cmsa);
      return improved;
}

BooLean	prb_typ::SFF(Int4 t, Int4 minlength)
// SplitFuseFuse operation:   3 -> 4 -> 3 -> 2
// ***************************************************************************
//                                      S(t)           
//   msa   --[___]---[\\|//]---[___]--  ===>  --[___]--[\\]-[//]--[___]--  maS
//            t-1       t       t+1              t-1     t   t+1   t+2
//                                                       |  
//                                                       V  F(t-1)
//                                      F(t)
//  msaSFF --[___|\\]-------[//|___]--  <===  --[___|\\]----[//]--[___]--  maSF
//             t-1             t                   t-1       t     t+1     
// ***************************************************************************
{
   cma_typ	maS,maSF,maSFF;
   Int4		len1,len2,len3,numblks=nBlksCMSA(cmsa);
   BooLean	improved=FALSE;

   if(numblks < 3 || t < 2 || t >= numblks) return FALSE;
   fprintf(stderr,"Try SFF operation on block %d ...\n",t);
   len1 = LengthCMSA(t-1,cmsa); len2 = LengthCMSA(t,cmsa); len3 = LengthCMSA(t+1,cmsa);
   if(len2 >= minlength){
	if((maS = SplitBlkCMSA(t,minlength,cmsa)) != NULL){
	   if((maSF = FuseBlksCMSA(t-1,len1+len2,maS)) != NULL){
		maSFF=FuseBlksCMSA(t,len2+len3,maSF);
	   	if(maSFF==NULL) print_error("prb_typ::SFF( ): This should not happen");
		if(gibbs(maSFF)){ // implies that maSFF == NULL;
			improved = TRUE;
		} 
	   } else print_error("prb_typ::SFF( ): This should not happen");
	   // gibbs(maSF);
	   NilCMSA(maSF); 
	} else print_error("prb_typ::SFF( ): This should not happen");
	// gibbs(maS);
	NilCMSA(maS); 
   }
   return improved;
}

cma_typ	prb_typ::Improve(cma_typ cma0)
{ 
  char		Str[300];
  double	Temperature=200;
  gss_typ	*gssX;

  if(cma0 == 0){ sprintf(Str,"%s.cma",name); cma0=ReadCMSA2(Str,A); }
  SetPenaltyCMSA(gapopen,gapextend,cma0);
  sprintf(name,"%s%d",Argv[1],run);  
  FILE *fptr=open_file(name,"","w");
  PutSeqSetEs(fptr,TrueDataCMSA(cma0));
  fclose(fptr);
  assert(cmsa==0); cmsa = cma0; map = RelMapCMSA(cmsa);
  gssX=gssCMSA(cmsa); make_binomials(gssX->FakeSqSet());
  Record( ); cmsa=0; // sets bestmsa to cmsa;
  if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
  else sprintf(options,"-t1 -l%d ",limit);

	h_type H = Histogram("contributions to map",-1000,1000,1.0);
	fprintf(stderr,"full MAP = %.2f\n",map);
	Int4 N = NSeqsSeqSet(DataCMSA(bestmsa));
	double minus_map,change_map;
	for(Int4 s=1; s<=N; s++){
		minus_map = RelMapMinusSeqCMSA(s,bestmsa);
		change_map=minus_map-map;
#if 0
		fprintf(stderr,"minusMAP(%d) = %.2f (diff = %.2f)\n",
			s,minus_map,change_map);
#endif
		IncdHist(change_map,H);
		if(change_map <= improve_cut) PutSeqSetE(stdout,s,TrueDataCMSA(bestmsa));
		// if(change_map > 20) PutSeqSetE(stdout,s,TrueDataCMSA(bestmsa));
	}
	PutHist(stderr,60,H); NilHist(H);
  return cmsa;	// need to free this up in calling environment.
}


cma_typ	prb_typ::optbreed(cma_typ cma0)
{ 
  char		Str[300];
  double	Temperature=200;
  gss_typ	*gssX;

  if(cma0 == 0){ sprintf(Str,"%s.cma",name); cma0=ReadCMSA2(Str,A); }
  SetPenaltyCMSA(gapopen,gapextend,cma0);
  sprintf(name,"%s%d",Argv[1],run);  
  FILE *fptr=open_file(name,"","w");
  PutSeqSetEs(fptr,TrueDataCMSA(cma0));
  fclose(fptr);
  assert(cmsa==0); cmsa = cma0; map = RelMapCMSA(cmsa);
  gssX=gssCMSA(cmsa); make_binomials(gssX->FakeSqSet());
  Record( ); cmsa=0; // sets bestmsa to cmsa;
  if(use_gseq) sprintf(options,"-t1 -g -l%d ",limit);
  else sprintf(options,"-t1 -l%d ",limit);
  assert((maH=MkMSAHeap(mhpsz)) != 0);
  if(InsertMSAHeap(cma0,bestmap,maH)==0) print_error("error in prb_typ::optbreed");
  fprintf(stderr,"starting Temperature = %.2f K (%.2f degrees F)\n",
                Temperature,((9.*Temperature)/5.)-459);
  for(Int4 n=1; n <= maxcycle; n++){
	cmsa = CopyCMSA(bestmsa);
	map=SimAnnealGibbs(options,&cmsa,'S',temperature);
	if(map > bestmap){ Record( ); }
	if(InsertMSAHeap(cmsa,map,maH)==0){ 
   		gssX=gssCMSA(cmsa);     // gssX not owned by cma.
		NilCMSA(cmsa); cmsa=0;
   		gssX->~gss_typ(); 
	} else cmsa=0;
	if(ConvergedMSAHeap(maH) < breedVar) break;
  }
  map=Breed( ); 
  cmsa=DelMinMSAHeap(&map, maH); NilMSAHeap(maH);
  if(map > bestmap){ Record( ); }
  // cmsa = RecombineCMSA(cma0,cmsa);
  free_input_data(); // fixes memory leak: AFN 5/23/01
  return cmsa;	// need to free this up in calling environment.
}

#if 0
cma_typ	prb_typ::optbreed(cma_typ msa0)
{ 
	double  optmap,hotmap,Map,oldmap,newmap,Temperature=300,tempmap,map2;
	Int4	r,n,i,*Num;
	cma_typ msa,oldmsa,tempmsa;
	char	Str[300],Str2[300],Str0[300];
	Int4	Aveblk,Avecol,t,numblks,num_aln=mhpsz;

  if(msa0 == NULL){ sprintf(Str,"%s.cma",name); msa0=ReadCMSA2(Str,A); }
  SetPenaltyCMSA(gapopen,gapextend,msa0);
  sprintf(name,"%s%d",Argv[1],run);  
  if(cmsa != msa0) cmsa = CopyCMSA(msa0);
  map = RelMapCMSA(cmsa);
  oldmsa = cmsa;
  optmap = RelMapCMSA(oldmsa);
  Aveblk = nBlksCMSA(oldmsa);
  Avecol = (NumColumnsCMSA(oldmsa)/Aveblk) + 1;

  st_type S=SitesCMSA(oldmsa);
  NEW(Num,nBlksCMSA(oldmsa)+2,Int4);
  for(i=1; i<= nBlksCMSA(oldmsa); i++) Num[i] = SiteLen(i,S);
  if(use_gseq) strcpy(options,"-t1 -g -T1 ");
  else strcpy(options,"-t1 -T1 ");
  free(Num);

  maH=MkMSAHeap(mhpsz); assert(maH != NULL);
  if(InsertMSAHeap(cmsa,map,maH)==NULL)
		print_error("input error in prb_typ::optbreed");
  sprintf(Str0,"%s.best",Argv[1]);
  fprintf(stderr,"starting Temperature = %.2f K (%.2f degrees F)\n",
                Temperature,((9.*Temperature)/5.)-459);
  for(n=1; n <= num_aln; n++){
	msa = CopyCMSA(msa0);
	map = hotmap = 0.0;
	SimAnnealGibbs(options,&msa,'D',150);
	tempmap = map; tempmsa= cmsa; map = hotmap; cmsa= msa;
	switch(align_mode){
	  case 0:	// fast & dirty mode do simulated annealing only.
#if 0
std::cerr << "SAMPLE HOTGIBBS\n\n";
	     map=hotmap=HotGibbs(options,&msa,Temperature);
#endif
std::cerr << "SAMPLE SIM_ANEAL_GIBBS\n\n";
	     SimAnnealGibbs(options,&msa,'D',150);
	     map=hotmap=RelMapCMSA(msa);
	   break;
	  case 1:	// fast mode: only split & fuse once.
		Split(8); Fuse(50); break;
		// Fuse(50); break;
	  case 2: Split(8); Fuse(50); Split(8); break;
	  case 3: Split(8); while(Fuse(50)){ if(!Split(8)) break; } break;
	  case 4:		// 
		SFR(8, 50);	// slide pieces to the right.
		FSL(60);	// Nudge going back to the left.
		do {
		    if(SFL(8, 50)) FSR(60); else break; 
		    // else if(!FSR(60)) break; 
		    if(SFR(8, 50)) FSL(60); else break;
		    // else if(!FSL(60)) break;
		} while(TRUE);
	   break;
	  case 5:
		Split(8); while(Fuse(50)){ if(!Split(8)) break; }
		do {
		    if(SFL(8, 50)) FSR(60); else if(!FSR(60)) break; 
		    if(SFR(8, 50)) FSL(60); else if(!FSL(60)) break; 
		} while(TRUE);
	   break;
	  case 6: 	// = abhfbestB.msa
	    for(t=2,numblks=nBlksCMSA(cmsa); t <= numblks; t++){
		map = 0.0;  // forces SSF to accept altered alignment
	  	if(SSF(t,8)) { map = 0.0; SFF(t,8); }
		Split(8); while(Fuse(50)) if(!Split(8)) break;
		numblks=nBlksCMSA(cmsa); 
	    } break;
	  case 7: case 8: case 9:
	  default: print_error("prb_typ::align() input error"); break;
        } 
	hotmap = map; msa = cmsa; map = tempmap; cmsa = tempmsa;
        Map = RelMapCMSA(msa);
	if(Map > optmap){ 
		optmap=Map; PutAlnCMSA(Str0,msa,NULL); 
	}
#if 1
	fprintf(stderr,"%d: hot map = %f; Map = %f\n",n,hotmap,Map);
#endif
	if(InsertMSAHeap(msa,Map,maH)==NULL){ NilCMSA(msa); }
	ConvergedMSAHeap(maH);
  }
  sprintf(Str,"%s.best",NameCMSA(msa0));
  Map=Breed( );

  msa = DelMinMSAHeap(&map2, maH); NilMSAHeap(maH);
  cmsa = RecombineCMSA(msa0, msa);
  if(cmsa != NULL) { Map = RelMapCMSA(cmsa); NilCMSA(msa); msa=NULL; }
  else { cmsa = msa; msa=NULL; }
#if 0	// PROBABLY NEED TO DEALLOCATE gss_typ!!!
   s_typ *gssX=gssCMSA(cmsa_in);     // not owned by cmsa_in.
   gssX->~gss_typ();
#endif
  return cmsa;
}
#endif

BooLean	prb_typ::create_align()
// create an alignment and store it in cmsa.
{
	cmsa = NULL; 
	gibbs(NULL);
	if(cmsa == NULL) return FALSE;
#if 1
	else return TRUE;
#else
	BooLean	S,F,D;
	Int4	t;
	double	avelen=0.0;
	for(t=1; t<=nBlksCMSA(cmsa); t++) avelen+=LengthCMSA(t,cmsa);
	avelen/=(double)nBlksCMSA(cmsa);
	switch(align_mode){
	  case 0: break; // fast & dirty mode don't do anything
	  case 1: Delete(); Add(7); Split(8); Fuse(50); break;
	  case 2: Split(8); Delete(); break;
	  case 3: Delete(); Split(8); break;
	  case 4: Split(8); Fuse(50); Split(8); break;
	  case 5: Split(8); while(Fuse(50) || Split(8)); break;
	  case 6: Split(8); while(Fuse(50) || Delete() || Split(8)); 
	   break;
	  case 7: 
	     if(avelen <= 9){
		do{ D=Delete(); F=Fuse(50); S=Split(8); } while(S || F || D);
	     } else if(avelen >= 16){
		do{ S=Split(8); D=Delete(); F=Fuse(50); } while(S || F || D);
	     } else do{ F=Fuse(50); S=Split(8); D=Delete(); } while(S || F || D);
	   break;
	  case 8: case 9:
	  default: print_error("prb_typ::align() input error"); break;
        }
	// gibbs(NULL); // tweak cmsa
	return TRUE;
#endif
}

