/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "cmsa.h"
#include "residues.h"
#include "histogram.h"
#include "gibbs.h"
#include "msaheap.h"
#include "gmb_typ.h"

#if 0	// cma_recomb output
#define	USAGE_START	"USAGE: cma_recomb cmafile1 cmafile2 [options]\n\
 or: cma_recomb prefix_cmafile number_models [options]\n\
   options:\n\
     -P<minprob>- minimum prob for (default H)\n\
     -g<int>,<int> - gap penalty\n\
     -h<int>    - heapsize for multimode (default: 6)\n\
     -t<float>  - trim cma files at info cutoff of <float> prior to recombining\n\
     -M         - Output MAP for each file and exit\n\
     -m<method> - alignment method (default H)\n\
     -u<mode>   - alignment mode (default G)\n\
     -I<x>:<y>  - left & right flank lengths for domain sampling\n\
     -s<int>    - random generator seed\n\
     -x         - dummy\n\
  Note: second format has multiple files prefix_cmafile.1 prefix_cmafile.2 etc.\n\n"
#else 	// ******************* GARMA output *********************
#define	USAGE_START	"USAGE: garma prefix_cmafile number [options]\n\
   options:\n\
     -g<int>,<int> - prior gap opening (alpha_o == beta_o) and \n\
                  extension (alpha_e == beta_e) penalties in fifth nats \n\
                  (default: specified by GISMO input files, typically 25,4)\n\
     -w<real>   - relative weight to place on pseudo versus observed counts\n\
                  (default: 1.0, which denotes equal weight)\n\
     -h<int>    - breed only the best <int> input alignments (default: 6)\n\
     -s<int>    - random generator seed\n\
   Note: if number == 0 then optimizes input cma file only.\n\
   Note: Input file format is: prefix_cmafile.1 ... prefix_cmafile.<size>\n\n"
#endif

/**************************** Global Variables ******************************/
int	main(Int4 argc,char *argv[])
{ 
	Int4	arg,i,j,s,time1,block1,block2,a=5,b=2;
	ss_type	data;
	Int4	left_flank=9,right_flank=9;
	char method='H';
        char mode='g'; 
	a_type	A;
	cma_typ	cma0,cma,cma1,cma2;
	double	map,map1,map2,info_cut=0.0;
	Int4	MultiMode=0;
	FILE	*fp;
        double minprob=0.0; 
	Int4 maxrpts=1;
	Int4 maxLength;
	Int4	mhpsz=6;
	BooLean weight=TRUE,filein=FALSE,read_map_only=FALSE;
	float minmap=0.0;
	double pseudo=0.5;
        static double blosum62freq[21] = {  0.0, /*  X
            C     G     A     S     T     N     D     E     Q     K */
        0.025,0.074,0.074,0.057,0.051,0.045,0.054,0.054,0.034,0.058,
        /*  R     H     W     Y     F     V     I     L     M     P */
        0.052,0.026,0.013,0.032,0.047,0.073,0.068,0.099,0.025,0.039 };
	e_type	E,E1,E2,gE;
	UInt4 seed=18364592;

	time1=time(NULL); 
	if(argc < 3) print_error(USAGE_START);
	if(isdigit(argv[2][0])) {
		MultiMode=atoi(argv[2]);
		// if(MultiMode<=0) print_error(USAGE_START);
	} else print_error(USAGE_START);
	for(arg = 3; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(USAGE_START);
	   switch(argv[arg][1]) {
	     case 'P': minprob=RealOption(argv[arg],'P',-5000,5000,USAGE_START);
		break;
             case 'g': if(sscanf(argv[arg],"-g%d,%d",&a,&b) != 2)
                                        print_error(USAGE_START); 
                     if(a < 0 || b < 0) print_error(USAGE_START);
                     break;
             case 'I': 
                  if(sscanf(argv[arg],"-I%d:%d",&left_flank,&right_flank) != 2)
                        print_error(USAGE_START);
                  break;
	     case 'm': if(!isalpha(method=argv[arg][2])) print_error(USAGE_START);
		  break;
	     case 'u': if(!isalpha(mode=argv[arg][2])) print_error(USAGE_START);
		  break;
	     case 'M': read_map_only=TRUE; break;
	     case 'f': filein=TRUE; break;
	     case 'h': mhpsz=IntOption(argv[arg],'h',2,100,USAGE_START); break;
             case 's': if(sscanf(argv[arg],"-s%d",&seed)!=1)
                        print_error(USAGE_START); break;
             case 't':
                if(argv[arg][2]==0){ print_error(USAGE_START); }
		else info_cut=RealOption(argv[arg],'t',0.0,2.0,USAGE_START);
                break;
	     case 'x': break;
	     default: print_error(USAGE_START);
	   }
	}
	FILE *fptr = open_file(argv[1],".cmd","w");
	for(i = 0; i < argc; i++) { if(argv[i][1] != ' ') fprintf(fptr,"%s ",argv[i]); }
	if(seed == 18364592) {  // not provided by user
        	seed=(UInt4) time(NULL)/2;
        	// seed = (UInt4) time(NULL);
        	fprintf(fptr,"-s%d\n",seed);
   	} else fprintf(fptr,"\n");
	fclose(fptr);
        sRandom(seed);
	A = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	Int4    m2m,m2i,m2d,i2i,i2m,d2d,d2m,s2m;
	double penalty=0.0;
	cma=NULL;
	cma_typ *CMA;
	char	str[205];
	assert(strlen(argv[1]) < 100);
#if 1	// optimize only 
	if(MultiMode==0){
	   sprintf(str,"%s.cma",argv[1]);
	   cma=ReadCMSA2(str,A); 
	   if(cma != 0){
	  	map=RelMapCMSA(cma);
	  	fprintf(stderr,"%d: map=%g\n",i,map);
	        // SetPenaltyCMSA(o,x,cma);
		sprintf(str,"%s_opt",argv[1]);
char *options=AllocString("-t1 -g -l1 ");
	  	map1 = SimAnnealGibbs(options,&cma,'S',100);
	  	map2 = SimAnnealGibbs(options,&cma,'S',80);
free(options);
	  	fprintf(stderr,"map1=%g; map2=%g; map3=%g\n",map1,map2,map);
	  	PutAlnCMSA(str,cma,NULL);
	   }
	   NilAlpha(A);
	   fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	   return 0;
	}
#endif

	mah_typ	maH=MkMSAHeap(mhpsz);
#if 0
	if(InsertMSAHeap(cmsa,map,maH)==NULL){ NilCMSA(cmsa); cmsa=0; }
	ConvergedMSAHeap(maH);
        if(BestItemMSAheap(maH) != item){ // i.e., new optimum alignment.
            item = BestItemMSAheap(maH);
            // SeeMSAHeap(item,maH);
        }
	NumAln=nMSAHeap(maH);
	for(aln=1; nMSAHeap(maH) > 0; aln++){
        	assert((cmsa=DelMinMSAHeap(&map, maH)) != NULL);
	}
	for(aln=1; aln <= NumAln; aln++){
        	assert(InsertMSAHeap(list[aln],lpr[aln],maH)!=NULL);
     	}
	cmsa = DelMinMSAHeap(&map2, maH); 
#endif
        Int4 *RmLeft,*RmRight,*TrimLimit,nBlks=0;
#if 1	// New heap version...
	NEW(CMA,mhpsz+2,cma_typ);
	for(j=0,i=1; i <= MultiMode; i++){
		sprintf(str,"%s%d.cma",argv[1],i);
	 	cma=ReadCMSA2(str,A); 
		if(cma != 0){
	  	 map=RelMapCMSA(cma);
	  	 fprintf(stderr,"%d: map=%g\n",i,map);
		 if(InsertMSAHeap(cma,map,maH)==0){ NilCMSA(cma); cma=0; }
		}
	}
	MultiMode=nMSAHeap(maH);
	for(j=0,i=1; i <= MultiMode; i++){
        	assert((cma=DelMinMSAHeap(&map, maH)) != NULL);
	  	fprintf(stderr,"%d: map=%g; blks = %d\n",i,map,nBlksCMSA(cma));
	    	nBlks=MAXIMUM(Int4,nBlks,nBlksCMSA(cma));
		CMA[j]=cma; j++; 
	} if(j==0) print_error(USAGE_START); 
#else
	NEW(CMA,MultiMode+2,cma_typ);
	for(j=0,i=1; i <= MultiMode; i++){
		sprintf(str,"%s%d.cma",argv[1],i);
	 	cma=ReadCMSA2(str,A); 
        	if(cma!=0){
	    		nBlks=MAXIMUM(Int4,nBlks,nBlksCMSA(cma));
			CMA[j]=cma; j++; 
		}
	} if(j==0) print_error(USAGE_START); 
	MultiMode=j;
#endif
	if(info_cut > 0.0){
            NEW(RmLeft,nBlks+3,Int4); NEW(RmRight,nBlks+3,Int4);
            NEW(TrimLimit,nBlks+3,Int4);
	    for(i=0; i < MultiMode; i++){ 
              cma = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,CMA[i]);
              NilCMSA(CMA[i]); CMA[i]=cma; 
	    } free(RmLeft); free(RmRight); free(TrimLimit); 
	}
	for(i=0; i < (MultiMode-1); i++){ 
	  cma1=CMA[i];
	  map1=RelMapCMSA(cma1);
	  for(j=i+1; j < MultiMode; j++){ 
	    cma2=CMA[j];
	    map2=RelMapCMSA(cma2);
	    fprintf(stderr,"map1=%g; map2=%g\n",map1,map2);
	    cma=GRecombineCMSA(cma1,cma2);
	    if(cma!=NULL) {	// !!!!!!!!!!!!!!!! FOUND A RECOMBINANT!!!!!!!!!!!!!!
		// penalty=JunLiuHMM_PenaltyCMA(stderr,'S',m2m,m2i,m2d,i2i,i2m,d2d,d2m,s2m,cma);
		// penalty=JunLiuHMM_PenaltyCMA(stderr,'S',cma);
		penalty=JunLiuHMM_PenaltyCMA(stderr,cma);
		double map=UnGappedRelMapCMSA(cma)-penalty;
#if 0
		double bestmap=map;
        	if(mode == 'S') do {
                   double  map=GAMBIT_CMSA(&cma,mode,Temperature);
                   if(bestmap < map){
                        sprintf(str,"%s.new",argv[1]);
                        WriteMtfCMSA(str,cma,NULL);
                        sprintf(str,"%s.new.cma",argv[1]);
                        WriteCMSA(str,cma);
                        bestmap=map;
                   } else break;
        	} while(TRUE);
#else
	        // double mapGARMA=JunLiuHMM_PenaltyCMA(stderr,cma);
	        double mapGARMA=0.0;	// afn new (this code is missing info)
	        // SetPenaltyCMSA(o,x,cma);
		sprintf(str,"%s%d_%d",argv[1],i,j);
char *options=AllocString("-t1 -g -l1 ");
	  	map = SimAnnealGibbs(options,&cma,'S',80);
free(options);
	  	fprintf(stderr,"map1=%g; map2=%g; map3=%g\n",map1,map2,map);
	  	if(map > map1) PutAlnCMSA(str,cma,NULL);
	  	cma0=GRecombineCMSA(cma,cma2);
	  	if(cma0!=NULL) {
	          mapGARMA=JunLiuHMM_PenaltyCMA(stderr,cma0);
	          // SetPenaltyCMSA(o,x,cma0);
		  // sprintf(str,"%s%d_%dx",argv[1],i,j);
char *options=AllocString("-t1 -g -l1 ");
	  	  map2 = SimAnnealGibbs(options,&cma,'S',100);
free(options);
	  	  if(map < map2) PutAlnCMSA(str,cma,NULL);
	     	  NilCMSA(cma0); // NOTE: TrueDataCMSA() not owned by cma.
	  	} NilCMSA(cma); // NOTE: TrueDataCMSA() not owned by cma.
#endif
	    }
	  }
	}
	// data=TrueDataCMSA(cma1);	// TrueDataCMSA() not owned by cma1.
	// NilCMSA(cma1); NilSeqSet(data); 
	NilMSAHeap(maH); maH=NULL;
	NilAlpha(A);
	fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	return 0;
}



