/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "cmsa.h"
#include "residues.h"
#include "histogram.h"
#include "gibbs.h"
#include "msaheap.h"

#if 1
double	JunLiuHMM_PenaltyCMA0(FILE *fp, cma_typ cma)
// Test Jun Liu's statistical model for GISMO...
{
	gss_typ *gss=gssCMSA(cma);
	assert(gss->Gapped());
	double penalty=0.0;
	UInt4 Nmm,Nmd,Nmi,Nm;
	UInt4 Nii,Ndd,Nim,Ndm;
	UInt4 Nid,Ndi,Nsd,Nsm;
	Int4 ae,be,ao,bo,n0,n1,n2;

	Nmm=Nmi=Nmd=Nii=Nim=Ndd=Ndm=Nid=Ndi=Nsd=Nsm=0;
	Int4 *pos;
	Int4	*lens = LengthsCMSA(cma);
	NEW(pos,nBlksCMSA(cma)+3,Int4);
	for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
	   	assert(PosSites(sq,pos,SitesCMSA(cma)) == nBlksCMSA(cma));
		gsq_typ *gsq=gss->GetGSQ(sq);
		gsq->FindIndels(nBlksCMSA(cma),pos,lens,Nmm,Nmi,Nmd,Nii,Nim,Ndd,Ndm,Ndi,Nid,Nsd,Nsm);
	} free(pos);

	n0=10000;
	ao=10;	// one insertion in 1000 residues.
	bo=10;	// one deletion every 1000 residues 
	n1=n2=100;
	ae=20;	// L1 = 20/100 --> 5 = average insert length
	be=50;	// L2 = 50/100 --> 2 = average deletion length

	Nm=Nmm+Nmi+Nmd;

	penalty=0.0;
	// numerator...
	penalty += lngamma((double)(Nmi+ao));
	penalty += lngamma((double)(Nmd+bo));
	penalty += lngamma((double)(Nmm+n0-ao-bo));
	penalty += lngamma((double)(n0));
	// denominator...
	penalty -= lngamma((double)(Nm+n0));
	penalty -= lngamma((double)(ao));
	penalty -= lngamma((double)(bo));
	penalty -= lngamma((double)(n0-ao-bo));

	// numerator...
	penalty += lngamma((double)(Nii+ae));
	penalty += lngamma((double)(Nim+n1-ae));
	penalty += lngamma((double)(n1));
	// denominator...
	penalty -= lngamma((double)(Nim+Nii+n1));
	penalty -= lngamma((double)(ae));
	penalty -= lngamma((double)(n1-ae));

	// numerator...
	penalty += lngamma((double)(Ndd+be));
	penalty += lngamma((double)(Ndm+n2-be));
	penalty += lngamma((double)(n2));
	// denominator...
	penalty -= lngamma((double)(Ndd+Ndm+n2));
	penalty -= lngamma((double)(be));
	penalty -= lngamma((double)(n2-be));
	double Bo,Be,Ao,Ae;
	double aveBo,aveBe,aveAo,aveAe;
	if(fp){
	 static Int4 Seed=0;
	 if(Seed==0) {
           Seed=-Random();  // need to initialize with a negative number.
	   if(Seed > 0) Seed = -Seed;
           fprintf(stderr,"INITIALIZING SEED (%d)\n",Seed);
         }
	for(Int4 iter=1; iter < 10; iter++){
	 Bo=betadev(Nmd+Nid+1,Nmm+Nmi+Nii+Nim+1,&Seed);
	 Be=betadev(Ndd,Ndm,&Seed);
	 Ao=(1-Bo)*betadev(Nmi,Nmm,&Seed);
	 Ae=(1-Bo)*betadev(Nii,Nim,&Seed);
	 if(0) fprintf(stdout,"Ao = %g; Ae = %g; Bo = %g; Be = %g\n",Ao,Ae,Bo,Be);
	}
	 aveBo=(double)(Nmd+Nid)/(double)(Nmm+Nmi+Nmd+Nid+Nii+Nim);
	 aveBe=(double)(Ndd)/(double)(Ndm+Ndd);
	 aveAo=(1.0 - aveBo)*((double)(Nmi)/(double)(Nmm+Nmi));
	 aveAe=(1.0 - aveBo)*((double)(Nii)/(double)(Nim+Nii));
	 double pernats = PerNatsCMSA(cma);
	if(0){
	 fprintf(stdout,"aveAo = %g; aveAe = %g; aveBo = %g; aveBe = %g\n",
			pernats*log(aveAo), pernats*log(aveAe), pernats*log(aveBo),
			pernats*log(aveBe));

	 fprintf(stdout,"Nmm = %d; Nmi = %d; Nmd = %d; Nm=%d; Nii = %d; Nim = %d\n",
			Nmm,Nmi,Nmd,Nm,Nii,Nim);
	 fprintf(stdout,"Ndd = %d; Ndm = %d; Ndi = %d; Nid = %d\n",Ndd,Ndm,Ndi,Nid);
	 fprintf(stderr,"penalty=%g; Nmm = %d\n",penalty,Nmm);
	 fprintf(stderr,"current indel_penalty = %g\n",-IndelPenaltySeqSet(DataCMSA(cma)));
	}
	}
	return penalty;
}
#endif

#if 0	// cma_recomb output
#define	USAGE_START	"USAGE: cma_recomb cmafile1 cmafile2 [options]\n\
 or: cma_recomb prefix_cmafile number_models [options]\n\
   options:\n\
     -P<minprob>- minimum prob for (default H)\n\
     -g<int>,<int> - gap penalty\n\
     -h<int>    - heapsize for multimode (default: 6)\n\
     -t<float>  - trim cma files at info cutoff of <float> prior to recombining\n\
     -M         - Output MAP for each file and exit\n\
     -m<method> - alignment method (default H)\n\
     -u<mode>   - alignment mode (default G)\n\
     -I<x>:<y>  - left & right flank lengths for domain sampling\n\
     -s<int>    - random generator seed\n\
     -x         - dummy\n\
  Note: second format has multiple files prefix_cmafile.1 prefix_cmafile.2 etc.\n\n"
#else 	// ******************* GARMA output *********************
#define	USAGE_START	"USAGE: garma prefix_cmafile number [options]\n\
   options:\n\
     -g<int>,<int> - prior gap opening (alpha_o == beta_o) and \n\
                  extension (alpha_e == beta_e) penalties in fifth nats \n\
                  (default: specified by GISMO input files, typically 25,4)\n\
     -w<real>   - relative weight to place on pseudo versus observed counts\n\
                  (default: 1.0, which denotes equal weight)\n\
     -h<int>    - breed only the best <int> input alignments (default: 6)\n\
     -s<int>    - random generator seed\n\
   Note: Input file format is: prefix_cmafile.1 ... prefix_cmafile.<size>\n\n"
#endif

/**************************** Global Variables ******************************/
int	main(Int4 argc,char *argv[])
{ 
	Int4	arg,i,j,s,time1,block1,block2,a=5,b=2;
	ss_type	data;
	Int4	left_flank=9,right_flank=9;
	char method='H';
        char mode='g'; 
	a_type	A;
	cma_typ	cma0,cma,cma1,cma2;
	double	map,map1,map2,info_cut=0.0;
	Int4	MultiMode=0;
	FILE	*fp;
        double minprob=0.0; 
	Int4 maxrpts=1;
	Int4 maxLength;
	Int4	mhpsz=6;
	BooLean weight=TRUE,filein=FALSE,read_map_only=FALSE;
	float minmap=0.0;
	double pseudo=0.5;
        static double blosum62freq[21] = {  0.0, /*  X
            C     G     A     S     T     N     D     E     Q     K */
        0.025,0.074,0.074,0.057,0.051,0.045,0.054,0.054,0.034,0.058,
        /*  R     H     W     Y     F     V     I     L     M     P */
        0.052,0.026,0.013,0.032,0.047,0.073,0.068,0.099,0.025,0.039 };
	e_type	E,E1,E2,gE;
	UInt4 seed=18364592;

	time1=time(NULL); 
	if(argc < 3) print_error(USAGE_START);
	if(isdigit(argv[2][0])) {
		MultiMode=atoi(argv[2]);
		if(MultiMode<=0) print_error(USAGE_START);
	}
	for(arg = 3; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(USAGE_START);
	   switch(argv[arg][1]) {
	     case 'P': minprob=RealOption(argv[arg],'P',-5000,5000,USAGE_START);
		break;
             case 'g': if(sscanf(argv[arg],"-g%d,%d",&a,&b) != 2)
                                        print_error(USAGE_START); 
                     if(a < 0 || b < 0) print_error(USAGE_START);
                     break;
             case 'I': 
                  if(sscanf(argv[arg],"-I%d:%d",&left_flank,&right_flank) != 2)
                        print_error(USAGE_START);
                  break;
	     case 'm': if(!isalpha(method=argv[arg][2])) print_error(USAGE_START);
		  break;
	     case 'u': if(!isalpha(mode=argv[arg][2])) print_error(USAGE_START);
		  break;
	     case 'M': read_map_only=TRUE; break;
	     case 'f': filein=TRUE; break;
	     case 'h': mhpsz=IntOption(argv[arg],'h',2,100,USAGE_START); break;
             case 's': if(sscanf(argv[arg],"-s%d",&seed)!=1)
                        print_error(USAGE_START); break;
             case 't':
                if(argv[arg][2]==0){ print_error(USAGE_START); }
		else info_cut=RealOption(argv[arg],'t',0.0,2.0,USAGE_START);
                break;
	     case 'x': break;
	     default: print_error(USAGE_START);
	   }
	}
	FILE *fptr = open_file(argv[1],".cmd","w");
	for(i = 0; i < argc; i++) { if(argv[i][1] != ' ') fprintf(fptr,"%s ",argv[i]); }
	if(seed == 18364592) {  // not provided by user
        	seed=(UInt4) time(NULL)/2;
        	// seed = (UInt4) time(NULL);
        	fprintf(fptr,"-s%d\n",seed);
   	} else fprintf(fptr,"\n");
	fclose(fptr);
        sRandom(seed);
	A = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	cma=NULL;
  if(MultiMode==0) {
	char    str[300],str_g[300];
	sprintf(str,"%s.recomb",argv[1]);
	cma1=ReadCMSA2(argv[1],A); 
        if(cma1==0) print_error(USAGE_START); 
	cma2=ReadCMSA2(argv[2],A); 
        if(cma2==0) print_error(USAGE_START); 
#if 1	// Trim cma files prior to recombining...
	if(info_cut > 0.0){
            Int4 *RmLeft,*RmRight,*TrimLimit,nBlks;
	    nBlks=MAXIMUM(Int4,nBlksCMSA(cma1),nBlksCMSA(cma2));
            NEW(RmLeft,nBlks+3,Int4);
            NEW(RmRight,nBlks+3,Int4);
            NEW(TrimLimit,nBlks+3,Int4);
            cma = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,cma1);
            NilCMSA(cma1); cma1=cma; 
            cma = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,cma2);
            NilCMSA(cma2); cma2=cma; cma=NULL;
	    free(RmLeft); free(RmRight); free(TrimLimit);
	}
#endif
	map1=RelMapCMSA(cma1);
	map2=RelMapCMSA(cma2);
	fprintf(stderr,"map1=%g; map2=%g\n",map1,map2);
if(read_map_only)exit(1);
	cma=GRecombineCMSA(cma1,cma2);
	if(cma!=NULL) {
if(0)exit(1);
	  map = SimAnnealGibbs("-t1 -g -l1 ",&cma,'S',80);
	  fprintf(stderr,"map1=%g; map2=%g; map3=%g\n",map1,map2,map);
	  if(map > map1) PutAlnCMSA(str,cma,NULL);
	  cma0=GRecombineCMSA(cma,cma2);
	  if(cma0!=NULL) {
	     map2 = SimAnnealGibbs("-t1 -g -l1 ",&cma,'S',100);
	     if(map < map2) PutAlnCMSA(str,cma,NULL);
	     NilCMSA(cma0); // NOTE: TrueDataCMSA() not owned by cma.
	  }
	  NilCMSA(cma); // NOTE: TrueDataCMSA() not owned by cma.
	}
	data=TrueDataCMSA(cma1);	// TrueDataCMSA() not owned by cma1.
	NilCMSA(cma1); NilCMSA(cma2);
	NilSeqSet(data); NilAlpha(A);
  } else if(MultiMode > 0) {	// USE FOR GARMA...
	cma_typ *CMA;
	char	str[205];
	assert(strlen(argv[1]) < 100);
	mah_typ	maH=MkMSAHeap(mhpsz);
#if 0
	if(InsertMSAHeap(cmsa,map,maH)==NULL){ NilCMSA(cmsa); cmsa=0; }
	ConvergedMSAHeap(maH);
        if(BestItemMSAheap(maH) != item){ // i.e., new optimum alignment.
            item = BestItemMSAheap(maH);
            // SeeMSAHeap(item,maH);
        }
	NumAln=nMSAHeap(maH);
	for(aln=1; nMSAHeap(maH) > 0; aln++){
        	assert((cmsa=DelMinMSAHeap(&map, maH)) != NULL);
	}
	for(aln=1; aln <= NumAln; aln++){
        	assert(InsertMSAHeap(list[aln],lpr[aln],maH)!=NULL);
     	}
	cmsa = DelMinMSAHeap(&map2, maH); 
#endif
        Int4 *RmLeft,*RmRight,*TrimLimit,nBlks=0;
#if 1	// New heap version...
	NEW(CMA,mhpsz+2,cma_typ);
	for(j=0,i=1; i <= MultiMode; i++){
		sprintf(str,"%s%d.cma",argv[1],i);
	 	cma=ReadCMSA2(str,A); 
		if(cma != 0){
	  	 map=RelMapCMSA(cma);
	  	 fprintf(stderr,"%d: map=%g\n",i,map);
		 if(InsertMSAHeap(cma,map,maH)==0){ NilCMSA(cma); cma=0; }
		}
	}
	MultiMode=nMSAHeap(maH);
	for(j=0,i=1; i <= MultiMode; i++){
        	assert((cma=DelMinMSAHeap(&map, maH)) != NULL);
	  	fprintf(stderr,"%d: map=%g; blks = %d\n",i,map,nBlksCMSA(cma));
	    	nBlks=MAXIMUM(Int4,nBlks,nBlksCMSA(cma));
		CMA[j]=cma; j++; 
	} if(j==0) print_error(USAGE_START); 
#else
	NEW(CMA,MultiMode+2,cma_typ);
	for(j=0,i=1; i <= MultiMode; i++){
		sprintf(str,"%s%d.cma",argv[1],i);
	 	cma=ReadCMSA2(str,A); 
        	if(cma!=0){
	    		nBlks=MAXIMUM(Int4,nBlks,nBlksCMSA(cma));
			CMA[j]=cma; j++; 
		}
	} if(j==0) print_error(USAGE_START); 
	MultiMode=j;
#endif
	if(info_cut > 0.0){
            NEW(RmLeft,nBlks+3,Int4); NEW(RmRight,nBlks+3,Int4);
            NEW(TrimLimit,nBlks+3,Int4);
	    for(i=0; i < MultiMode; i++){ 
              cma = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,CMA[i]);
              NilCMSA(CMA[i]); CMA[i]=cma; 
	    } free(RmLeft); free(RmRight); free(TrimLimit); 
	}
	for(i=0; i < (MultiMode-1); i++){ 
	  cma1=CMA[i];
	  map1=RelMapCMSA(cma1);
	  for(j=i+1; j < MultiMode; j++){ 
	    cma2=CMA[j];
	    map2=RelMapCMSA(cma2);
	    fprintf(stderr,"map1=%g; map2=%g\n",map1,map2);
	    cma=GRecombineCMSA(cma1,cma2);
	    if(cma!=NULL) {
	        double mapGARMA=JunLiuHMM_PenaltyCMA0(stderr,cma);
	        // SetPenaltyCMSA(o,x,cma);
		sprintf(str,"%s%d_%d",argv[1],i,j);
	  	map = SimAnnealGibbs("-t1 -g -l1 ",&cma,'S',80);
	  	fprintf(stderr,"map1=%g; map2=%g; map3=%g\n",map1,map2,map);
	  	if(map > map1) PutAlnCMSA(str,cma,NULL);
	  	cma0=GRecombineCMSA(cma,cma2);
	  	if(cma0!=NULL) {
	          mapGARMA=JunLiuHMM_PenaltyCMA(stderr,cma0);
	          // SetPenaltyCMSA(o,x,cma0);
		  // sprintf(str,"%s%d_%dx",argv[1],i,j);
	  	  map2 = SimAnnealGibbs("-t1 -g -l1 ",&cma,'S',100);
	  	  if(map < map2) PutAlnCMSA(str,cma,NULL);
	     	  NilCMSA(cma0); // NOTE: TrueDataCMSA() not owned by cma.
	  	} NilCMSA(cma); // NOTE: TrueDataCMSA() not owned by cma.
	    }
	  }
	}
	// data=TrueDataCMSA(cma1);	// TrueDataCMSA() not owned by cma1.
	// NilCMSA(cma1); NilSeqSet(data); 
	NilMSAHeap(maH); maH=NULL;
	NilAlpha(A);
  } else {
	char	str[205];
	cma1=ReadCMSA2(argv[1],A); 
	SaveBestCMSA(cma1); 
	map1=RelMapCMSA(cma1);
	cma2=CopyCMSA(cma1);
	// map2 = SimAnnealGibbs("-t1 -g -l1 ",&cma2,'S',300);
	// map2 = RunGibbs("-t1 -g -l1 ",&cma2);
	map2 = HotGibbs("-t1 -g -l1 ",&cma2,200);
	if(map2 > map1) PutAlnCMSA(str,cma2,NULL);

	cma=GRecombineCMSA(cma1,cma2);
	if(cma!=NULL) {
	  map = SimAnnealGibbs("-t1 -g -l1 ",&cma,'S',100);
	  if(map > map2 && map > map1) PutAlnCMSA(str,cma,NULL);
	  // PutCMSA(stderr,cma2);
	  fprintf(stderr,"map1=%g; map2=%g; map3=%g\n",map1,map2,map);
	    // 	WriteCMSA("junk.cma",cma);
	    // PutMSA(stderr,cma); 
		NilCMSA(cma); // NOTE: TrueDataCMSA() not owned by cma.
	}
	data=TrueDataCMSA(cma1);	// TrueDataCMSA() not owned by cma1.
	NilCMSA(cma1); NilCMSA(cma2);
	NilSeqSet(data); NilAlpha(A);
  }
	fprintf(stderr,"\ttime: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	return 0;
}



