/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "gsm_typ.h"
#include "gismo_usage.h"
#include "gmb_typ.h"
#include "cma_gmb.h"
// #include "chn_typ.h"
// #include "gismo_usage.h"
#if 0
The floating-point environment access and modification is only meaningful when 
#pragma STDC FENV_ACCESS is supported and is set to ON. Otherwise the 
implementation is free to assume that floating-point control modes are 
always the default ones and that floating-point status flags are never 
tested or modified.
#endif
#if 0	// Necessary to avoid an Intel Inspector runtime error? (crash?)
// It appears to turn off floating point error exceptions:
#include <fenv.h>
#pragma STDC FENV_ACCESS OFF
#endif

// UnLabelSeq(e_type E); LabelSeq(e_type E); LabeledSeq(E);

static void PrintError(const char *version, const char *usage)
{ fprintf(stderr,"%s",version); print_error(usage); }

int	RunGISMO(int argc, char *argv[],const char *version, const char *usage)
{
	Int4	time1=time(NULL);
	clock_t tckS=clock();
	TurnOffLicenseStatement();
//====================== TEST OUT stdout effect on run times for omp ===================
// Int4 iters=100000000;
Int4 iters=1000000;
	//================ 0. Get global input parameters and file ===================
	UInt4   i,arg,x;
	UInt4   seed=18364592;
	Int4	NumThrds=0;
	Int4	max_in_seq=20000;
        Int4	mhpsz=2,Stages=2,NumCandidates=2;
	if(argc < 2) PrintError(version,usage);
	BooLean	use_openmp=FALSE,output_rtf=FALSE,use_seg=FALSE,SaveBlkMSA=FALSE;
	FILE	*ofp,*ifp,*fptr = open_file(argv[1],".cmd","w");
	for(i = 0; i < argc; i++) { fprintf(fptr,"%s ",argv[i]); } 
        for(arg = 2; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(usage);
           switch(argv[arg][1]) {
	      case 'L':
		  if(argv[arg][2]==0){ use_seg=TRUE; argv[arg][1]=' '; }
		  else print_error(usage);
                  break;
              case 'm':	// new for passing in input seqs.
                  if(sscanf(argv[arg],"-maxseq=%d",&max_in_seq) == 1){
                        if(max_in_seq < 20 || max_in_seq >= INT4_MAX)
				print_error(usage);
                  } else if(strcmp(argv[arg],"-msa") == 0){
			SaveBlkMSA=TRUE;
                  } else print_error(usage);
                  break;
	      case 'r':
		  {
		    if(strcmp("-rtf",argv[arg]) == 0){ output_rtf=TRUE; argv[arg][1]=' '; }
		    else { print_error(usage); }
		  } break;
              case 's':
                  if(sscanf(argv[arg],"-stages=%d",&x) == 1){
			Stages=x; // do nothing...
                  } else if(sscanf(argv[arg],"-seed=%u",&seed) == 1){
                        argv[arg][1]=' ';
                  } else print_error(usage); break;
              case 't':
#ifndef _OPENMP
	print_error("Program compiled without openmp: -thrds option invalid");	
#endif
		  if(sscanf(argv[arg],"-thrds=%d",&NumThrds) == 1){
			if(NumThrds < 2 || NumThrds > 100) print_error(usage);
			use_openmp=TRUE; argv[arg][1]=' '; 
                  } else print_error(usage); break;
#if 1
              case 'H':
                  if(sscanf(argv[arg],"-H=%d",&NumCandidates) == 1){
                        if(NumCandidates < 2 || NumCandidates > 50)
				print_error(usage);
                  } else print_error(usage); break;
              case 'h':
                  if(sscanf(argv[arg],"-h=%d",&mhpsz) == 1){
                        if(mhpsz < 2 || mhpsz > 50) print_error(usage);
                  } else print_error(usage); break;
#endif
	      default: break;	// ignore the rest...
	   }
	}
	if(seed == 18364592){ seed = (UInt4) time(NULL); fprintf(fptr,"-seed=%d",seed);}
	fprintf(fptr,"\n"); fclose(fptr);
	TurnOffLicenseStatement();
	a_type	AB=0;
	// PrintLicenseStatement(); 
	char	str[200];
	cma_typ	cma=0;
	double	bst_lpr,bstWtSq;
	InitializeSets( ); 
#if defined(_OPENMP)
   if(use_openmp){ 	// Run lots of runs in parallel...
	Int4 max_thrd=omp_get_max_threads();	// maximum that can be used
	ifp=open_file(argv[1],"","r");
	if(ifp==0) print_error("Input file not found"); fclose(ifp);
	Int4	j,Nt=0; 
	if(NumThrds > max_thrd) Nt=max_thrd; else Nt=NumThrds;
	cma_typ	*tcma;	NEW(tcma,Nt+3,cma_typ);
	double 	*lpr; NEW(lpr,Nt+3,double);
	double 	*nWtSq; NEW(nWtSq,Nt+3,double);
	// required to avoid data race issues...
	sRand_R(seed,Nt); SetDirichletDist(Nt); init_lnfact();	
	a_type *ab=0; NEW(ab, Nt+3, a_type);
	char ***argV; NEWPP(argV,Nt+2,char);
	for(i=0; i < Nt; i++){
	   ab[i]=MkAlphabet(AMINO_ACIDS,PROT_BLOSUM62,SREL26_BLSM62);
	   NEWP(argV[i], argc+2, char);
	   for(Int4 z=0; z < argc; z++)argV[i][z]=AllocString(argv[z]); 
	} omp_set_num_threads(Nt);
#if 1
	Int4    Nsq,*counts,J;
	unsigned short  *nsize;
        // Read in the sequences as an array to allow full and purged alignments.
        Nsq=GetFastaInfo(argv[1], max_in_seq, &counts, &nsize, ab[0]);
        e_type  *Seq; NEW(Seq,Nsq+3,e_type);
        ifp= open_file(argv[1],"","r");
        for(J=1; J <= Nsq; J++){ 
		Seq[J]=ReadSeq(ifp,J,nsize[J],ab[0]);
		if(use_seg) ProcessSeqPSeg(17, 2.2,2.5,100,Seq[J],ab[0]);
	} free(counts); free(nsize); fclose(ifp);
#endif
	Int4 num_thrd=omp_get_num_threads();	// Nsq used.
	// fprintf(stderr,"max_thrd=%d; num_thrd=%d=%d\n",max_thrd,num_thrd,Nt);
	// pass in fasta seqs & info to avoid multiple reads, data races, etc.
        #pragma omp parallel
	{	// getting collisons it seems...sharing something...?
           int th=omp_get_thread_num(),nt=omp_get_num_threads();

	  #pragma omp critical (omp_gismo_mainA)
      	  {
	   fprintf(stderr,"starting thread %d of %d\n",th+1,nt);
	   // std::cerr << "starting thread " << th << " of " << nt << std::endl;
	  }
// fprintf(stderr,"DEBUG 1\n");	
// why sometimes get error message and then exit for same seed?
	   tcma[th]=run_gismo_plus(argc,argV[th],lpr[th],nWtSq[th],ab[th], usage,Seq,Nsq);

          #pragma omp critical (omp_gismo_mainB)
	  {
	    if(tcma[th]==0){
		lpr[th]=-9999999.0; fprintf(stderr,"thread %d failed\n",th+1); 
	    } else fprintf(stderr,"thread %d done; LPR = %.2f (%d cols)\n",
			th+1,lpr[th],NumColumnsCMSA(tcma[th]));
	  }
        }	// end of parallel section.
	// fprintf(stderr,"# of threads=%d; id = %d\n",omp_get_num_threads(),omp_get_thread_num());
// e_type  *NilSeqSetRtnSeqs(ss_type P);
// try to use only one array without copying sequences over? not sure how will work out.
	for(cma=0,bst_lpr=-9999999.0,j=0; j < Nt; j++){
	    if(lpr[j] > bst_lpr){ 
		if(cma){ TotalNilCMSA(cma); cma=0; }
		if(AB){ NilAlpha(AB); AB=0; }
		bst_lpr=lpr[j]; bstWtSq=nWtSq[j];
		cma=tcma[j]; AB=ab[j]; ab[j]=0;
	    } else {
		if(tcma[j]){ TotalNilCMSA(tcma[j]); }
		if(ab[j]){ NilAlpha(ab[j]); ab[j]=0; }
	    } tcma[j]=0;
	} free(tcma); free(lpr); free(nWtSq);
	sRand_R_Free(); DirichletDistFree();
        for(J=1; J <= Nsq; J++) NilSeq(Seq[J]); free(Seq);
	for(i=0; i < Nt; i++){
	  if(argV[i]){ for(j=0; j < argc; j++) free(argV[i][j]); free(argV[i]); }
	} free(argV); free(ab);
    } else
#endif
    {
	fprintf(stderr,"using serial mode; seed= %d\n",seed);
	AB=MkAlphabet(AMINO_ACIDS,PROT_BLOSUM62,SREL26_BLSM62);
	sRandom(seed);
	cma=run_gismo_plus(argc,argv,bst_lpr,bstWtSq,AB,usage);
    }
	if(cma == 0) print_error("gismo: failed to find a significant alignment");
	clock_t tckE=clock() - tckS;
	Int4 tm=time(NULL)-time1;
	if(NumThrds == 0){
	   sprintf(str,
	     "LPR=%.2lf(%.1lf nats/%.1lf WtSq)%d cols;1 thrd;h=%d;H=%d;stages=%d(%d s;%.2f m)",
	     	bst_lpr/bstWtSq,bst_lpr,bstWtSq,NumColumnsCMSA(cma),mhpsz,NumCandidates,
		Stages,tm,(float)tm/60.0);
	} else {
	     sprintf(str,
	     "LPR=%.2f(%.1lf nats/%.1lf WtSq)%d cols;%d thrds;h=%d;H=%d;stages=%d(%d s;%.2f m)",
		bst_lpr/bstWtSq,bst_lpr,bstWtSq,NumColumnsCMSA(cma),NumThrds,mhpsz,NumCandidates,
		Stages,tm,(float)tm/60.0);
	} RenameCMSA(argv[1],cma);
        cma_typ bst_cma = AddConsensusCMSA(cma,str);
	sprintf(str,"%s_gsm",argv[1]);
	RenameCMSA(str,bst_cma);
#if 1
        // sprintf(str,"%s.cma",argv[1]); WriteCMSA(str,bst_cma);
	ofp = open_file(argv[1],".cma","w"); PutCMSA(ofp,bst_cma); fflush(ofp); fclose(ofp); 
        ofp = open_file(argv[1],".fa","w"); PutFastaCMSA(ofp,bst_cma); fclose(ofp);
#endif
#if 0
	if(output_rtf){
	  Int4	Argc=0; 
	  char 	**ArgV;  
	  NEWP(ArgV, 12, char); ArgV[0]=0; ArgV[Argc]=AllocString(argv[0]); 
	  Argc++; ArgV[Argc]=AllocString(argv[1]); 
	  Argc++; ArgV[Argc]=AllocString("-S");
          Argc++; ArgV[Argc]=AllocString("-F7"); 
	  Argc++; ArgV[Argc]=AllocString("-S=P");
          Argc++; ArgV[Argc]=AllocString("-N=6"); 
          Argc++; ArgV[Argc]=AllocString("-Q"); Argc++; ArgV[Argc]=0;
          chn_typ chn(Argc,ArgV);
	  chn.PutHierarchicalAlignment( );
	  for(x=0; x < Argc; x++) free(ArgV[x]); free(ArgV);
	}
#endif
	fprintf(stderr,"Final_LPR=%.1f (%d cols)\n",bst_lpr,NumColumnsCMSA(bst_cma));
	TotalNilCMSA(cma); TotalNilCMSA(bst_cma);
	NilAlpha(AB); // leave alphabet around until done.
	tckE=clock() - tckS;
	tm=time(NULL)-time1;
	float secs=(float)(tckE)/CLOCKS_PER_SEC;
	fprintf(stderr,"\ttime: %.2e ticks (%d sec; %0.2f min)\n",
		(float)tckE,tm,(float)tm/60.0);
	return 0;
}

