/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "cmc_typ.h"
#include "blosum62.h"
#include "rst_typ.h"
#include "swt_typ.h"

#define	MAIN_CMC_USAGE  "USAGE: bhps file_prefix [options]\n\
      (Bayesian Hyperpartitioning with Pattern Selection)\n\
      (put input main alignment cma formated file in <file_prefix>.mma)\n\
      (put input seed alignment cma formated files in <file_prefix>.sma)\n\
      (put hyperpartition specification in <file_prefix>.hpt)\n\
   options:\n\
     -A=<real>:<real> - Set default alpha hyperparameters A0:B0\n\
     -Am=<real>:<real> - Set miscellaneous default alpha hyperparameters A0:B0\n\
     -col=<int>:<int>  - Default min and max number of columns\n\
     -convergence  - monitor convergence by outputting LPRs in <file_refix>.cnv\n\
     -del          - Treat deletions as random background\n\
     -fixed=<int>  - Don't sample sequences with a score >= <int> % of the consensus self score.\n\
                       (default: <int> == 0; defined as lowest seed sequence versus consensus score)\n\
     -heatmap      - Output heatmaps showing pattern conservation among subgroups\n\
     -maxiter=<int> - Iterate no more than <int> cycles (default: 6)\n\
     -noseeds	   - ignore seed patterns in hyperpartition (*.hpt) file\n\
     -addcsq	   - Add a consensus sequence to each seed alignment\n\
     -N=<int>      - Set default contrast setting (N = number of columns to highlight)\n\
     -pdb_list=<str> - file of paths to 3D coordinates for creating PyMOL scripts\n\
                       from a generated VSI file using the vsi2pml program.\n\
     -put          - output intermediate files for hyperpartition categories and sets\n\
     -ppb=<int>    - parts per billion increase in LPR required to keep going (default: 100)\n\
                         Larger values lead to shorter (less optimized) runs (range: 1-1000000)\n\
     -prune=<int>  - Set the sampling round at which pruning of the input tree occurs (default: 2)\n\
     -Pttrn=<int>  - Specify length of automatically generated seed patterns\n\
     -R=<int>      - Random seed\n\
     -rho=<real>   - Set global rho parameter\n\
     -Ri=<real>    - Set global prior probability that a sequence belongs to the foreground\n\
     -Rim=<real>   - Set misc global prior probability that a sequence belongs to the foreground\n\
     -Rounds=<int> - set the number of sampling rounds for each iteration (default: 6)\n\
     -rchk       - read checkpoint file\n\
     -RTF          - output rtf files; requires a *.CHK file\n\
     -rtf          - output individual rtf files, one for each hyperpartition set\n\
     -sets	   - Save the sets for comparing hierarchies (evalBPPS program)\n\
     -show	   - show (i.e., don't hide) indels in contrast alignments\n\
     -shuffle      - shuffle non-null residues within columns (control for CHAIN analysis)\n\
     -strict       - Require strict independence between categories\n\
     -syntax       - show hyperpartition syntax\n\
     -tree	   - sample assuming that the input hpt corresponds to a tree\n\
     -verbose      - output lots of additional information to stderr\n\
     -wchk         - write checkpoint file\n\
  Reference: \n\
    Neuwald, A.F. 2011. Surveying the manifold divergence of an entire protein class \n\
        for statistical clues to underlying biochemical mechanisms.  Statistical Applications\n\
        in Genetics and Molecular Biology 10(1): Article 36. (30 pages)\n\
    Neuwald, A.F. 2023. Identifying cross-conserved residue constraints in protein seuqences. \n\
        In preparation.\n\
   \n"

#if 0

#endif


void    cmc_typ::PrintUsage(FILE *fp)
{
        // fprintf(stderr,"%s\n",MAIN_CMC_USAGE);
        // print_error(HPT_PUBLIC_USAGE);
        print_error(MAIN_CMC_USAGE);
}

void	cmc_typ::ReadMainArg(Int4 argc, char *argv[])
{
	Int4 arg,i,j,x;
	UInt4   seed=7061950;
	StrictIndepend=FALSE;
	SaveSets=FALSE;
	PrintEachRTF=FALSE;
	WriteCheckPoint=FALSE;
	AddCSQ=FALSE;
	Int4 DfltMinNumCol,DfltMaxNumCol;
	checkpoint=0; 	// recover checkpoint...
	cfp=ifp=0;
	if(argc < 2){ print_error(MAIN_CMC_USAGE); }
	program_name=AllocString(argv[0]);

	FILE *fp=0;
	if(strcmp(program_name,"pmcBPPS") != 0){ 
          fp=open_file(argv[1],".cmd","w");
          for(arg=0; arg < argc; arg++){
		fprintf(fp,"%s ",argv[arg]);
		if(arg==0) fprintf(fp,"H ");
	  }
	}

	infile=AllocString(argv[1]);
	if(argc == 2 && argv[1][0] == '-'){
	      if(strcmp("-syntax",argv[1]) == 0) print_error(HPT_PUBLIC_USAGE);
	      else {
	      	print_error(MAIN_CMC_USAGE);
		// fprintf(stderr,"%s\n",MAIN_CMC_USAGE);
	      	// print_error(HPT_PUBLIC_USAGE);
	      }
	}
	for(arg = 2; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(MAIN_CMC_USAGE);
	   switch(argv[arg][1]) {
             case 'A':
		if(sscanf(argv[arg],"-Am=%lf:%lf",&MiscGlobalA0,&MiscGlobalB0)==2){
                        if(MiscGlobalA0 <= 0.0 || MiscGlobalB0 <= 0.0) print_error(MAIN_CMC_USAGE);
		} else if(sscanf(argv[arg],"-A=%lf:%lf",&GlobalA0,&GlobalB0)==2){
                        if(GlobalA0 <= 0.0 || GlobalB0 <= 0.0) print_error(MAIN_CMC_USAGE);
                } else print_error(MAIN_CMC_USAGE);
                break;
	     case 'a': 
		{
	      	   if(strcmp("-addcsq",argv[arg]) == 0) AddCSQ=TRUE;
              	   else print_error(MAIN_CMC_USAGE);
                } break;
	     case 'c': 
              if(sscanf(argv[arg],"-col=%d:%d",&DfltMinNumCol,&DfltMaxNumCol)==2){
			if(DfltMinNumCol < 2 || DfltMinNumCol > DfltMaxNumCol){
				fprintf(stderr,"Default Min(%d)/Max(%d) # columns out of range\n",
					DfltMinNumCol,DfltMaxNumCol);
				print_error(CMC_USAGE_START);
			} argv[arg][1] = ' '; 
			DefaultMaxCol=DfltMaxNumCol; DefaultMinCol=DfltMinNumCol;
	      } else if(strcmp("-convergence",argv[arg]) == 0){
		cfp=open_file(infile,".cnv","w");
		ifp=open_file(infile,".itr","w");	// simulated annealing temperature
              } else print_error(MAIN_CMC_USAGE);
	      break;
	     case 'd': 
	      if(strcmp("-del",argv[arg]) != 0) print_error(MAIN_CMC_USAGE);
	      break;
	     case 'f': if(sscanf(argv[arg],"-fixed=%d",&x)==1){
			if(x < 1 || x > 100) print_error(MAIN_CMC_USAGE); else fixed_cutoff=x;
		       } else print_error(MAIN_CMC_USAGE);
		break;
	     case 'm': 
                if(sscanf(argv[arg],"-maxiter=%d",&x)==1){
			if(x < 1) print_error(MAIN_CMC_USAGE); else MaximumIter=x;
		} else print_error(MAIN_CMC_USAGE);
	      break;
	     case 'N': 
                if(sscanf(argv[arg],"-N=%d",&GlobalN)==1){
			if(GlobalN < 2) print_error(MAIN_CMC_USAGE);
		} else print_error(MAIN_CMC_USAGE);
	      break;
	     case 'n': 
	      if(strcmp("-noseeds",argv[arg]) == 0) NoSeeds=TRUE;
	      else print_error(MAIN_CMC_USAGE);
	      break;
	     case 'P': 
              if(sscanf(argv[arg],"-Pttrn=%d",&SeedPttrnLen)==1){
		if(SeedPttrnLen < 5) print_error("SeedPttrnLen must be >= 5");
              } else print_error(MAIN_CMC_USAGE);
	      break;
	     case 'p': 
	      if(sscanf(argv[arg],"-prune=%d",&PruneIter) == 1){
		if(PruneIter < 1) print_error("-prune option input error");
	      } else if(sscanf(argv[arg],"-ppb=%u",&ppb_increase) == 1){
		if(ppb_increase < 1 || ppb_increase > 1000000) print_error(MAIN_CMC_USAGE);
	      } else if(strcmp("-put",argv[arg]) == 0) PutIntermediateFiles=TRUE;
	      else print_error(MAIN_CMC_USAGE);
	      break;
             case 'R':
		if(sscanf(argv[arg],"-Rounds=%d",&NumberRounds)==1){
                   if(NumberRounds < 1) print_error(MAIN_CMC_USAGE);
		} else if(sscanf(argv[arg],"-Rim=%lf",&MiscGlobalRi)==1){
                   if(MiscGlobalRi <= 0.0 || MiscGlobalRi >= 1.0) print_error(MAIN_CMC_USAGE);
		} else if(sscanf(argv[arg],"-Ri=%lf",&GlobalRi)==1){
                   if(GlobalRi <= 0.0 || GlobalRi >= 1.0) print_error(MAIN_CMC_USAGE);
                } else if(sscanf(argv[arg],"-R=%d",&seed)!=1) print_error(MAIN_CMC_USAGE);
                break;
             case 'r':
		if(sscanf(argv[arg],"-rho=%lf",&Global_rho)==1){
                   if(Global_rho <= 0.0 || Global_rho >= 0.5) print_error(MAIN_CMC_USAGE);
	        } else if(strcmp("-rchk",argv[arg]) == 0) ReadCheckpoint(0);
		else if(strcmp("-rtf",argv[arg]) == 0) PrintEachRTF=TRUE;
	        else print_error(MAIN_CMC_USAGE);
                break;
             case 's':
	      if(strcmp("-sets",argv[arg]) == 0) SaveSets=TRUE;
	      else if(strcmp("-strict",argv[arg]) == 0) StrictIndepend=TRUE;
	      else if(strcmp("-show",argv[arg]) == 0) ShowIndels=TRUE;
	      else if(strcmp("-syntax",argv[arg]) == 0) print_error(HPT_PUBLIC_USAGE);
	      else print_error(MAIN_CMC_USAGE);
                break;
             case 't':
	      if(strcmp("-tree",argv[arg]) == 0){
		  // then allow elimination of inappropriate subcategories.
		  IsTreeHpt=TRUE; 
		} else print_error(MAIN_CMC_USAGE);
                break;
             case 'v':
		if(strcmp("-verbose",argv[arg]) == 0) efp=stderr;
		else print_error(MAIN_CMC_USAGE); break;
             case 'w':
	        if(strcmp("-wchk",argv[arg]) == 0) WriteCheckPoint=TRUE;
                break;
             case ' ': break;	// ignore these...
	     default: print_error(MAIN_CMC_USAGE);
	      break;
	  }
	}
	if(seed == 7061950){ seed = (UInt4) time(NULL); if(fp) fprintf(fp,"-R=%d\n",seed); }
	else if(fp) fprintf(fp,"\n"); if(fp) fclose(fp);
	if(seed != 0) sRandom(seed);
#if 1	// afn: 5/10/2022
	RandomSeed=seed;
#endif
	if(strcmp(program_name,"pmcBPPS")==0){ IsTreeHpt=TRUE; IsTreePMC=TRUE; }
	// fprintf(stderr,"random seed = %u\n",seed);
	// fprintf(stderr,"Program Name = \"%s\"\n",program_name); exit(1);
}


