/* mapgaps program... */
%{
#include "cdd2cma.h"
#include "blosum62.h"
/**************************** Global Variables ******************************/
int	yyparse();
int     yylex();
UInt4	PARSELINE=1;
Int4	DEPTH_FROM_ROOT=0;
char	*SubTreeName=0;
BooLean	PrintAsBinary=FALSE;
cma_typ *IN_CMA=0,TemplateCMA=0;
Int4	Number;
a_type	AB=0;
char	OutName[200];

extern FILE *yyin;
extern FILE *yyout;

BooLean	VERBOSE=TRUE;
FILE *outfp=stdout;
Int4 NUM_RANDOM=20000;

extern int yyerror(const char *s);

/********************* Declarations of Action Symbols ***********************/
%}
%start start
%token <pcval>	STRING			/*  name of node */
%token <ival>	NODE			/*  integers */
%token <ival>	FIRSTNODE			/*  integers */
%token <ival>	ROOT			/*  integers */
%token <cval>	JUNK			/*  syntax error in lex program */
%token <rval>   DISTANCE                    /*  float value */
%token <ival>	LEVEL_B			/*  integers */
%token <ival>	LEVEL_M			/*  integers */
%token <ival>	LEVEL_E			/*  integers */
%token <ival>	FALSE_NODE
%token <dummy>	ROOT_ZERO
%token <dummy>	IGNORE

%token <dummy>	NEWLINE
%token <dummy>	END_OF_FILE		/* require program to come to parse entire file.*/
%union	{
		int	dummy;		/* no (null) attributes */
		char 	*pcval;		/* pointer to character string */
		char 	cval;		/* character */
		double	rval;		/* float value */
		Int4	ival;		/* integer type attribute */
		tbn_typ *tbn;		// binary tree node
		char	Sval[200];	// string
		int	Pval[200];	// path for file nodes to root
	}
%type	<tbn>	root tree child children
%type	<Sval>	junk fatal_error 
%type	<dummy>	left_paren end_file comma ignore
// e.g. input string: (((7,((15,16):11,12,(17,18):13,14):8):3,4,(9,10):5,6):1,2):0;
// each tree at least two children required
%%
start		: tree ';' end_file {
			Int4 N=0;
			// fprintf(stderr,";\n\n");
			// if(PrintAsBinary) $1->PrintBinary(outfp); else
			// N=$1->Print(stderr,0);
			// fprintf(stderr,"; %d nodes\n",N);
			if(SubTreeName) N=$1->PrintSubTree(outfp,SubTreeName,
				DEPTH_FROM_ROOT,TemplateCMA,IN_CMA,OutName);
			else N=$1->PrintTree(outfp,DEPTH_FROM_ROOT,NUM_RANDOM,
				TemplateCMA,IN_CMA,OutName);
			// fprintf(stderr,";\n%d nodes\n",N);
			if(VERBOSE) fprintf(stderr,"Input accepted.\n"); YYACCEPT; 
		}
		| fatal_error end_file { yyerror($1); YYERROR; }
		;

tree		: left_paren children root  { $3->AddLeftChild($2); $$ = $3; } 
		| left_paren tree root  { $3->AddLeftChild($2); $$ = $3; } 
		| left_paren child root  { $3->AddLeftChild($2); $$ = $3; } 
		;

children	: child comma children 	{ $1->AddRightChild($3); $$=$1;  }
		| tree comma children { $1->AddRightChild($3); $$=$1;  }
		| child comma child { $1->AddRightChild($3); $$=$1;  }
		| tree comma child { $1->AddRightChild($3); $$=$1;  }
		| child comma tree { $1->AddRightChild($3); $$=$1;  }
		| tree comma tree { $1->AddRightChild($3); $$=$1;  }
		;

child		: NODE { $$ = new tbn_typ($1,0); if(0) fprintf(stderr,"%d",$1); }
		| NODE DISTANCE { $$ = new tbn_typ($1,0); if(0) fprintf(stderr,"%d:%.2f",$1,$2); }
		| NODE STRING { $$ = new tbn_typ($1,$2); if(0) fprintf(stderr,"%d_%s",$1,$2); }
		| NODE STRING DISTANCE { $$ = new tbn_typ($1,$2); if(0) fprintf(stderr,"%d_%s:%.2f",$1,$2,$3); }
		;

comma		: ',' { if(0) fprintf(stderr,","); } ;
left_paren	: '(' { if(0) fprintf(stderr,"("); } ;

root		: ROOT { $$ = new tbn_typ($1,0); if(0) fprintf(stderr,")%d",$1); }
		| ROOT DISTANCE { $$ = new tbn_typ($1,0); if(0) fprintf(stderr,")%d:%.2f",$1,$2); }
		| ROOT STRING { $$ = new tbn_typ($1,$2); if(0) fprintf(stderr,")%d_%s",$1,$2); }
		| ROOT STRING DISTANCE { $$ = new tbn_typ($1,$2); if(0) fprintf(stderr,")%d_%s:%.2f",$1,$2,$3); }
		;

fatal_error	: junk { strcpy($$,$1); } 
		;

end_file	: END_OF_FILE 
		| ignore END_OF_FILE 
		;

ignore		: ignore IGNORE  
		| IGNORE
		;

junk		: junk JUNK { 
			char s[3]; s[0]=$2; s[1]=0; strcpy($$,$1); strcat($$,s); 
			if( strlen($$) > 90) yyerror($$); 
			}
		| JUNK { $$[0]=$1; $$[1]=0; $$[2]=0; }
		;

%%

int	yyerror(const char *s)
{ 
	fprintf(stderr,"\nfatal error (line %d): '%s'.\n",PARSELINE,s);
	// fprintf(stderr,"%s --> %d: %s",s,yylineno,yytext);
	print_error("Input rejected.\n");
	return 1;
}

void    Close(FILE *fptr) {if(fptr != stderr && fptr != stdout) fclose(fptr); }

// e.g. input string: (((7,((15,16):11,12,(17,18):13,14):8):3,4,(9,10):5,6):1,2):0;

#define USAGE_START "Usage: cdd2chn infile [options] \n\
  input: concatenated *cma files in '<infile>.mma'.\n\
         template alignment *cma files in '<infile>.tpl'.\n\
         CDD hierarchy as a Newick tree in '<infile>.nwt'.\n\
  output: concatenated *.cma files annotated consistent with template.\n\
          This output file can be used as input to mkmaps.\n\
       -b          print tree in binary format (default: Newick format)\n\
       -S=<str>    print subtree from the node with name=<str>\n\
       -O=<str>    output filename=<str>\n\
       -depth=<int> print tree as a hpt, but only to depth <int> from root\n\
       -x          dummy\n\n"

#if 0
//**************************************************************************
   Strategy for a tree based search:
//--------------------------------------------------------------------------
0. Create directories for profiles:
  0a. TreeStructure for Directories mirrors tree structure for Template.
  0b. change ~/sbin/MkGAPMAPS to ~/sbin/MkMAPGAPS to convert this into maps format (define?).
  0c. 

1. Read in tree (if file provided as input).
 1a. Define subtrees that need to be converted at each level.
 1b. Everytime a parent node is reached Convert the child alignments to match parent node 
       using the SubTreeTemplate
 1c. Also convert the SubTreeTemplate cma file and add to template...How?

2. Create mgs object and pass in tree for search...

3. For tree, call ConvertViaTemplate( ) recursively...

For each lowest level subtree, call:

conversionViaTemplateCMSA3(cma_typ TemplateCMA, cma_typ *IN_CMA);

Where TemplateCMA is SubtreeTemplateCMA.

Alsp need to convert SubtreeTemplateCMA to 

//**************************************************************************
#endif

int	cdd2cma(int argc,char *argv[], Int4 NumRandom,char *outfile, FILE *fp)
// int	GAPMAPS(int argc,char *argv[])
{ 
	Int4	i,J,n,arg;
	FILE	*tmp_fp;
	PrintAsBinary=FALSE;

	if(NumRandom==0) NumRandom=20000;
	NUM_RANDOM=NumRandom;
	if(argc < 2) print_error(USAGE_START);
	if(fp != 0) outfp = fp;
        for(arg = 2; arg < argc; arg++){
          if(argv[arg][0] == '-'){
           switch(argv[arg][1]) {
             case 'x': break;
             case 'b': PrintAsBinary=TRUE; break;
             case 'O':
	        if(argv[arg][2] == '=' && isprint(argv[arg][3])){
			outfp=open_file(argv[arg] + 3,"","w");
		} else print_error(USAGE_START);
		break;
             case 'S': 
		if(argv[arg][2] == '=' && isprint(argv[arg][3])){
			SubTreeName=argv[arg] + 3;
		} else print_error(USAGE_START);
		break;
             case 'd': 
		if(sscanf(argv[arg],"-depth=%d",&DEPTH_FROM_ROOT)==1){
			// print_error("-depth option not yet implemented");
			if(DEPTH_FROM_ROOT < 1) print_error(USAGE_START);
		} break;
             default : print_error(USAGE_START);
           }
	  }else print_error(USAGE_START);
	}
	AB=MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	char str[500];
#if 0	// allow 
	sprintf(str,"%s.cma",argv[1]);
        if((tmp_fp=fopen(str,"r")) == NULL){
          sprintf(str,"%s.mma",argv[1]);
          if((tmp_fp=fopen(str,"r")) == NULL){
	     fclose(tmp_fp); 
	     fprintf(stderr,"Could not open file \"%s\".",str);
	     print_error("Fatal error!");
	  }
        }
#else
	tmp_fp=open_file(argv[1],".mma","r");
#endif
	IN_CMA=MultiReadCMSA(tmp_fp,&Number,AB); fclose(tmp_fp);
	tmp_fp=open_file(argv[1],".tpl","r");
	TemplateCMA=ReadCMSA(tmp_fp,AB); fclose(tmp_fp);
#if 1
	sprintf(str,"%s.nwt",argv[1]);
	FILE *tfp=fopen(str,"r");
	if(tfp == NULL){
	   tfp=open_file(argv[1],".nwt","w");
	   fprintf(tfp,"(");
	   for(i=1; i <=Number; i++){
#if 0
		if(i == 1) fprintf(tfp,"%d_Set%d",i,i);
		else fprintf(tfp,",%d_Set%d",i,i);
		if(i == Number) fprintf(tfp,")0_Set0;\n");
#else
		if(i == 1) fprintf(tfp,"%d_%s",i,NameCMSA(IN_CMA[i]));
		else fprintf(tfp,",%d_Set%d",i,NameCMSA(IN_CMA[i]));
		if(i == Number) fprintf(tfp,")0_Root;\n");
#endif
	   } fclose(tfp);
	} else fclose(tfp);
	if(NumSeqsCMSA(TemplateCMA) != Number + 1){
	    fprintf(stderr,"TemplateCMA = %d seqs != %d profiles + 1\n",
		NumSeqsCMSA(TemplateCMA),Number);
	    print_error("*.tpl and *.mma files are inconsistent");
	}
#if 0
	for(i=1; i <= Number; i++){
	   // cma_typ tcma=AddConsensusCMSA(IN_CMA[i]);
	   cma_typ tcma=IN_CMA[i];
	   sprintf(str,"Set%d",i); ReNameCMSA(str,tcma);
	   sprintf(str,"Set%d consensus",i);
	   ChangeInfoSeq(str,TrueSeqCMSA(1,tcma));
	   ChangeInfoSeq(str,TrueSeqCMSA(i+1,TemplateCMA));
	} 
#endif
#endif
	yyin = open_file(argv[1],".nwt","r");

	if(outfile != 0) sprintf(OutName,"%s",outfile);
	else sprintf(OutName,"%s_Out",argv[1]);

	fprintf(stderr,"\n=========== Parsing Newick file %s ============\n",argv[1]);
	yyout=stderr; yyparse(); fclose(yyin);
	if(outfp != stdout && outfp != fp) fclose(outfp);
	if(TemplateCMA) TotalNilCMSA(TemplateCMA);
	for(i=1; i<= Number; i++) TotalNilCMSA(IN_CMA[i]); free(IN_CMA);
	NilAlpha(AB);
	// yyin = stdin; yyout=stdout; yyparse(); 
}

