/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "tbn_typ.h"

tbn_typ::tbn_typ(Int4 id, char *Name )
{
	leftchild=0;
	rightchild=0;
	parent=0;
	if(Name) name = NewString(Name);
	else name = 0;
	ID=id;
}

tbn_typ::~tbn_typ()
{
	if(leftchild != 0) delete leftchild; // free up all child nodes in tree.
	free(name);
}

Int4	tbn_typ::MaxID()
// see what the maximum in subtree is.
{
	Int4	x,y,z;
	if(leftchild && rightchild){	// is an internal node
		x = leftchild->MaxID();
		y = rightchild->MaxID();
		z = MAXIMUM(Int4,x,y);
		return MAXIMUM(Int4,z,ID);
	} else return ID;
}

BooLean	tbn_typ::IsOkay()
{
	if((leftchild && !rightchild) || (!leftchild && rightchild)){
		return FALSE;
	} else return TRUE;
}

char	**tbn_typ::GetHPT(Int4 &N)
// this is assumed to be the root node...
{
	N=CountNodes(0);
	char **hpt;
	NEWP(hpt,N+3,char);
	for(Int4 i=0; i <= N; i++) NEW(hpt[i],N+3,char);
	return hpt;
}

#include "swaln.h"

Int4	tbn_typ::PrintTree(FILE *fp, Int4 depth, Int4 NumRandom,cma_typ tpl_cma,cma_typ *in_cma,char *outname)
/************************************************************
In the hyperpartition, each column corresponds to one node in the tree.

The ‘+’ rows in that column correspond to that node’s subtree,
which serves as the foreground.

The ‘-‘ rows in that column correspond to the rest of the parent node’s subtree,
which serves as the background

The remaining (non-participating) nodes in that column are labeled with an ‘o’.

(For the root node a set of random sequences serves as the background.) 
*************************************************************/
{
	// 1. Check to make sure that each node has a unique name.

	Int4 N=CountNodes(0),i,j,rtnN;
	Int4 Number=0;
	for(i=1; in_cma[i]; i++) Number++;

	// 2. Renumber nodes phylogenetically (using dfs) so that the hpt is easy to interpret.
	tbn_typ **nodes = OrderAsDFS(rtnN,depth);
	assert(depth > 0 || rtnN == N);
	N=rtnN;
	if(0) fprintf(stderr,"\n");
	for(Int4 i=1; i <= N; i++){
		if(0){ fprintf(stderr,"%d: ",i); nodes[i]->Put(stderr); }
		nodes[i]->ID=i; 
	} if(0) fprintf(stderr,"\n");

	// 3. Allocate a matrix for the hyperpartition:
	char **hpt;
	NEWP(hpt,N+3,char);
	for(i=0; i <= N; i++) NEW(hpt[i],N+3,char);

	// 4. For each node (column in hpt) starting from the root...
	for(i=1; i <= N; i++){
         if(nodes[i]->IsRoot()) for(j=1; j <= N; j++) hpt[i][j]='+'; 
	 else {
   	   // 4.1. Set all nodes on the ith list to 'o' == non-participating subgroups.
	   for(j=1; j <= N; j++) hpt[i][j] = 'o';
   	   // 4.2. Reset all nodes in the parent's subtree to '-'.  
		// background nodes. (Caution: convert from binary to n-ary.
	   tbn_typ *pnode=nodes[i]->FindNaryParent( );
	   if(pnode != 0){  		// don't do this for the root which has no parent;
	      assert(!pnode->IsLeafNode( ));	// can't be a leaf if is a parent node...
	      // Int4 *list=pnode->leftchild->GetSubTreeIDs(); // 
	      Int4 *list=pnode->GetSubTreeIDs(); // 
	      for(j=1; j <= N; j++){
		if(list[j] == 0) break;
		assert(list[j] > 0 && list[j] <= N);
		hpt[i][list[j]] = '-';
	      } free(list);
	   }

   	   // 4.3. Reset all nodes in the current node's subtree to '+'.    // foreground nodes.
	   assert(nodes[i]->ID > 0 && nodes[i]->ID <= N);
	   if(nodes[i]->IsLeafNode( )){ hpt[i][nodes[i]->ID]='+'; }
	   else {
	     Int4 *list=nodes[i]->GetSubTreeIDs(); // 
	     for(j=1; j <= N; j++){
		if(list[j] == 0) break;
		assert(list[j] > 0 && list[j] <= N);
		hpt[i][list[j]] = '+';
	     } free(list);
	   } 
	 }
	}

	// 5. Print the hpt.
	FILE *ofp=open_file(outname,".hpt","w");
	fprintf(ofp,"\n");
	fprintf(ofp,"HyperParTition:\n");
	for(i=1; i <= N; i++) fprintf(ofp,"!"); fprintf(ofp,"\n");
	for(i=1; i <= N; i++){
	      for(j=1; j <= N; j++){ 
		 // fprintf(ofp," %c ",hpt[j][i]);
		 fprintf(ofp,"%c",hpt[j][i]);
	      } 
	      if(nodes[i]->IsLeafNode()) fprintf(ofp," %d.%s!\n",i,nodes[i]->name);
	      else fprintf(ofp," %d.%s?\n",i,nodes[i]->name);
	} fprintf(ofp,"-");
	for(j=2; j <= N; j++) fprintf(ofp,"o");
	fprintf(ofp," %d.Reject=%d.\n\n",N+1,NumRandom);
	fclose(ofp);
#if 0
	for(i=1; i <= N; i++){
	    if(!nodes[i]->IsLeafNode()){ 
	      fprintf(fp,"mkdir ");
	      for(j=1; j < i; j++){ 
	        if(hpt[j][i] == '+') fprintf(fp,"%s/",nodes[j]->name);
	      } fprintf(fp,"%s\n",nodes[i]->name);
	    } 
	    fprintf(fp,"cp $CDD_DIR/%s.cma ",nodes[i]->name);
	    for(j=1; j < i; j++){ 
	        if(hpt[j][i] == '+') fprintf(fp,"%s/",nodes[j]->name);
	    }
	    if(!nodes[i]->IsLeafNode()) fprintf(fp,"%s/",nodes[i]->name);
	    fprintf(fp,"%s.cma\n",nodes[i]->name);
	}
	fprintf(fp,"\n");
#endif
	char	str[500];
	// tweakcma xxx -level=%d	// set level in tree to %d
	// tweakcma -n=<string>   - rename the input cmafile to <string> and print it out
	// if named the same...
	if(NumSeqsCMSA(tpl_cma) != N){
		fprintf(stderr,"NumSeqsCMSA(tpl_cma) =%d; #Nodes = %d\n",NumSeqsCMSA(tpl_cma),N);
		print_error("tbn_typ::PrintTree() error: # nodes in tree != # seqs in template alignment");
		assert(NumSeqsCMSA(tpl_cma) == N);
	} else if(Number != N-1){
		fprintf(stderr,"Number cma files =%d; #nodes N = %d\n",Number,N);
		print_error("tbn_typ::PrintTree() error: # nodes in hpt should = # cma files in *.mma minus 1.");
		assert(Number == N-1);
	}
	// Add a routine here to set the order of all files in a consistent manner.
	// Dummy();
	// The following (also in mkmaps=setup_gapmaps.cc) makes the two cma files compatible.
	// REDUNDANT: NEED TO CREATE A GENERAL ROUTINE.
	for(Int4 s=1; s <= Number; s++){
          fprintf(stderr,"================================ %d: %s.\n", s,NameCMSA(in_cma[s]));
          e_type tplSq=TrueSeqCMSA(s+1,tpl_cma);
          if(LenSeq(tplSq) > LengthCMSA(1,in_cma[s])) print_error("input error");
          if(LenSeq(tplSq) != LengthCMSA(1,in_cma[s])){ 
	        // then need to change input cma file.
                Int4 Start,i;
                cma_typ cmaX=in_cma[s];
                e_type csqSq=TrueSeqCMSA(1,cmaX);
                char rtn=IsSubSeq(tplSq,csqSq,&Start,FALSE);
#if 0	// DEBUG:
		{
			a_type AB = AlphabetCMSA(cmaX);
			PutSeq(stderr,tplSq,AB);
			PutSeq(stderr,csqSq,AB); 	
			AlnSeqSW(stderr,11,1,tplSq,csqSq,AB);
			exit(1);
		}
#endif
                // rtn = 1 if tplSq is a subseq of csqSq.
                if(rtn != 1){
			a_type AB = AlphabetCMSA(cmaX);
			PutSeq(stderr,csqSq,AB); PutSeq(stderr,tplSq,AB);
			print_error("Template and cma files are incompatible");
		}
                if(Start > 0){  // remove N-terminal columns.
                        for(i=1; i<=Start; i++){
                           if(LengthCMSA(1,cmaX) <= 3) print_error("input error");
                           RmColumnMSA(1,1,cmaX); // block 1, first column removed.
                        }
                }
                if(LenSeq(tplSq) < LengthCMSA(1,cmaX)) { // remove C-terminal columns.
                        Int4 lenrm = LengthCMSA(1,cmaX) - LenSeq(tplSq);
                        for(i=1; i<=lenrm; i++){
                           Int4 lemon = LengthCMSA(1,cmaX);
                           if(lemon <= 3) print_error("input error");
                           RmColumnMSA(1, lemon, cmaX);
                        }
                }
#if 0
		print_error("RmOverHangsCMSA() not working; needs to be fixed");
		in_cma[s]= RmOverHangsCMSA(cmaX); TotalNilCMSA(cmaX);
#else	// this may be messing up the alignments.
#if 0	// DEBUG...
		fprintf(stderr,"RmOverHangsCMSA() issue encountered; Ignored...\n");
static int calls=0; calls++;
char	strng[30]; sprintf(strng,"junk%d.cma",calls);
		WriteCMSA(strng,cmaX);
#endif
		in_cma[s]= RemoveOverhangsCMSA(cmaX,TRUE); TotalNilCMSA(cmaX);
#endif
                // in_cma[s] = MinimizeFirstSeqCMSA(cmaX); TotalNilCMSA(cmaX);
		// PutCMSA(stdout,in_cma[s]);
          }
	}
	// The following creates the input files for mkmaps...
	
	SetLevelCMSA(0,tpl_cma); ReNameCMSA(nodes[1]->name, tpl_cma);
	for(i=2; i <= N; i++){
	    Int4 cma_level=0;
	    for(j=1; j < i; j++){	// skips i == j level... 
	        if(hpt[j][i] == '+') cma_level++;
	    } SetLevelCMSA(cma_level,in_cma[i-1]); 
	    ReNameCMSA(nodes[i]->name, in_cma[i-1]);
	    fprintf(stderr,"%d. %s.cma\n",i,nodes[i]->name);
	} fprintf(fp,"\n");

	sprintf(Str,"%s.cma",outname); ofp=fopen(Str,"r");
	if(ofp) fclose(ofp);
	else {
	   ofp=open_file(outname,".cma","w");
	   for(Int4 s=1; s <= Number; s++) PutCMSA(ofp,in_cma[s]); fclose(ofp);
	}

	sprintf(Str,"%s.tpl",outname); ofp=fopen(Str,"r");
	if(ofp) fclose(ofp);
	else {
	   ofp=open_file(outname,".tpl","w");
	   PutCMSA(ofp,tpl_cma); fclose(ofp);
	}
	
#if 1	// call setup_gapmaps();
	int	Argc=0;
	char    *Argv[1000];
	// Argv[0]=AllocString("mkmaps"); Argc++; // name of program
	Argv[0]=AllocString("mapgaps"); Argc++; // name of program
	Argv[1]=AllocString(outname); Argc++; // name of output file 
	setup_gapmaps(Argc,Argv);
#endif

	// 6. Free the hpt.
	for(i=0; i <= N; i++) free(hpt[i]); free(hpt);
	return N;
}

Int4    *tbn_typ::GetSubTreeIDs()
{
	Int4 N=CountNodes(0);
	Int4 *list; NEW(list, N+3,Int4);
	if(IsLeafNode( )){ list[1]=ID; }
	else {
	   Int4 Index=1; list[1]=ID; 
	   // Index++; list[Index]=leftchild->ID;
	   leftchild->SubTreeIDs(Index,N,list); 
	} return list;
}

Int4    tbn_typ::SubTreeIDs(Int4 &Index, Int4 N,Int4 *list)
{
	Index++; 
	if(Index > N) fprintf(stderr,"Index = %d\n",Index);
	assert(Index <= N); list[Index] = ID;
	if(leftchild != 0){ leftchild->SubTreeIDs(Index,N,list); }
	if(rightchild != 0){ rightchild->SubTreeIDs(Index,N,list); }
	return Index;
}

tbn_typ *tbn_typ::FindNaryParent( )
// Find the parent node treating the tree as an N-ary tree.
// To find the N-ary parent node follow parent nodes until one is reached that is a leftchild of it's parent.
// The N-ary parent is this leftchild's parent.
{
	if(this->parent == 0) return 0;						     // root node.
	if(this->parent->leftchild == this) return this->parent;		     // true parent...
	if(this->parent->rightchild == this) return this->parent->FindNaryParent( ); // binary parent is an N-ary sibling.
	assert(!"this should not happen");
}


/************************************************************
3. For each node (column in hpt) starting from the root:
   3.0. Allocate a list of length == # nodes in tree.
   3.3. Reset all nodes in the current node's subtree to '+'.    // foreground nodes.
   
4. Print out the matrix, labeling each row with the nodes number and name.
*************************************************************/

tbn_typ	**tbn_typ::OrderAsDFS(Int4 &rtnN, Int4 max_depth)
// order the nodes on a list using a DFS convention.
{
	Int4	Index=0;
	tbn_typ **nodes;
	Int4 N=CountNodes(0);
	NEWP(nodes,N+3,tbn_typ);

	Index++; nodes[Index] = this;
	if(max_depth <= 0) OrderDFS(Index, N, nodes);
	else OrderLimitedDFS(Index, N, nodes,0,max_depth);
	rtnN=Index;
	return nodes;
}

Int4    tbn_typ::OrderLimitedDFS(Int4 &Index, Int4 N, tbn_typ **nodes,
		Int4 depth, Int4 max_depth)
{
	if(depth < max_depth && leftchild != 0){   // then look at child nodes
		Index++; 
		if(Index > N) fprintf(stderr,"Index = %d\n",Index);
		assert(Index <= N); nodes[Index] = leftchild;
		leftchild->OrderLimitedDFS(Index,N,nodes,depth+1,max_depth); 
	} else if(leftchild){ delete leftchild; leftchild=0; } // delete subtree...
	if(rightchild != 0){			   // these are sibling nodes in N-ary tree.
		Index++;
		if(Index > N) fprintf(stderr,"Index = %d\n",Index);
		assert(Index <= N); nodes[Index] = rightchild;
		rightchild->OrderLimitedDFS(Index,N,nodes,depth,max_depth); 
	}
	return Index;
}

Int4    tbn_typ::OrderDFS(Int4 &Index, Int4 N, tbn_typ **nodes)
{
	if(leftchild != 0){
		Index++; 
		if(Index > N) fprintf(stderr,"Index = %d\n",Index);
		assert(Index <= N); nodes[Index] = leftchild; leftchild->OrderDFS(Index,N,nodes); 
	}
	if(rightchild != 0){
		Index++;
		if(Index > N) fprintf(stderr,"Index = %d\n",Index);
		assert(Index <= N); nodes[Index] = rightchild; rightchild->OrderDFS(Index,N,nodes); 
	}
	return Index;
}

Int4	tbn_typ::Print(FILE *fp,Int4 depth)
// print as an N-ary tree
// Encoding n-ary trees as binary trees
// 
// There is a one-to-one mapping between general ordered trees and binary trees, which 
// in particular is used by Lisp to represent general ordered trees as binary trees. 
// Each node N in the ordered tree corresponds to a node N' in the binary tree; the left 
// child of N' is the node corresponding to the first child of N, and the right child 
// of N' is the node corresponding to N 's next sibling --- that is, the next node in 
// order among the children of the parent of N. This binary tree representation of a 
// general order tree, is sometimes also referred to as a First-Child/Next-Sibling 
// binary tree, or a Doubly-Chained Tree, or a Filial-Heir chain.
// 
// One way of thinking about this is that each node's children are in a linked list, chained 
// together with their right fields, and the node only has a pointer to the beginning or 
// head of this list, through its left field.
// 
// The binary tree can be thought of as the original tree tilted sideways, with the 
// black left edges representing first child and the blue right edges representing 
// next sibling. 
{
	// if this is the parent node's rightchild then print comma 
	Int4	N=1,n;

	// case 1: leaf as last child of parent.
	if(leftchild==0 && rightchild==0){	
	  fprintf(fp,"%d",ID);
	  if(name) fprintf(fp,"_%s",name);
//fprintf(fp,"{%d}",N);
	} // case 2: internal node as last child of parent.
	else if(leftchild!=0 && rightchild==0){	
		fprintf(fp,"("); N+=leftchild->Print(fp,depth+1); fprintf(fp,"):%d",ID);
	  	if(name) fprintf(fp,"_%s",name);
//fprintf(fp,"{%d}",N);
	} // case 3: leaf node as middle child of parent.
	else if(leftchild==0 && rightchild!=0){
		fprintf(fp,"%d",ID);
		if(name) fprintf(fp,"_%s",name);
//fprintf(fp,"{%d}",N);
		fprintf(fp,","); N += rightchild->Print(fp,depth+1);
	} // case 4: internal node and middle child of parent.
	else if(leftchild!=0 && rightchild!=0){
		fprintf(fp,"("); N+=leftchild->Print(fp,depth+1); fprintf(fp,"):%d",ID);
		if(name) fprintf(fp,"_%s",name);
//fprintf(fp,"{%d}",N);
		fprintf(fp,","); N += rightchild->Print(fp,depth+1);
//fprintf(fp,"|%d|\n",N);
	}
// fprintf(fp,"[%d(%d)]\n",N,depth);
	return N;
}

Int4	tbn_typ::CountNodes(Int4 depth)
{
	Int4	N=1;

	// case 2: internal node as last child of parent.
	if(leftchild!=0 && rightchild==0) N+=leftchild->CountNodes(depth+1);
	// case 3: leaf node as middle child of parent.
	else if(leftchild==0 && rightchild!=0) N += rightchild->CountNodes(depth+1);
	// case 4: internal node and middle child of parent.
	else if(leftchild!=0 && rightchild!=0){
		N+=leftchild->CountNodes(depth+1); N+=rightchild->CountNodes(depth+1);
	}
	// if(leftchild==0 && rightchild==0) ; // case 1: leaf as last child of parent. 
	return N;
}

BooLean tbn_typ::IsRightChild()
{
	if(parent != 0 && this == parent->rightchild) return TRUE; else return FALSE;
}

BooLean tbn_typ::IsLeftChild()
{
	if(parent != 0 && this == parent->leftchild) return TRUE; else return FALSE;
}

Int4	tbn_typ::NumRightChildren()
{
	if(rightchild){ return (1 + rightchild->NumRightChildren()); } else  return 0;
}

void	tbn_typ::PrintBinary(FILE *fp)
// Print as a binary tree.
{
	if(leftchild && rightchild){	// is an internal node
	    fprintf(fp,"("); leftchild->PrintBinary(fp); 
	    fprintf(fp,","); rightchild->PrintBinary(fp); fprintf(fp,"):");
	} else if(leftchild){	// an internal node
	    fprintf(fp,"("); leftchild->PrintBinary(fp); fprintf(fp,"):");
	} else if(rightchild){	// an internal node
	    fprintf(fp,"("); rightchild->PrintBinary(fp); fprintf(fp,"):");
	} 
	fprintf(fp,"%d",ID);
	if(name) fprintf(fp,"_%s",name);
	// fprintf(fp,"\n\n");
}

tbn_typ *tbn_typ::ReturnNode(char *node_name)
// Find the node with name == node_name.
{
	tbn_typ *left,*right;
	if(strcmp(name,node_name) == 0) return this;
	if(leftchild != 0){
		left=leftchild->ReturnNode(node_name);
		if(left != 0) return left;
	}
	if(rightchild){
		right=rightchild->ReturnNode(node_name);
		if(right != 0) return right;
	} return 0;
}


Int4	tbn_typ::PrintSubTree(FILE *fp, char *root_node_name, Int4 depth,cma_typ tpl_cma,cma_typ *in_cma,char *outname)
/************************************************************
In the hyperpartition, each column corresponds to one node in the tree.

The ‘+’ rows in that column correspond to that node’s subtree,
which serves as the foreground.

The ‘-‘ rows in that column correspond to the rest of the parent node’s subtree,
which serves as the background

The remaining (non-participating) nodes in that column are labeled with an ‘o’.

(For the root node a set of random sequences serves as the background.) 
*************************************************************/
{
	// 1. Check to make sure that each node has a unique name.
	tbn_typ *subroot = ReturnNode(root_node_name),*pnode,*rnode;
	if(subroot != 0){
	    pnode=subroot->parent; subroot->parent=0;
	    rnode=subroot->rightchild; subroot->rightchild=0;
	    Int4 rtn=subroot->PrintTree(fp,depth,20000,tpl_cma,in_cma,outname);
	    subroot->parent=pnode; subroot->rightchild=rnode;
	    return rtn;
	} else {
		fprintf(stderr,"Error: Subtree node not found!\n");
		return 0;
	}
}



