/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "gth_typ.h"

//************************************* State '?' ***********************************************

void	gth_typ::Init(Int4 num_sets,double min_lpr)
{
	assert(num_sets > 0);
	State='I'; // initialize.
	Root=num_sets+1;
	assert(min_lpr > 0.0);
	MinLPRforEdge=min_lpr;
	MaxNumEdges=(Root*(Root-1))/2; // guarranted to be an integer.
	Tree=0;
	Grph=MkWdgraph(Root,MaxNumEdges);

	Both=MakeSet(Root+1); // set elements range from 0 .. N-1, where N = Root+1.
	ClearSet(Both);	
	SuperSet=CopySet(Both);
	SubSet=CopySet(Both);
	Distinct=CopySet(Both);
	Reject=CopySet(Both);
	InGraph=CopySet(Both);
   	Orphan=MakeSet(Root+1); FillSet(Orphan); DeleteSet(0,Orphan); DeleteSet(Root,Orphan);

	SuperOnly=0; SubOnly=0; Leaves=0;

	NEWP(EdgeSST,MaxNumEdges+2, sst_typ);
	NEW(EdgeLPR,MaxNumEdges+2, double);
	NEW(EdgeOverLap,MaxNumEdges+2, Int4);
	NEW(NodeSize,Root+3, Int4);
}

void    gth_typ::Free()
{
	NilSet(SuperSet); NilSet(SubSet); NilSet(Both); NilSet(Distinct); NilSet(Reject); 
	if(Leaves) NilSet(Leaves);
	if(Orphan) NilSet(Orphan);
	if(SuperOnly) NilSet(SuperOnly); 
	if(SubOnly) NilSet(SubOnly);
	if(InGraph) NilSet(InGraph); 
	for(Int4 e=1; e<= mWdgraph(Grph); e++){
		// if(EdgeSST[e]) free(EdgeSST[e]);  // some of these are used in calling routine.
	} free(EdgeSST);
	if(EdgeLPR) free(EdgeLPR); 
	free(EdgeOverLap); free(NodeSize);
	if(Tree) NilWdgraph(Tree);
	NilWdgraph(Grph);
}


//************************************* State 'I' ***********************************************

Int4    gth_typ::MkDistinct(Int4 i)
{
	assert(State=='I');
	IsNodeOkay(i,Grph); assert(NumEdgesOut(i,Grph)==0); assert(NumEdgesIn(i,Grph)==0);
	AddSet(i,Distinct);
	return 0;
}

#if 0
Int4    gth_typ::AddEdge(double lprIJvI, double lprJxIvI, lprIxJvJ,sst_typ *sst, Int4 j, Int4 cardJ, Int4 i, Int4 cardI, Int4 OverLap)
	lprIJvI // SetI vs SetJ with Pattern I (should match; not hard to do; this pattern is sent.)
	lprJxIvI // SetJ vs Not SetI with Pattern I (should not match)
	lprIxJvJ // SetI vs Not SetJ with Pattern J (attached to parent node; should match if I a subset of J; most important match).
#endif

Int4    gth_typ::AddEdge(double lpr, sst_typ *sst, Int4 j, Int4 cardJ, Int4 i, Int4 cardI, Int4 OverLap)
{	// i = parent; j = child.
	assert(State=='I');
#if 0	// new routine that stores intersection, LPR and SST information for all edges.
	if(cardJ < cardI) return 0;	// don't connect...
	IsNodeOkay(i,Grph); IsNodeOkay(j,Grph);
	assert(!IsJoined(j,i,Grph)); assert(!IsJoined(i,j,Grph));	// can only call this edge once...
	if(lpr < MinLPRforEdge){
		return 0;
	}
#else 
	if(lpr < MinLPRforEdge) return 0;
	IsNodeOkay(i,Grph); IsNodeOkay(j,Grph);
	assert(!IsJoined(j,i,Grph)); assert(!IsJoined(i,j,Grph)); // can only call this edge once...
#endif

	AddSet(j,SuperSet); AddSet(i,SubSet);
	AddSet(j,InGraph); DeleteSet(j,Orphan);
	AddSet(i,InGraph); DeleteSet(i,Orphan);
	Int4 wt = -(Int4) ceil(sqrt(lpr)); // should make meaningful direct paths longer
        Int4 e=JoinWdgraph(j,i,wt,Grph); // i(head) <-- j (tail); j is a superset of i.
	assert(e > 0 && e <= mWdgraph(Grph));
	EdgeLPR[e]=lpr; EdgeSST[e]=sst; 
	EdgeOverLap[e]=OverLap; 
	if(NodeSize[i] > 0) assert(NodeSize[i]==cardI); else NodeSize[i]=cardI; 
	if(NodeSize[j] > 0) assert(NodeSize[j]==cardJ); else NodeSize[j]=cardJ; 
	return e;
}

//************************************* State 'C' ***********************************************
Int4    gth_typ::LinkToRoot(double lpr, sst_typ *sst, Int4 i)
{
	if(State=='I') ComputeSubOnly(); assert(State=='C');
	IsNodeOkay(i,Grph); assert(!IsJoined(Root,i,Grph)); assert(!IsJoined(i,Root,Grph));

	if(!MemberSet(i,Distinct) && !MemberSet(i,SuperOnly) && !MemberSet(i,Orphan)) return 0;
	// if(!MemberSet(i,Distinct) && !MemberSet(i,SuperOnly)) return 0;
	if(lpr < MinLPRforEdge) return 0;
	AddSet(i,InGraph);
	Int4 wt = -(Int4) ceil(sqrt(lpr)); // should make meaningful direct paths longer
	Int4 e=JoinWdgraph(Root,i,wt,Grph); // j --> i; j is a superset of i
	EdgeLPR[e]=lpr; EdgeSST[e]=sst;
	return e;
}

void	gth_typ::PutSetsGraph(FILE *fp)
{
	if(State=='I') ComputeSubOnly();
	assert(State == 'C' || State == 'T' || State == 'O'); // { State='C'; ComputeSubOnly( ); }
	// find orphan nodes to
	fprintf(fp,"=================== Summary ===================\n");
	fprintf(fp,"SuperSet only:\n"); PutSet(fp,SuperOnly);
	fprintf(fp,"\n\nSubSet only:\n"); PutSet(fp,SubOnly);
	fprintf(fp,"\n\nIntermediate Sets:\n"); PutSet(fp,Both);
	fprintf(fp,"\n\nIsolated Sets:\n"); PutSet(fp,Distinct);
	fprintf(fp,"\n\nOrphan Sets:\n"); PutSet(fp,Orphan);

	fprintf(fp,"=================== Graph ===================\n");
	PutWdgraph(fp,Grph);
}

void	gth_typ::ComputeSubOnly( )
// This can only be called one time.
{
	assert(State=='I'); State='C'; 
	IntersectSet1(SuperSet,SubSet,Both);
	SuperOnly=CopySet(SuperSet); IntersectNotSet(SuperOnly,SubSet);
	SubOnly=CopySet(SubSet); IntersectNotSet(SubOnly,SuperSet);
   	InGraph=CopySet(Both);
	UnionSet(InGraph,SuperSet); UnionSet(InGraph,SubSet); UnionSet(InGraph,Distinct);
	IntersectNotSet(Orphan,InGraph);
}

wdg_typ gth_typ::RtnShortestPathTree(FILE *fp)
{
	if(Tree) return Tree; 
	assert(State=='I' || State=='C'); 
	if(State=='I') ComputeSubOnly(); else State='C'; // shift to compute state.
        Tree=MkWdgraph(Root,MaxNumEdges);
        Int4 i,r,*path,*dist; NEW(path,Root+3,Int4); NEW(dist,Root+3,Int4);
        if(ShortestPathWdgraph(Grph, Root, path, dist)) print_error("cycle in graph");
        for(i=1; i < Root; i++){
	     if(dist[i] > 0.0) continue;
	     if(NumEdgesOut(i,Grph) != 0) continue;
	     // if(MemberSet(i,Orphan)) continue;	// Leave Orphan's in, as they may be removed later.
	     // if(NumEdgesIn(i,Grph) != 0) continue;
             // if(!MemberSet(i,SubOnly)) continue;  // must be a leaf node.
             // if(!MemberSet(i,SubOnly) && !MemberSet(i,Distinct)) continue;  // set must be a leaf node.
             fprintf(fp," leaf %d: ",i);
             for(r=i; r != Root && r != 0; r = path[r]){
                    fprintf(fp,"%d ->",r);
                    if(!IsJoined(path[r],r,Tree)) JoinWdgraph(path[r],r,1,Tree); // should end when path[r] == Root.
             } fprintf(fp," %d (dist = %d)\n",Root,dist[i]);
        } fprintf(fp,"\n\n");
	State='T';		// now have Tree and can't go back to earlier states.
        Leaves=RtnLeafSet(Tree); return Tree;
}

//************************* States 'T' or 'O' ***********************************************

set_typ gth_typ::RtnLeafSet(wdg_typ G)
// WARNING: for shortest paths arrows are reversed!!!
{       // return a set containing only leaf nodes.
	assert(State=='T'|| State=='O');
	set_typ leaves=MakeSet(Root+1); ClearSet(leaves);
        for(Int4 n=1; n <= nWdgraph(G); n++){
               if(NumEdgesIn(n,G) > 0 && NumEdgesOut(n,G)==0) AddSet(n,leaves);
        } 
	set_typ Nodes= RtnSubTreeNodeSet(Root, G);
	IntersectSet3(leaves, Nodes);    // leaves = leaves & Nodes.
	NilSet(Nodes); return leaves;
}

void    gth_typ::PrintNewickTree(FILE *fp,set_typ *set)
// hpt print tree in Newick format.
{ assert(State=='T' || State=='O'); TreeDFS(fp, Root, set, Tree); fprintf(fp,";\n"); }

void    gth_typ::TreeDFS(FILE *fp, Int4 v, set_typ *set, wdg_typ G)
// start from Root, recursively print tree in Newick format.
{
	Int4    w,e,i,n=NumEdgesOut(v,G);
        if(n > 0){
           fprintf(fp,"(");
           for(i=0,e=FirstOutWdgraph(v,G);e!=0;e=NextOutWdgraph(e,G)){
              i++; w = HeadWdgraph(e,G);      // a child of v.
              TreeDFS(fp,w,set,G);
              if(i < n) fprintf(fp,",");
           } fprintf(fp,")");
         }
         if(set != 0) fprintf(fp,"%d_Seq%d",v,CardSet(set[v]));
         else fprintf(fp,"%d_Set%d",v,v);      // output for HyperPartition.
}

set_typ gth_typ::RtnSubTreeNodeSet(Int4 subroot, wdg_typ T)
{       // return the nodes in the subtree rooted at subroot.
	assert(State=='T'|| State=='O');
        Int4    j,number=0,*subTree;
        NEW(subTree, nWdgraph(T) +3, Int4);
        set_typ SetX=MakeSet(Root+1); ClearSet(SetX);
        TreeDFS(subTree,number,subroot,T);
        for(j=1; j<= number; j++) AddSet(subTree[j],SetX);
#if 1
        fprintf(stderr,"Set%d subtree: ",subroot);
        for(j=1; j<= number; j++) fprintf(stderr,"%d ",subTree[j]);
        fprintf(stderr,"\n");
#endif
        free(subTree); return SetX;
}

set_typ gth_typ::RtnSubTreeSeqSet(Int4 subroot, set_typ *set,wdg_typ T)
{       // return the nodes in the subtree rooted at subroot.
	assert(State=='T'|| State=='O');
        Int4    number=0,*subTree;
        NEW(subTree, nWdgraph(T) +3, Int4);
        set_typ SetU=CopySet(set[subroot]); ClearSet(SetU);
        TreeDFS(subTree,number,subroot,T);
        for(Int4 j=1; j<= number; j++) UnionSet(SetU,set[subTree[j]]);
#if 1
        fprintf(stderr,"Set%d subtree: ",subroot);
        for(Int4 j=1; j<= number; j++) fprintf(stderr,"%d ",subTree[j]);
        fprintf(stderr,"\n");
#endif
        free(subTree); return SetU;
}

Int4    *gth_typ::RtnSubTreeNodes(Int4 subroot, Int4 &size, wdg_typ T)
{
	assert(State=='T'|| State=='O');
        Int4    number=0,*SubTreeNodes;
        NEW(SubTreeNodes, nWdgraph(T) +3, Int4);
        TreeDFS(SubTreeNodes,number,subroot,T);
        size=number;
        return SubTreeNodes;
}

Int4    gth_typ::TreeDFS(Int4 *SubTreeNodes, Int4 &number, Int4 v, wdg_typ T)
// start from root, recursively print tree in Newick format.
{
        Int4    w,e,i,n=NumEdgesOut(v,T);
        number++; assert(number <= nWdgraph(T)); SubTreeNodes[number]=v;
        if(n > 0){
           for(i=0,e=FirstOutWdgraph(v,T);e!=0;e=NextOutWdgraph(e,T)){
                  i++; w = HeadWdgraph(e,T);      // a child of v.
                  TreeDFS(SubTreeNodes,number,w,T);
           }
        } return number;
}




