#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <sequence.h>
#include <stdinc.h>
#include <afnio.h>
#include "alphabet.h"
#include "blosum62.h"
#include "cmsa.h"
#include "hat_typ.h"
#include "hpt_typ.h"

hpt_typ	*ConversionViaTemplateHpt(cma_typ TmplCMA, hpt_typ *Hpt)
/************************************************************************************
            NEW ROUTINE TO CONVERT Hpt patterns GIVEN A TEMPLATE.
 modified from hat_typ::ConversionViaTemplateCMSA3() routine...
 Want to find root pattern positions within each child of the root node.
 These will allow recursive analysis of the child alignments while retaining root node patterns.
 The Also print out the subtrees for each child Hpt found...
  use Hpt->SubTree(child);
************************************************************************************/
{ 
	Int4	i,j,x,z,end,*Parent,**Pos,*pos,len,Len,*ncol,II,num_sets,num_seqs;
	e_type	trueE;
	hpt_typ	*rhpt=0;
	sst_typ	xsst,*sst,**IN_SST,**OUT_SST;	// pattern sets for each level of the hierarchy...
	char	**argv,*pttrn; 
	cma_typ	*OUT_CMA,cma;
	a_type	AB=AlphabetCMSA(TmplCMA);

	//************** 1. Make sure that the input files are consistent; allocate memory. ***********
	num_sets=Hpt->NumSets(),num_seqs=NumSeqsCMSA(TmplCMA);
   	if(num_sets != num_seqs || num_sets != Hpt->NumBPPS() +1){
		print_error("ConversionViaTemplateHpt() input error 1");
	} else if(Hpt->NumBPPS() != num_sets-1){
		print_error("ConversionViaTemplateHpt() input error 2");
	} else assert(Hpt->IsTree(Parent)); 
	NEWP(OUT_SST,num_sets+3,sst_typ); NEWP(IN_SST,num_sets+3,sst_typ);
	NEWP(Pos,num_sets+3,Int4); NEW(ncol,num_sets+3,Int4);
	set_typ *Set; NEW(Set,Hpt->NumBPPS() +3, set_typ);
	hpt_typ **oHpt; NEWP(oHpt,Hpt->NumBPPS() +3, hpt_typ);

	//*************** 2. Get the residue sets for each of the sequences. *************
	Len=LengthCMSA(1,TmplCMA);
        for(i=1; i <= Hpt->NumBPPS(); i++){
	     trueE = TrueSeqCMSA(i+1,TmplCMA); len=LenSeq(trueE);
	     // NEW(IN_SST[i],Len+3,sst_typ); NEW(OUT_SST[i],len+3,sst_typ);
	     argv=Hpt->Argv(i);
	     // 3a. Get seed patterns for each run using pattern strings.
             // 3b. Get legal sets for each run (these will change during the search).
             // Need to initialize legal sets to be subsets of positions higher up the tree.
	     // sst is pattern (null at non-pattern positions...).
             for(pos=0,j=0; j < Hpt->nArg(i); j++){
		if(strncmp("-P=",argv[j],3) == 0){
                    pttrn=AllocString(argv[j]+3);
		    ncol[i]=ParseResidueSets(pttrn,pos,sst,AB,"ConversionViaTemplateHpt() error");
		    IN_SST[i] = sst; Pos[i]=pos; free(pttrn); 
#if 1	// debug...
		    fprintf(stderr,"debug: %d.",i);
		    for(x=1; x <= ncol[i]; x++){
			if(sst[x]){ PutSST(stderr,sst[x],AB); fprintf(stderr,"%d,",Pos[i][x]); }
		    } fprintf(stderr,"\n");
#endif
		    break;
		}
	     } assert(IN_SST[i]); 
	     for(j=2; j <= num_seqs; j++){
		
	     }
	     if(Parent[i] == 1){
		fprintf(stderr,"%d. %s\n",i,Hpt->SetName(i));
		Set[i]=Hpt->MkSubTreeSet(i);	// sets for each child.
		oHpt[i]=Hpt->SubTree(i);
	     }
        }

      char str[202];
      for(II=1; II < num_sets; II++){	
	Int4	lenII,st,sc,ins,ndel=0;
	if(Set[II]==0) continue; 

	//************* 1. Confirm consistency between the template and input alignments. ***************
	sst=IN_SST[II];
	trueE = TrueSeqCMSA(II+1,TmplCMA); StrSeqID(str,200,trueE);
	// lenII=LengthCMSA(1,TmplCMA); 
	lenII=LenSeq(trueE); 
	if(strcmp(str,Hpt->SetName(II)) != 0){
	    fprintf(stderr,"\n%d. Input Error: '%s' != '%s'\n",II,str,Hpt->SetName(II));
	}
	if(InsertionCMSA(1,II+1,Len,TmplCMA) > 0){
		fprintf(stderr,"Fatal: insertion within cma file %d\n",II+1); exit(1);
	}
	//********** 2. If deletions on either end of template, find C- and N-terminal adjustments. ***********
	Int4 Nt_adj=0,Ct_adj=0;
	if(IsDeletedCMSA(1,II+1,1,TmplCMA)){	// no deletions allowed on ends.
		fprintf(stderr,"Warning: deletion at first position in cma file %d\n",II+1);
		for(st=0; IsDeletedCMSA(1,II+1,st+1,TmplCMA); st++) ;  // move in until reach a residue.
		Nt_adj=st;
	}
	if(IsDeletedCMSA(1,II+1,Len,TmplCMA)){
		fprintf(stderr,"Warning: deletion at last position in cma file %d\n",II+1);
		for(st=0; IsDeletedCMSA(1,II+1,Len-st,TmplCMA); st++) ;
		Ct_adj=st;
	}
	// Ct_adj=Nt_adj=0;

	//*********** 3. Iterate through full lengths of both template and input alignments. ***********
	// sc = site in subgroup consensus sequence;  st = site in template
	// Start from the C-terminal end of both template and cma alignment.
	for(st=Len-Ct_adj,sc=lenII-Ct_adj; st > Nt_adj; )
	{
	   assert(sc > Nt_adj);
	   //******************** 3a. Disallow insertions directly following deletions. ***************
	   if(IsDeletedCMSA(1,II+1,st,TmplCMA) && InsertionCMSA(1,II+1,st,TmplCMA)){
		fprintf(stderr,"FATAL! -> %d = '%s':",II,Hpt->SetName(II));
		print_error("Input alignment contains a deletion next to an insertion");
	   }
	   ins=InsertionCMSA(1,II+1,st,TmplCMA);  // Get # insertions at site in template.
	   //******************** 3b. Insertion in template found. ***************
	   if(ins > 0){			// Insertion in template alignment.
		// Disallow insertions next to deletions. (REDUNDANT CHECK)
		assert(!(IsDeletedCMSA(1,II+1,st,TmplCMA) 
			|| IsDeletedCMSA(1,II+1,st+1,TmplCMA)));
		sc-=ins; 	// decrement site in sequence by # inserted residues...
#if 1	// for all nodes in child's subtree shift pattern residues after st forward...
		end=Len-Ct_adj;
		for(j=1; j <= Hpt->NumBPPS(); j++){
		    if(MemberSet(j,Set[II])){
			for(x=1; x <= ncol[II]; x++){
			   if(Pos[j][x] > st){
				Pos[j][x] += ins;
			fprintf(stderr,"%d. j=%d; x=%d; st=%d; ins=%d: ",II,j,x,st,ins,Pos[j][x]);
			if(IN_SST[j][x]){ PutSST(stderr,IN_SST[j][x],AB); fprintf(stderr,"%d.\n",Pos[j][x]); }
			   }
			} 
		    }
		}
#endif
		// Convert aligned columns at sc+1 to sc+ins into an insertion.
		st--; sc--; 
	   	assert(sc >= 0);
	   //******************** 3c. One or more deletions in template found. ***************
	   } else if(IsDeletedCMSA(1,II+1,st,TmplCMA)){
	     // Deletion in template alignment.
	     ndel=0;
	     //*********** 3ci. Count the number of deletions to right of site in input alignment. ********
	     do {
		if(st <= 1) break;
#if 0		// Disallow insertions next to deletions. (REDUNDANT CHECK)
		if(!(InsertionCMSA(1,II+1,st,TmplCMA) == 0 && 
				InsertionCMSA(1,II+1,st-1,TmplCMA) == 0)){
		    fprintf(stderr,"II = %d; st = %d\n",II,st);
		    fprintf(stderr,"InsertionCMSA(st) = %d\n",
				InsertionCMSA(1,II+1,st,TmplCMA));
		    e_type tmpE=TrueSeqCMSA(II+1,TmplCMA);
		    PutSeqID(stderr,tmpE);
		    fprintf(stderr,"\n");
		    PutSeq(stderr,tmpE,AlphabetCMSA(TmplCMA));
		    gsq_typ *gsq=gsqCMSA(II+1,TmplCMA);
		    gsq->Put(stderr,AlphabetCMSA(TmplCMA));
		    print_error("Insertions next to deletions disallowed in template");
		    fprintf(stderr,"InsertionCMSA(st-1) = %d\n",
				InsertionCMSA(1,II+1,st-1,TmplCMA));
			assert(InsertionCMSA(1,II+1,st,TmplCMA) == 0 && 
				InsertionCMSA(1,II+1,st-1,TmplCMA) == 0);
		}
#endif
		ndel++; st--;
	     } while(IsDeletedCMSA(1,II+1,st,TmplCMA));
	     // fprintf(stderr,"end of loop ...6-%d.c\n",II);

	   //******************** 3cii. Add deletions to right of site in input alignment. ***************
	     if(ndel > 0){	// REDUNDANT IF STATEMENT...
#if 0
	        cma_typ cma0=InsertColumnsCMSA(cma,1,sc,ndel); // add '-'s to right of sc.
		if(cma0==0) print_error("InsertColumnsCMSA() error");
		NilCMSA(cma); cma=cma0;
#endif
	     } 
	   //******************** 3d. Input and template match at this position. ***************
	   } else { st--; sc--; }	// Match in template alignment.
	}
#if 1
	char	ArgStr[1000];
	for(i=0,j=1; j <= Hpt->NumBPPS(); j++){
	    if(MemberSet(j,Set[II])){
		i++;
		sst_typ	*nsst; NEW(nsst, lenII +3, sst_typ);
		for(x=1; x <= ncol[j]; x++){
			z=Pos[j][x];
			fprintf(stderr,"%d. z=%d; j=%d; x=%d.\n",II,z,j,x);
			nsst[z]=IN_SST[j][x];
		} 
		char    *ptn=SST2ArgStrAlpha(nsst,lenII,AB); free(nsst);
		fprintf(stderr,"%d.%d(%d). %s\n",II,i,j,ptn); 
		sprintf(ArgStr,"%d.%s %s",i,Hpt->SetName(j),ptn); free(ptn);
		assert(oHpt[II]->SetArgStr(i,ArgStr)); // Makes its own copy of ArgStr;
	    }
	}
// convert a small set type (sst_typ) array to a pattern argumnet string.
// "-P=M11,M14,D15,S17,N25,I28,L31,VI53..."
#endif
   } 
   for(i=1; i <= Hpt->NumBPPS(); i++){ if(Pos[i]) free(Pos[i]);  NilSet(Set[i]); }
   free(Pos); free(Set); free(Parent); 
   for(i=1; i <= Hpt->NumBPPS(); i++){ if(oHpt[i]) oHpt[i]->Put(stderr); }
   return rhpt;
}

