/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

/** fa2cma **/
%START DEF SEQ STRT MSF MFASTA
A       [a-zA-Z]
U       [A-Z]
L       [a-z]
D       [-]
P       [.]
R       ({D}|{P}|{A})+
S       [ \t]+
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "sequence.h"
#include "stdinc.h"
#include "afnio.h"
#include "cmsa.h"
#include "cma_gmb.h"
#include "histogram.h"
#include "dms_typ.h"
#include "chn_typ.h"
#include "convert_msa_usage.h"

#define YYLMAX  1000000
#define print_error(str) (fprintf(stderr,"%s\n",str)? exit(1): exit(1)) 

unsigned long    num=0,TotalSeq=0,cLenAln=0,tLenAln=0,NumCol=0;
unsigned long	 LenCMA=0,LenTrue=0,LenFake=0;
unsigned long    NumP=0,NumD=0,NumU=0,NumL=0,NumAln=0,NumGap=0,NumRF=0;
char	SeqCMA[YYLMAX];
char	SeqDef[YYLMAX];
unsigned long Begin=ULONG_MAX,End=0,Length;
e_type	DummySeq=0;
long SeqOffSet=0,SeqExtend=0;
char Kingdom,Phylum[100];
FILE	*ofp=stdout;

Int4	RmQueryGapsCMSA(cma_typ &cma)
// remove columns that are deleted in the first sequence.
{
    Int4	sq,hits,sq_hits,s,pos[4],len=LengthCMSA(1,cma),N=NumSeqsCMSA(cma);
    Int4	i,j,total=0,cycle;
    // a_type	A=AlphabetCMSA(cma);
    BooLean	*Delete;
    cma_typ	rcma=0;

#if 0
    if((i=Check4NtermExtendCMSA(cma)) > 0){
	fprintf(stderr,"%d out of %d sequences have N-term extensions\n",i,N);
	print_error("Fatal: input file must not have N-term extensions");
    }
#endif
#if 1	// fixes problem with adding insertions!
    ExtendFakeToRealCMSA(cma);
#endif
#if 0	// 
    RmUnAlignedSeqsCMSA(cma);
#endif
#if 0	// 
    rcma=RmWrinklesCMSA(cma);
    if(rcma){ NilCMSA(cma); cma=rcma; }
#endif
    NEW(Delete, LengthCMSA(1,cma)+3, BooLean);
    // Don't remove columns on ends!!
    for(s=2 ; s < LengthCMSA(1,cma); s++){ Delete[s]=IsDeletedCMSA(1,1,s,cma); }
    cycle=0;
    Int4 Len=LengthCMSA(1,cma);
    for(s = Len; s > 0; s--){
	if(Delete[s]){
	   cycle++;
	   Int4 end=s; 
	   while(Delete[s]){ total++; s--; }
	   s++;
#if 0	// ConvertColsToInsertsCMSA is not working!!!
	   rcma=0;
	   rcma=ConvertColsToInsertsCMSA(cma,1,s,end);
	   if(rcma){ NilCMSA(cma); cma=rcma; }
	   fprintf(stderr,"\n********************** cycle %d **************************\n",cycle);
	   char str[200];
           sprintf(str,"%s.cycle%d",NameCMSA(cma),cycle);
           // FILE *fp = open_file(str,".cma","w"); PutCMSA(fp,cma); fclose(fp);
#else	// use this instead...
	   // fprintf(stderr,"removing column %d to %d --> ",s,end);
	   // ColumnsToInsertCMSA(cma,s,end);
	   ColumnsToInsertCMSA2(cma,s,end);
	   // fprintf(stderr,"Len=%d.\n",LengthCMSA(1,cma));
#endif
	}
    } free(Delete); rcma=0;
#if 1
    if(IsDeletedCMSA(1,1,1,cma)) rcma=TrimBlkCMSA(cma,1,1,0,2);
    if(rcma){ NilCMSA(cma); cma=rcma; rcma=0; } 
    Len=LengthCMSA(1,cma); 
    if(IsDeletedCMSA(1,1,Len,cma)) rcma=TrimBlkCMSA(cma,1,0,1,2);
    if(rcma){ NilCMSA(cma); cma=rcma; rcma=0; } 
#endif
    return total;
}

FILE    *open_file2(const char *fstring,const char *subfile,const char *cmnd)
{
        FILE    *fptr;
        char    s[100];

        while(fstring[0] == ' ') fstring++;
        strcpy(s,fstring);
        strcat(s,subfile);
        if((fptr = fopen(s,cmnd)) == NULL) {
                fprintf(stderr,"Could not open file \"%s\"\n",s);
                print_error("File does not exist!\n");
        }
        return(fptr);
}

%}
%%
^>.+$		{ 
			long i,os,x;
			char *str1,*str2,*str3,*str4;
			unsigned char seq[5]={1,1,1,1,1,};
			long len=5;
			if(num==0){
			   fprintf(ofp,"[0_(1)=fa2cma(%d)", TotalSeq);
			   fprintf(ofp,"{go=10000,gx=2000,pn=1000.0,lf=0,rf=0}:\n");
			   fprintf(ofp,"(%d)",NumCol);
			   for(i=1; i<=NumCol; i++) fprintf(ofp,"*"); fprintf(ofp,"\n\n");
			} else {
			   if(MFASTA){ SeqCMA[LenCMA]='('; LenCMA++; }
			   fprintf(ofp,"$%d=%d(%d):\n",num,LenTrue,LenFake);
			   PutSeqInfo(ofp,DummySeq); SeqCMA[LenCMA]=0;
			   if(NumAln==cLenAln && NumRF==0) fprintf(ofp,"{(%s()}*\n\n",SeqCMA);
			   else fprintf(ofp,"{(%s)}*\n\n",SeqCMA);
			} num++;
			SeqOffSet=0; SeqExtend=0;
			e_type  E=MkSeq(yytext, len, seq);
			if(DummySeq) NilSeq(DummySeq);
			DummySeq=E; Kingdom=KingdomSeq(E);
			if(PhylumSeq(E)) strncpy(Phylum,PhylumSeq(E),50);
			else strncpy(Phylum,"unknown",50);
			SeqOffSet=OffSetSeq(E);
			NumP=NumD=NumU=NumL=NumAln=NumGap=NumRF=0;
			LenTrue=LenFake=LenCMA=0;
		}

<MSF>{P}		{ 	NumP++; }

<MFASTA>{L}		{	// lower case...
			   char c=yytext[0];
			   if(NumAln==0){			// left flank
				SeqCMA[LenCMA]=')'; LenCMA++;
			   } SeqCMA[LenCMA]=toupper(c); LenCMA++; LenTrue++; LenFake++;
			   NumAln++;
			}

<MSF>{L}		{	// lower case...
			char c=yytext[0];
			if(NumAln==0){			// left flank
			   SeqCMA[LenCMA]=toupper(c); LenCMA++; LenTrue++; LenFake++; 
			}else if(NumAln==cLenAln){	// right flank;
			   if(NumRF==0){ SeqCMA[LenCMA]='('; LenCMA++; } NumRF++;
			   SeqCMA[LenCMA]=toupper(c); LenCMA++; LenTrue++; LenFake++;
			} else {			// within alignment.
			   SeqCMA[LenCMA]=c; LenCMA++; LenTrue++;
			} NumL++; NumGap++;
		}

{D}		{	// delete character '-';
			if(NumAln==0){ SeqCMA[LenCMA]=')'; LenCMA++; }
			SeqCMA[LenCMA]='-'; LenCMA++; LenFake++;
#if 0
			if(NumAln==cLenAln){	// start right flank;
			   if(NumRF==0){ SeqCMA[LenCMA]='('; LenCMA++; } 
			}
#endif
			NumD++; NumAln++;
		}

{U}		{	// upper
			if(NumAln==0){ SeqCMA[LenCMA]=')'; LenCMA++; }
			SeqCMA[LenCMA]=yytext[0]; LenCMA++; LenTrue++; LenFake++;
			NumU++; NumAln++;
		}

<MFASTA>{P}	{ print_error("invalid fasta input");  // util/FA2CMA/faToCMA.l
		}

[ \t\n]                  ;
%%
int	yywrap()
{
	fflush(ofp);
	if(DummySeq){
	  fprintf(ofp,"$%d=%d(%d):\n",num,LenTrue,LenFake);
	  if(DummySeq) PutSeqInfo(ofp,DummySeq); SeqCMA[LenCMA]=0;
	  if(NumAln==cLenAln && NumRF==0) fprintf(ofp,"{(%s()}*\n",SeqCMA);
	  else fprintf(ofp,"{(%s)}*\n",SeqCMA);
	} fprintf(ofp,"\n_0].\n\n");
	return(1);
}


static void    PressSeq(FILE *ifp, FILE *fp)
{
        char    c,state='S',last='\n';
        Int4    nm=0;
        while((c=fgetc(ifp))!=EOF){
                if(last == '\n' && c=='>'){ if(nm > 0) fputc('\n',fp); nm++; state = 'D'; }
                else if(c=='\n'){ if(state == 'D') fputc('\n',fp); state = 'S';  }
                if(state == 'S' && !isspace(c) && c != '\n') fputc(c,fp);
                else if(state == 'D'){ fputc(c,fp); }
                // else if(c == '\n') fputc(c,fp);
                last=c;
        } fputc('\n',fp);
}

void	ComputeConsensus(UInt4 cLenAln, UInt4 **ResCounts, a_type AB)
{
	char	debug=1; 
	UInt4	i,j,X,Z,r,rx,rz,max=0;
	char	c,dms_mode='F',wt_factor=1;
	Int4 	pernats=1000;
	UInt4	kount[30];
	dms_typ *dms=new dms_typ(wt_factor,pernats,dms_mode,AB);
	double	*BILD=0; NEW(BILD,cLenAln+9,double);
	char	maxC,*Csq; NEW(Csq, cLenAln+9, char);
	double	d,dd,D,RE=0,maxRE=0,sum,p,q=0.05;
	for(i=1; i<=cLenAln; i++) Csq[i]='-';
	for(j=0,i=1; i<=cLenAln; i++){
	  for(r=0; r <= nAlpha(AB); r++) kount[r]=0;
	  for(c='A'; c < 'Z'; c++){
	  	if((r=AlphaCode(c,AB))==0) continue;
		if(ResCounts[c][i] == 0) continue;
	  	fprintf(stderr,"%d.%c(%d) = %d\n",i,c,r,ResCounts[c][i]);
		kount[r] += ResCounts[c][i];
	  } D=dms->bild(kount);
	  if(D > 0) fprintf(stderr,"%d. bild=%.3f\n",i,D);
	  BILD[i]=D;
	  for(RE=0,max=0,maxC=0,c='A'; c < 'Z'; c++){
	      X=ResCounts[c][i];
	      p=(double)X/(double)TotalSeq;
	      // if(X > 0) RE += p*log(p/q);
	      if(c != 'X' && X > 0) RE += p*log(p/q);
	      if(X > max){ max=X; maxC=c; }
	  }
	  d=(double)ResCounts['Z'][i]/(double)TotalSeq;
	  if(debug && maxC != 'Z'){
		j++; fprintf(stderr,"%d(%c=%d): re=%.3f; bild=%.3f\n",
			j,maxC,ResCounts[maxC][i],RE,BILD[i]); 
	  }
	  if(d < 0.75 && maxC != 'X') Csq[i]=maxC;
	}
	for(c='A'; c <='Z'; c++) free(ResCounts[c]); free(ResCounts);
	delete dms; 
	//=================================================================== 
	double	*RunScore=0,AveScore=0;	// score in nats.
	NEW(RunScore,cLenAln+9,double);
	Int4 x,strt,end;
	h_type HG = Histogram("Running BILD scores (nats)",-50,200,10);
	for(i=1; !isalpha(Csq[i]); ) i++; strt=i;
	for(i=cLenAln; !isalpha(Csq[i]); ) i--; end=i;
	for(i=strt; i<=end; i++){
	   Int4 s=MAXIMUM(Int4,i-3,1),e=MINIMUM(Int4,i+3,cLenAln);
	   for(D=0,x=0,j=s; j <=e; j++){ D += BILD[j]; x++; }
	   RunScore[i] = D/(double)x;
	   if(debug) fprintf(stderr,"%d(%c): %.1f (%.2f)\n",i,Csq[i],RunScore[i],BILD[i]);
	   IncdHist(RunScore[i], HG);
	} 
	for(i=strt; RunScore[i] < 5.0 && i <= end; i++) Csq[i]='-';
	for(i=end; RunScore[i] < 5.0 && i >= strt;  i--) Csq[i]='-';
	// MeanHist(HG); VarianceHist(HG);
	if(debug) PutHist(stderr,60,HG); 
	NilHist(HG);
	free(BILD); free(RunScore);
	Int4 NumCsqAln=0;
	for(i=strt; i<=end; i++){ 
	   if(isalpha(Csq[i])) NumCsqAln++;
	   if(debug) fprintf(stderr,"%c",Csq[i]);
	} if(debug) fprintf(stderr,"\n num col=%d\n",NumCsqAln);

}

#if 0
// PFAM format:
>G4ZSJ8_PHYSP/197-316
.crm---EAY......LD................KRA.RGVGNl................
.......................qaNW.R..........M..RY.........M..Q.LT
..EKE.............................................IVYFKHDgdk
lek................................

>I3JUQ1_ORENI/266-368
..is--FEGY......LYv.............qeKRP.P---Pf................
.......................gsSW.V..........K..RY.........C..T.FV
..KEQki.........................................lhMVTFDHR...
...................................

// NCBI format:
>lcl|consensus
LKFRAVRVYR---------------------EPTKRVEGTLYITS------DRLILRDKN
DG---------------GLELSIPISD--IVNVNVSPQGP---------------SSRYL
VLVLKD-------------RGEFVGFSFPKE----------------------------E
DAIEISDAL

>gi|8569616|pdb|1EF1|A
NYFSIKNK--------------------------KGSELWLGVDA------LGLNIYEQN
DR--------------LTPKIGFPWSE--IRNISFND--------------------KKF
VIKPIDK------------KAPDFVFYAPRL----------------------------R
INKRILALC

>gi|5107580|pdb|1EVH|A
CQARAAVMVYDDANK------------KWVPAGGSTGFSRVHIYHHTGNNTFRVVGRKIQ
DH---------------QVVINCAIPK--GLKYNQATQ-------------------TFH
QWRDA---------------RQVYGLNFGSK----------------------------E
DANVFASAM

#endif

int     main(int argc, char *argv[])
{
        char    last_c,c,str[300],cma2fa=0;
        long    i,j,x,clen,arg,rlen,tlen,rLenAln,CutOff=50;
	char	format='m'; // 'm' == MSF; mfasta as for PFAM.
	// char	format='f'; // 'f' == mfasta-like used by the NCBI CDD.
	cma_typ	cma=0;
	FILE	*tfp;
	FILE	*outfptr=0;
	char	debug=1; 

	BEGIN MSF;
	ofp=stdout;
        if(argc < 3){
	   fprintf(stderr,"%s",CONVERT_MSA_VERSION);
	   print_error(USAGE_CONVERT_MSA);
	}
	char mode=0;
	if(strcmp(argv[1],"fa2cma") == 0){
	   for(i=3; i < argc; i++){
	      if(argv[i][0] != '-') print_error(USAGE_CONVERT_MSA);
	      switch(argv[i][1]) {
             	case 'C': CutOff=IntOption(argv[i],'C',0,100,USAGE_CONVERT_MSA); break;
             	default : print_error(USAGE_CONVERT_MSA);
	      }
	   } mode='f';
	} else if(strcmp(argv[1],"cma2fa") == 0){
        	if(argc > 3) print_error(USAGE_CONVERT_MSA); else mode='c';
	} else if(strcmp(argv[1],"cma2smpl") == 0){
        	if(argc > 3) print_error(USAGE_CONVERT_MSA); else mode='s';
	} else if(strcmp(argv[1],"cma2rtf") == 0){
	   char	**ArgV; NEWP(ArgV,argc +10,char);
	   int	ArgC=0;
	   ArgV[0]=argv[0]; ArgC++;
	   ArgV[1]=argv[2]; ArgC++;
	   ArgV[2]=AllocString("-S"); ArgC++;
	   for(i=3; i < argc; i++){
	      if(argv[i][0] != '-') print_error(USAGE_CONVERT_MSA);
	      switch(argv[i][1]) {
             	case 'F': 
		  j=IntOption(argv[i],'F',4,24,USAGE_CONVERT_MSA); 
		  sprintf(str,"-F=%d",j);
		  ArgV[ArgC]=AllocString(str); ArgC++;
		  break;
             	case 'h': 
		  if(sscanf(argv[i],"-hide=%d",&j)!=1)print_error(USAGE_CONVERT_MSA);
		  sprintf(str,"-hide=%d",j);
		  ArgV[ArgC]=AllocString(str); ArgC++;
		  break;
             	case 'S': 
		  if(argv[i][2] == '=' && argv[i][4] == 0){
			c=argv[i][3];
			switch(c){
			  case 'p': case 'l': case 'P': case 'L': break;
             		  default : print_error(USAGE_CONVERT_MSA); break;
			}
		  	sprintf(str,"-S=%c",c);
		  	ArgV[ArgC]=AllocString(str); ArgC++;
		  } else print_error(USAGE_CONVERT_MSA); break;
             	case 'N': 
		  if(sscanf(argv[i],"-Nth=%d",&j)!=1)print_error(USAGE_CONVERT_MSA);
		  else if(j < 1) print_error(USAGE_CONVERT_MSA); 
		  else { sprintf(str,"-S=%d",j); ArgV[ArgC]=AllocString(str); ArgC++; }
	 	  break;
             	default : print_error(USAGE_CONVERT_MSA);
              }
	   }
	   for(i=3; i < argc; i++) ArgV[i]=argv[i];
	   
	   chn_typ chn((Int4)(argc),ArgV);
           chn.PutHierarchicalAlignment( );
	   free(ArgV[2]); free(ArgV); 
	   return 0;
	} else print_error(USAGE_CONVERT_MSA);
    TurnOffLicenseStatement();
    a_type AB = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
    if(mode == 'c'){
#if 1
	cma=ReadCMSA2(argv[2],AB);
	if(!cma) print_error("cma file read error");
#else
	if(!cma){
	  tfp=open_file2(argv[2],".cma","r");
	  cma=ReadCMSA(tfp,AB); fclose(tfp);
	  if(!cma) print_error("cma file read error");
	}
#endif
	PutFastaCMSA(stdout,cma);
	// PutFastaAlnCMSA(stdout,cma);
	// chn_see /tmp/junk.merged -S -N=6 -F=7
	if(cma) TotalNilCMSA(cma);
    } else if(mode == 's'){
	cma=ReadCMSA2(argv[2],AB);
	if(!cma) print_error("cma file read error");
	PutSimpleAlnCMSA(stdout,cma);
	if(cma) TotalNilCMSA(cma);
    } else {
	//========== Determine & check mfasta length & number of columns ===============
	tfp=open_file2(argv[2],"","r");
	TotalSeq=0; cLenAln=0; rLenAln=0;
	last_c='\n'; clen=rlen=tlen=0;	// column_len,residue_len,total_len
	Int4	*nDel=0,*del; NEW(nDel,YYLMAX +3,Int4); 
	char	*IsMAT=0; NEW(IsMAT,YYLMAX+3,char);
	while((c=fgetc(tfp)) != EOF){
	   if(c=='>'){
		if(last_c != '\n') print_error("fasta file input error 1");
		if(TotalSeq==1){
		   cLenAln=clen; rLenAln=rlen; tLenAln=tlen; 
		   NEW(del,tLenAln+3,Int4);
		   for(x=1; x <= tLenAln; x++){ 
		      del[x]=nDel[x]; 
		   } free(nDel); nDel=del;
		} else if(TotalSeq > 0){
		  if(clen != cLenAln || tlen != tLenAln){
			fprintf(stderr,"%s\n",SeqCMA);
			fprintf(stderr,"columns=%d != %d; total len=%d != %d; Seq=%d\n",
				clen,cLenAln,tlen,tLenAln,TotalSeq);
			print_error("fasta file input error 2");
		  }
		} TotalSeq++; clen=rlen=tlen=0;
		ungetc(c,tfp); 	// get the defline...
		assert(fgets(SeqCMA,YYLMAX,tfp) != NULL); 
	   } else if(isupper(c)){
		clen++; rlen++; tlen++; 
		if(TotalSeq==1) IsMAT[tlen]=1;
	   } else if(c == '-'){
		clen++; tlen++; nDel[tlen]++; 
		if(TotalSeq==1) IsMAT[tlen]=1;
	   } else if(islower(c)){ rlen++; tlen++;}
	   else if(c == '.'){ tlen++; }
	   else if(!isspace(c)){ print_error("fasta file input error 3"); }
	   last_c=c;
	} if(clen != cLenAln || tlen != tLenAln)print_error("fasta file input error 4"); 
#if 1	
	float inc=floor((double)tLenAln/60.0);
	h_type	HG=Histogram("Percent deletions at mfasta positions",0,105,1.0);
	double *dd; NEW(dd,tLenAln+5,double);
	for(x=1; x <= tLenAln; x++){
	   if(nDel[x] > 0){
		dd[x]=(double)floor(100.0*(double)nDel[x]/(double)TotalSeq);
		IncdHist(dd[x],HG);
	   }
	} PutHist(stderr,60,HG); NilHist(HG);
#endif
	fclose(tfp);
	fprintf(stderr,"#columns =%d; Total length = %d; %d seqs.\n",cLenAln,tLenAln,TotalSeq);
	//=================================================================== 
	//========== compute residue freqs in each column =======
	// 1. obtain data for consensus sequence...
	UInt4	**ResCounts; 
	NEWP(ResCounts,'Z'+3,UInt4);
	for(c='A'; c <='Z'; c++) NEW(ResCounts[c], cLenAln +9, UInt4);
	tfp=open_file2(argv[2],"","r"); rlen=clen=0;
	while((c=fgetc(tfp)) != EOF){
	   if(c=='>'){ clen=rlen=0; ungetc(c,tfp); assert(fgets(SeqCMA,YYLMAX,tfp) != NULL); }
	   else if(isupper(c)){ clen++; ResCounts[c][clen]++; }
	   else if(c == '-'){ clen++; ResCounts['Z'][clen]++; }
	} fclose(tfp);
	//========== compute consensus seq. =======
	char	maxC,*Csq; NEW(Csq, cLenAln+9, char);
	Csq[0]=' ';
	for(j=1; j <=cLenAln; j++){
	  UInt4	maxN=0; Csq[j]='-';
	  for(c='A'; c <='Y'; c++){
	    if(ResCounts[c][j] > maxN){
		Csq[j]=c; maxN=ResCounts[c][j];
	    }
#if 0
	    if(ResCounts[c][j] > 0){
	      fprintf(stderr,"%d: '%c' = %d\n",j,c,ResCounts[c][j]);
	    }
#endif
	  }
	} // fprintf(stderr,"Csq=%s\n",Csq);
	for(c='A'; c <='Z'; c++) free(ResCounts[c]); free(ResCounts);

	//=================================================================== 
#if 1
	Int4	z;
	yyin=open_file2(argv[2],".yyin","w");	// ofp=tmpfile();
	fprintf(yyin,">consensus seq\n"); TotalSeq++;
	double cutoff=(double) CutOff;
	for(NumCol=0,z=0,x=1; x <= tLenAln; x++){
	   if(dd[x] >= cutoff){	// columns heavily deleted...
#if 0
		if(Csq[x] == '-') fprintf(yyin,".");
		else fprintf(yyin,"%c",tolower(Csq[x]));  
#else
		fprintf(yyin,".");
#endif
	   } else { 
		if(IsMAT[x]){ z++; fprintf(yyin,"%c",Csq[z]); NumCol++; }
	   }
	} fprintf(yyin,"\n");
	free(Csq); 
	FILE *fp=open_file2(argv[2],"","r");
	for(x=1; (c=fgetc(fp)) != EOF; x++){
	   if(c == '>'){
	     fprintf(yyin,"%c",c); 
	     while((c=fgetc(fp)) != EOF){
	        fprintf(yyin,"%c",c); 
		if(c == '\n') break; 
	     } x=0;
	   } else if(c=='\n'){ fprintf(yyin,"\n"); x--; }
	   else if(isspace(c)) x--;
	   else if(dd[x] >= cutoff){
		if(c == '-') fprintf(yyin,".");
		else fprintf(yyin,"%c",tolower(c));  
	   } else fprintf(yyin,"%c",c); 
	}
	fprintf(yyin,"\n"); fclose(fp); 
	fclose(yyin); 
#else
	// yyin=tmpfile(); 
	yyin=open_file2(argv[2],".yyin","w");	// ofp=tmpfile();
	fprintf(yyin,">consensus seq\n%s\n",Csq+1); TotalSeq++;
	free(Csq); 
	FILE *fp=open_file2(argv[2],"","r");
	while((c=fgetc(fp)) != EOF) fprintf(yyin,"%c",c); 
	fprintf(yyin,"\n"); fclose(fp); 
	fclose(yyin); // rewind(yyin);
#endif
	yyin=open_file2(argv[2],".yyin","r");	// ofp=tmpfile();
#if 0
	ofp=open_file2(argv[2],".cma","w");	// ofp=tmpfile();
        while(yylex()); fclose(yyin);
	fclose(ofp); 
#else
	ofp=stdout; while(yylex()); fclose(yyin);
#endif
#if 0
	ofp=open_file2(argv[2],".cma","r");	// ofp=tmpfile();
	cma=ReadCMSA(ofp,AB);  fclose(ofp); 
	sprintf(str,"%s.yyin",argv[2]); std::remove(str); 
	sprintf(str,"%s.tmp",argv[2]); std::remove(str); 
	if(!cma) print_error("cma file read error");
	// RmQueryGapsCMSA(cma); 
	if(outfptr) PutCMSA(outfptr,cma); else PutCMSA(stdout,cma); 
	TotalNilCMSA(cma);
#endif
// exit(1);
    } NilAlpha(AB);
}

