/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "sma.h"
#include "cmsa.h"
#include "residues.h"
#include "gpsi_typ.h"
#include "editcma.h"
#include "table.h"
#include "sset.h"
#include "wdigraph.h"
#include "selexCMSA.h"
#include "dheap.h"
#include "mheap.h"
#include "swaln.h"
#include "tax_typ.h"
#include "hat_typ.h"
#include "HMM_typ.h"
#include "rst_typ.h"
#include "swt_typ.h"
#include "set_typ.h"
#include "blosum62.h"
#include "pairaln.h"
#include "cma_gmb.h"
#include "gsm_typ.h"
#include "hmm_typ.h"
#include "cdhit-common.h"
#include "twkcma_usage.h"

#include "trm_typ.h"

static Int4	RunCDHit(FILE *ofp,Int4 cutoff, char *name, cma_typ cma)
{
        BooLean *skip;
        Int4	x,i,j,sq,N=NumSeqsCMSA(cma);
	char	str[100],Str[200];
        cma_typ pdbcma=0;
        a_type AB=AlphabetCMSA(cma);
        FILE *tfp=0;

        sprintf(str,".%d.in",cutoff);
	tfp=open_file(name,str,"w");
	for(j=0,i=1;i<=NumSeqsCMSA(cma); i++) {
	   e_type sq=CopySeq(TrueSeqCMSA(i,cma));
	   StrSeqID(str,90,sq); sprintf(Str,"Sq_%d %s",i,str);
	   ChangeInfoSeq(Str,sq);
	   PutSeq(tfp,sq,AB); NilSeq(sq);
	} fclose(tfp);
	Int4    ArgC=0;
        char    **ArgV;
        NEWP(ArgV, 35, char); ArgV[0]=0; ArgV[ArgC]=AllocString("cdhit");
        ArgC++; ArgV[ArgC]=AllocString("-i");
        sprintf(str,"%s.%d.in",name,cutoff);
        ArgC++; ArgV[ArgC]=AllocString(str);
        ArgC++; ArgV[ArgC]=AllocString("-o");
        sprintf(str,"%s_U%d",name,cutoff);
        ArgC++; ArgV[ArgC]=AllocString(str);
        ArgC++; ArgV[ArgC]=AllocString("-c");
        sprintf(str,"0.%d",cutoff);
        ArgC++; ArgV[ArgC]=AllocString(str);
        ArgC++; ArgV[ArgC]=AllocString("-M");
        ArgC++; ArgV[ArgC]=AllocString("0"); 
	Int4 threshold=0,wrdlen=0;
	if(cutoff < 40){
		print_error("cdhit cutoff is too low");
	} else if(cutoff < 65){
	   	threshold=5; wrdlen=2;
	} else if(cutoff < 75){
		threshold=2; wrdlen=4;
	}
	if(threshold > 0){
	   ArgC++; ArgV[ArgC]=AllocString("-t");
           sprintf(str,"%d",threshold);
           ArgC++; ArgV[ArgC]=AllocString(str);
	   ArgC++; ArgV[ArgC]=AllocString("-n");
           sprintf(str,"%d",wrdlen);
           ArgC++; ArgV[ArgC]=AllocString(str);
	}

#if 0
        ArgC++; ArgV[ArgC]=AllocString("-t");
        ArgC++; ArgV[ArgC]=AllocString("2");
        ArgC++; ArgV[ArgC]=AllocString("-n");
        ArgC++; ArgV[ArgC]=AllocString("4");
        ArgC++; ArgV[ArgC]=AllocString(str);
#endif
	ArgC++; 
        int rtn=run_cdhit(ArgC,ArgV);
        for(x=0; x < ArgC; x++) free(ArgV[x]); free(ArgV);

	sprintf(str,"%s_U%d",name,cutoff);
	ss_type sqset=MkSeqSet(str,AB);
	set_typ Set=MakeSet(NumSeqsCMSA(cma)+5); ClearSet(Set);
	for(i=1; i <= NSeqsSeqSet(sqset); i++){
	   StrSeqID(str,90,SeqSetE(i,sqset));
	   assert(sscanf(str,"Sq_%d",&x) == 1);
	   AddSet(x,Set);
	}
	sprintf(str,"%s_U%d",name,cutoff);
	tfp=open_file(str,".cma","w");
	PutInSetCMSA(tfp,Set,cma);
	NilSet(Set); NilSeqSet(sqset); fclose(tfp);

	sprintf(str,"%s.%d.in",name,cutoff); std::remove(str);
	sprintf(str,"%s_U%d",name,cutoff); std::remove(str);
	// sprintf(str,"%s_U%d.clstr",name,cutoff); std::remove(str);
}

void	PutFastRepSetCMSA(FILE *fp_err, FILE *fp, Int4 percent_ident,Int4 *Nset,cma_typ cma)
{
#if 0
	BooLean	*list=RtnFastRepSetCMSA(fp_err,percent_ident,Nset,cma);
	PutSelectCMSA(fp,list,cma); free(list); 
#else
	BooLean	keep_first=FALSE;
	if(percent_ident < 0){ keep_first=TRUE; percent_ident=-percent_ident; }
	set_typ InSet=MakeSet(NumSeqsCMSA(cma)+4); FillSet(InSet);
	set_typ	Set=RtnFastRepSetCMSA(fp_err, percent_ident,InSet,cma);
	if(keep_first) AddSet(1,Set);
	PutInSetCMSA(fp,Set,cma); NilSet(Set); NilSet(InSet);
#endif
}

cma_typ	RtnCDTreeCMSA(char *filename, cma_typ MainCMA)
// Merge CDTree seed alignment into MainCMA file.
{
	cma_typ cma;
	Int4	N,M,sq,s,i,j,Number,file;
	a_type 	AB=AlphabetCMSA(MainCMA);
	e_type	sE,qE,fE;
	FILE	*fp;
	char	strS[100],strQ[100];

	// 1. Open sma file and merge into one alignment.
	fp=open_file(filename,".sma","r");
	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,AB); fclose(fp);
	fp=tmpfile(); PutMergedCMSA(fp,Number,IN_CMA);
	for(file=1; file <= Number; file++) TotalNilCMSA(IN_CMA[file]); free(IN_CMA);
	rewind(fp); cma=ReadCMSA(fp,AB); fclose(fp);

	// 2. Remove consensus sequences.
	N=NumSeqsCMSA(cma);
	BooLean	*skip; NEW(skip,N+3,BooLean);
	for(sq=1; sq <= N; sq++){
	   if(StringInSeqInfo("consensus",TrueSeqCMSA(sq,cma))){ skip[sq]=TRUE; continue; }
	   qE = TrueSeqCMSA(sq,cma);
	   for(s=sq+1; s <= N; s++){
		sE = TrueSeqCMSA(s,cma);
		if(FastIdentSeqs(sE,qE)) skip[s]=TRUE;
	   }
	} fp=tmpfile(); PutSelectCMSA(fp,skip,cma); free(skip);
	TotalNilCMSA(cma); rewind(fp); cma=ReadCMSA(fp,AB); fclose(fp);

	fprintf(stderr,"%d seqs left in merged sma file\n",NumSeqsCMSA(cma));

	// 3. Create two sets for the two input alignments.
	set_typ set[5];
	set[1]=MakeSet(NumSeqsCMSA(cma)+1); FillSet(set[1]); DeleteSet(0,set[1]);
	set[2]=MakeSet(NumSeqsCMSA(MainCMA)+1); FillSet(set[2]); DeleteSet(0,set[2]);
	// LabelSeqsCMSA(cma); UnLabelSeqsCMSA(MainCMA);

	// 4. Scan through each sequence in Main alignment looking for sequences 
	//    identical to each sequence in the merged *.sma file.
	BooLean	found,report;
	Int4 	start,os=0,Missing=0,NumRmSMA=0;
	N=NumSeqsCMSA(cma); M=NumSeqsCMSA(MainCMA);
	for(i=1; i <= N; i++){
	    qE = TrueSeqCMSA(i,cma); 
	    if(strchr(SeqKey(qE),' ')==0){	// then add a space...
		sprintf(strQ,"%s ",SeqKey(qE)); ChangeInfoSeq(strQ,qE);
	    } fE=FakeSeqCMSA(i,cma); LabelSeq(fE); LabelSeq(qE); 
	    for(found=FALSE,j=1; j <= M; j++){
		sE = TrueSeqCMSA(j,MainCMA); UnLabelSeq(sE); 	
		fE=FakeSeqCMSA(j,MainCMA); UnLabelSeq(fE);
		// 5. For each identity found...  // if(FastIdentSeqs(qE,sE))
		char x=IsSubSeq(qE,sE,&start,FALSE); 
		if(x > 0){   // x == 1 --> qE a subseq of sE; x == 2 --> sE a subseq of qE; x == 3 --> sE == qE.
		   LabelSeq(sE); 	// make sure to label both seqs.
		   fE=FakeSeqCMSA(j,MainCMA); LabelSeq(fE);
		   if(found){ DeleteSet(j,set[2]); continue; } else found=TRUE;
#if 0	// DEBUG...
	if(x==1 || strstr(SeqKey(qE),"P11B_HUMAN") != 0){
		PutSeq(stderr,qE,AB); PutSeq(stderr,sE,AB); 
		report=TRUE; 
	} else report=FALSE; os=0;
#endif
		  switch (x){
		    case 1:  // --> qE a subseq of sE; retain sE.
		     // 6. Set information, phylum & kingdom of sma sequence to that of Main seq.
#if 0
		     ChangeInfoSeq(SeqKey(qE),sE); // keep the rest...
		     // SetOffSetSeq(OffSetSeq(sE)-start,qE);
		fprintf(stderr,"start = %d; x = %d; %s; %c\n",start,x,PhylumSeq(sE),kingdomSeq(sE));
		fprintf(stderr,"   %s; %c; os=%d\n",PhylumSeq(qE),kingdomSeq(qE),os);
		PutSeq(stdout,qE,AB); PutSeq(stdout,sE,AB);
		     DeleteSet(i,set[1]); NumRmSMA++;
#else
		     TaxAssignSeq(PhylumSeq(sE),kingdomSeq(sE),qE);
		     DeleteSet(j,set[2]); 
#endif
		    break;
		    case 2: // --> sE a subseq of qE; retain qE.
		     // 7. Remove the Main seq from the corresponding set (set_typ).
		     TaxAssignSeq(PhylumSeq(sE),kingdomSeq(sE),qE);
		     os=OffSetSeq(sE)-start; 
		     if(os >= 0) SetOffSetSeq(os,qE);
		     DeleteSet(j,set[2]); 
		    break;
		    case 3: // --> sE == qE; retain qE.
#if 0
		     StrSeqID(strS,90,sE); StrSeqID(strQ,90,qE);
		     if(strncmp(strS,strQ,90) == 0){	// then simply use sE instead...
		fprintf(stderr,"start = %d; x = %d; %s; %c\n",start,x,PhylumSeq(sE),kingdomSeq(sE));
		fprintf(stderr,"   %s; %c; os=%d\n",PhylumSeq(qE),kingdomSeq(qE),os);
		PutSeq(stdout,qE,AB); PutSeq(stdout,sE,AB);
		       DeleteSet(i,set[1]); NumRmSMA++; 
		     } else {
		       TaxAssignSeq(PhylumSeq(sE),kingdomSeq(sE),qE);
		       SetOffSetSeq(OffSetSeq(sE),qE); 
		       DeleteSet(j,set[2]); 
		     }
#else
		       TaxAssignSeq(PhylumSeq(sE),kingdomSeq(sE),qE);
		       DeleteSet(j,set[2]); 
#endif
		    break;
		    default: print_error("Tweakcma: This should not happen"); break;
		  } 
		   LabelSeq(qE); LabelSeq(sE); 	// make sure to label both seqs.
#if 0
	if(report){
		fprintf(stderr,"start = %d; x = %d; %s; %c\n",start,x,PhylumSeq(sE),kingdomSeq(sE));
		fprintf(stderr,"   %s; %c; os=%d\n",PhylumSeq(qE),kingdomSeq(qE),os);
		AlnSeqSW(stderr,11,1,sE,qE,AB);
		PutSeq(stderr,qE,AB); PutSeq(stderr,sE,AB);
	}
#endif
		  // if(found) break; // look through all!!
	        } // if(!found) PutSeq(stderr,qE,AB);
	    } if(!found) Missing++;
	}
	fprintf(stderr,"Missing = %d; %d removed from sma file\n",Missing,NumRmSMA);
	cma_typ in_cma[5]; in_cma[1]=cma; in_cma[2]=MainCMA;
#if 0
// fprintf(stderr,"DEBUG5\n");
FILE *cfp=open_file("junkC",".cma","w");
	PutSelectCMSA(cfp,0,0,0,set[1],cma); fclose(cfp);
FILE *mfp=open_file("junkM",".cma","w");
	PutSelectCMSA(cfp,0,0,0,set[2],MainCMA); fclose(mfp);
#endif
	// 8. Create and return the final cma file.
	fp=tmpfile(); PutMergedCMSA(fp,NameCMSA(MainCMA),2,set,in_cma,0);
	TotalNilCMSA(cma); rewind(fp);  cma=ReadCMSA(fp,AB); fclose(fp);
	NilSet(set[1]); NilSet(set[2]);
	return cma;
}

BooLean IsSeqEST(e_type E)
{
        char *result=strstr(E->info,"_EST");
        if(result==0) return FALSE;
        else {
                char *space=strchr(E->info,' ');
                if(space > result) return TRUE; else return FALSE;
        }
}

Int4	CountEstSeqs(cma_typ cma)
{
	Int4 J,hits=0,N=NumSeqsCMSA(cma);
	for(J=1; J <= N; J++){
		e_type  sE=TrueSeqCMSA(J,cma);
		if(IsSeqEST(sE)) hits++;
	} return hits;
}

void    PutGoodCMSA_X(FILE *fp,double cutoff, Int4 purge_trigger_size, Int4 percentID,
						Int4 min_size, Int4 max_size, cma_typ cma)
// PutGoodCMSA_X(fp,Cut,100,70,12,200,IN_CMA[I]);
{
	assert(nBlksCMSA(cma) == 1);
        Int4    i,j,N = NumSeqsCMSA(cma),hits,J;
	e_type	sE;
	if(N <= min_size){		// if too small output as is.
	   fprintf(stdout,"%30s:\t%5d *\n",NameCMSA(cma),N);
	   PutCMSA(fp,cma); return; 
	}
	a_type AB=AlphabetCMSA(cma);
        ss_type data = TrueDataCMSA(cma);
        BooLean *skip; NEW(skip,N+3,BooLean);
	h_type	HG=Histogram("sequence LLR scores",-2000,2000,10.0);
        double prob,cut=cutoff;
	BooLean	RmESTs=TRUE;
	J=CountEstSeqs(cma);
	if(N -J < purge_trigger_size) RmESTs=FALSE;
	dh_type dH=dheap(N+2,4);
        for(J=1; J <= N; J++){
	     sE=TrueSeqCMSA(J,cma);
	     if(RmESTs && IsSeqEST(sE)){ skip[J]=TRUE; continue; }
             prob = GetGappedProbCMSA(1,J,cma);
	     if(std::isfinite(-prob)){
	         insrtHeap(J,(keytyp) -prob,dH); 
	         IncdHist((double)prob,HG);
	     } else fprintf(stderr,"prob = %g\n",-prob);
	     if(PdbSeq(sE)) skip[J]=FALSE; else skip[J]=TRUE;
	}
#if 1
	for(i=0; !emptyHeap(dH); ){
		prob =-(double)minkeyHeap(dH);
		J=delminHeap(dH); 
		if(prob >= cut || i < min_size){ skip[J]=FALSE; i++; } 
		else break;
		if(i >= max_size) break;  
	} Nildheap(dH);
#else	// Add routine to remove deletions.
	gss_typ *gss=gssCMSA(cma);
	Int4	col,ndel,pos[4],Len=LengthCMSA(1,cma);
	for(i=0; !emptyHeap(dH); ){
	    prob =-(double)minkeyHeap(dH);
	    J=delminHeap(dH); 
	    for(ndel=0,col=2; col <= Len-2; col++) {
	        PosSiteCMSA(1,J,pos,cma);
	        if(IsDeletedCMSA(J,pos[1]+col-1,cma)){ ndel++; break; }
	    }
	    if(ndel > 0){
		fprintf(stderr,"column = %d\n",col);
		gsq_typ *gsq=gss->GetGSQ(J); // gsq->Put(stderr,AB);
		continue;
	    } if(prob >= cut || i < min_size){ skip[J]=FALSE; i++; } 
	    else break;
	    if(i >= max_size) break;  
	} Nildheap(dH);
#endif
	if(i <= purge_trigger_size) { 
		PutSelectCMSA(fp,skip,cma); free(skip);
		fprintf(stdout,"%30s:\t%5d --> %5d *\n",NameCMSA(cma),N,i);
	} else {	// if too many sequences still then purge set.
		FILE *tfp=tmpfile(); PutSelectCMSA(tfp,skip,cma); free(skip);
		rewind(tfp); cma_typ tcma=ReadCMSA(tfp,AB); fclose(tfp);
		Int4 size_new_set;
		skip=0;
		do {
		   if(skip) free(skip);
		   skip=RtnRepSetCMSA(stderr,percentID,&size_new_set,tcma);
		   percentID=percentID + 9;   // 70 79 88 97...
		} while(size_new_set < min_size && percentID <= 99); 
		for(i=1; i <= NumSeqsCMSA(tcma); i++){
			sE=TrueSeqCMSA(i,tcma); if(PdbSeq(sE)) skip[i]=FALSE;
		}
        	PutSelectCMSA(fp,skip,tcma); free(skip); TotalNilCMSA(tcma);
		fprintf(stdout,"%30s:\t%5d --> %5d --> %5d\n",NameCMSA(cma),N,i,size_new_set);
		//  use -percentID to keep first sequence
	} PutHist(stderr,50,HG); NilHist(HG);
}

FILE *OpenFileToRead(char *argv1)
{
	FILE *fp=0;
	char	str[500]; 
	sprintf(str,"%s.cma",argv1);
	if((fp=fopen(str,"r")) == NULL){ 
	   sprintf(str,"%s.mma",argv1);
	   fp = fopen(str,"r"); 
	   if(fp==NULL) fp = open_file(argv1,"","r"); 
	} 
	return fp;
}


Int4	PutUniqueMergedMinColCMSA(char *filename, char *matstr, double MinCol, a_type AB)
{
#if 0
	FILE *fp=OpenFileToRead(filename);
	Int4 rtn=PutUniqueMergedMinColCMSA(fp, char *matstr, double MinCol, a_type AB);
	fclose(fp); return rtn;
#else
	Int4 Number,file;
	FILE *fp=OpenFileToRead(filename);
	cma_typ cma,tcma,*IN_CMA=MultiReadCMSA(fp,&Number,AB); fclose(fp);

	// -m option
	assert(nBlksCMSA(IN_CMA[1]) == 1);
	if(Number > 1){
	   fp=tmpfile();
	   PutMergedCMSA(fp,Number,IN_CMA); 
	   for(file=1; file <= Number; file++) TotalNilCMSA(IN_CMA[file]); free(IN_CMA);
	   rewind(fp); cma=ReadCMSA(fp,AB); fclose(fp);
	} else { cma=IN_CMA[1]; free(IN_CMA); }

	// -mincol=0.75 option
	h_type HG=Histogram("fraction aligned",0,1,0.025);
	Int4	i,j,k,s,J,I,n,Len,na;
	Int4    N = NumSeqsCMSA(cma);
        BooLean *skip; NEW(skip,N+3,BooLean);
        for(J=1; J <= N; J++){ skip[J]=TRUE; }
	Len=LengthCMSA(1,cma);
        for(n=0,J=1; J <= N; J++){
		// e_type E=FakeSeqCMSA(J,cma);
		for(na=0,s=1 ; s <= Len;s++){
			Int4 r=ResidueCMSA(1,J,s,cma);
			if(r != UndefAlpha(A)) na++; 
		}
		double fr=(double)na/(double)Len;
		if(fr >= MinCol){ skip[J]=FALSE; n++; }
		IncdHist(fr, HG);
	} 
	if(n > 0){
		fp=tmpfile(); PutSelectCMSA(fp,skip,cma); 
		TotalNilCMSA(cma); rewind(fp); cma=ReadCMSA(fp,AB); fclose(fp);
		PutHist(stdout,60,HG); NilHist(HG); fflush(stdout); free(skip);
	} else {
		PutHist(stdout,60,HG); NilHist(HG); fflush(stdout);
		free(skip); TotalNilCMSA(cma);
		return 0;
	}

	// -U option...
	ss_type data = TrueDataCMSA(cma);
	N=NSeqsSeqSet(data); NEW(skip,N+3,BooLean); 
        for(i=1;i < N; i++) {
	   if(skip[i]) continue;
	   e_type  qE=SeqSetE(i,data);
	   if(i % 1000 == 0) fprintf(stderr,"\r%.1f",100.0*((double)i/(double)N));
       	   for(j=i+1;j <= N; j++) {
		if(skip[j]) continue;
		if(IdentSeqs(qE,SeqSetE(j,data))) skip[j]=TRUE;
	   }
	}
	fp = open_file(filename,matstr,"w");
	PutSelectCMSA(fp,skip,cma); free(skip); fclose(fp);
	TotalNilCMSA(cma);
	return n;
#endif
}


void	CreateWriteHSW(FILE *fp,cma_typ cma,char *name)
// 	create and write HSW.
{
        assert(nBlksCMSA(cma) ==1); 
	Int4 Length=LengthCMSA(1,cma);

	Int4 time1=time(NULL); 
	swt_typ swt = swt_typ(cma);
	hsw_typ hsw=swt.RtnHSW( );
	fprintf(stderr,"Length = %d; NWtSq = %d\n",hsw->Length,hsw->NWtSq);
#if 1
	unsigned char **RtnSqWt;
	UInt4   i,*AveSqIWt=swt.GetIntegerWts(&RtnSqWt),Total;
	for(i=1,Total=0; i <= swt.NumWtSeqs(); i++)  Total += AveSqIWt[i];
	fprintf(stderr,"WtNumSeqs = %.3f\n",(double)Total/(double)swt.WtFactor());
#endif
	FWriteHSW(fp,hsw);
        FILE *tfp = open_file(name,".wts","w");
	swt.PutAveSqWt(tfp); fclose(tfp);
#if 0
	swt.FWrite("junk");
	swt.FRead("junk");
	swt.FWrite("junk2");
#endif
	fprintf(stderr, "\ttime hsw write: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
}

void	PutBySeqID_CMA(FILE *ofp,cma_typ cma, char *seq_ids_file)
// Sort the sequences in the Template cma file to correspond to order of profiles...
// Print out an error message if Template contains 
{
fprintf(stderr,"DEBUG 1\n");
	ss_type data = TrueDataCMSA(cma);  // == True sequences...
        Int4    i,j,I,J,Score,N=NumSeqsCMSA(cma),n;
	char	*ID_E;
	a_type	A=AlphabetCMSA(cma);
	Int4	Number;
	BooLean	*skip,found;
	Int4 	num_names,namelen,NewNumber;
        char	c,*SeqName[100000]; // maximum of 1000 seqids 
        char	tmpname[100000];

fprintf(stderr,"DEBUG 2\n");
        FILE *fp = open_file(seq_ids_file,"","r");
        for(namelen=0,n=0,num_names=0; (c=getc(fp)) != EOF; n++){
            if(n==0) if(!(isalnum(c))) print_error("PutBySeqID_CMA( ) error1");
            if(c==',' || c == '\n'){
              num_names++;
	      if(num_names >= 1000000) print_error("too many seqids");
              tmpname[namelen]=0;
              SeqName[num_names]=NewString(tmpname);
              fprintf(stderr,"Name %d: %s\n",num_names,SeqName[num_names]);
              namelen=0;
            } else if(isalnum(c) || c=='_' || c=='|'){
                        tmpname[namelen]=c; namelen++;
            } else if(c != '\n'){
		tmpname[namelen]=0;
		fprintf(stderr,"c= %c; str=%s\n",c,tmpname);
		print_error("PutBySeqID_CMA() input file error2");
	    }
        } fclose(fp);
fprintf(stderr,"DEBUG 3\n");
 	Number=num_names;
	NEW(skip, N+4,BooLean);
	set_typ Set=MakeSet(Number+1); ClearSet(Set);
	char seq_id[1000];
	for(J=1; J <= N; J++) skip[J]=TRUE;
	for(n=0,J=1; J <= N; J++){
	   e_type E = TrueSeqCMSA(J,cma);	// get template sequence
	   // StrSeqID(char *str, Int4 n, e_type E);
	   // StrSeqID(seq_id,80,E);
	   found=FALSE;
	   for(I = 1; I <= Number; I++){
		if(StringInSeqInfo(SeqName[I],E)){ skip[J]=FALSE; found=TRUE; AddSet(I,Set); }
		// if(strcmp(seq_id,SeqName[I])==0) skip[J]=FALSE;
	   }
	   if(0 && !found) {
		PutSeqID(stderr,E);
		fprintf(stderr,": Sequence %d not found in identifier file.\n",J); 
	   }
	} PutSelectOneCMSA(ofp,skip,cma); fprintf(stderr,"\n\n%d identifiers.\n",Number);
fprintf(stderr,"DEBUG 4\n");
	for(I = 1; I <= Number; I++){
		if(!MemberSet(I,Set)) fprintf(stderr,"%s: identifier not found in cma file\n",SeqName[I]);
	} NilSet(Set);
#if 0
	fp = open_file(seq_ids_file,"missed.cma","w");
	for(J=1; J <= N; J++) if(skip[J]) skip[J]=FALSE; else skip[J]=TRUE;
	PutSelectOneCMSA(fp,skip,cma); fclose(fp);
#endif
	free(skip); 
}

void CMSAToPHYLIP(FILE *ofp, cma_typ cma)
{
	gss_typ		*gss = gssCMSA(cma);
	Int4 		i, j, k, m, p, t, pos[5], nins, cons_len;
	Int4		nseq = NumSeqsCMSA(cma), len = TotalLenCMSA(cma);
	e_type 		E,Etrue,Efake;
	Int4		*ins;
	unsigned char 	*ptr,*ptrf,*ptrt;
	a_type 		A = AlphabetCMSA(cma);
	BooLean		is_a2m=TRUE;

	// print_error("This needs to be checked!!");
	NEW(ins,len+1,Int4);
	E = MkConsensusCMSA(cma);
	cons_len = LenSeq(E); 
	for(i=1;i<=nseq;i++){
		PosSiteCMSA(1,i,pos,cma);
		p = pos[1];
		for(j=1;j<=cons_len;j++){
			nins = InsertionCMSA(i,p+j-1,cma);		
			if(nins > ins[j]) ins[j] = nins;
		}
	}

	char s2[] = " ";

	fprintf(ofp,"    %d   %d\n",nseq,cons_len);
	for(i=1;i<=nseq;i++){
		Efake = gss->FakeSeq(i);
		Etrue = gss->TrueSeq(i);
		ptrf = SeqPtr(Efake);
		ptrt = SeqPtr(Etrue);
//		fprintf(ofp,"%-21.20s",SeqKey(Etrue));		
//	fprintf(ofp,"example ");
//	fprintf(ofp,"%-30s_%d ",strtok(SeqKey(Etrue),s2),i);
		char str[100];
		StrSeqID(str, 8, Efake);
		// PutShortSeqID(ofp,Efake);
		fprintf(ofp,"%-10s",str);
		PosSiteCMSA(1,i,pos,cma);
		p=pos[1];
		for(k=p,j=1;j<=cons_len;j++){
			if(ptrf[k] == 0) { fprintf(ofp,"-"); k++; }
			else fprintf(ofp,"%c",AlphaChar(ptrf[k++],A));
#if 0
			nins = InsertionCMSA(i,p+j-1,cma);
			m = gss->TruePos(i,k-1) + 1;
			for(t=1;t<=nins;t++) {
				fprintf(ofp,"%c",AlphaChar(ptrt[m++],A));
			}
			for(t=nins+1;t<=ins[j];t++) {
				fprintf(ofp,"-");
			}
#endif
		}
		fprintf(ofp,"\n");
	}
	fprintf(ofp,"\n"); fprintf(ofp,"\n");
	free(ins);
}

// MOVE THESE BACK TO cmsa_operations.cc AFTER DEBUGGING...

Int4     ParseIntegers2(char *str, Int4 *values, const char *msg)
// input string: "3,5-7,9,11-17"
// returns:      12 and sets values=[12,3,5,6,7,9,11,12,13,14,15,16,17]
//                                    0 1 2 3 ...
{
        Int4     n,v,w;

fprintf(stderr,"str='%s'\n",str);
        if(!isdigit(str[0])) print_error(msg);
        for(n=1; str[0] != 0; ){
           if(str[0] == ',') { n++; str++; }
           else if(isdigit(str[0])){
fprintf(stderr,"str[%d]='%c'",n,str[0]);
                if(sscanf(str,"%d", &v) != 1){ print_error(msg); }
                else {
fprintf(stderr,"=%d (n=%d)\n",v,n);
                   values[n] = v; while(isdigit(str[0])) str++;
                   if(str[0] == '-'){
                        str++; if(!isdigit(str[0])) print_error(msg);
                        if(sscanf(str,"%d", &w) != 1) print_error(msg);
                        if(w <= v) print_error(msg);
                        for(Int4 i=v+1; i <= w; i++){ n++; values[n] = i; }
                        while(isdigit(str[0])) str++;
                   }
                }
           } else print_error(msg);
        }
fprintf(stderr,"rtn: n=%d\n",n);
	return n;
}


Int4	PutClusterOfCMSA(char *name, Int4 percent_ident,Int4 min_size,
	BooLean	IncludeFirst,BooLean UseNumPhyla, cma_typ cma)
// print out separate cmas for each cluster from the input cma.
{
	ds_type sets;
	Int4	b,i,j,k,N=NumSeqsCMSA(cma);
	Int4	score,nblk=nBlksCMSA(cma);
	Int4	s,si,sj,pos[3],seti,setj;
	ss_type	data=DataCMSA(cma);
        a_type  A=AlphabetCMSA(cma);
	unsigned char	*isq,*jsq;
#if 1
	Int4	NumPhyla=0,**NumPhylaInSet;
	Int4    *phyla=GetPhylaSeqSet(stderr, &NumPhyla, TrueDataCMSA(cma));
#endif

	assert(percent_ident > 0 && percent_ident <= 100);
	// 1. Cluster sequences into sets at the percent identity cutoff.
	Int4 total = TotalLenCMSA(cma);
	sets = DSets(N);
	for(i = 1; i < N; i++){
	  isq = SeqPtrCMSA(i,cma);
	  seti=findDSets(i,sets);
	  for(j=i+1; j <= N; j++){
	     setj=findDSets(j,sets);
	     if(seti != setj){
	      jsq = SeqPtrCMSA(j,cma);
	      for(score=0,b=1; b <= nblk; b++){
		PosSiteCMSA(b,i,pos,cma); si=pos[1];
		PosSiteCMSA(b,j,pos,cma); sj=pos[1];
		for(s=1; s <= LengthCMSA(b,cma); s++,si++,sj++){
			if(isq[si] == jsq[sj]) score++;
		}
	      } score = (Int4) floor(((double)score*100.0/(double)total) +0.5);
	      if(score >= percent_ident) seti=linkDSets(seti,setj,sets);
	     }
	  }
	}
	// 2. Within each set pick the sequence with the highest profile score.
	BooLean	**skipseq;
	Int4	*size;
	NEWP(skipseq, NumSeqsCMSA(cma)+2, BooLean);
	NEW(size, NumSeqsCMSA(cma)+2, Int4);
    if(UseNumPhyla){		// New: use phyla not number seqs.
	NEWP(NumPhylaInSet,N+3,Int4);
        for(i=1; i <= N; i++){
	  seti=findDSets(i,sets);
	  if(skipseq[seti]==0) {
		NEW(NumPhylaInSet[seti],NumPhyla+3,Int4);
		NEW(skipseq[seti],NumSeqsCMSA(cma)+2, BooLean); 
        	for(j=1; j <= N; j++) skipseq[seti][j] = TRUE;
	  } skipseq[seti][i]=FALSE; 
	  Int4 ph=phyla[i];
	  if(NumPhylaInSet[seti][ph] == 0) size[seti]++;	// first time for this phylum
	  NumPhylaInSet[seti][ph]++;	// another sequence from phylum ph in sequence set_i
	} 
    } else {
        for(i=1; i <= N; i++){
	  seti=findDSets(i,sets);
	  if(skipseq[seti]==0) {
		NEW(skipseq[seti],NumSeqsCMSA(cma)+2, BooLean); 
        	for(j=1; j <= N; j++) skipseq[seti][j] = TRUE;
	  } skipseq[seti][i]=FALSE; size[seti]++;
	} 
    }
#if 1	// get probabilities...
	double *prob=0,best_prob;
	Int4	best_sq;
	NEW(prob,N+2,double); NEW(skipseq[0],N+2, BooLean); 
        for(i=1; i <= N; i++){
		skipseq[0][i]=TRUE; 
		prob[i]=GetTotalGappedProbCMSA(i,cma);
	}
        for(i=1; i <= N; i++){
	  if(skipseq[i]==0) continue; // no set i; this sequence in another set.
	  best_prob=-DBL_MAX;
	  for(best_sq=0,j=1; j <= N; j++){
		if(skipseq[i][j] == FALSE){	// indicates sequence j is in set i.
		  if(prob[j] >= best_prob){
			best_prob=prob[j]; best_sq=j;
		  }
		}
	  }
	  assert(skipseq[0][best_sq]==TRUE); // must be true at this point.
	  skipseq[0][best_sq]=FALSE;	// retain best sequence in set i.
	} free(prob);
#endif
	// 3. output the cma files;
	char str[100];
	sprintf(str,"_0.cma");
        FILE *fp = open_file(name,str,"w");
	sprintf(str,"cluster0"); RenameCMSA(str,cma);
	PutSelectCMSA(fp,skipseq[0],cma); fclose(fp);
	free(skipseq[0]);
	for(s=0,i=1; i <= N; i++){
	   if(skipseq[i] && size[i] >= min_size){
		if(IncludeFirst) skipseq[i][1]=FALSE; // always include the concensus seq.
		s++;
		if(UseNumPhyla){
		   fprintf(stderr,"set %d(%d): # phyla = %d; ",s,i,size[i]);
		} else fprintf(stderr,"set %d(%d): # seq = %d; ",s,i,size[i]);
		sprintf(str,"_%d.cma",s);
           	fp = open_file(name,str,"w");
		sprintf(str,"cluster%d",s); RenameCMSA(str,cma);
	   	Int4 NumPrint=PutSelectCMSA(fp,skipseq[i],cma); 
		fprintf(stderr,"# seq = %d\n",NumPrint);
		fclose(fp); free(skipseq[i]); 
	   }
	} NilDSets(sets); free(skipseq);
	if(UseNumPhyla){	// New phylogenetic info.
           for(i=1; i <= N; i++){ if(NumPhylaInSet[i]) free(NumPhylaInSet[i]); }
	   free(NumPhylaInSet);
	}
	if(phyla) free(phyla);
	return s;
}

Int4    *SortByKingdomPhylumCMSA(cma_typ cma)
{
	char	kingdom,phylum;
	char	Kingdoms[]="FMEVABX";
	ss_type data = TrueDataCMSA(cma);  // == True sequences...
        Int4    *list,i,j,k,N=NumSeqsCMSA(cma);
	keytyp	key;
	Int4	K;
	char	*Phylum;

	dh_type dH=dheap(N+2,4);
	for(j=2; j<=N; j++){
		e_type sE=SeqSetE(j,data);
		kingdom=KingdomSeq(sE); // sets to upper case.
		for(K=0,k=0; Kingdoms[k]; k++){
			assert(k < 255);
			if(Kingdoms[k]==kingdom){
			   K = (Int4) (255-k);
			}
		} 
		Phylum=PhylumSeq(sE); 
		if(Phylum) phylum=toupper(Phylum[0]); 
		else phylum=0;
		key = 10000*(keytyp)K - (keytyp) phylum;
		insrtHeap(j,-(keytyp)key,dH);
	   	// fprintf(stderr," kingdom[%d,%d]='%c' == %c?\n",i,j,kingdom,K);
	}
        NEW(list, N+3,Int4); list[1]=1;
	for(i=2; !emptyHeap(dH); ){
                assert((j=delminHeap(dH)) != 0);
                list[i]=j; i++;
        } Nildheap(dH);
	return list;
}

void	SortBySeqID_CMA(cma_typ cma, char *seq_ids_file)
// Sort the sequences in the Template cma file to correspond to order of profiles...
// Print out an error message if Template contains 
{
	ss_type data = TrueDataCMSA(cma);  // == True sequences...
        Int4    *list,i,j,I,J,Score,N=NumSeqsCMSA(cma),n;
	char	*ID_E;
	a_type	A=AlphabetCMSA(cma);
	Int4	Number;
	BooLean	*Found;
	Int4 	num_names,namelen,NewNumber;
        char	c,*SeqName[1000]; // maximum of 1000 families
        char	tmpname[1000];

        FILE *fp = open_file(seq_ids_file,"","r");
        for(namelen=0,n=0,num_names=0; (c=getc(fp)) != EOF; n++){
            if(n==0) if(!isalnum(c)) print_error("-m option input file error1");
            if(c==',' || c=='\n'){
              num_names++;
              tmpname[namelen]=0;
              SeqName[num_names]=NewString(tmpname);
              fprintf(stderr,"Name %d: %s\n",num_names,SeqName[num_names]);
              namelen=0;
            } else if(isalnum(c) || c=='_'){
                        tmpname[namelen]=c; namelen++;
            } else if(c != '\n') print_error("SortBySeqID_CMA() input file error2");
        } fclose(fp);
 	Number=num_names;
	if(N != Number){
		fprintf(stderr,"N = %d != %d = Number\n",N,Number);
		print_error("SortBySeqID_CMA() error 1: inconsistent numbers");
	}
	NEW(list, N+3,Int4);
	NEW(Found, Number+4,BooLean);
	// list[1]=1;	// keep template consensus sequence first.
	char seq_id[1000];
	for(n=0,J=1; J <= N; J++){
	   e_type E = TrueSeqCMSA(J,cma);	// get template sequence
	   // StrSeqID(char *str, Int4 n, e_type E);
	   StrSeqID(seq_id,80,E);
	   for(I = 1; I <= Number; I++){
		// if(strcmp(seq_id,SeqName[I])==0)
		if(strstr(seq_id,SeqName[I])!=0)
		{
#if 0
		   if(list[I] != 0 || Found[J]){
		    	fprintf(stderr,"seqID %d: %s = %d (%s)\n",I,SeqName[I],J,seq_id);
			print_error("SortBySeqID_CMA() error 2: multiple identical seqs");
		   } else {
			list[I]=J; n++;	// found corresponding profile
			// I'th + 1 sequence in output is sequence J for input template cma 
			Found[J]=TRUE;
			break;
		   }
#else
		   if(list[I] == 0){
			list[I]=J; n++;	// found corresponding profile
			// I'th + 1 sequence in output is sequence J for input template cma 
			Found[J]=TRUE;
			break;
		   }
#endif
		}
	   }
	   if(I > Number){
		PutSeq(stderr,E,A);
		fprintf(stderr,"I = %d; Number=%d\n",I,Number);
		print_error("SortBySeqID_CMA() error 3: missing profile");
	   }
	}
	for(J = 1; J <= N; J++){
		if(!Found[J]){
		    e_type E = TrueSeqCMSA(J,cma);
		    fprintf(stderr,"template seq. %d\n",J);
		    PutSeq(stderr,E,A);
		    print_error("SortBySeqID_CMA() error 4: unused profile");
		}
	}
	PutSelectOneCMSA(stdout,0,list,cma);
	free(list); 
}

char	*FindSeedPattern(FILE *fp,Int4 Number, cma_typ *cma)
// 		char *pttrn=FindSeedPattern(fp,IN_CMA);
{
	Int4 i,j;
	char *pttrn=0;
        assert(nBlksCMSA(cma[1]) ==1); assert(nBlksCMSA(cma[2]) ==1);
	assert(Number >=2);
	assert(LengthCMSA(1,cma[1]) == LengthCMSA(1,cma[2]));
	Int4 Length=LengthCMSA(1,cma[1]);
        a_type  AB=AlphabetCMSA(cma[1]);
	rst_typ rst=rst_typ('L');	// get full residue sets
	sst_typ **sst=rst.LegalResSets( );
	e_type  cE1=MkConsensusCMSA(cma[1]);
	sst_typ *sst_Best;
	NEW(sst_Best, Length +4, sst_typ);
	dh_type dH = dheap(Length+5,4);
	// e_type  cE2=MkConsensusCMSA(cma[2]);
	double	*FG,*BG;
	NEW(FG, Length +4, double);
	NEW(BG, Length +4, double);
	swt_typ swt1 = swt_typ(cma[1],cma[2],FALSE);
	swt_typ swt2 = swt_typ(cma[1],cma[3],FALSE);
	UInt4   wtFactor=swt1.WtFactor();
	double  **WtCnts1 = swt1.ObsWtCnts();
	double  **WtCnts2 = swt2.ObsWtCnts();
	for(j=1; j <= Length; j++){
	   unsigned char r,r1=ResSeq(j,cE1);
	   sst_typ Bsst = 0;
	   double score,best_score=-99999.9,best_n,best_d;
	   double n,d;
	   for(Int4 s=1; sst[r1][s]; s++){
		sst_typ xsst = sst[r1][s];
	        double	r1_f=0,r1_n=0,r2_f=0,r2_n=0;
		// double  **ColResFreqsCMSA(Int4 t, BooLean *skip, cma_typ cma);
		// double  **ColResFreqsCMSA(Int4 t, BooLean *skip, Int4 ***observed, cma_typ cma);
#if 0
		for(i=1; i<=NumSeqsCMSA(cma[1]); i++){
		   r=ResidueCMSA(1,i,j,cma[1]);
		   if(r==0) continue;
		   if(MemSset(r,xsst)){
			r1_f += WtCnts1[i][j];
		   } else {
			r1_n++;
		   }
		}
		for(i=1; i<=NumSeqsCMSA(cma[2]); i++){
		   r=ResidueCMSA(1,i,j,cma[2]);
		   if(r==0) continue;
		   if(MemSset(r,xsst)){ r2_f++; } else r2_n++;
		}
#else
		for(r=1; r <= nAlpha(AB); r++){
		   if(MemSset(r,xsst)){
			r1_f += WtCnts1[j][r];
			r2_f += WtCnts2[j][r];
		   } else {
			r1_n += WtCnts1[j][r];
			r2_n += WtCnts2[j][r];
		   }
		}
#endif
#if 0
		n=((double)r1_f/(double)(r1_f+r1_n));
		d=((double)r2_f/(double)(r2_f+r2_n)); 
#else		// add pseudocounts
		n=((double)(r1_f+5*wtFactor)/(double)(r1_f+r1_n + 10*wtFactor));
		d=((double)(r2_f+5*wtFactor)/(double)(r2_f+r2_n + 10*wtFactor)); 
#endif
		// score = n/d;
		// score=((double)r1_f/(double)(r1_f+r1_n))/((double)r2_f/(double)(r2_f+r2_n)); 
		double p,q;
		score = n*log((n)/d) + (1-n)*log((1-n)/(1-d));
		// if(score > best_score && n >= 0.8 && d <= 0.2)
		if(score > best_score)
			{ best_score=score; Bsst=xsst; best_n=n; best_d=d; }
	   } 
	   // if(best_score > 0.0 && best_n >= 0.8 && best_d <= 0.2) 
	   if(best_score > 0.0) 
	   {
	      sst_Best[j]=Bsst; insrtHeap(j,(keytyp)-best_score,dH);
	      FG[j]=best_n; BG[j]=best_d;
	   }
	}
	for(i=0; !emptyHeap(dH); ){
		double score=-(double)minkeyHeap(dH);
		j=delminHeap(dH);
		i++;
		fprintf(stderr,"%d: sst[%d]: ",i,j);
                PutSST(stderr,sst_Best[j],AB);
                fprintf(stderr,"%d (%.3f)(%.3f/%.3f)\n",j,score,FG[j],BG[j]);
	}
	free(FG); free(BG);
}

void	PartitionBySeedAlnCMSA(FILE *fp, cma_typ cma, Int4 NumSeedAln, cma_typ *seed_cma)
// partition the alignment cma based on sequence scores against the seed cma files
{
        Int4    i,j,n,I,J,N=NumSeqsCMSA(cma);
	BooLean	*skip=0;
	Int2	*Partition;

	cma_typ *SeedCMA;
	NEW(SeedCMA, NumSeedAln +3, cma_typ);
	NEW(Partition, N+3, Int2);
	for(i=1; i <= NumSeedAln; i++){ SeedCMA[i] = AddConsensusCMSA(seed_cma[i]); }

	h_type HG=Histogram("partitions", 0,NumSeedAln,1.0);
	h_type sHG=Histogram("best scores", -100,5000,25.0);
	// if(key_seq < 1 || key_seq > N) print_error("PartitionBySeedAlnCMSA( ) input error");
	for(J=1; J <= N; J++){
	   Int4 BestScore = -99999999,BestN=0;
	   for(n=1; n <= NumSeedAln; n++){
		Int4 Score=PseudoAlnScoreTwoCMSA(1,SeedCMA[n],J,cma);
		if(Score > BestScore){ BestScore=Score; BestN=n; }
	   }
	   assert(BestN != 0);
	   Partition[J]=BestN;
	   IncdHist((double)BestN,HG);
	   IncdHist((double)BestScore,sHG);
	}
        PutHist(stdout,50,HG); NilHist(HG); 
        PutHist(stdout,50,sHG); NilHist(sHG); 

	HG=Histogram("partitions", 0,NumSeedAln,1.0);
	for(n=1; n <= NumSeedAln; n++){
	   NEW(skip,N+3,BooLean); 
	   for(J=1; J <= N; J++){
		if(Partition[J] != n) skip[J]=TRUE;
	        else IncdHist((double)n,HG);
	   }
	   RenameCMSA(NameCMSA(seed_cma[n]),cma);
	   // PutSelectOneCMSA(fp,skip,cma);
	   PutSelectCMSA(fp,skip,cma);
	   free(skip);
	} free(Partition);
        PutHist(stdout,50,HG); NilHist(HG); 
}

void	SortTemplateCMSA(cma_typ cma, char *name_profiles)
// Sort the sequences in the Template cma file to correspond to order of profiles...
// Print out an error message if Template contains 
{
        Int4    *list,i,j,I,J,Score,N=NumSeqsCMSA(cma),n;
	e_type	*ListE;
	a_type	A=AlphabetCMSA(cma);
	Int4 Number;
	BooLean	*Found,*skip;

        FILE *fp = open_file(name_profiles,".cma","r");
        cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	// DEBUG:
	// for(I=1; I <= Number; I++){ fprintf(stderr,"%d: %s\n",I,NameCMSA(IN_CMA[I])); }
        fclose(fp);
	if(N != Number+1){
		fprintf(stderr,"N = %d != %d = Number + 1\n",N,Number+1);
		print_error("SortTemplateCMSA() error 1: inconsistent numbers");
	}
	// if(N < Number+1) print_error("SortTemplateCMSA() error 1: inconsistent numbers");
	NEW(list, N+3,Int4);
	// NEW(skip, N+3,Int4);
	NEW(ListE, Number+4,e_type);
	NEW(Found, Number+4,BooLean);
	for(I = 1; I <= Number; I++){
		ListE[I] = TrueSeqCMSA(1,IN_CMA[I]);	// get first sequence for each profile.
	}
	list[1]=1;	// keep template consensus sequence first.
	for(n=0,J=2; J <= N; J++){
	   e_type E = TrueSeqCMSA(J,cma);	// get template sequence
	   for(I = 1; I <= Number; I++){
		if(FastIdentSeqs(E,ListE[I])){
		   if(list[I+1] != 0 || Found[J]){
		    	fprintf(stderr,"profile %d: %s\n",I,NameCMSA(IN_CMA[I]));
		    	PutSeq(stderr,ListE[I],A);
			print_error("SortTemplateCMSA() error 2: multiple identical seqs");
		   } else {
			list[I+1]=J; n++;	// found corresponding profile
			// I'th + 1 sequence in output is sequence J for input template cma 
			Found[J]=TRUE;
			break;
		   }
		}
		// DEBUG...
#if 0
		if(J == 8 && strstr("Nos1p",NameCMSA(IN_CMA[I]))){
		    	fprintf(stderr,"profile %d: %s\n",I,NameCMSA(IN_CMA[I]));
		    	fprintf(stderr,"list[%d]=%d; Found[J] = %d\n",I+1,list[I+1],Found[J]);
			PutSeq(stderr,ListE[I],A);
		    	fprintf(stderr,"template seq. %d\n",J);
		    	PutSeq(stderr,E,A);
			fprintf(stderr,"FastIdentSeqs() = %d\n",FastIdentSeqs(E,ListE[I]));
			AlnSeqSW(stderr,11, 2,E, ListE[I],A);
		}
#endif
	   }
	   if(I > Number){
#if 0		// simply skip sequences that aren't in template.
		// actually want to skip profile not template sequences...
		// needs more work...
#else
		PutSeq(stderr,E,A);
		fprintf(stderr,"I = %d; Number=%d\n",I,Number);
		print_error("SortTemplateCMSA() error 3: missing profile");
#endif
	   }
	}
	for(J = 2; J <= N; J++){
		if(!Found[J]){
		    e_type E = TrueSeqCMSA(J,cma);
		    fprintf(stderr,"template seq. %d\n",J);
		    PutSeq(stderr,E,A);
		    print_error("SortTemplateCMSA() error 4: unused profile");
		}
	}
	PutSelectOneCMSA(stdout,0,list,cma);
	free(list); 
}

void	SortBySimilarityCMSA(FILE *fp,Int4 key_seq, Int4 put_best, cma_typ cma)
// return a list of aligned sequences sorted by score to sequence key_seq 
{
	ss_type data = TrueDataCMSA(cma);  // == True sequences...
        Int4    *list,i,j,I,J,Score,N=NumSeqsCMSA(cma);
	BooLean	*skip=0;
	double	dd,MaxScore=0.0;

fprintf(stderr,"DEBUG 3\n");
	if(key_seq < 1 || key_seq > N) print_error("SortBySimilarityCMSA( ) input error");
	dh_type	dH=dheap(N+2,4);
        NEW(list, N+3,Int4);
	NEW(skip,N+3,BooLean); 
	for(J=1; J <= N; J++){
		skip[J]=TRUE;
		Score=PseudoAlnScoreCMSA(key_seq, J,cma);
		insrtHeap(J,-(keytyp)Score,dH); 
		dd=(double) Score;
		if(dd > MaxScore) MaxScore=dd;
	}
	dd=MaxScore/50.0;
	i=(Int4)ceil(MaxScore);
	j=(Int4)ceil(dd);
	h_type HG=Histogram("scores versus key sequence",0,i,j);
	for(i=I=0; !emptyHeap(dH); I++){
		if(I >= put_best) break;
		dd=-minkeyHeap(dH);
		IncdHist(dd,HG);
                assert((J=delminHeap(dH)) != 0); 
		skip[J]=FALSE;
		i++; list[i]=J; 
	} Nildheap(dH); 
	PutHist(stderr,60,HG); NilHist(HG);
	for(J=1; J <= N; J++){
		if(skip[J]){  i++; assert(i <= N); list[i]=J; }
	}
	assert(i == N);
	PutSelectOneCMSA(fp,skip,list,cma);
	// PutSelectCMSA(fp,skip,cma);
}

Int4    *SortByKingdomCMSA(cma_typ cma)
// return a list of aligned sequences sorted by kingdom 
{
	char Kingdoms[]="FMEVABX",k,K,kingdom;
	ss_type data = TrueDataCMSA(cma);  // == True sequences...
        Int4    *list,i,j,N=NumSeqsCMSA(cma);

        NEW(list, N+3,Int4);
	list[1]=1;
	for(j=1,k=0; Kingdoms[k]; k++){
	   K=Kingdoms[k];
	   fprintf(stderr,"Kingdoms[%d]=%c\n",k,K);
	   if(K=='X') K = 0;
	   for(i=2; i<=N; i++){
		e_type sE=SeqSetE(i,data);
		kingdom=KingdomSeq(sE); // sets to upper case.
	   	// fprintf(stderr," kingdom[%d,%d]='%c' == %c?\n",i,j,kingdom,K);
		if(kingdom == K){
			j++; list[j]=i; 
	   		fprintf(stderr," kingdom[%d,%d]='%c'\n",i,j,kingdom);
		}
	   }
	} assert(j==N);
	return list;
}

Int4	InsDelTransCMSA(Int4 *InDelTrans, cma_typ cma)
{
    char    **Operation=0,*operation;
    Int4    *Start;
    Int4    sq,hits,sq_hits,s,pos[4],len=LengthCMSA(1,cma),N=NumSeqsCMSA(cma);
    gss_typ *gss=gssCMSA(cma);
    a_type  A=AlphabetCMSA(cma);
    e_type  *ListE;

    NEWP(Operation,N+3,char);
    NEW(Start,N+3,Int4);
    NEW(ListE,N+3,e_type);
    for(hits=sq_hits=0,sq=1; sq<=N; sq++){
	operation=gss->Operation(sq);
#if 0
 	if(sq==381){
		// char *new_operation = AllocString("EMIIIIIIIIIIIIImmmmmmmmmmmmmmmmmmmmmmIImmmmddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmmmImmmmmmmmIImmmdmmmmdmmmmddddmmmmmmmmmmmmmdddddddddddddddddIIIIIIIIIIIIIIIIIIIIImmmmmImdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmIIImddddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddmmdddddddmmmmmmmmmmmmmmmmmmmmmmmmmmdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmdddddddmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddmmmmmmmmmmmmmmmmmIIIIImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
		char *new_operation = AllocString("EMIIIIIIIIIIIIImmmmmmmmmmmmmmmmmmmmmmIImmmmIIIIIIIIIIIIIIIIIIddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmdmmmImmmmmmmmIImmmdmmmmdmmmmddddmmmmmmmmmmmmmIIIIIIIIIIIIIIIIIIIIIdddddddddddddddddmmmmmImdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmIIImddddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddmmdddddddmmmmmmmmmmmmmmmmmmmmmmmmmmdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmdddddddmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddmmmmmmmmmmmmmmmmmIIIIImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
		// char *new_operation = AllocString("EMIIIIIIIIIIIIImmmmmmmmmmmmmmmmmmmmmmIImmmmddddddddddddddddddddddddddddddIIIIIIIIIIIIIIIIIImmmmmmmmmmmmmmmmmmmmmmmmmmmdmmmImmmmmmmmIImmmdmmmmdmmmmddddmmmmmmmmmmmmmdddddddddddddddddIIIIIIIIIIIIIIIIIIIIImmmmmImdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmIIImddddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddmmdddddddmmmmmmmmmmmmmmmmmmmmmmmmmmdddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmdddddddmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddmmmmmmmmmmmmmmmmmIIIIImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
		fprintf(stderr,"\n %s\n",operation);
	      Operation[sq]=new_operation;
	} else
#endif
	{
	      // fprintf(stderr,"\n%d: '%s'\n",sq,operation);
	      Int4 num_fix=IronOutOperation(operation);
	      // if(num_fix > 0) fprintf(stderr,"\n --> '%s'\n num_fix=%d;\n",operation,num_fix);
	      if(num_fix > 0) fprintf(stderr,"\n --> '%s'\n num_fix=%d;\n",NameCMSA(cma),num_fix);
	      Operation[sq]=operation;
	}
        Start[sq]=TruePosCMSA(sq,1,cma);
	ListE[sq]=TrueSeqCMSA(sq,cma);
    }
    // Make a new cma file, using: 
    // cma_typ MakeCMSA(e_type *ListE,Int4 N,char **Operation,Int4 *Start,cma_typ cma);
    cma_typ cma2=MakeCMSA(ListE,N,Operation,Start,cma);
    FILE *fp = open_file("junk_test",".cma","w"); PutCMSA(fp,cma2); fclose(fp);
    return sq_hits;
}

Int4	InsDelTransCMSA2(Int4 *InDelTrans, cma_typ cma)
// return number of sequences with insertion-to-deletion or del-to-ins transitions
{
    if(nBlksCMSA(cma) != 1) print_error("InsDelTransCMSA() requires a single block");
    Int4	i,j,sq,hits,sq_hits,s,pos[4],len=LengthCMSA(1,cma),N=NumSeqsCMSA(cma);
    gss_typ *gss=gssCMSA(cma);
    a_type  A=AlphabetCMSA(cma);

    for(hits=sq_hits=0,sq=1; sq<=N; sq++){
	e_type sE=TrueSeqCMSA(sq,cma);
        BooLean	found=FALSE;
	// assert(PosSiteCMSA(1,1,pos,cma));  // position of blk in first seq...
	// Look for insertions and deletions in cma file for 
	// query sequence to determine the query stop position!
	assert(PosSiteCMSA(1, sq, pos, cma)); 

	Int4	del,ins;
// NEED TO TRIM DELETIONS AND INSERTIONS FROM ENDS...
// 1. Find start sites...
	fprintf(stderr,"WARNING: InsDelTransCMSA2() likely has a bug in IsDeletedCMSA(sq,s,cma)!!\n");
	for(s=1; IsDeletedCMSA(sq,s,cma); ) { s++; }
// 2. Find matching regions...
	// first check for deletions followed by insertions...
	for( ; s <= LengthCMSA(1,cma); s++){
		for(del=0; IsDeletedCMSA(sq,pos[1]+s-1,cma); ){
			del++; 
			if(s >= LengthCMSA(1,cma)){ s++; break; }
			if(InsertionCMSA(sq,pos[1]+s-1,cma)){
				// PutSeqInfo(stderr,sE);
				// PutShortSeqID(stderr,sE);
				if(!found){
				   fprintf(stderr,"%4d: ", sq);
				   PutSeqID(stderr,sE);
				   fprintf(stderr,"\n       (");
				} else fprintf(stderr,"       (");
				if(!found){ sq_hits++; found=TRUE; } 
				ins=InsertionCMSA(sq,pos[1]+s-1,cma);
				fprintf(stderr," site=%d; del = %d; ins = %d)\n",
					pos[1]+s-1,del,ins);
				
				hits++;
			} s++;
		}
		// at this point !IsDeletedCMSA(sq,s,cma).
		if(s >= LengthCMSA(1,cma)) break;
	}
	// Next check for insertions followed by deletions...
/***************************************************************************************
Example:
 (implied model:     Mmmmmmmmmmmmiidddmmmmmmmmmmmmmmmdiiidmmmmmmmmmmmmmd )
 Real:      plwlbvrpfIEVIGKENICGApg---IVASNHRSHLDPPVL-dee-GGILKHMRAIPLR-rainsg*
 Fake:               IEVIGKENICGA  xxxIVASNHRSHLDPPVLx   xGGILKHMRAIPLRx
 inserts:  9         000000000002  0000000000000000003   000000000000006
 del_bit:  0         000000000000  1110000000000000001   100000000000001
 f2r[f]:   0        10...
 f:        0         1...

 ***************************************************************************************/
#if 0
	if(sq==380){
		gsq_typ *gsq=gss->GetGSQ(sq);
		// char *operation = gsq->Operation(0, A);
		// char *operation=gss->Operation(sq);
		gsq->IronOut(A);
	}
#endif
	for(s=1; IsDeletedCMSA(sq,s,cma); ) { s++; }
	for( ; s < LengthCMSA(1,cma); s++){
	   if(!IsDeletedCMSA(sq,pos[1]+s-1,cma)){ 	// Apg---
		ins=InsertionCMSA(sq,pos[1]+s-1,cma);	// ins=2
#if 1
#elif 0
		// if(sq==380 && s > 552 && s < 555)
		if(ins > 0)
		{
			fprintf(stderr,"\n%d===> site=%d; ins = %d)\n",
				sq,pos[1]+s-1,ins);
		}
#else
#endif
		if(ins && IsDeletedCMSA(sq,pos[1]+s,cma)){ // next site is deleted.
		        Int4 s0=s; 
		        for(del=0; IsDeletedCMSA(sq,pos[1]+s,cma); ){ 
				del++; s++; if(s >= LengthCMSA(1,cma)) break;
			}
			// PutSeqInfo(stderr,sE);
			// PutShortSeqID(stderr,sE);
			if(!found){
				   fprintf(stderr,"%4d: ", sq);
				   PutSeqID(stderr,sE);
				   fprintf(stderr,"\n       (");
			} else fprintf(stderr,"       (");
			if(!found){ sq_hits++; found=TRUE; } 
			fprintf(stderr," site=%d; ins = %d; del = %d)\n",
				pos[1]+s0-1,ins,del);
			hits++;
	        }
	   } // s++;
	   if(s >= LengthCMSA(1,cma)) break;
	}
#if 0
	if(found && sq==380){
		char *operation=gss->Operation(sq);
		fprintf(stderr," %s\n",operation);
                PosSiteCMSA(1,sq,pos,cma);
                Int4 start=TruePosCMSA(sq,1,cma);
		// put in code to correct indel errors as follows...
#if 0
	// Mmmmm includes N-terminal extension!!!! Reason not working below...!!!
		char *new_operation = AllocString("EMmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmmmmdddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmdddmmImmmmmmmmdmmdmmmmdmmmmddddmmmmmmmmmmmmmmmdddddddddmmmImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmddddddmmmmmmmmmmmmmmmmmmmmmmmmmddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmImmmmmIIIIImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmdmImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
	char *new_operation =AllocString("EMmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmmmmdddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmdddmmImmmmmmmmdmmdmmmmdmmmmddddmmmmmmmmmmmmmmmdddddddddmmmdddIIIImmmmmdImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmddddddddIImmmmmmmmmmmmmmmmmmmmmmmmmddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmImmmmmIIIIImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmdmIIIIIddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
#else
	char *new_operation =AllocString("EMmmmmmmmmmmmmmmmmmmmmmdmmmmdddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmdddmmImmmmmmmmdmmdmmmmdmmmmddddmmmmmmmmmmmmmmmdddddddddmmmImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmddddddddIImmmmmmmmmmmmmmmmmmmmmmmmmddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmImmmmmIIIIImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmdmImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
	// char *new_operation =AllocString("EMmmmmmmmmmmmmmmmmmmmmmdmmmmdddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmmmmmmmmmdddmmImmmmmmmmdmmdmmmmdmmmmddddmmmmmmmmmmmmmmmdddddddddmmmdddIIIImmmmmdImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdmddddddddIImmmmmmmmmmmmmmmmmmmmmmmmmddddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmImmmmmIIIIImmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmddddddddddddddddddddddddddddddddddddddddddddddmmmmmmmmmmmmmmmmmImddmmmmmmmmmmmmmmmmmmmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmdmIIIIIddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmE");
#endif
		Int4 newpos[4];
                VacateSitesCMSA(sq,cma);
                Int4 trace_length=strlen(new_operation);
                e_type E = gss->TrueSeq(sq);
                gsq_typ *gsq; gsq = new gsq_typ[1];
                assert(gss->LeftFlank() == 0 && gss->RightFlank() == 0);
                if(start > 0 && new_operation[1]=='D') start++;
                // WARNING: need to fix this as flanking sequence on the left
                // "{(AFGV)...}" is causing fatal problems....
                gsq->initialize(gss->LeftFlank(),gss->RightFlank(),
                  new_operation,trace_length,start,E,newpos);
                // fprintf(stderr,"pos=%d; newpos=%d\n",TruePos[sq],newpos[1]);
                // gsq->Put(stdout,A);
                ReplaceCMSA(sq,gsq,cma); // replace sequence sq in CMSA & fmodel.
                AddSiteCMSA(1,sq,pos[1],cma);
		// free(operation);
	}
#else
#endif
    }
    *InDelTrans=hits;
    return sq_hits;
}

Int4	Check4NtermExtendCMSA(cma_typ cma)
// check to see if cma has N-terminal extensions that will core dump (use this to debug).
{
    Int4	hits,sq,N=NumSeqsCMSA(cma);
    for(hits=0,sq=1; sq<=N; sq++){
        Int4 start=TruePosCMSA(sq,1,cma);
	if(start > 1) hits++;
    }
    return hits;
}

Int4	RmQueryGapsCMSA(double cutoff, cma_typ &cma)
// remove columns that are deleted in the first sequence.
{
    Int4	sq,hits,sq_hits,s,pos[4],len=LengthCMSA(1,cma),N=NumSeqsCMSA(cma);
    Int4	i,j,total=0,cycle;
    // a_type	A=AlphabetCMSA(cma);
    BooLean	*Delete;

#if 0
    if((i=Check4NtermExtendCMSA(cma)) > 0){
	fprintf(stderr,"%d out of %d sequences have N-term extensions\n",i,N);
	print_error("Fatal: input file must not have N-term extensions");
    }
#endif
#if 1	// fixes problem with adding insertions!
    ExtendFakeToRealCMSA(cma);
#endif
    NEW(Delete, LengthCMSA(1,cma)+3, BooLean);
    // Don't remove columns on ends!!
    for(s=2 ; s < LengthCMSA(1,cma); s++){ Delete[s]=IsDeletedCMSA(1,1,s,cma); }
    cycle=0;
    Int4 Len=LengthCMSA(1,cma);
    for(s = Len; s > 0; s--){
	if(Delete[s]){
	   cycle++;
	   Int4 end=s; 
	   while(Delete[s]){ total++; s--; }
	   s++;
	   // routines from cma_gmb.cc
	   cma_typ rcma=0;
	   if(s==Len){ rcma=TrimBlkCMSA(cma,1,0,(end-s+1), 2); }
	   else if(end==1){ rcma=TrimBlkCMSA(cma,1,(end-s+1),0, 2); }
	   else { rcma=ConvertColsToInsertsCMSA(cma,1,s,end); }
	   if(rcma){ NilCMSA(cma); cma=rcma; }
	   fprintf(stderr,"\n********************** cycle %d **************************\n",cycle);
	   char str[200];
           sprintf(str,"%s.cycle%d",NameCMSA(cma),cycle);
           // FILE *fp = open_file(str,".cma","w"); PutCMSA(fp,cma); fclose(fp);
	}
    } free(Delete);
    return total;
}

#if 1
const char *RelResidues[21] = {
		"X","C", "G", "AS", "SATN", "TS",
		"NSDH", "DNE", "EDQK", "QEKR", "KEQR",
		"RQK", "HY", "WYF", "YHWF", "FWY",
		"VILM", "IVLM", "LVIM", "MVIL", "P"};

const char *ResidueSets[21][9] = {
		{"X",0,0,0,0,0,0,0,0},
		{"C",0,0,0,0,0,0,0,0},
		{"G",0,0,0,0,0,0,0,0},
		{"A","AS",0,0,0,0,0,0,0},
		{"S","SA","ST","SN","SAT","STN","SAN","SATN",0},
		{"T","TS",0,0,0,0,0,0,0},
		{"N","NS","ND","NH","NSD","NSH","NDH","NSDH",0},
		{"D","DN","DE","DNE",0,0,0,0,0},
		{"E","ED","EQ","EK","EDQ","EDK","EQK","EDQK",0},
		{"Q","QE","QK","QR","QEK","QER","QKR","QEKR",0},
		{"K","KE","KQ","KR","KEQ","KER","KQR","KEQR",0},
		{"R","RQ","RK","RQK",0,0,0,0,0},
		{"H","HY",0,0,0,0,0,0,0},
		{"W","WF","WY","WYF",0,0,0,0,0},
		{"Y","YH","YW","YF","YHW","YHF","YWF","YHWF",0},
		{"F","FW","FY","FWY",0,0,0,0,0},
		{"V","VI","VL","VM","VLM","VIM","VIL","VILM",0},
		{"I","IV","IL","IM","IVL","IVM","ILM","IVLM",0},
		{"L","LV","LI","LM","LVI","LVM","LIM","LVIM",0},
		{"M","MV","MI","ML","MVI","MVL","MIL","MVIL",0},
		{"P",0,0,0,0,0,0,0,0}};
#else
Int4	ResidueSetsInt[21][9];
#endif

Int4	NumIndelsCMSA(char *filename,cma_typ cma)
{
	assert(nBlksCMSA(cma) == 1);
	Int4	nins,inslen,ins,del,pos[3],sq,N=NumSeqsCMSA(cma);
	Int4	max,col,*Nino,*Ndel,*Nins,Len=LengthCMSA(1,cma);

	NEW(Nino,Len+5,Int4); NEW(Nins,Len+5,Int4); NEW(Ndel,Len+5,Int4);
	for(col=0; col < Len; col++){
	    inslen=del=nins=0;
	    for(sq=1; sq <= N; sq++){
	        PosSiteCMSA(1, sq, pos, cma);
	        ins = InsertionCMSA(sq, pos[1]+col,cma);
	        if(ins > 0){ nins++; inslen+=ins; }
	        if(IsDeletedCMSA(sq,pos[1]+col, cma)){ del++; }
	    } Ndel[col+1]=del; 
	     if(col < (Len - 1)) Nino[col+1]=nins; Nins[col+1]=inslen; 
	}

	h_type HG=Histogram("number of residues inserted",0,Len,1);
	for(max=0,col=1; col < Len; col++){	// ignore extensions on the the end!!
	    if(Nins[col] > 0){
		IncdMHist(col,Nins[col],HG);
		if(max < Nins[col]) max = Nins[col];
	    }
	} PutHist(stdout,60,HG); NilHist(HG);

	// Int4 cut=10*N/Len;
	// Int4 cut=N/Len;
	Int4 cut=N/3;
	Int4 inc=1 + max/50;
	set_typ set=MakeSet(Len+5); ClearSet(set);
	HG=Histogram("residues inserted",0,max,inc);
	for(col=1; col < Len; col++){
	   if(Nins[col] > cut && col < (Len-9)) IncdHist(Nins[col],HG); 
	   else AddSet(col,set);
	}
	PutHist(stdout,60,HG); NilHist(HG);

	HG=Histogram("number of insertions",0,Len,1);
	for(col=1; col < Len; col++){
	    if(Nino[col] > 0) IncdMHist(col,Nino[col],HG);
	} PutHist(stdout,60,HG); NilHist(HG);

	HG=Histogram("number of deletions",0,Len,1);
	for(col=1; col <= Len; col++){
	    if(Ndel[col] > 0)  IncdMHist(col,Ndel[col],HG);
	} PutHist(stdout,60,HG); NilHist(HG);

	inc=1+N/100;
	HG=Histogram("numbers of indels per sq",0,Len,1);
	set_typ sqSet=MakeSet(N+5); ClearSet(sqSet);
	for(sq=1; sq <= N; sq++){
	    inslen=del=nins=0;
	    // for(col=0; col < Len; col++)
	    for(col=2; col <= Len-2; col++)
	    {
		if(!MemberSet(col,set)) continue;
	        PosSiteCMSA(1, sq, pos, cma);
	        ins = InsertionCMSA(sq, pos[1]+col-1,cma);
	        if(ins > 0){ nins++; inslen+=ins; }
	        if(IsDeletedCMSA(sq,pos[1]+col-1, cma)){ del++; }
	    } if(nins == 0 && del == 0) AddSet(sq,sqSet);
	    IncdHist(nins+del,HG);
	 
	} PutHist(stdout,60,HG); NilHist(HG);
	fprintf(stdout,"%d out of %d sequences without indels\n", CardSet(sqSet),N);
	FILE *fp = open_file(filename,"_good.cma","w");
#if 0
	set_typ tmpsts[4]; tmpsts[1]=sqSet;
	cma_typ tmpcmas[4]; tmpcmas[1]=cma;
	PutMergedCMSA(fp,1,tmpsts, tmpcmas,0);
#else
	PutInSetCMSA(fp,sqSet,cma);
#endif
	fclose(fp);

// fprintf(stderr,"Len=%d\n",Len);
        long	min_len=4; // TotalLenCMuA(cma);
	cma_typ cma2=0,tmp_cma=0,in_cma=cma;
        for(col = Len-1; col > 1; col--){
	     if(!MemberSet(col,set)){
fprintf(stderr,"nblks=%d (column=%d)\n",nBlksCMSA(in_cma),col);
               tmp_cma=SplitBlkCMSA(1,col,min_len,in_cma); // always work on leftmost block.
               if(tmp_cma == 0){
		   fprintf(stderr," --> split operation failure (column=%d)...continuing\n",col);
		  // print_error("fatal error with -split option");
	       } else {
	           if(cma2) NilCMSA(cma2);
	           in_cma=tmp_cma; cma2=in_cma;
	       }
	     } // else fprintf(stderr,"col=%d\n",col);
        }
// fprintf(stderr,"filename=%s\n",filename);
	fp = open_file(filename,"_blks.cma","w");
// fprintf(stderr,"%s_blks.cma opened.\n",filename);
        PutCMSA(fp,in_cma); 
// fprintf(stderr,"%s_blks.cma writing.\n",filename);
	fclose(fp);
// fprintf(stderr,"%s_blks.cma closed.\n",filename);

} 

Int4	NoIndelsSet(set_typ NoIndels, cma_typ cma)
{
	assert(nBlksCMSA(cma) == 1);
	Int4	nins,inslen,ins,del,pos[3],sq,N=NumSeqsCMSA(cma);
	Int4	max,col,*Nino,*Ndel,*Nins,Len=LengthCMSA(1,cma);

	assert(SetN(NoIndels) >= N);
	ClearSet(NoIndels);

	NEW(Nino,Len+5,Int4); NEW(Nins,Len+5,Int4); NEW(Ndel,Len+5,Int4);
	for(col=0; col < Len; col++){
	    inslen=del=nins=0;
	    for(sq=1; sq <= N; sq++){
	        PosSiteCMSA(1, sq, pos, cma);
	        ins = InsertionCMSA(sq, pos[1]+col,cma);
	        if(ins > 0){ nins++; inslen+=ins; }
	        if(IsDeletedCMSA(sq,pos[1]+col, cma)){ del++; }
	    } Ndel[col+1]=del; 
	     if(col < (Len - 1)) Nino[col+1]=nins; Nins[col+1]=inslen; 
	}

	h_type HG=Histogram("number of residues inserted",0,Len,1);
	for(max=0,col=1; col < Len; col++){	// ignore extensions on the the end!!
	    if(Nins[col] > 0){
		IncdMHist(col,Nins[col],HG);
		if(max < Nins[col]) max = Nins[col];
	    }
	} PutHist(stdout,60,HG); NilHist(HG);

	// Int4 cut=10*N/Len;
	// Int4 cut=N/Len;
	Int4 cut=N/3;
	Int4 inc=1 + max/50;
	set_typ set=MakeSet(Len+5); ClearSet(set);
	HG=Histogram("residues inserted",0,max,inc);
	for(col=1; col < Len; col++){
	   if(Nins[col] > cut && col < (Len-9)) IncdHist(Nins[col],HG); 
	   else AddSet(col,set);
	}
	PutHist(stdout,60,HG); NilHist(HG);

	HG=Histogram("number of insertions",0,Len,1);
	for(col=1; col < Len; col++){
	    if(Nino[col] > 0) IncdMHist(col,Nino[col],HG);
	} PutHist(stdout,60,HG); NilHist(HG);

	HG=Histogram("number of deletions",0,Len,1);
	for(col=1; col <= Len; col++){
	    if(Ndel[col] > 0)  IncdMHist(col,Ndel[col],HG);
	} PutHist(stdout,60,HG); NilHist(HG);

	inc=1+N/100;
	HG=Histogram("numbers of indels per sq",0,Len,1);
	for(sq=1; sq <= N; sq++){
	    inslen=del=nins=0;
	    for(col=2; col <= Len-2; col++)
	    {
		if(!MemberSet(col,set)) continue;
	        PosSiteCMSA(1, sq, pos, cma);
	        ins = InsertionCMSA(sq, pos[1]+col-1,cma);
	        if(ins > 0){ nins++; inslen+=ins; }
	        if(IsDeletedCMSA(sq,pos[1]+col-1, cma)){ del++; }
	    } if(nins == 0 && del == 0) AddSet(sq,NoIndels);
	    IncdHist(nins+del,HG);
	 
	} PutHist(stdout,60,HG); NilHist(HG);
	free(Nino); free(Nins); free(Ndel);
} 

/**************************** Global Variables ******************************/
int	main(Int4 argc,char *argv[])
{ 
	Int4	arg,i,j,s,cutoff=-999,blk=0,lenrm,mingap;
	Int4    time1,*len,t,n,N,left=0,right=0,x,z,TrimMax=3;
	UInt4	min, max,num_rpts=0;
	char	str[300],mode=' ',*rm=NULL,*seqid,*look=0,*keep=0,*Keep=0;
	float	Cut,*info,info_cut,look_cut=2.0,prob_cut=0.05;
	sma_typ MA=0,MA2;
	cma_typ	cmsa,cmsa2,cma=0;
	BooLean	*remove,shuffle=FALSE,verbose=FALSE,Newick=FALSE;
	Int4	*value;
	ss_type	data;
	e_type	*ListE;
	BooLean *good,*skip,putseqs=0,template_mode=FALSE,split_into_blks=FALSE;
	Int4	*blk_start,*blk_end,nblks;
	FILE	*fp;
	Int4	min_rpt=0,min_spacing=0;
	UInt4   seed=7061950;
	double	clust_cut=0.001, add_cut=0.001;
	char	*psm_arg=0;
	Int4	purge=-1,rm_seq=0,fuseblk=0;
	Int4	left_leng,min_len=1,block,percent_ident=0;
	Int4	put_consensus=0;
	// BooLean	IncludeFirst=TRUE;
	BooLean	IncludeFirst=FALSE,TaxID_Only=FALSE,PutTerms=FALSE; 
	char	front_back=' ';
	Int4	length_extend=0;
	char	chain,color;
	Int4	file;
	Int4	left_flank=-1,right_flank=-1;
	double	freq_cut;
	Int4	add_block=0,add_col=0;
	Int4	start_ins[100],end_ins[100],num_ins=0;
	Int4	start_add[100],len_add[100],num_add=0;
        Int4	grow_blk=0,grow_len=0;
	char	*selex_file=0,*split_cmafile=0;
	char	status=0,*Kingdom;
	char	*main_set_file=0;
	Int4	min_seq_cma=0,min_set_size=0;
	char	*defline_string=0;
	Int4	LastSeq,FirstSeq;
	char	*new_name=0;
	Int4	Mutated_pos=0;
	char	*phylum=0;
	Int4	deleted_pos=0,residue_pos=0;
	char	*merge_file_name=0,residue_str[100],*use_file_name=0;
	BooLean	mk_use_file=FALSE,sto_format=FALSE,fasta_format=FALSE,show_phyla=FALSE; 
	BooLean	full_seq=FALSE,seqlens=FALSE,show_var=FALSE;
	char	rm_csq=0;
	Int4	put_best=0,min_sq=0,max_sq=0,KeySeq=0;
	double	fraction=0.0;
	Int4	NoFragN=0,NoFragC=0;
	BooLean	PutCSQ=FALSE,output_first=FALSE,keep_first=FALSE;
	Int4	see_aa=0,see_blk=0;
	char	layer_keysq[100];
	layer_keysq[0]=0;
	BooLean	print_indel_trans=FALSE,BestPhylaOnly=TRUE;
	double	fractDeleted=0;
	BooLean	sort_using_profiles=FALSE,WriteCMA=FALSE,PrintDiversity=FALSE,WriteAllCMA=FALSE;
	char	profiles[200],*mm_tpl_file=0;
	BooLean	IronOut=FALSE,cobbled=FALSE,PrintLPR=FALSE;
	BooLean	FirstOnly=FALSE,FootPrint=FALSE;
	Int4	MinNumSeq=0,NumPoor=0;
	double	MinCol=0;
	BooLean	MinColHSW=FALSE;
	BooLean	MinColU=FALSE;
	Int4	cma_level=-1;
	char	seq_ids_file[200];
	BooLean	sort_using_seqids=FALSE,PutSingles=FALSE,PutHSW=FALSE,PutDoubles=FALSE;
	char	PutScores=0;
	BooLean PutPDB=FALSE,UseNumPhyla=TRUE,PutAsHMM =FALSE,FindSeedPttrn=FALSE,DistinctPhyla=TRUE;
	double  rm_sq_fract=-1.0;
	char	multi_cma_name[200];
	char	family_name[202];
	char	name_seed_aln[202],RmSeqId[200]; RmSeqId[0]=0;
	Int4	put_worst=0,pairA=0,pairB=0;
	Int4	CntPhyla=0;
	Int4	NumRandom=0;
	Int4	minlen=0,maxlen=0;
	BooLean	FileName2Sq1=FALSE,OutPutSMA=FALSE, SameIDs=FALSE,AddDupl=FALSE;
	Int4	OutFileNumber=0,OutSeqFile=0;
	char	*write_file_name=0;
	char	*seqid_file_name=0,*cdtree=0;
	BooLean	rm_flank=FALSE,LabelOut=FALSE,UnLabelOut=FALSE,RmUnKnown=FALSE;
	double	RptEval=-1.0;
	BooLean	SplitCMA=FALSE,ExcelFormat=FALSE,put_config=FALSE,PutRelEntropy=FALSE;
	Int4	TheSeqId=0,*TheSqIds=0,rm_all_but=0;
	Int4	rename_id=0;
	char	*rename=0;
	Int4	SpltSz=0;
	Int4	cdhit=0;
	char	*txfile=0;
	Int4	TxID=0;

	time1=time(NULL); 
	family_name[0]=0;
	name_seed_aln[0]=0;
	// if(argc < 2){ PrintLicenseStatement(); print_error(USAGE_START); }
	if(argc < 2){ print_error(USAGE_START); }
	if(argc == 2){
	   fprintf(stderr,"%s",USAGE_START);
	   print_error("\tFATAL: !!!!!!! You need to specify an option. !!!!!!!\n"); 
	}
	for(arg = 2; arg < argc; arg++){
	   if(argv[arg][0] != '-') print_error(USAGE_START);
	   switch(argv[arg][1]) {
             case 'A': mode = 'A'; 
                     if(argv[arg][2] != 0){
			if(sscanf(argv[arg],"-A%d:%d",&add_block,&add_col) != 2)
                        					print_error(USAGE_START); 
		     } else print_error(USAGE_START); 
                     break;
             case 'a': mode = 'a'; 
		     if(strcmp("-add",argv[arg])==0) {
			AddDupl=TRUE;
                     } else if(sscanf(argv[arg],"-a%lf:%lf",&clust_cut,&add_cut) != 2)
                        print_error(USAGE_START); 
                     break;
	     case 'B': {
			if(strcmp("-Best",argv[arg])==0) {
				put_best=1;
				BestPhylaOnly=FALSE;
                        } else {
			  if(sscanf(argv[arg],"-Best=%d:%d",&KeySeq,&put_best) == 2){
			        if(put_best < 1 || KeySeq < 1) print_error(USAGE_START);
				BestPhylaOnly=TRUE;
			  } else {
			    KeySeq=0;
			    if(sscanf(argv[arg],"-Best=%d",&put_best) != 1){
				if(argv[arg][2] != 0) print_error(USAGE_START);
			    } else if(put_best < 1) print_error(USAGE_START);
			    BestPhylaOnly=FALSE;
			  }
			}
			mode = 'B';
                     } break;
	     case 'b': mode = 'b';
		   if(sscanf(argv[arg],"-best=%d:%d",&KeySeq,&put_best) == 2){
			if(put_best < 1 || KeySeq < 1) print_error(USAGE_START);
			BestPhylaOnly=FALSE;
			mode='B';
		   } else if(sscanf(argv[arg],"-best=%d",&put_best) == 1){
			// BestPhylaOnly=FALSE;
			// BestPhylaOnly=TRUE;
			// DistinctPhyla=FALSE; // mode='B';
			DistinctPhyla=TRUE; // mode='B';
			KeySeq=0;
			mode='B';
		   } else { print_error(USAGE_START); }
		   break;
             case 'C': {
		  mode = 'C'; 
		  if(argv[arg][2] == '='){
                    if(sscanf(argv[arg],"-C=%d:%d",&percent_ident,&min_set_size) != 2){
                        print_error(USAGE_START); 
		    } 
		    if(percent_ident <= 0 || percent_ident > 100) print_error(USAGE_START);
		    if(min_set_size <= 0) print_error(USAGE_START);
		  } else if(strcmp("-CSQ",argv[arg])==0) {
		    PutCSQ=TRUE;
		  }
#if 0
		  if(isdigit(argv[arg][2])){
		    // percent_ident=IntOption(argv[arg],'C',10,100,USAGE_START); 
		  }
#endif
		} break;
	     case 'c': 
		UseNumPhyla=TRUE;
		if(strcmp("-config",argv[arg])==0) {
			put_config=TRUE;
		} else if(strcmp("-cobbled",argv[arg])==0) {
			cobbled=TRUE;
		} else if(argv[arg][2]==0) put_consensus=1;
		else if(strcmp("-csq",argv[arg])==0) {
			put_consensus=2;
		} else if(argv[arg][2]=='=' && argv[arg][3] != 0){
			main_set_file=argv[arg] + 3;
		} else if(argv[arg][2]=='d'){
		   if(sscanf(argv[arg],"-cdhit=%d",&x) != 1 || x < 40 || x > 100){
                        print_error(USAGE_START); 
		   } else { cdhit=x; }
		} else if(argv[arg][2]==':'){
		   if(sscanf(argv[arg],"-c:%d:%d",&percent_ident,&min_seq_cma) != 2){
                        print_error(USAGE_START); 
		   } else IncludeFirst=FALSE;
		} else if(sscanf(argv[arg],"-cdtree=%s",&str) == 1){
			cdtree=AllocString(str);
		} else if(sscanf(argv[arg],"-c%d:%d",&percent_ident,&min_seq_cma) != 2){
			percent_ident=IntOption(argv[arg],'c',10,100,USAGE_START);
			UseNumPhyla=FALSE;
		} mode='c'; break;
             case 'D': mode = 'D'; 
		if(argv[arg][2]=='=' && argv[arg][3] != 0){
		  if(sscanf(argv[arg],"-D=%d",&deleted_pos) != 1) print_error(USAGE_START);
		} else if(argv[arg][2] != 0) print_error(USAGE_START);
		break;
             case 'd': mode = 'd'; 
			if(strcmp("-diversity",argv[arg])==0) {
				PrintDiversity=TRUE;
			} else if(strcmp("-doubles",argv[arg]) == 0){ 
				PutDoubles=TRUE; mode='s';
			} else print_error(USAGE_START); 
			break;
             case 'E': mode = 'E'; 
                     if(sscanf(argv[arg],"-E%d%c%lf%c",&file,&chain,&freq_cut,&color) != 4){
                        print_error(USAGE_START); 
		     }
		break;
	     case 'e': 
		if(strcmp("-extend",argv[arg])==0){
		   mode='e'; 
	        } else if(argv[arg][2] != 0) print_error(USAGE_START);
		else prob_cut = RealOption(argv[arg],'e',0.0,10000.0,USAGE_START);
                        break;
             case 'f': 
		if(strcmp("-first",argv[arg]) == 0){ 
		       output_first=TRUE; 
		} else if(strcmp("-fullseq",argv[arg]) == 0){ 
		       full_seq=TRUE; mode='D'; break;
		} else if(strcmp("-fasta",argv[arg]) == 0){ 
			fasta_format=TRUE;
		} else if(argv[arg][2]!= 0) print_error(USAGE_START);
		mode = 'f'; break;
	     case 'F': 
		if(strcmp("-First",argv[arg]) == 0){ mode = 'B'; FirstOnly=TRUE; }
		else if(strcmp("-FootPrint",argv[arg]) == 0){ mode = 'F'; FootPrint=TRUE; }
		else {
		  mode = 'F'; 
		  if(argv[arg][2] !=0){
			if(!isdigit(argv[arg][2])) print_error(USAGE_START);
			else fuseblk=IntOption(argv[arg],'F',1,1000,USAGE_START);
		  }
		}
		break;
	     case 'G': mode = 'G'; break;
	     case 'g': mode = 'g'; 
                if(sscanf(argv[arg],"-g%d:%d",&grow_blk,&grow_len) != 2)
                        print_error(USAGE_START); 
		break;
             case 'h': 
		mode = 'h'; 
		if(argv[arg][2] !=0){
		   if(strncmp(argv[arg],"-hsw",5) ==0){
			PutHSW=TRUE;
		   } else if(strncmp(argv[arg],"-hmm",5) ==0){
			PutAsHMM =TRUE;
		   } else { print_error(USAGE_START); }
		}
		break;
             case 'I': mode = 'I'; 
		if(argv[arg][2] == '=') {  // -I=<file>
		  if(sscanf(argv[arg],"-I=%d:%d",&left_flank,&right_flank) == 2){
		     if(left_flank < 0 || right_flank < 0) print_error(USAGE_START);
		  } else {
		    if(!isgraph(argv[arg][3])) print_error(USAGE_START);
		    seqid_file_name = argv[arg] + 3;
		  }
		} else {
#if 1
		     if(num_add >= 100) print_error("-I option error: limit exceeded.");
                     if(sscanf(argv[arg],"-I%d:%d",&start_add[num_add],&len_add[num_add]) != 2){
                        print_error("-I option syntax error"); 
		     } 
		     if(start_add[num_add] < 1 || len_add[num_add] < 1) print_error(USAGE_START);
#if 1
		     // Need to start from C-terminus and work toward N-terminus
		     if(num_add > 0 && start_add[num_add] > start_add[num_add-1])
			print_error("-I option error: work toward N-terminus.");
#else
		     // Need to start from N-terminus and work toward C-terminus
		     if(num_add > 0 && start_add[num_add] < start_add[num_add-1])
			print_error("-I option error: work toward C-terminus.");
#endif
		     num_add++;
		     // fprintf(stderr,"DEBUG1\n");
#else
                     if(sscanf(argv[arg],"-I%d:%d",&left,&right) != 2)
                        print_error(USAGE_START); 
#endif
		}
                     break;
             case 'i': mode = 'i'; 
		   if(strncmp(argv[arg],"-indel",7) ==0){
			print_indel_trans=TRUE;
		   } else if(sscanf(argv[arg],"-islen=%d:%d",&minlen,&maxlen) == 2){
			if(minlen > maxlen) print_error("islen input error");
		   } else if(sscanf(argv[arg],"-iron=%lf",&fractDeleted) == 1){
			if(fractDeleted <= 0.0 || fractDeleted > 1.0){
				print_error(USAGE_START);
			}
		   } else if(strncmp(argv[arg],"-iron",6) ==0){
			IronOut=TRUE;
		   } else if(argv[arg][2] != 0){
		      if(sscanf(argv[arg],"-i%d:%c%d",
				&blk,&front_back,&length_extend) != 3){
                        print_error(USAGE_START); 
		      }
		   } else { print_error(USAGE_START); }
		   break;
	     case 'K': 
		if(argv[arg][2]!='=' && argv[arg][2]!=0) keep=argv[arg]+2; 
		else if(argv[arg][2] == '=' && argv[arg][3]!=0) Keep=argv[arg]+3;
		else { print_error(USAGE_START); }
		mode = 'K'; break;
	     case 'k': 
		if(argv[arg][2]!=0) Kingdom=argv[arg]+2; 
		else Kingdom=0; // print_error(USAGE_START);
		mode = 'k'; 
		break;
	     case 'L': 
		if(argv[arg][2]!=0) look=argv[arg]+2; mode = 'L'; 
		break;
	     case 'l': 
		if(strcmp(argv[arg],"-label") == 0){
		   LabelOut=TRUE; mode='l';
		} else if(strcmp(argv[arg],"-lpr") == 0){
			PrintLPR=TRUE; mode='l';
		} else if(sscanf(argv[arg],"-level=%d",&cma_level) == 1){
		   if(cma_level < 0)  print_error(USAGE_START);
		   mode='l';
		} else {
		  if(sscanf(argv[arg],"-layer=%s",layer_keysq) != 1){
		     look_cut=RealOption(argv[arg],'l',-1000.0,1000.0,USAGE_START); 
		  } else {
		   if(!isprint(layer_keysq[0])) print_error("-layer input error"); 
		   mode='l';
		  }
		}
		break;
	     case 'M': 
                if(sscanf(argv[arg],"-MinColU=%d:%d",&x,&z) == 2){
		      if(x < 10 || x > 100) print_error(USAGE_START);
		      if(z < 10 || z >= 100) print_error(USAGE_START);
		      MinCol=(double)x/100.0; purge=z; MinColU=TRUE;
                } else if(sscanf(argv[arg],"-MinColHSW=%lf",&MinCol) == 1){
		    if(MinCol < 0.01 || MinCol > 1.0) print_error(USAGE_START);
		    MinColHSW=TRUE;
                } else if(sscanf(argv[arg],"-Mincol=%lf",&MinCol) == 1){
		    if(MinCol < 0.01 || MinCol > 1.0) print_error(USAGE_START);
		} else num_rpts=IntOption(argv[arg],'M',1,500,USAGE_START); 
		mode = 'M'; break;
	     case 'm': mode = 'm'; 
                if(sscanf(argv[arg],"-mincol=%lf",&MinCol) == 1){
		    if(MinCol < 0.01 || MinCol > 1.0) print_error(USAGE_START);
                } else if(sscanf(argv[arg],"-maxsq=%d",&max_sq) == 1){
		    if(max_sq < 1) print_error(USAGE_START);
                } else if(sscanf(argv[arg],"-minsq=%d",&min_sq) == 1){
		    if(min_sq < 1) print_error(USAGE_START);
                } else if(sscanf(argv[arg],"-mm=%s",str) == 1){
		    if(!isprint(str[0])) print_error(USAGE_START);
		    mm_tpl_file=AllocString(str);
		} else if(argv[arg][2]!=0) {  // -m=<file>
		   if(argv[arg][2]!='=') print_error(USAGE_START);
		   if(!isgraph(argv[arg][3])) print_error(USAGE_START);
		   merge_file_name = argv[arg] + 3;
		} break;
             case 'N': mode = 'N'; 
		if(strcmp("-Newick",argv[arg]) == 0){ 
			Newick=TRUE;
		} else {
			MinNumSeq=IntOption(argv[arg],'N',1,500000,USAGE_START); 
		} break;
             case 'n': mode = 'n'; 
		if(argv[arg][2] == 0 ){
			FileName2Sq1=TRUE;
		} else if(argv[arg][2] == '=' ){
			new_name=argv[arg] + 3;
			fprintf(stderr,"new_name = %s\n",new_name);
		} else if(sscanf(argv[arg],"-nofrag=%d:%d",&NoFragN,&NoFragC) == 2){
			// -nofrag=<int1>:<int2>
			if(NoFragN <=0 || NoFragC <= 0) print_error(USAGE_START);
		} else print_error(USAGE_START);
		break;
	     case 'O': mode = 'O'; 
		if(argv[arg][2] == '='){ 
			defline_string=argv[arg]+3;
		} else if(sscanf(argv[arg],"-Out=%d",&OutSeqFile) == 1){
			if(OutSeqFile < 1) print_error(USAGE_START);
		} else if(argv[arg][2]!=0) selex_file=argv[arg]+2;
		else if(argv[arg][2] == 0){
			Int4 Number;
			// if(cma) TotalNilCSMA(cma);
	    		fp=OpenFileToRead(argv[1]);
			a_type AB = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
			cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,AB);
			fclose(fp);
#if 0
	 trm_typ *trm = new trm_typ(IN_CMA);
	 trm->Scan4Terms(stdout);
	 delete trm;
	 return 0;
#endif
			for(i=1; i <= Number; i++){
			    PutSeqSetEs(stdout,TrueDataCMSA(IN_CMA[i])); 
			    TotalNilCMSA(IN_CMA[i]);
			} free(IN_CMA); NilAlpha(AB); 
			FreeSets();	// use at end of main() only!!!
		        return 0; 
		} else  print_error(USAGE_START);
		break;
	     case 'o': mode = 'o'; 
		if(sscanf(argv[arg],"-out=%s",multi_cma_name) == 1){
		} else multi_cma_name[0]=0;
		break;
	     case 'P': 
		if(strcmp("-Pttrn",argv[arg]) == 0){
		    mode='P'; FindSeedPttrn=TRUE;
		} else if(sscanf(argv[arg],"-Partition=%s",name_seed_aln) == 1){
		    mode='P';
		} else if(sscanf(argv[arg],"-P=%d",&CntPhyla) == 1){
			mode='P'; if(CntPhyla <= 0) print_error(USAGE_START);
		} else if(argv[arg][2] == '=' && argv[arg][3] == 'U' && argv[arg][4] == 0){
			mode='P'; mk_use_file=TRUE;
		} else if(argv[arg][2] != 0){ psm_arg=argv[arg]; } else mode = 'P'; 
		break;
	     case 'p': 
		if(sscanf(argv[arg],"-poor=%d",&NumPoor) == 1){
		  if(NumPoor < 1) print_error(USAGE_START);
		} else if(sscanf(argv[arg],"-pairs=%d:%d",&pairA,&pairB) == 2){
			if(pairA <= 0 || pairB <= 0) print_error(USAGE_START);
		} else if(strcmp("-pdb",argv[arg]) == 0){ 
			PutPDB=TRUE;
		} else if(strcmp("-phyla",argv[arg]) == 0){ 
		  show_phyla=TRUE; 
		} else if(argv[arg][2] == '='){
		    if(argv[arg][3] == 0) print_error(USAGE_START);
		    phylum=argv[arg] + 3;
		} else {
		   Mutated_pos=IntOption(argv[arg],'p',0,5000,USAGE_START); 
		} mode = 'p'; 
		break;
             case 'Q': mode = 'Q'; break;
             case 'T': mode = 'T'; 
		if(argv[arg][2]==0) template_mode=TRUE;
		else if(argv[arg][2]== '=' || argv[arg][2]== '_'){
		   fprintf(stderr,"split_into_blks mode\n");
		   split_into_blks=TRUE;
		   Int4 shift=0;
		   char *arg_str=0;
		   if(sscanf(argv[arg],"-T_%d=",&shift) == 1){
			arg_str=argv[arg];
			while(arg_str[0] != '=') arg_str++;
			arg_str++;
		   } else if(argv[arg][2]== '='){
			shift=0; arg_str=argv[arg]+3; 
		   } else print_error(USAGE_START);
		   nblks=ParseRegions(arg_str,&blk_start,&blk_end,USAGE_START);
		   fprintf(stderr,"%d blocks: \n",nblks);
		   for(i=1; i <= nblks; i++){
			if(shift != 0) {
			  blk_start[i] = blk_start[i] + shift;
			  blk_end[i] = blk_end[i] + shift;
			} fprintf(stderr,"(%d..%d)\n",blk_start[i],blk_end[i]);
		   } fprintf(stderr,"\n\n");
		} else if(isdigit(argv[arg][2])){
		   if(sscanf(argv[arg],"-T%d:%d",&blk,&lenrm) != 2){
			mode = '!';
			if(sscanf(argv[arg],"-T%d",&TrimMax) != 1)
				print_error(USAGE_START); 
		   } else lenrm=-lenrm;
                } else print_error(USAGE_START);
		break;
	     case 't': 
	       { 
		char txstr[200];
	        if(sscanf(argv[arg],"-tx=%d",&TxID) == 1){ mode = 't'; }
		else if(sscanf(argv[arg],"-tax=%s",txstr) == 1){
			txfile= AllocString(txstr);
			mode = 't'; 
	        } else if(strcmp("-terms",argv[arg]) == 0){ 
			PutTerms=TRUE; mode='t';
	        } else if(strcmp("-tax",argv[arg]) == 0){ 
			TaxID_Only=TRUE; mode = 't'; 
	        } else if(argv[arg][2]==0){
		       mode='#';  // output table mode...
	        } else {
		 info_cut=RealOption(argv[arg],'t',0.0,10.0,USAGE_START); 
			mode = 't'; 
	        } 
	       } break;
             case 's': 
		if(strcmp("-stockholm",argv[arg]) == 0){ 
			sto_format=TRUE; 
		} else if(strcmp("-split",argv[arg]) == 0){ 
			SplitCMA=TRUE;
		} else if(sscanf(argv[arg],"-sq=%d",&TheSeqId) == 1){
			if(TheSeqId <= 0) print_error(USAGE_START);
		} else if(strcmp("-sma",argv[arg]) == 0){ 
			PutSingles=TRUE;
		} else if(strcmp("-scores",argv[arg]) == 0){ 
			PutScores='t';
		} else if(strcmp("-sameIDs",argv[arg]) == 0){ 
			SameIDs=TRUE;
		} else if(strcmp("-single",argv[arg]) == 0){ 
			PutSingles=TRUE;
		} else if(sscanf(argv[arg],"-sort=%s",profiles) == 1){
		     sort_using_profiles=TRUE;
		     if(!isprint(profiles[0])) print_error(USAGE_START); 
		} else if(strcmp("-seqlens",argv[arg]) == 0){ // i.e., option -seqlens
			seqlens=TRUE;
		} else if(sscanf(argv[arg],"-seed=%d",&seed) == 1){
		     break;
		} else if(sscanf(argv[arg],"-see=%d",&see_aa) == 1){
			if(see_aa <= 0) print_error(USAGE_START);
			see_blk = 1;
		} else if(sscanf(argv[arg],"-show=%d:%d,%s",&see_blk,&see_aa,family_name) == 3){
			if(see_blk <= 0 || see_aa <= 0) print_error(USAGE_START);
			see_aa = -see_aa; 
			fprintf(stderr,"family_name = %s\n",family_name);
		} else if(sscanf(argv[arg],"-show=%d:%d",&see_blk,&see_aa) == 2){
			if(see_blk <= 0 || see_aa <= 0) print_error(USAGE_START);
			see_aa = -see_aa; 
		} else if(sscanf(argv[arg],"-show=%d",&see_aa) == 1){
			if(see_aa <= 0) print_error(USAGE_START);
			see_blk=1; see_aa = -see_aa; 
		} else if(argv[arg][2] != 0) print_error(USAGE_START);
		mode='s'; break;
             case 'S': 
		if(sscanf(argv[arg],"-Splt=%d",&x) == 1){
			if(x < 10) print_error("-Splt Option input error");
			mode='S'; SpltSz=x;
		} else if(sscanf(argv[arg],"-Show=%d:%d,%s",&see_blk,&see_aa,family_name) == 3){
			if(see_blk <= 0 || see_aa <= 0) print_error(USAGE_START);
			see_aa = -see_aa;  mode='s'; ExcelFormat=TRUE;
			fprintf(stderr,"family_name = %s\n",family_name);
		} else if(sscanf(argv[arg],"-Show=%d:%d",&see_blk,&see_aa) == 2){
			if(see_blk <= 0 || see_aa <= 0) print_error(USAGE_START);
			see_aa = -see_aa; mode='s'; ExcelFormat=TRUE;
		} else if(strncmp(argv[arg],"-Sq=",4) == 0){
		   Int4 x=strlen(argv[arg]);
		   NEW(TheSqIds,x+5,Int4); mode='S';
		   x=ParseIntegers(argv[arg]+4, TheSqIds, "-Sq= input error");
		   // TheSqIds[0]=x;
		} else if(strcmp("-Scores",argv[arg]) == 0){ 
			PutScores='T'; mode = 's';
		} else if(sscanf(argv[arg],"-Sort=%s",seq_ids_file) == 1){
		     mode = 's'; 
		     sort_using_seqids=TRUE;
		     if(!isprint(seq_ids_file[0])) print_error(USAGE_START); 
		} else if(argv[arg][2] == 0) { mode = 'S'; shuffle = TRUE; }
		else if(argv[arg][2] == '=') {
		     mode = 'S'; 
		     if(!isprint(argv[arg][3])) print_error(USAGE_START); 
		     split_cmafile = argv[arg] + 3;
		} else {
		     mode = 'S'; 
                     if(sscanf(argv[arg],"-S%d:%d",&block,&left_leng) != 2)
                        print_error(USAGE_START); 
		     if(left_leng < 1 || block < 1){
			 print_error(USAGE_START);
		     }
		} break;
	     case 'r': 
		if(argv[arg][3] == '='){ 	// r=<string><int>
                     if(sscanf(argv[arg],"-rm=%d",&rm_all_but) != 1) 
				print_error(USAGE_START);
		     if(rm_all_but < 1) print_error(USAGE_START);
		     mode='r';
                } else if(sscanf(argv[arg],"-rmsq=%s",RmSeqId) == 1){
		    mode='x'; break;
		} else if(argv[arg][2] == '='){ 	// r=<string><int>
                     if(sscanf(argv[arg],"-r=%[A-Za-z]%d",residue_str,&residue_pos) != 2) 
				print_error(USAGE_START);
                } else if(sscanf(argv[arg],"-random=%d",&NumRandom) == 1){
		    mode='r';
		    break;
                } else if(sscanf(argv[arg],"-rpts=%lf",&RptEval) == 1){
		    mode='r';
		    if(RptEval <= 0.0 || RptEval > 10.0) print_error(USAGE_START);
		    break;
                } else if(sscanf(argv[arg],"-rand=%lf",&fraction) == 1){
		    mode='r';
		    if(fraction <= 0.0 || fraction > 1.0) print_error(USAGE_START);
		    break;
		} else if(strcmp("-re",argv[arg]) == 0){ // i.e., option -rcsq
		   PutRelEntropy=TRUE; mode='r'; break;
		} else if(strcmp("-rcsq",argv[arg]) == 0){ // i.e., option -rcsq
		   rm_csq=2; mode='x'; break;
		} else if(strcmp("-rspsq",argv[arg]) == 0){ // i.e., retain swissprot and pdb only
		   rm_csq=3; mode='x'; break;
		} else if(strcmp("-rpdb",argv[arg]) == 0){ // i.e., rm pdb seqs
		   rm_csq=4; mode='x'; break;
		} else if(strcmp("-rmest",argv[arg]) == 0){ // i.e., remove ests and envnr seqs.
		   rm_csq=5; mode='x'; break;
		} else if(strcmp("-rmcsq",argv[arg]) == 0){ // i.e., option -rmcsq
		   rm_csq=1; mode='x'; break;
		} else if(strcmp("-rmflank",argv[arg]) == 0){ // i.e., option -rmflank
		   // print_error("Use gsq_typ V mode instead");
		   rm_flank=TRUE;
		   mode='x'; break;
	        } else if(strstr(argv[arg],",") != NULL){ // i.e., option -r<x>,<s>
		   if(sscanf(argv[arg],"-r%d,%d",&min_rpt,&min_spacing)!=2)
			print_error(USAGE_START);
		} else {
		  Cut=RealOption(argv[arg],'r',-99999,500000,USAGE_START); 
		} mode = 'r'; break;
	     case 'R': 
		mode = 'R'; 
		if(argv[arg][2] == '='){
                   if(sscanf(argv[arg],"-R=%d",&rm_seq) != 1) print_error(USAGE_START);
		}  else if(sscanf(argv[arg],"-Rm=%s",&family_name) == 1){
		    if(strlen(family_name) < 1) print_error(USAGE_START);
		} else if(sscanf(argv[arg],"-Rename=%d:%s",&rename_id,str) == 2){
		    rename=AllocString(str);
		    if(0) fprintf(stderr,"-Rename=%d:%s\n",rename_id,rename);
		    if(strlen(rename) < 1) print_error(USAGE_START);
		    else if(rename_id < 1) print_error(USAGE_START);
		} else {
			rm = argv[arg]+2; 
		}
	        break;
	     case 'V': mode = 'V'; break;
	     case 'v': mode='v'; 
		if(strcmp("-var",argv[arg]) == 0){ // i.e., option variance...
			show_var=TRUE;
		} else verbose = TRUE; break;
	     case 'u': 
		if(strcmp(argv[arg],"-unlabel") == 0){
		   UnLabelOut=TRUE; mode='u';
		} else if(strcmp(argv[arg],"-unknwn") == 0){
		   RmUnKnown=TRUE; mode='u';
		}  else if(sscanf(argv[arg],"-u=%d",&purge) == 1){
		   if(purge > 100 || purge < 40) print_error(USAGE_START);
		   mode='u';
		} else { 
		  mode='u'; purge=0;
		  char tmp_buffer[1000];
		  if(sscanf(argv[arg],"-use=%s",tmp_buffer) == 1){
			use_file_name=AllocString(tmp_buffer);
		  } else if(argv[arg][2] != 0) print_error(USAGE_START);
		} 
	        break;
	     case 'U': 
		mode='U';
		if(argv[arg][2] == 0){
		  purge=0;
		}  else if(sscanf(argv[arg],"-U=%d",&purge) == 1){
		   keep_first=TRUE;
		} else {
		  purge=IntOption(argv[arg],'U',10,100,USAGE_START);
		}
		break;
	     case 'W': 
		mode = 'W'; 
		if(sscanf(argv[arg],"-W=%d",&OutFileNumber) == 1){
			if(OutFileNumber < 1) print_error(USAGE_START);
		} else if(strcmp("-Write",argv[arg]) == 0){ 
			WriteCMA=TRUE;
		}
		break;
	     case 'w': mode = 'w'; 
		if(strcmp("-write",argv[arg]) == 0){ 
			WriteAllCMA=TRUE; mode = 'W';
		} else if(sscanf(argv[arg],"-worst=%d:%d",&KeySeq,&put_worst) == 2){
			if(put_worst < 1 || KeySeq < 1) print_error(USAGE_START);
		} else if(argv[arg][2]!=0) {  // -w=<file>
		   if(argv[arg][2]!='=') print_error(USAGE_START);
		   if(!isgraph(argv[arg][3])) print_error(USAGE_START);
		   write_file_name= argv[arg] + 3; mode='m'; // do within merge.
	        } break;
	     case 'q': 
		if(argv[arg][2]==0){
			seqid=0; mode = 'i'; fractDeleted=-1; // remove columns for which 1st seq == '-'.
		} else if(argv[arg][2]== '=') { seqid=argv[arg]+3; mode = 'q'; }
		else print_error(USAGE_START);
		break;
	     case 'x': seqid=argv[arg]+2; mode = 'x'; break;
	     case 'Z': {
		mode = 'Z'; 
		if(sscanf(argv[arg],"-Z%d:%d",&FirstSeq,&LastSeq)!=2)
			print_error(USAGE_START);
		if(FirstSeq < 1 || FirstSeq > LastSeq) print_error(USAGE_START);
	        } break;
	     case 'z': seqid=argv[arg]+2; mode = 'z'; break;
	     default: print_error(USAGE_START);
	   }
	}
	if(seed == 7061950) seed = (UInt4) time(NULL);
	sRandom(seed);
	TurnOffLicenseStatement();
	a_type	A = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
	if(see_aa == 0 && !FindSeedPttrn && !put_consensus &&
	  (mode != 'm' || (mode=='m' && min_sq > 0) || (mode!='U') 
	  || (mode=='m' && max_sq > 0)) && !template_mode && !rm_csq
	  && !PutScores && deleted_pos==0 && !(mode == 'M' && MinCol > 0.0)){
#if 0
	  sprintf(str,"%s.cma",argv[1]);
	  cma=ReadCMSA2(str,A);
	  if(!cma) print_error("cma file read error");
#else
	  // fprintf(stderr,"Opening cma file\n");
	  fp=OpenFileToRead(argv[1]); cma=ReadCMSA(fp,A); fclose(fp);
	  if(cma==0) print_error("FATAL: input cma file read failed");
#endif
	} else cma=0;
	switch(mode) {
	  case 'p': {
	    if(pairA > 0 && pairB > 0){
		a_type AB=AlphabetCMSA(cma);
		if(pairA > NumColumnsCMSA(cma) || pairB > NumColumnsCMSA(cma)){
			print_error("FATAL: -pairs input out of range");
		} 
		if(pairA == pairB) print_error("FATAL: -pairs input error");
		Int4 i,j,resAB[25][25],resI[25],resJ[25];
		for(i=0; i <= nAlpha(AB); i++){ 
		   resI[i]=resJ[i]=0;
		   for(j=0; j <= nAlpha(AB); j++){ resAB[i][j]=0; }
		}
		Int4 n,rA,rB,sq,N=NumSeqsCMSA(cma);
        	for(Int4 sq=1;sq <= N; sq++) {
			rA=ResidueCMSA(1,sq,pairA,cma);
			rB=ResidueCMSA(1,sq,pairB,cma);
			resAB[rA][rB]++; resI[rA]++; resJ[rB]++;
		}
		dh_type dH=dheap(500,4);
		Int4 n_i[500],n_j[500];
		for(n=0,i=1; i <= nAlpha(AB); i++){ 
		   for(j=1; j <= nAlpha(AB); j++){
			n++;
			if(1 || resAB[i][j] > 0){
			   insrtHeap(n,-(keytyp)resAB[i][j],dH); n_i[n]=i; n_j[n]=j;
			}
		   }
		}
		Int4 *cumHits,totHits=0,sumHits=0; NEW(cumHits,n+10,Int4);
		dh_type dH2=dheap(500,4);
		// fprintf(stdout,"rank\tresA\tresB\tobs\texp\t X \tchi_sq\n");
		Int4 rank=0;
		double chi_sq=0.0;
		for(rank=1; (n=delminHeap(dH)) != 0; rank++){
			i=n_i[n]; j=n_j[n];
			char cA,cB; cA=AlphaChar(i,AB); cB=AlphaChar(j,AB);
			double dA=(double)resI[i]/(double)N;
			double dB=(double)resJ[j]/(double)N;
			double exp=dA*dB*(double)N;
			double D,dd,d;
			totHits += resAB[i][j];
			if(exp > 0.0){
			  d=(double) resAB[i][j]/exp;
			  dd=(double)resAB[i][j] - exp;
			  chi_sq += (dd*dd)/exp;
			  // D=(dd*dd)/exp;
D=(double)resAB[i][j];
			  insrtHeap(n,-D,dH2);
			}
#if 0
			if(resAB[i][j] > 0 || exp >= 0.9){
			    fprintf(stdout,"%d\t%c%d\t%c%d\t%d\t%.0f\t%.2f\t%.0f\n",
				rank,cA,pairA,cB,pairB,resAB[i][j],round(exp),d,round(dd)); 
			}
#endif
		} // fprintf(stdout,"\n\n\n");
		fprintf(stdout,"rank\tresA\tresB\tobs\texp\t X \tchi_sq\tNorm\n");
		for(rank=1; (n=delminHeap(dH2)) != 0; rank++){
			i=n_i[n]; j=n_j[n];
			char cA,cB; cA=AlphaChar(i,AB); cB=AlphaChar(j,AB);
			double dA=(double)resI[i]/(double)N;
			double dB=(double)resJ[j]/(double)N;
			double exp=dA*dB*(double)N;
			double D,obs,contrib,d=(double) resAB[i][j]/exp;
			obs=(double)resAB[i][j] - exp;
			sumHits+=resAB[i][j];
			contrib= (obs*fabs(obs))/exp;
			if(resAB[i][j] > 0 || exp >= 0.9){
			   fprintf(stdout,"%d\t%c%d\t%c%d\t%d\t%.0f\t%.2f\t%.0f\t%.1f%c\t%.1f%c\n",
				rank,cA,pairA,cB,pairB,resAB[i][j],round(exp),d,round(contrib),100.0*fabs(contrib)/chi_sq,'%',
				(100.0*(double)sumHits)/(double)totHits,'%'); 
			}
		} fprintf(stdout,"\n"); Nildheap(dH); Nildheap(dH2);
		free(cumHits);
	    } else if(PutPDB){
		BooLean *skip;
		Int4 sq,N=NumSeqsCMSA(cma);
		data = TrueDataCMSA(cma);  
		NEW(skip,N+3,BooLean);
        	for(Int4 i=1;i <= N; i++) {
			e_type	sE;
			sE=SeqSetE(i,data);
			if(!PdbSeq(sE)) skip[i]=TRUE;
		} PutSelectCMSA(stdout,skip,cma); free(skip);
	    } else if(NumPoor > 0){
		Int4    N = NumSeqsCMSA(cma);
		ss_type data = TrueDataCMSA(cma);
        	BooLean *skip; NEW(skip,N+3,BooLean);
		Int4 J,I;
	        dh_type dH=NULL;
		dH=dheap(N+2,4);
        	double prob;
                for(J=1; J <= N; J++){
                   double prob = GetGappedProbCMSA(1,J,cma);
		   insrtHeap(J,(keytyp)prob,dH); 
		   skip[J]=TRUE;
                }
		for(J=0 ; !emptyHeap(dH); ){
                	assert((I=delminHeap(dH)) != 0);
                        // PutSeqSetE(stdout,I,data);
			J++; 
			skip[I]=FALSE;
			if(J >= NumPoor) break;
		} Nildheap(dH); 
		char poorstr[50];
		sprintf(poorstr,".poor%d.cma",NumPoor);
		FILE *fp = open_file(argv[1],poorstr,"w");
		// FILE *fp = open_file(argv[1],".poor.cma","w");
		PutSelectCMSA(fp,skip,cma); free(skip);
		fclose(fp);
	    } else if(show_phyla){
	      Int4 f,Number,num_phyla,less_than4 = 0,more_than3=0;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      set_typ Set=MakeSet(Number+1); ClearSet(Set);
	      h_type HG=0;
	      if(Number > 2){ HG=Histogram("number of phyla", 0,50,1.0); }
	      for(f=1; f <= Number; f++) {
		// fprintf(stderr,"FILE: %s:\n",NameCMSA(IN_CMA[f]));
		num_phyla=PutPhylaSeqSet(stdout,0,TrueDataCMSA(IN_CMA[f]));
		if(num_phyla < 4){
		    less_than4++; AddSet(f,Set);
		    
		} else more_than3++;
		if(HG) IncdHist((double)num_phyla,HG);
	      }
	      if(HG){ PutHist(stdout,50,HG); NilHist(HG); }
	      for(f=1; f <= Number; f++) {
		if(MemberSet(f,Set)){
		  fprintf(stdout,"%s,",NameCMSA(IN_CMA[f]));
		}
	      } fprintf(stdout,"\n\n");
	      NilSet(Set);
	      fprintf(stdout,"%d sets have less than 4 phyla; %d more than 3\n",less_than4,more_than3);
	    } else if(phylum != 0){
		data = TrueDataCMSA(cma);  
		Int4	N=NSeqsSeqSet(data);
		e_type	sE;
		NEW(skip,N+3,BooLean); 
        	for(Int4 i=1;i <= N; i++) {
			sE=SeqSetE(i,data);
			if(PhylumSeq(sE) == 0) continue;
			if(strcmp(PhylumSeq(sE),phylum) == 0) skip[i]=FALSE;
			else skip[i]=TRUE;
			// fprintf(stderr,"%d: %s vs %s\n",i,PhylumSeq(sE),phylum);
		}
		FILE *fp = open_file(argv[1],".rm_phylum.cma","w");
		PutSelectCMSA(fp,skip,cma); fclose(fp); 
		free(skip); 
	   } else {
		Int4 pos[3];
		data = DataCMSA(cma);  // fake sequences
		Int4 s,fake_pos;
		e_type E=SeqSetE(1,TrueDataCMSA(cma));
		// Mutated_pos=RealToFakeCMSA(1,Mutated_pos-OffSetSeq(E),cma);
		fake_pos=RealToFakeCMSA(1,Mutated_pos,cma);
        	for(Int4 sq=1;sq <= NSeqsSeqSet(data); sq++) {
			E=SeqSetE(sq,TrueDataCMSA(cma));
			// Int4 s=TruePosCMSA(sq, Mutated_pos-OffSetSeq(E),cma);
			// s=TruePosCMSA(sq,Mutated_pos,cma);
			s=TruePosCMSA(sq,fake_pos,cma);
			// s=TruePosCMSA(sq,s,cma);
			if(s > 0 && s <= LenSeq(E)){
		  	   unsigned char *seq=SeqPtr(E); 
			   fprintf(stderr,"mutate %c in sq %d and pos %d to: ",
				AlphaChar(seq[s],A),sq,s);
			   seq[s]=RandomResSeq(E);
		  	   unsigned char *xsq=XSeqPtr(E); 
			   xsq[s]=seq[s];
			   // PutSeq(stdout,E,A);
			   fprintf(stderr,"%c\n",AlphaChar(seq[s],A));
			} else {
			   fprintf(stderr,"sq=%d; s=%d; fatal\n",sq,s); exit(1);
			}
		}
           	sprintf(str,"%s.mutate_%d",argv[1],Mutated_pos);
#if 1
		FILE *fp = open_file(str,".cma","w"); PutCMSA(fp,cma); fclose(fp);
#else
                WriteMtfCMSA(str, cma, NULL);
#endif
	   }
		} break;
	  case 'G': {
		for(Int4 b=0; b <= nBlksCMSA(cma); b++) PutBlockSpacingCMSA(stdout,b,cma);
		} break;
	  case 'g': 
	    {
		assert(grow_blk > 0 && grow_blk <= nBlksCMSA(cma));
		Int4 d;
		for(Int4 col=1; col <= abs(grow_len); col++){
		   if(!InsertColCMSA(grow_blk,(grow_len > 0), cma))
			print_error("Failed to insert a column at end of block");
		}
           	sprintf(str,"%s.add",argv[1]);
                WriteMtfCMSA(str, cma, NULL);
	    } break;
	  case 'B': {
	    if(FirstOnly){ // put out best in each phylum only...
		Int4 J,Number;
	        // fp = open_file(argv[1],".cma","r");
	        fp=OpenFileToRead(argv[1]);
	        cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		for(Int4 f=1; f<=Number;f++) {
		  cma=IN_CMA[f];
		  data = TrueDataCMSA(cma);  
		  char new_info[200];
		  sprintf(new_info,"%s consensus",NameCMSA(cma));
		  e_type sE=SeqSetE(1,data);
		  ChangeInfoSeq(new_info, sE);
		  Int4	N=NSeqsSeqSet(data);
	 	  NEW(skip,N+3,BooLean); 
		  for(J=2; J <= N; J++) skip[J]=TRUE;
		  PutSelectCMSA(stdout,skip,cma); 
		  free(skip);
		}
	    } else {
	   Int4 J,Number,KeySeq0=KeySeq;
	   fp=OpenFileToRead(argv[1]);
	   cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	   fp = stdout; // open_file(argv[1],"_best.cma","w");
	   for(Int4 f=1; f<=Number;f++) {
	     cma=IN_CMA[f];
	     double prob,best;
	     dh_type dH=NULL;
	     Int4    Score,I,J,best_J=0,N=NumSeqsCMSA(cma);
#if 0	     // combine -Best=5 with -Best=25:5 options
	     if(KeySeq > 0 && put_best > 0){	// put 'put_best' number of best hits to KeySeq.
#else
	     if(put_best > 0){	// put 'put_best' number of best hits to KeySeq.
		if(KeySeq0 == 0){	// then set KeySeq == to the best sequence...
		  if(nBlksCMSA(cma) != 1) print_error("this option requires only one blk");
		  if(N < 2){ print_error("only one sequence in cma file"); }
		  best=-9999999999.0;
		  for(J=1; J <= N; J++){
               		prob = GetProbCMSA(1,J,cma);
			// prob = GetGappedProbCMSA(1,J,cma);
			if(prob > best) { best=prob; KeySeq=J; }
		  }
		}
#endif
		// if(KeySeq > N) print_error("-Best option input error (key seq > number seqs");
		if(KeySeq > N) fprintf(stderr,"-Best option (key seq > number seqs)\n");
	      if(BestPhylaOnly){ // put out best in each phylum only...
		  fprintf(stderr,"KeySeq=%d\n",KeySeq);
		  dH=dheap(N+2,4);
	 	  NEW(skip,N+3,BooLean); 
#if 0
		  h_type HG=Histogram("pseudoscores", 0,5000,25.0);
		  PutSeq(stderr,TrueSeqCMSA(KeySeq,cma),A);
#endif
		  Int4 SelfScore=PseudoAlnScoreCMSA(KeySeq, KeySeq,cma);
		  insrtHeap(KeySeq,-((keytyp)SelfScore + 0.01),dH); 
		  if(KeySeq==J) fprintf(stderr,"KeySeq=%d; Self Score = %d\n",J,Score);
		  for(J=1; J <= N; J++){
			skip[J]=TRUE;
			if(J==KeySeq) continue;
#if 1
			e_type sE=TrueSeqCMSA(J,cma);
			if(DistinctPhyla){
			  if(KingdomSeq(sE) == 'U') continue;	// kingdom is unknown...
			  if(KingdomSeq(sE) == 'X') continue;	// kingdom is unknown...
			  if(KingdomSeq(sE) == 0) continue;	// kingdom is unknown...
			}
#endif
			Score=PseudoAlnScoreCMSA(KeySeq, J,cma);
			// IncdHist((double)Score,HG);
			insrtHeap(J,-(keytyp)Score,dH); 
		  }
        	  // PutHist(stdout,50,HG); NilHist(HG); 

		  Int4	NumPhyla=0,NumHits,NumClass=0;
		  BooLean	*Printed=0,*PutClass=0;
		  Int4	*Class;
		  Int4    *phyla=GetPhylaSeqSet(stderr, NumPhyla, TrueDataCMSA(cma),
				Class,NumClass);
		  NEW(Printed,NumPhyla+3,BooLean);
		  NEW(PutClass,NumClass+3,BooLean);

		  Int4 *list; NEW(list, N+3,Int4); 

		  for(NumHits=I=0; I < put_best && !emptyHeap(dH); ){
                	assert((J=delminHeap(dH)) != 0);
			if(!Printed[phyla[J]]) {
				skip[J]=FALSE; NumHits++;
				// fprintf(stderr,"Seq Num: %d\n",J);
				if(DistinctPhyla){
				   Printed[phyla[J]]=TRUE;
				   PutClass[Class[J]]=TRUE;
				}
				I++; list[I]=J;
			} else if(!PutClass[Class[J]]){
				skip[J]=FALSE; NumHits++;
				if(DistinctPhyla) PutClass[Class[J]]=TRUE;
				I++; list[I]=J;
			}
		  } Nildheap(dH); 
		  for(J=1; J <= N; J++){
			if(skip[J]){  I++; assert(I <= N); list[I]=J; }
		  }
		  assert(I == N);
		  if(NumHits > 1) PutSelectOneCMSA(fp,skip,list,cma);
		  // if(NumHits > 1) PutSelectOneCMSA(stdout,skip,list,cma);
		  // PutSelectCMSA(stdout,skip,cma); 
		  free(list); free(skip); free(phyla); free(Printed);
		  free(Class); free(PutClass);
	       } else {
#if 0
		  Int4    *list=SortBySimilarityCMSA(KeySeq, put_best,cma);
		  PutSelectOneCMSA(stdout,0,list,cma);
		  free(list); 
#else
		  SortBySimilarityCMSA(fp,KeySeq, put_best,cma);
#endif
	       }
	     } else {	// this has bee superceded by above...
		// best=0.0;
		best=-9999999999.0;
		if(N < 2){ print_error("only one sequence in cma file"); }
		for(J=1; J <= N; J++){
		   for(Int4 m=1; m <= nBlksCMSA(cma); m++){
               		// prob = GetProbCMSA(m,J,cma);
			prob = GetGappedProbCMSA(m,J,cma);
			if(prob > best) { best=prob; best_J=J; }
		   }
		}
	       if(best_J > 0){
		if(put_best > 0) {	// Output homologs 
	 	  NEW(skip,N+3,BooLean); 
		  if(put_best > 1){	// Output best with homologs in cma format
		    if(nBlksCMSA(cma) > 1) print_error("this option requires only 1 blk");
		    dH=dheap(N+2,4);
		    for(J=1; J <= N; J++){ 
			Score = PseudoAlnScoreCMSA(best_J, J, cma);
			insrtHeap(J,-(keytyp)Score,dH); skip[J]=TRUE;
		    }
		    for(I=1; I <= put_best && !emptyHeap(dH); I++){
                	assert((J=delminHeap(dH)) != 0); skip[J]=FALSE;
		    } Nildheap(dH); 
		  } else {		// Output best only.
			for(J=1; J <= N; J++) skip[J]=TRUE;
		  	skip[best_J]=FALSE;
		  }
		  PutSelectCMSA(stdout,skip,cma); free(skip);
	        } else {			// Put best sequence only.
		   e_type bestE=TrueSeqCMSA(best_J,cma);
		   PutSeq(stdout,bestE,A);
	        }
	      } 
	     }
	   }
	   } if(fp != stdout) fclose(fp);
	    }break;
	  case 'l': 
	     if(LabelOut){
	        Int4 Number,N,num_names,namelen,NewNumber;
	    	FILE *fp=OpenFileToRead(argv[1]);
	    	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		fp = open_file(argv[1],"_labeled.cma","w"); 
		for(Int4 f=1; f<=Number; f++) {
		   LabelSeqsCMSA(IN_CMA[f]); PutCMSA(fp,IN_CMA[f]);
		} fclose(fp);
	    } else if(PrintLPR){
		Int4 Number;
	        // fp = open_file(argv[1],".cma","r");
	        fp=OpenFileToRead(argv[1]);
	        cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		for(Int4 f=1; f<=Number;f++) {
#if 1
		  cma_typ xcma=IN_CMA[f];
		  char dms_mode='T';
		  Int4    aa_per_io=20,aa_per_do=150, exp_ie=1,exp_de=1;
		  double  PriorWt=1.0,SqWtAdj=0.0,pn=1000;
// aa_per_io=30; aa_per_do=200;	// stage 4.
                  ssx_typ *ssx = new ssx_typ(aa_per_io,aa_per_do,exp_ie,
				exp_de, pn,xcma,dms_mode);
                    if(PriorWt > 0) ssx->SetPriorWt(PriorWt);
                    if(SqWtAdj > 0) ssx->SetSqWtAdjust(SqWtAdj);
		    double  dd,DD,gp,nWtSq,MaxWtSq=0;
		    MaxWtSq=ssx->RtnWtNumSeqs();
		    dd=ssx->AdjstDirichletLLR(MaxWtSq);    //for diff # weighted seqs.
		    DD=ssx->AdjstdBildLLR(MaxWtSq);
		    gp=ssx->RtnIndelPenalty();
		  fprintf(stderr,"LPR=%.2lf (%.2lf + %.2lf = %.2lf)\n",
			ssx->GapMap(),dd,gp,dd+gp);
		  delete ssx;
		  fprintf(stderr,"%s: blk-based map = %g; DD = %.2fl\n",
			NameCMSA(IN_CMA[f]),RelMapCMSA(IN_CMA[f]),DD);
#else
			PutRelMapCMSA(stderr,IN_CMA[f]);
			fprintf(stderr,"%s: map = %g\n",NameCMSA(IN_CMA[f]),RelMapCMSA(IN_CMA[f]));
			fprintf(stderr,"Haussler Dirichlet map = %g\n",DirichletRelMap('h',IN_CMA[f]));
			fprintf(stderr,"BLOSUM62 Dirichlet map = %g\n",DirichletRelMap('b',IN_CMA[f]));
#endif
		}
	    } else if(cma_level >= 0){
		SetLevelCMSA(cma_level,cma);
		PutCMSA(stdout,cma);
	    } else {
		Int4 **gapscore=0;
		Int4    BlkScore;
		// 1. get seq.
		e_type	keyE=ReadSeqFA(layer_keysq, 0, A);
		// 2. align seq against cma.
		char *operation=GapAlignSeqCMSA(stderr,18,2,&BlkScore,keyE,gapscore,cma);
		// operation=GapAlignSeqCMSA(stderr,insert,extend,&BlkScore,alnE,gapscore,BG_CMA);
		// 3. output keyE seq in cma format.
		// 4. output blocks in cma format.
		//  PutInterBlockCMSA(FILE *fptr, Int4 t1, Int4 t2, cma_typ L);
		// 
		print_error(" -layer option not yet implemented");
		
	    } break;
	  case 'S': 
	    if(SpltSz > 0){
		Int4 file=1,Nsq=NumSeqsCMSA(cma);
		if(SpltSz >= Nsq){
			fprintf(stderr,"SpltSz=%d >= Nsq=%d\n",SpltSz,Nsq);
			print_error("-Splt option input error");
		}
		char str[100];
		set_typ set=MakeSet(Nsq+4);
		for(file=i=1; i <= Nsq; file++){
		     ClearSet(set);
		     for(j=1; j <= SpltSz; j++){ AddSet(i,set); if(i == Nsq) break; else i++; } 
		     sprintf(str,"%s_%d",argv[1],file);
		     FILE *ofp=open_file(str,".cma","w");
		     PutInSetCMSA(ofp,set,cma); fclose(ofp);
		     if(i >= Nsq) break;
		} NilSet(set);
	    } else if(TheSqIds){
		BooLean	*skip;
		Int4	hits,i,sq;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		data = TrueDataCMSA(cma);  // == True sequences...
		for(sq=1; sq <= NumSeqsCMSA(cma); sq++) skip[sq]=TRUE;
		for(hits=0,i=1; (sq=TheSqIds[i]); i++){
		    if(sq > 0 && sq <= NumSeqsCMSA(cma)){ hits++; skip[sq]=FALSE; }
		}
		// sprintf(str0,"%s_is%d",argv[1],deleted_pos);
		// fp = open_file(str0,".cma","w");
		if(hits > 0) PutSelectCMSA(stdout,skip,cma);
		// fclose(fp); 
		free(skip); free(TheSqIds);
	    } else if(shuffle){
		ShuffleColumnsCMA(cma); 
		PutCMSA(stdout, cma); 
	    } else if(split_cmafile != 0){
	        cma_typ split_cma=ReadCMSA2(split_cmafile,A);
		if(TotalLenCMSA(cma) != TotalLenCMSA(split_cma)){
			print_error("FATAL: split_cma and input cma of differenct lengths");
		}
		long blk_len,len=TotalLenCMSA(cma);
		for(long blk = nBlksCMSA(split_cma); blk > 1; blk--){
			blk_len=LengthCMSA(blk,split_cma);
			len = len-blk_len;
           		cma_typ cma2=SplitBlkCMSA(1, len, min_len, cma);
			if(cma2 == 0) print_error("fatal error with -S=<cmafile> option");
			NilCMSA(cma); cma=cma2;
		}
	        fp = open_file(argv[1],".blks.cma","w");
		PutCMSA(fp,cma); fclose(fp);
           	// sprintf(str,"%s.blks",argv[1]);
                // WriteMtfCMSA(str, cma, NULL);
	    } else {
           	cma_typ cma2=SplitBlkCMSA(block, left_leng, min_len, cma);
		if(cma2 == 0) print_error(USAGE_START);
		//SaveBestCMSA(cma2); InitMAPCMSA(cma2);
           	sprintf(str,"%s.split",argv[1]);
		FILE *fp = open_file(str,".cma","w"); PutCMSA(fp,cma2); fclose(fp);
                // WriteMtfCMSA(str, cma2, NULL);
		NilCMSA(cma2);
	    } break;
	  case 'u': 
	    {
	      if(purge >= 40 && purge <=100){
		a_type AB=AlphabetCMSA(cma);
		RunCDHit(stdout,purge,argv[1],cma);
		sprintf(str,"%s_U%d.cma",argv[1],purge);
		fprintf(stderr,"input=%s\n",str);
		FILE *fp = open_file(str,"","r");
                cma_typ cma2=ReadCMSA(fp,AB); fclose(fp);
		sprintf(str,"%s_u%d",argv[1],purge);
		fprintf(stderr,"output=%s.cma\n",str);
		fp = open_file(str,".cma","w");
                Int4 Nset=0; // Nsq=NumSeqsCMSA(cma2);
                PutFastRepSetCMSA(stderr,fp,purge,&Nset,cma2);
                // sprintf(str,"%s.purge.cma",argv[1]);
                // cma_typ cma2=ReadCMSA2(str,A);
                // sprintf(str,"%s.purge",argv[1]);
                // WriteMtfCMSA(str, cma2, NULL);
                // TotalNilCMSA(cma2);
                fclose(fp);
	        break;
	      } else if(UnLabelOut){
	        Int4 Number,N,num_names,namelen,NewNumber;
	    	FILE *fp=OpenFileToRead(argv[1]);
	    	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		fp = open_file(argv[1],"_unlabeled.cma","w"); 
		for(Int4 f=1; f<=Number; f++) {
		   UnLabelSeqsCMSA(IN_CMA[f]); PutCMSA(fp,IN_CMA[f]);
		} fclose(fp);
	        break;
	      } else if(RmUnKnown){
	        Int4 Number,N,num_names,namelen,NewNumber;
	    	FILE *fp=OpenFileToRead(argv[1]);
	    	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		fp = open_file(argv[1],"_known.cma","w"); 
		for(Int4 f=1; f<=Number; f++) {
		  data = TrueDataCMSA(IN_CMA[f]);  
		  Int4	N=NSeqsSeqSet(data);
		  e_type sE;
		  NEW(skip,N+3,BooLean); 
        	  for(Int4 i=1;i <= N; i++) {
			sE=SeqSetE(i,data); skip[i]=FALSE;
			fprintf(stderr,"%d: %s vs %s\n",i,PhylumSeq(sE),"unknown");
			if(PhylumSeq(sE) == 0){ skip[i]=TRUE; continue; }
			if(strcmp(PhylumSeq(sE),"unknown") == 0) skip[i]=TRUE;
		  }
		  PutSelectCMSA(fp,skip,IN_CMA[f]); free(skip); 
		} fclose(fp);
	        break;
	      }
	    }
	  case 'U': 
	  {
	    Int4 Number,N,num_names,namelen,NewNumber;
	    char c,*buffer[1000]; // maximum of 1000 families
	    char tmpname[1000];
	    // fp = open_file(argv[1],".cma","r");
	    fp=OpenFileToRead(argv[1]);
	    cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	    if(use_file_name != 0){
//*******************************************************************
		cma_typ *USE_CMA;
		NEW(USE_CMA,Number+3,cma_typ);
		for(Int4 f=1; f<=Number;f++) {
		  USE_CMA[f]=IN_CMA[f]; IN_CMA[f]=0;
		}
#if 0
		ss_type P = SeqSet(use_file_name,A);
		num_names=0;
		for(n=1; n <= NSeqsSeqSet(P); n++){
			e_type E = SeqSetE(n,P);
			StrSeqID(tmpname,100,E);
			num_names++; 
			buffer[num_names]=NewString(tmpname); 
			fprintf(stderr,"Name %d: %s\n",num_names,buffer[num_names]);
		}
#else
	// 1. Get protein family names from input file:
		fp = open_file(use_file_name,"","r");
		for(namelen=0,N=0,num_names=1; (c=getc(fp)) != EOF; N++){
		  if(N==0) if(!isalnum(c)) print_error("-m option input file error1");
		  if(c==','){
			tmpname[namelen]=0;
			buffer[num_names]=NewString(tmpname); 
			fprintf(stderr,"Name %d: %s\n",num_names,buffer[num_names]);
			namelen=0;
			num_names++; 
		  } else if(isalnum(c) || c=='_'){
			tmpname[namelen]=c; namelen++;
		  } else if(c != '\n') print_error("-m option input file error2");
		} fclose(fp);
		if(namelen==0) print_error("-m option input file error3");
		tmpname[namelen]=0;
		buffer[num_names]=NewString(tmpname); 
		fprintf(stderr,"Name %d: %s\n",num_names,buffer[num_names]);
#endif

	// 2. remove all cma files that don't match a name from input file:
		NewNumber=0;
		for(Int4 f=1; f<=Number;f++){ 
		   BooLean found=FALSE;
		   char *str1=NameCMSA(USE_CMA[f]);
		   fprintf(stderr,"Name %d = '%s'\n",f,str1);
		   for(Int4 n=1; n <= num_names; n++){
		  	char *str2=buffer[n];
			if(strcmp(str1,str2)==0){	// 0 --> a match
				found=TRUE;
			fprintf(stderr,"===> Match: '%s' = '%s'\n",str1,str2);
				break;
			}
		   }
		   if(found){
			NewNumber++;
			IN_CMA[NewNumber]=USE_CMA[f];
		   } else {	// need to free data too...?
			NilCMSA(USE_CMA[f]);
		   }
		}
		Number=NewNumber;
		fprintf(stderr,"NewNumber = %d\n",NewNumber);
		for(Int4 n=1; n <= num_names; n++) free(buffer[n]);
	    fp = open_file(argv[1],".use.cma","w");
	    for(Int4 f=1; f<=Number;f++){ 
		PutCMSA(fp,IN_CMA[f]);
	    } fclose(fp); break;
//*******************************************************************
	  } else {
		Int4 Nset;
		 if(purge > 0){
#if 0	// test amcBPPS time complexity.
	 time1=time(NULL); 
	 data = TrueDataCMSA(cma);  // fake sequences
	 Int4 I,J,N=NumSeqsCMSA(cma);
	 e_type  SeqI,SeqJ;
	 char    *phylumI,*phylumJ;
	 for(I=1; I < N; I++){
	   SeqI=SeqSetE(I,data); phylumI=PhylumSeq(SeqI); assert(I == SeqI(SeqI));
           if(KingdomSeq(SeqI) == 'U' || KingdomSeq(SeqI) == 'X') continue;
           if(phylumI==0) continue;
           unsigned char   *isq,*jsq;
           isq = SeqPtrCMSA(I,cma);  // fake seq...
           Int4 length=LengthCMSA(1,cma);
           for(J=I+1; J <= N; J++){
                // if(triedseeds && MemberSet(J,triedseeds)) continue;
                SeqJ=SeqSetE(J,data); phylumJ=PhylumSeq(SeqJ);
                if(KingdomSeq(SeqJ) == 'U' || KingdomSeq(SeqJ) == 'X') continue;
                if(phylumJ==0) continue;
                jsq = SeqPtrCMSA(J,cma);  // fake seq...
                Int4    s,si,sj,pos[3],scr;
                PosSiteCMSA(1,I,pos,cma); si=pos[1];
                PosSiteCMSA(1,J,pos,cma); sj=pos[1];
                for(scr=0,s=1; s <= length; s++,si++,sj++){
                        if(isq[si] == jsq[sj]) scr++;
                }
                double score = (double)scr*100.0/(double)length;
	   }
	 } fprintf(stderr, "\ttime: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	 exit(1);
#endif
           	   sprintf(str,"%s.purge%d",argv[1],purge);
           	   FILE *fp = open_file(str,".cma","w");
	           for(Int4 f=1; f<=Number;f++){ 
			cma = IN_CMA[f];
			Int4 Nsq=NumSeqsCMSA(cma);
#if 0
			if(keep_first) PutRepSetCMSA(stderr,fp,-purge,&Nset,cma); 
			else PutRepSetCMSA(stderr,fp,purge,&Nset,cma); 
#else
			if(keep_first) PutFastRepSetCMSA(stderr,fp,-purge,&Nset,cma); 
			else PutFastRepSetCMSA(stderr,fp,purge,&Nset,cma); 
#endif
           	  	// sprintf(str,"%s.purge.cma",argv[1]);
           	  	// cma_typ cma2=ReadCMSA2(str,A);
		  	// sprintf(str,"%s.purge",argv[1]);
                  	// WriteMtfCMSA(str, cma2, NULL);
			// TotalNilCMSA(cma2);
			if(Number > 1) fprintf(stdout,"...file %d (\"%s\"): (%d/%d removed; %d remain).\n",
						f,NameCMSA(cma),Nsq-Nset,Nsq,Nset);
		   } fclose(fp); 
		 } else {
		   FILE *fp = open_file(argv[1],".unique.cma","w");
	           for(Int4 f=1; f<=Number;f++){ 
			cma = IN_CMA[f];
			Int4 i,j,k;
			data = TrueDataCMSA(cma);  // fake sequences
			Int4	N=NSeqsSeqSet(data);
			e_type	qE,sE;
			NEW(skip,N+3,BooLean); 
			if(mode=='u'){  // -u option
        		  for(i=1;i < NSeqsSeqSet(data); i++) {
			   if(skip[i]) continue;
			   qE=SeqSetE(i,data);
        		   for(j=i+1;j <= NSeqsSeqSet(data); j++) {
				if(skip[j]) continue;
			   	sE=SeqSetE(j,data);
				if(IdentSeqs(qE,sE)){
				   if(PhylumSeq(qE)==0 && PhylumSeq(sE)==0){
					skip[j]=TRUE;
				   } else if(PhylumSeq(qE)==0 || PhylumSeq(sE)==0 ||
					strcmp(PhylumSeq(qE),PhylumSeq(sE))==0){ 
					skip[j]=TRUE;
				   }
				}
			   }
			  }
			} else {	// -U option
			 UInt4	NumSq=NSeqsSeqSet(data);
        		 for(i=1;i < NSeqsSeqSet(data); i++) {
			   if(skip[i]) continue;
			   qE=SeqSetE(i,data);
			   if(i % 1000 == 0) fprintf(stderr,"\r%.1f",100.0*((double)i/(double)NumSq));
#if 0		// This doesnt' seem to help...
			   unsigned char *q_sq=SeqPtr(qE);
			   Int4	q_n=LenSeq(qE);
        		   for(j=i+1;j <= NSeqsSeqSet(data); j++) {
				if(skip[j]) continue;
			   	sE=SeqSetE(j,data);
				if(IdentSeqs(qE,sE)) skip[j]=TRUE;
				// if(VeryFastIdentSeqs(qE,SeqSetE(j,data))) skip[j]=TRUE;
				// if(VeryFastIdentSeqs(q_sq, SeqPtr(sE), q_n,LenSeq(sE))) skip[j]=TRUE;
			   }
#else
        		   for(j=i+1;j <= NSeqsSeqSet(data); j++) {
				if(skip[j]) continue;
				if(IdentSeqs(qE,SeqSetE(j,data))) skip[j]=TRUE;
			   }
#endif
			 }
			}
			PutSelectCMSA(fp,skip,cma); 
			free(skip); 
		   } fclose(fp); 
		 }
		} break;
	     }
	  case 'a': {
		if(AddDupl){
		   Int4 Number;
	    	   fp=OpenFileToRead(argv[1]);
	           cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	           cma_typ OUT_CMA[5]; 
		   for(Int4 f=1; f <= Number;f++){ 
		      cma_typ cma2=IN_CMA[f];
		      if(NumSeqsCMSA(cma2) == 1){
			OUT_CMA[1]=cma2; OUT_CMA[2]=cma2;
			fp = tmpfile(); PutMergedCMSA(fp,2,OUT_CMA); rewind(fp);
			cma2=ReadCMSA(fp,A); fclose(fp);
			PutCMSA(stdout,cma2); TotalNilCMSA(cma2);
		      } else PutCMSA(stdout,cma2);
		   }
		} else {
		  cma_typ cma2=AddRelatedCMSA(clust_cut,add_cut,psm_arg,cma);
		  fprintf(stderr,"map = %g\n",RelMapCMSA(cma2));
		  sprintf(str,"%s.new",argv[1]);
                  WriteMtfCMSA(str, cma2, NULL);
                  sprintf(str,"%s.new.cma",argv[1]);
                  WriteCMSA(str, cma2);
		  TotalNilCMSA(cma2);
		} } break;
	  case 'N': 
		if(Newick){
		   Int4 Number;
	    	   fp=OpenFileToRead(argv[1]);
	           cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		   fprintf(stdout,"(");
		   for(Int4 f=1; f < Number;f++){ 
		      fprintf(stdout,"%d_%s,",f,NameCMSA(IN_CMA[f]));
		   } fprintf(stdout,"%d_%s)0;\n",Number,NameCMSA(IN_CMA[Number]));
			
		} else if(MinNumSeq <= NumSeqsCMSA(cma)) return 1; else return 0;
		break;
	  case 'f': { 
	    if(output_first){	// assume csq is used...
		Int4 Number;
	        //fp = open_file(argv[1],".cma","r");
	    	fp=OpenFileToRead(argv[1]);
	        cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	        fclose(fp);
		for(Int4 f=1; f<=Number;f++){ 
		   char *cma_name=NameCMSA(IN_CMA[f]);
		   char *buffer;
		   data = TrueDataCMSA(IN_CMA[f]);  
		   e_type sE=SeqSetE(1,data);
		   NEW(buffer,LenSeq(sE)+5,char);
		   SeqToString(buffer, sE, A);
		   fprintf(stdout,">%s consensus\n%s\n\n",cma_name,buffer);
		}
	    } else if(fasta_format){
		FILE *fa_fp=stdout;
		PutFastaCMSA(fa_fp,cma);
		// PutFastaAlnCMSA(fa_fp,cma); // == -A option
	    } else {
			ss_type FullSeq = FullSeqCMSA(cma);
			if(FullSeq == NULL){
				print_error("input cma file doesn't contain full seq");
			} else PutSeqSetEs(stdout,FullSeq); 
	    }
	   } break;
	  case 'd': {
#if 0
		if(PrintDiversity){
		   double  d=ResidueDiversityCMSA(stdout,0,cma);
		   fprintf(stdout,"Average diversity = %.2f\n\n",d);
		   d=AveRelEntropyCMSA((BooLean *) 0,cma);
		   fprintf(stdout,"Average RelEntropy = %.2f\n\n",d);
#else
		if(PrintDiversity){
		  Int4 Number;
	    	  fp=OpenFileToRead(argv[1]);
	          cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	          fclose(fp);
		  for(Int4 f=1; f<=Number;f++){ 
		      cma_typ cma=IN_CMA[f];
		      double  d=ResidueDiversityCMSA(stdout,0,cma);
		      fprintf(stdout,"Average diversity (\"%s\")= %.2f\n\n",NameCMSA(cma),d);
		      d=AveRelEntropyCMSA((BooLean *) 0,cma);
		      fprintf(stdout,"Average RelEntropy (\"%s\")= %.2f nats\n\n",
				NameCMSA(cma),d);
		  }
#endif
		} else {
		  dom_typ *dom=DomainsCMSA(cma); if(dom) dom->Put(stdout);}
		}
		break;
	  case 'F': { 
		if(FootPrint){	// Put domain foot print
		   assert(nBlksCMSA(cma) == 1);
		   for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			char str2[100],c;
			Int4 os,ex,len,e,blk=1,end=LengthCMSA(1,cma);
			e_type E=TrueSeqCMSA(sq,cma);
			os=OffSetSeq(E); ex=0; len=LenSeq(E)+os;
			s=TruePosCMSA(sq, blk, 1, cma);
			e=TruePosCMSA(sq, blk, end, cma);
			//fprintf(stdout,"%d:%d..%d --> ",sq,s,e);
			for(i=1; i < end && IsDeletedCMSA(blk,sq,i,cma); i++) ;
			if(i > 1){ i--; s=MAXIMUM(Int4,os+1,s-i+os+1); } else s=os+s;
			for(j=end; j > 1 && IsDeletedCMSA(blk,sq,j,cma); j--) ;
			if(j < end){ j=end-j; e=MINIMUM(Int4,len+os,e+j+os); } else e=e+os;
			// fprintf(stdout,"%d:%d..%d\n",sq,s,e);
			StrSeqID(str,20,E);
			// sprintf(str,"%s",PutShortSeqID(E));
			if(sscanf(str,"%[^|]|%c",str2,&c) ==2){
			  fprintf(stdout,"chn_pdb %s -w=%d..%d%c > %s%c.pdb\n",str2,s,e,c,str2,c);
			}
		   }
		} else if(fuseblk == 0){
			// fprintf(stderr,"outputing fake seqs\n");
		    PutSeqSetEs(stdout,DataCMSA(cma)); 
		} else {
		  if(fuseblk >= nBlksCMSA(cma)) print_error("blocks cannot be fused");
		  cma_typ cma2=FuseBlksCMSA(fuseblk, 10000, cma);
		  if(cma2 == 0) print_error(USAGE_START);
           	  sprintf(str,"%s.fuse",argv[1]);
                  WriteMtfCMSA(str, cma2, NULL);
		  NilCMSA(cma2);
#if 0
		  gss_typ *gss=gssCMSA(cma2);
		  for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++) gss->Put(stdout,sq);
#endif
		}
	     } break;
	  case 'n': { 
             if(FileName2Sq1){
		Int4 Number;
		// fp = open_file(argv[1],".cma","r");
	    	fp=OpenFileToRead(argv[1]);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	       for(Int4 II=1; II <= Number; II++){
	        cma=IN_CMA[II];
		data = TrueDataCMSA(cma);  // == True sequences...
		e_type sE=SeqSetE(1,data);
		char str[202];
		StrSeqID(str, 200, sE); RenameCMSA(str,cma);
		PutCMSA(stdout,cma);
	       }
	     } else if(NoFragN > 0){
	        if(NoFragN > LengthCMSA(1,cma) || NoFragC > LengthCMSA(1,cma))
		   print_error("nofrag outside of alignment");
		if(nBlksCMSA(cma) != 1)
		   print_error("this option requires one block alignments");
		Int4 Number;
		// fp = open_file(argv[1],".cma","r");
	    	fp=OpenFileToRead(argv[1]);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 II=1; II <= Number; II++){
	        cma=IN_CMA[II];
		gss_typ *gss=gssCMSA(cma);
		Int4	pos[3],siteN,siteC;
		siteN = NoFragN; 
		siteC = LengthCMSA(1,cma) - (NoFragC - 1);
		BooLean	*skip;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		data = TrueDataCMSA(cma);  // == True sequences...
		Int4 hits=0;
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			e_type sE=SeqSetE(sq,data);
			skip[sq]=FALSE;
			if(IsDeletedCMSA(sq,siteN,cma) || IsDeletedCMSA(sq,siteC,cma)) skip[sq]=TRUE;
			else hits++;
		}
		char str0[108];
		// sprintf(str0,"%s_is%d",argv[1],deleted_pos);
		// fp = open_file(str0,".cma","w");
		if(hits > 0) PutSelectCMSA(stdout,skip,cma);
		// fclose(fp); 
		free(skip);
              }
		
	     } else {
		ReNameCMSA(new_name, cma);
           	sprintf(str,"%s.renamed.cma",argv[1]);
                WriteCMSA(str, cma);
	     }
	    } break;
	  case 'O': { 
		if(defline_string){
			fprintf(stderr,"%s\n",defline_string);
        		Int4     i;
			data = TrueDataCMSA(cma);  
#if 0	// This is not necessary, done by calling environment...
			Int4 end=strlen(defline_string); end--;
			if(defline_string[0] == '\'' || defline_string[0] == '"'){
				if(defline_string[end] == '\'' || defline_string[end] == '"'){
					defline_string[end]=0;
					defline_string++;
				} else print_error("input error for option -O=<string>");
			} else if(defline_string[end] == '\'' || defline_string[end] == '"')
				print_error("input error for option -O=<string>");
#endif
			// fprintf(stderr,"string=%s\n",defline_string);
			Int4	N=NSeqsSeqSet(data);
			NEW(skip,N+3,BooLean); 
			dh_type dH=dheap(N+2,4);
			e_type	sE,qE; 
        		for(j=0,i=1;i<=NSeqsSeqSet(data); i++) {
				sE=SeqSetE(i,data);
				if(StringInSeqInfo(defline_string,sE)){
					PutSeqSetE(stdout,i,data); fprintf(stdout,"\n");
					insrtHeap(i,-(keytyp)LenSeq(sE),dH);
					skip[i] = TRUE;
					status=1;
				}
        		} qE=0;
			while(!emptyHeap(dH)){
                		assert((i=delminHeap(dH)) != 0);
				if(FALSE && qE==0){
					qE=SeqSetE(i,data);
					PutSeqInfo(stderr,qE);
					Int4 score=AlnSeqSW(stderr,11,1,qE,qE,A);
				} else {
					sE=SeqSetE(i,data);
					fprintf(stderr,"===============================\n");
					// PutSeqInfo(stdout,qE); fprintf(stdout," versus\n"); PutSeqInfo(stdout,sE);
					//PutSeqID(stdout,qE); fprintf(stdout," versus ");
					PutSeqID(stderr,sE); fprintf(stderr,"\n");
					// Int4 score=AlnSeqSW(stderr,11,1,qE,sE,A);
					// fprintf(stdout,"score = %d\n",score);
					fprintf(stderr,"===============================\n");
				} 
			} Nildheap(dH); 
			// exit(1);
			fp = open_file(argv[1],".rm.cma","w");
			PutSelectCMSA(fp,skip,cma); fclose(fp); 
			free(skip); 
		} else if(selex_file){
			FILE *ofp = open_file(selex_file,"","w");
			// CMSAToPHYLIP(ofp,cma);
			CMSAToSelex(ofp,cma);
			fclose(ofp);
		} else if(OutSeqFile > 0){
			Int4 Number;
	    		fp=OpenFileToRead(argv[1]);
			cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
			fclose(fp);
			if(OutSeqFile < Number) PutSeqSetEs(stdout,TrueDataCMSA(IN_CMA[OutSeqFile])); 
	  	} else {
			Int4 Number;
			// if(cma) TotalNilCSMA(cma);
	    		fp=OpenFileToRead(argv[1]);
			cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
			fclose(fp);
			for(i=1; i <= Number; i++){
			    PutSeqSetEs(stdout,TrueDataCMSA(IN_CMA[i])); 
			}
		}
	    } break;
	  case 'P': { 
	    if(FindSeedPttrn){
		Int4 Number;
		// fp = open_file(argv[1],".cma","r");
	    	fp=OpenFileToRead(argv[1]);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
		fclose(fp);
		char *pttrn=FindSeedPattern(fp,Number,IN_CMA);
	    } else if(name_seed_aln[0]!=0){
		Int4 Number;
		fp = open_file(name_seed_aln,".cma","r");
		cma_typ *SeedCMA=MultiReadCMSA(fp,&Number,A);
		fclose(fp);
		fp = open_file(argv[1],"_part.mma","w");
		PartitionBySeedAlnCMSA(fp,cma,Number,SeedCMA);
		fclose(fp);
	    } else if(CntPhyla){
		Int4	NumPhyla=0;
		Int4    *phyla=GetPhylaSeqSet(stderr, &NumPhyla, TrueDataCMSA(cma));
		if(NumPhyla >= CntPhyla) return 1; else return 0;
	    } else if(mk_use_file){
		Int4 Number;
		//fp = open_file(argv[1],".cma","r");
	    	fp=OpenFileToRead(argv[1]);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
		fclose(fp);
		for(Int4 II=1; II <= Number; II++){
		   fprintf(stdout,"%s,",NameCMSA(IN_CMA[II]));
		} fprintf(stdout,"%s\n",NameCMSA(IN_CMA[Number]));
	    } else {
		PutRptPartionCMSA(stdout,cma); 
	    }
	  } break;
	  case 'E': { // Exact test...
	// WARNING: assumes PSI-BLAST generated input file...
	double  **colfreq = ColResFreqsCMSA(1, cma);
	for(Int4 s0=1; s0 <= LengthCMSA(1,cma); s0++){
	  Int4 r0q = ResidueCMSA(1,1,s0,cma);
	  e_type qE=TrueSeqCMSA(1,cma);
	  if(colfreq[s0][r0q] >= freq_cut){
		fprintf(stdout,"%d%c%d%c.%c\t\t// %.2f\n",
			file,AlphaChar(r0q,A),s0+OffSetSeq(qE),
			chain,color,colfreq[s0][r0q]);
	  } else {
		double d=0.0;
		for(Int4 c=0; c <= nAlpha(A); c++){
			if(valAlphaR(c,r0q,A) > 0) d+=colfreq[s0][c];
		}
		if(d >= freq_cut){
		  fprintf(stdout,"%d%c%d%c.%c\t\t// %.2f (%.2f)\n",
			file,AlphaChar(r0q,A),s0+OffSetSeq(qE),
			chain,color,d,colfreq[s0][r0q]);
		}
	  }
	}
		} break;
	  case 'e': 
		{
#if 0
			ExtendFakeToRealCMSA(cma); 
	      		fp = open_file(argv[1],"_extend.cma","w");
			PutCMSA(fp,cma); fclose(fp);
#else
			print_error("This option moved to gsq_typ");
#endif
		} break;
	  // case 'o': { PutRptSpacingsCMSA(stdout,cma); } break;
	  case 'o': 
		if(multi_cma_name[0]){
		  Int4 Number;
		  FILE *ofp=0;
		  fp = open_file(multi_cma_name,"","r");
		  cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
		  fclose(fp);
		  BooLean *found=0;
		  NEW(found,NumSeqsCMSA(cma) + 3, BooLean);
		  for(Int4 i=1; i <= Number; i++){
			fprintf(stderr,"file %d:",i);
			BooLean okay=FALSE;
			e_type csqE = TrueSeqCMSA(1,IN_CMA[i]);
		        for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			  e_type sE=TrueSeqCMSA(sq,cma);
			  if(IdentSeqs(csqE,sE)){
				found[sq]=TRUE;
				okay=TRUE; break;
			  }
			}
			if(okay){
			  fprintf(stderr," hit.\n");
			  if(ofp == 0){
		  		ofp = open_file(argv[1],"_MPA.cma","w");
			  }
			  PutCMSA(ofp,IN_CMA[i]);
			} else fprintf(stderr,"-\n");
		  }
		  if(ofp){
			fclose(ofp);
		        for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
				if(!found[sq]) PutSeq(stderr,TrueSeqCMSA(sq,cma),A);
			}
		  }
		} else { PutRptSpacingCMSA(stdout,cma); } 
		break;
	  case '!': {
		 if(nBlksCMSA(cma) != 1) print_error(USAGE_START);
		 Int4 *RmLeft,*RmRight,TrimLimit[3];
		 TrimLimit[1] =TrimMax;;
		 NEW(RmLeft,nBlksCMSA(cma)+3,Int4);
		 NEW(RmRight,nBlksCMSA(cma)+3,Int4);
		 cma_typ cma2 = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,cma);
		 sprintf(str,"%s.new",argv[1]); PutAlnCMSA(str,cma2,NULL);
		 NilCMSA(cma2);
		 free(RmLeft); free(RmRight);
		} break; 
	  case '#': {	// output table mode...
		  PutTableOfCMSA(stdout, cma);
		} break;
	  case 't': {
	     if(PutTerms){
		Int4 Number;
	    	fp=OpenFileToRead(argv[1]);
		a_type AB = MkAlpha(AMINO_ACIDS,PROT_BLOSUM62);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,AB);
		fclose(fp);
	 	trm_typ *trm = new trm_typ(IN_CMA);
	 	trm->Scan4Terms(stdout);
	 	delete trm;
		for(i=1; i <= Number; i++){ TotalNilCMSA(IN_CMA[i]); }
		free(IN_CMA); NilAlpha(AB); 
	 	return 0;
	     } else if(TxID != 0){
		BooLean *skip=0;
		Int4	id,sq,N=NumSeqsCMSA(cma);
		NEW(skip,N+5,BooLean);
		for(sq=1; sq <= N; sq++) skip[sq]=TRUE;
		e_type	sE=0;
		for(sq=1; sq <= N; sq++){
			sE=TrueSeqCMSA(sq,cma);
			id=TaxIdentSeq(sE);
			if(id == TxID) skip[sq]=FALSE;
		}
		PutSelectCMSA(stdout,skip,cma); free(skip);
	     } else if(txfile != 0){
		Int4	sq,N=NumSeqsCMSA(cma);
		e_type	sE=0,tE=0;
		data = TrueDataCMSA(cma);  // == True sequences...
		a_type AB=AlphabetCMSA(cma);
		ss_type Data = MkSeqSet(txfile,AB);
		if(N != NSeqsSeqSet(Data)) print_error("-tax=<file> option input error");
		for(sq=1; sq <= N; sq++){
			sE=SeqSetE(sq,data);
			tE=SeqSetE(sq,Data);
			if(!IsSameSeqID(tE,sE)){
			    print_error("-tax=<file> option input error");
			}
			char *phylum=0,kingdom=0,*Class=0;
			phylum=PhylumSeq(tE);
			Class=ClassSeq(tE);
			kingdom=KingdomSeq(tE);
			TaxAssignSeq(phylum, kingdom, sE,Class);
#if 0
			StrSeqInfo(char *str,e_type E);
			ChangeInfoSeq(info,sE);
#endif
		} PutCMSA(stdout,cma);
		NilSeqSet(Data);
	     } else if(TaxID_Only){
		BooLean *skip;
		Int4 sq,N=NumSeqsCMSA(cma);
		NEW(skip,N+3,BooLean);
		e_type sE=0;
		data = TrueDataCMSA(cma);  // == True sequences...
		for(sq=1; sq <= N; sq++){
			sE=SeqSetE(sq,data);
			if(TaxIdentSeq(sE) == 0) skip[sq]=TRUE;
		} PutSelectCMSA(stdout,skip,cma);
		free(skip);
	     } else {
		 Int4 *RmLeft,*RmRight,*TrimLimit;
		 NEW(RmLeft,nBlksCMSA(cma)+3,Int4);
		 NEW(RmRight,nBlksCMSA(cma)+3,Int4);
		 NEW(TrimLimit,nBlksCMSA(cma)+3,Int4);
		 cma_typ cma2 = TrimCMSA(info_cut,TrimLimit,RmLeft,RmRight,cma);
		 sprintf(str,"%s.trim",argv[1]); PutAlnCMSA(str,cma2,NULL);
		 NilCMSA(cma2);
		 free(RmLeft); free(RmRight);
		}
	     } break;
	  case 'h':	
	    if(PutHSW){
	        fp = open_file(argv[1],".hsw","w");
		CreateWriteHSW(fp,cma,argv[1]); fclose(fp);
#if 0
		Int4 time1=time(NULL); 
	        fp = open_file(argv[1],".hsw","r");
		hsw_typ hsw=FReadHSW(fp,A,cma); fclose(fp);
		fprintf(stderr, "\ttime hsw read: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
#endif
	    } else if(PutAsHMM){
		// HMM_typ(Int4 maxrpts,cma_typ cma,double *exp_rpt_gap);
		// HMM_typ(Int4 maxrpts,char *pssm_arg,cma_typ cma,Int4 pernats,double *exp_rpt_gap);
#if 0
		HMM_typ hmm(1,"-P200..1300,20..100:500..500,35..250",cma,200,0);
		hmm.Put(stdout);
#else
            char dms_mode='T';       // nearly as good as 'F'.
            // dms_mode='O';
            dms_mode='f';       // nearly as good as 'F'.
            dms_mode='F';       // best setting...
            // Int4   aa_per_io=200,aa_per_do=200,exp_ie=1,exp_de=1;
            Int4    aa_per_io=20,aa_per_do=150, exp_ie=1,exp_de=1;      // best setting...
	    double  SqWtAdj=0,PriorWt=1.0,pn;
	    pn=1443;  // This is what Sean Eddy is using: ln(x)*1.443 * 1000; 1/1443 nats == 1/1000 bits.
	    Int4 Number;
	    fp=OpenFileToRead(argv[1]);
	    cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	    for(Int4 f=1; f <= Number; f++) { 
		cma=IN_CMA[f]; // PutStockholmCMSA(stdout,cma); 
                ssx_typ *ssx = new ssx_typ(aa_per_io,aa_per_do,exp_ie, exp_de, pn,cma,dms_mode);
                if(PriorWt > 0) ssx->SetPriorWt(PriorWt);
                if(SqWtAdj > 0) ssx->SetSqWtAdjust(SqWtAdj);
#if 0
	        smx_typ *smx = ssx->RtnWtCntsSMX( );
#else
		smx_typ *smx = ssx->RtnStraightSMX( );
#endif
		Int4 **mat_emit= ValuesSMatrix(smx[1]),**ins_emit=0;
                char *name= NameCMSA(cma);
                Int4 len=LengthCMSA(1,cma);
                Int4 *mm,*mi,*md,*ii,*im,*dd,*dm,*bm,*me=0;
                ndl_typ *ndl=ssx->RtnNDL();
                ndl->GetTransProb(&mm,&mi,&md,&ii,&im,&dd,&dm,&bm);
                hmm_typ *hmm = new hmm_typ(name,len,mat_emit,ins_emit,mm,mi,md,ii,im,dd,dm,bm,me,A,'R');
                hmm->Put(stdout);
	        delete hmm; delete ssx;
	    } fprintf(stderr,"Use hmmcalibrate to add EVD parameters. Use hmmsearch to search for hits.\n");
#endif
	    } else {		// Test Jun Liu's statistical model for GISMO...
	     Int4 iter,numiter=0; // 100000;
	     gss_typ *gss=gssCMSA(cma);
	     double map;
	     time1=time(NULL); 
	     for(iter=1; iter < numiter; iter++){
		// fprintf(stderr,"map = %g\n",RelMapCMSA(cma));
		// map=RelMapCMSA(cma);
	     }
	     map=UnGappedRelMapCMSA(cma);
	     fprintf(stderr,"\ttime map: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	     time1=time(NULL); 
#if 1
	     double penalty=0.0;
	     for(iter=1; iter < numiter; iter++){
	     	penalty=JunLiuHMM_PenaltyCMA(NULL,cma);
	     } penalty=JunLiuHMM_PenaltyCMA(stderr,cma);
	     fprintf(stderr,"map = %g; penalty=%g\n",map + penalty,penalty);
	     fprintf(stderr,"current indel_penalty = %g; map = %g\n",
			-IndelPenaltySeqSet(DataCMSA(cma)),map);
	     fprintf(stderr,"\ttime indels: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
#else
	     assert(gss->Gapped());
	     double penalty=0.0;
	     UInt4 Nmm,Nmd,Nmi,Nm;
	     UInt4 Nii,Ndd,Nim,Ndm;
	     UInt4 Nid,Ndi;
	     Int4 ae,be,ao,bo,n0,n1,n2;
	     for(iter=1; iter <=numiter; iter++){
		Nmm=Nmi=Nmd=Nii=Nim=Ndd=Ndm=Nid=Ndi=0;
		Int4 *pos;
		Int4	*lens = LengthsCMSA(cma);
		NEW(pos,nBlksCMSA(cma)+3,Int4);
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
		  assert(PosSites(sq,pos,SitesCMSA(cma)) == nBlksCMSA(cma));
		  gsq_typ *gsq=gss->GetGSQ(sq);
		  gsq->FindIndels(nBlksCMSA(cma),pos,lens,Nmm,Nmi,Nmd,Nii,Nim,Ndd,Ndm,Ndi,Nid,Nsd,Nsm);
		} free(pos);

		n0=10000;
		ao=10;	// one insertion in 1000 residues.
		bo=10;	// one deletion every 1000 residues 
		n1=n2=100;
		ae=20;	// L1 = 20/100 --> 5 = average insert length
		be=50;	// L2 = 50/100 --> 2 = average deletion length

		Nm=Nmm+Nmi+Nmd;

		penalty=0.0;
		// numerator...
		penalty += lngamma((double)(Nmi+ao));
		penalty += lngamma((double)(Nmd+bo));
		penalty += lngamma((double)(Nmm+n0-ao-bo));
		penalty += lngamma((double)(n0));
		// denominator...
		penalty -= lngamma((double)(Nm+n0));
		penalty -= lngamma((double)(ao));
		penalty -= lngamma((double)(bo));
		penalty -= lngamma((double)(n0-ao-bo));

		// numerator...
		penalty += lngamma((double)(Nii+ae));
		penalty += lngamma((double)(Nim+n1-ae));
		penalty += lngamma((double)(n1));
		// denominator...
		penalty -= lngamma((double)(Nim+Nii+n1));
		penalty -= lngamma((double)(ae));
		penalty -= lngamma((double)(n1-ae));

		// numerator...
		penalty += lngamma((double)(Ndd+be));
		penalty += lngamma((double)(Ndm+n2-be));
		penalty += lngamma((double)(n2));
		// denominator...
		penalty -= lngamma((double)(Ndd+Ndm+n2));
		penalty -= lngamma((double)(be));
		penalty -= lngamma((double)(n2-be));
	     }
	     fprintf(stdout,"Nmm = %d; Nmi = %d; Nmd = %d; Nm=%d; Nii = %d; Nim = %d\n",
			Nmm,Nmi,Nmd,Nm,Nii,Nim);
	     fprintf(stdout,"Ndd = %d; Ndm = %d; Ndi = %d; Nid = %d\n",Ndd,Ndm,Ndi,Nid);

	     fprintf(stderr,"penalty=%g; Nmm = %d\n",penalty,Nmm);
	     fprintf(stderr,"current indel_penalty = %g; map = %g\n",
			-IndelPenaltySeqSet(DataCMSA(cma)),map);
	     fprintf(stderr,"\ttime indels: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
#endif
	    } break;
	  case 's': 
	    if(sto_format){
#if 0	// set option not yet implemented in PutStockholmCMSA();
		set_typ set=MakeSet(NumSeqsCMSA(cma) + 5); ClearSet(set);
		AddSet(1,set); AddSet(5,set); AddSet(6,set); AddSet(7,set);
		AddSet(10,set); AddSet(15,set); AddSet(16,set); AddSet(17,set);
		PutStockholmCMSA(stdout,cma,set);
#elif 1
	      Int4 Number;
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 f=1; f <= Number; f++) { cma=IN_CMA[f]; PutStockholmCMSA(stdout,cma); }
#else
		PutStockholmCMSA2(stdout,cma);
#endif
	    } else if(TheSeqId > 0){
		Int4 I,J,N=NumSeqsCMSA(cma);
		data = TrueDataCMSA(cma);  // == True sequences...
		if(TheSeqId <= N){
		  e_type sE=SeqSetE(TheSeqId,data);
		  PutSeq(stdout,sE,A);
		}
	    } else if(SameIDs){
		Int4 I,J,N=NumSeqsCMSA(cma);
		data = TrueDataCMSA(cma);  // == True sequences...
		e_type	E1,E2;
		for(I=1; I < N; I++){
		   Int4 NumRpts=0;
		   E1=SeqSetE(I,data);
		   for(J=I+1; J <= N; J++){
		        E2=SeqSetE(J,data);
			if(IsSameSeqID(E1,E2)){
				NumRpts++;
				PutShortSeqID(stderr,E2); fprintf(stderr,"\n"); 
				char	info[1005],id[205],new_info[1225];
				Int4	i,j;
				if(NumRpts==1){
				    StrSeqID(id,200,E1); StrSeqDescript(info,1000,E1);
				    sprintf(new_info,"%s%d %s",id,NumRpts,info);
				    ChangeInfoSeq(new_info, E1); NumRpts++;
				}
				StrSeqID(id,200,E2); StrSeqDescript(info,1000,E2);
				sprintf(new_info,"%s%d %s",id,NumRpts,info);
				ChangeInfoSeq(new_info, E2);
				PutShortSeqID(stderr,E2); fprintf(stderr,"\n"); 
				// printf("\nID: %s",id); printf("\ninfo: %s",info); printf("\n"); 
			}
		   }
		} fp = open_file(argv[1],".idfix.cma","w"); PutCMSA(fp,cma); fclose(fp);
	    } else if(SplitCMA){ 
#if 0
		Int4 N=NumSeqsCMSA(cma);
		set_typ sqSet=MakeSet(N+5); ClearSet(sqSet);
		NoIndelsSet(sqSet,cma);
		fprintf(stdout,"%d out of %d sequences without indels\n", CardSet(sqSet),N);
		exit(1);
#else
		NumIndelsCMSA(argv[1],cma); 
#endif
	    } else if(PutScores){
	      Int4 Number;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 f=1; f <= Number; f++) {
		cma=IN_CMA[f];
		if(nBlksCMSA(cma) > 1) print_error("-scores option requires single blk cma file");
	        Int4    N = NumSeqsCMSA(cma),Item;
		mh_type mH=Mheap(N+3,3);
		for(Int4 J=1; J <= N; J++){
		   if(PutScores == 'T') assert(InsertMheap(GetGappedProbCMSA(1,J,cma),mH));
		   else assert(InsertMheap(GetProbCMSA(1,J,cma),mH));
		} 
		Int4 start= (Int4) floor(MinKeyMheap(mH));
		Int4 end= (Int4) ceil(MaxKeyMheap(mH));
		double Inc=ceil((double)(end-start+1)); Inc = Inc/50.0;
	        h_type HG;
		if(PutScores == 'T') HG=Histogram("gapped log-odds scores",start,end,Inc);
		else HG=Histogram("ungapped log-odds scores",start,end,Inc);
		for(i=1; ItemsInMheap(mH) > 0; i++){
			IncdHist((double)MinKeyMheap(mH),HG);
			DelMinMheap(mH);
		}
		printf("%d.%s:\n",f,NameCMSA(cma));
		// for(Int4 J=1; J <= N; J++)IncdHist(GetGappedProbCMSA(1,J,cma),HG); 
		PutHist(stdout,60,HG); NilHist(HG); NilMheap(mH);
	      }
	    } else if(PutSingles || PutDoubles){
		BooLean *skip;
		Int4 sq,N=NumSeqsCMSA(cma);
		NEW(skip,N+3,BooLean);
		for(sq=1; sq <= N; sq++) skip[sq]=TRUE;
		// char str0[108];
		// sprintf(str0,"%s_S%d",argv[1],sq);
		// FILE *fp = open_file(str0,".cma","w");
		FILE *fp = 0;
		if(PutSingles) fp=open_file(argv[1],".sma","w");
		else fp=open_file(argv[1],"_maps.cma","w");
		Int4 MaxLen=0;
		e_type sE=0;
		data = TrueDataCMSA(cma);  // == True sequences...
		for(sq=1; sq <= N; sq++){
		   sE=SeqSetE(sq,data);
		   MaxLen=MAXIMUM(Int4,LenSeq(sE),MaxLen);
		} char *buffer;
		NEW(buffer,MaxLen+9,char);
		for(sq=1; sq <= N; sq++){
			sE=SeqSetE(sq,data);
			char str[202];
			StrSeqID(str, 200, sE);
			if(PutSingles){
			   RenameCMSA(str,cma); skip[sq]=FALSE;
			   PutSelectCMSA(fp,skip,cma);
			   skip[sq]=TRUE;
			} else if(sq > 1){	// first sequence is the template concensus
			   // PutDoubles: creates an input file for gapmaps from a template only.
			   // if(nBlksCMSA(cma) == 1) return PutSelectOneCMSA(fp,skip,cma);
			   Int4 len=SeqToString(buffer, sE, A);
			   fprintf(fp,"[0_(1)=%s(2){go=10000,gx=2000,pn=1000.0,lf=0,rf=0}:\n", str);
			   fprintf(fp,"(%d)",len);
			   for(Int4 x=1; x <= len; x++) fprintf(fp,"*"); fprintf(fp,"\n\n");

			   fprintf(fp,"$%d=%d(%d):\n",sq,len,len);
			   fprintf(fp,">%s \n{()%s()}*\n\n",str,buffer);

			   fprintf(fp,"$%d=%d(%d):\n",sq,len,len);
			   fprintf(fp,">%s \n{()%s()}*\n\n",str,buffer);

			   fprintf(fp,"\n_0].\n");
			}
		} free(skip);
		fclose(fp);
	    } else if(sort_using_seqids){
		SortBySeqID_CMA(cma,seq_ids_file);
	    } else if(sort_using_profiles){
#if 1
		cma_typ cma2=ReadCMSA2(profiles,A);
		cma_typ rcma=SortByCMA(cma2, cma); PutCMSA(stdout,rcma);
#else	
	        SortTemplateCMSA(cma,profiles);
#endif
	    } else if(see_aa != 0){
	      Int4 Number;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	      fclose(fp);
#if 0
	      if(see_aa < 0) see_aa=-see_aa;
	      PutResFreqCMA(stdout,IN_CMA,Number,see_aa,A,ExcelFormat);
#else
	      BooLean  UseFakePos=FALSE;
	     if(see_aa < 0){ UseFakePos=TRUE; see_aa = -see_aa; }
	     double maxRE=0.0;
	     for(Int4 f=1; f <= Number; f++) {
		cma=IN_CMA[f];
		if(family_name[0] != 0 && strncmp(family_name,NameCMSA(cma),200) != 0) continue;
	        // make into a routine...
		Int4	i,j,r,aa,**obs,fake_pos,pos[5];
		Int4	*ordered,total;
		if(see_blk > nBlksCMSA(cma)) print_error(USAGE_START);
		// fprintf(stdout,"\n");
		if(UseFakePos){	// then pick column in alignment
		   fake_pos=see_aa;
		   fprintf(stdout,"============ file %d (\"%s\"): Blk %d column %d ===========\n",
				f,NameCMSA(cma),see_blk,see_aa);
		} else {	// pick position in first seq.
		   if(nBlksCMSA(cma) > 1) print_error("-see= option requires single blk cma file");
		   fake_pos=RealToFakeCMSA(1,see_aa,cma);
		   fprintf(stdout,"============ file %d (\"%s\"): Blk %d residue %d (col %d) ===========\n",
				f,NameCMSA(cma),see_blk,see_aa,fake_pos);
		}
		if(fake_pos > LengthCMSA(see_blk,cma)) print_error("residue position exceeds end of sequence");
		if(fake_pos < 1) print_error("residue position precedes start of sequence");
#if 1
		double	*tfrq= tFreqSeqSet(DataCMSA(cma));
#elif 1
		double  *tfrq= tFreqSeqSet(TrueDataCMSA(cma));
#else
		double	tfrq[]= { 0.0,0.05,0.05,0.05,0.05,0.05, 0.05,0.05,0.05,0.05,0.05,
				      0.05,0.05,0.05,0.05,0.05, 0.05,0.05,0.05,0.05,0.05};
#endif
		double d,**frq = ColResFreqsCMSA(see_blk,&obs, cma),cum_frq;
		dh_type dH=dheap(nAlpha(A)+2,4);
		Int4 r_start=1;
		if(ExcelFormat) r_start=1; 
		for(r=r_start; r <= nAlpha(A); r++){
			if(ExcelFormat) d=frq[fake_pos][r]/tfrq[r];
			else d= frq[fake_pos][r];
			// insrtHeap(r+1,(keytyp)-frq[fake_pos][r],dH);
			insrtHeap(r+1,(keytyp)-d,dH);
		}	// r must be > 0
		h_type HG=Histogram("Observed amino acid types", 0, nAlpha(A)+3,1.0);
		aa = 0;
		NEW(ordered,nAlpha(A)+3,Int4);
		while((r=delminHeap(dH)) != 0){
		    aa++; r--; ordered[aa] = r;
		    IncdMHist((double) aa, obs[fake_pos][r],HG);
		}
		Nildheap(dH);
		Int4 *height=RtnHist(50,HG);
		if(height==0){
			fprintf(stdout,"no hits\n");
			continue;
		}
		total=0;
#if 0
		Int4	counts[30];
		for(r=0; r <= nAlpha(A); r++){
			counts[r]= CountsSeqSet(r,data)
		}
#endif
	        double p,RE=0.0,entropy=0.0;
		if(ExcelFormat){
		  fprintf(stdout,"res: Obsrvd  percent -ln(K)\n");
		  // BooLean split=FALSE;
		  for(i=1; i <= aa; i++){
		    r=ordered[i];
		    char chr=AlphaChar(r,A);
		    if(chr=='X') continue; 
		    if(chr=='X') chr='-';
		    // fprintf(stdout,"%c\t%d\t%.2f",
		    fprintf(stdout,"%c %d %.2f",
			chr,obs[fake_pos][r],100.0*frq[fake_pos][r]);
			// AlphaChar(r,A),obs[fake_pos][r],100.0*frq[fake_pos][r]);
		    total+=obs[fake_pos][r];
		    p=frq[fake_pos][r];
		    if(p > 0.0){ 
			RE += p * log(p/tfrq[r]); 
			entropy += -p * log(p); 
		    }
		    d=tfrq[r]/frq[fake_pos][r];
		    // if(d > 1.0) fprintf(stdout,"\t+%.1f\n",log(d));
		    // else fprintf(stdout,"\t%.1f\n",log(d));
		    if(d > 1.0) fprintf(stdout," +%.3f\n",log(d));
		    else fprintf(stdout," %.3f\n",log(d));
		  }
		} else {
		  fprintf(stdout,"res: Obsrvd  percent (cumul)\n");
		  for(cum_frq=0.0, i=1; i <= aa; i++){
// fprintf(stderr,"height[%d]=%d\n",i,height[i]);
		    r=ordered[i];
		    cum_frq += frq[fake_pos][r];
		    char chr=AlphaChar(r,A);
		    if(chr=='X') chr='-';
		    fprintf(stdout,"  %c: %-8d %-7.2f (%3.0f\%) |",
			chr,obs[fake_pos][r],100.0*frq[fake_pos][r],100.0*cum_frq);
			// AlphaChar(r,A),obs[fake_pos][r],100.0*frq[fake_pos][r]);
		    total+=obs[fake_pos][r];
		    p=frq[fake_pos][r];
		    if(p > 0.0){
			RE += p * log(p/tfrq[r]); 
			entropy += -p * log(p); 
		    }
		    for(j=1 ;j <= height[i]; j++) fprintf(stdout,"=");
		    d=frq[fake_pos][r]/tfrq[r];
		    if(d > 1.0) fprintf(stdout," (+%.1f)\n",log(d));
		    else fprintf(stdout," (%.1f)\n",log(d));
		  }
		}
		maxRE=MAXIMUM(double,RE,maxRE);
		fprintf(stdout,"  total=%d; relative entropy=%.2f; entropy=%.2f\n\n",total,RE,entropy);
        	// PutHist(stdout,50,HG); 
		NilHist(HG); 
		free(ordered);
		free(height);
		for(i=1; i <= LengthCMSA(see_blk,cma); i++){ free(obs[i]); free(frq[i]); }
		free(obs); free(frq);
	     } fprintf(stdout,"  maximum relative entropy=%.2f\n\n",maxRE);
#endif
	    } else if(seqlens){
		Int4 min=MinTrueSeqCMSA(cma),max=MaxTrueSeqCMSA(cma);
		double inc = ceil((double)(max -min +1)/40.0);
		h_type H=Histogram("Sequence lengths", min, max,inc);
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			e_type E = TrueSeqCMSA(sq,cma);
			IncdHist(LenSeq(E),H);
        	} PutHist(stdout,60,H); NilHist(H); 
	    } else {
#if 1
		gss_typ *gss=gssCMSA(cma);
		for(Int4 bk = 1 ; bk <= nBlksCMSA(cma); bk++){
		  h_type HG=Histogram("number of hits for each repeat",0,LengthCMSA(bk,cma),2);
		  Int4 totins=0,totdel=0,totilen=0;
		  Int4 nins,inslen,ins,del,pos[3];
		  fprintf(stdout,"Block %3d:  ins(num)  del\n",bk);
		  for(Int4 col=0; col < LengthCMSA(bk,cma); col++){
		    inslen=del=nins=0;
		    for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			PosSiteCMSA(bk, sq, pos, cma);
			ins = InsertionCMSA(sq, pos[1]+col,cma);
			if(ins > 0){ nins++; inslen+=ins; }
			if(IsDeletedCMSA(sq,pos[1]+col, cma)){ del++; }
		    } fprintf(stdout,"%3d         %3d(%3d)  %3d\n",col+1,inslen,nins,del);
		    if((col+1) < LengthCMSA(bk,cma)) IncdMHist(col+1,inslen,HG);
		    totins+=nins; totdel+=del; totilen+=inslen;
		  } fprintf(stdout,"\n");
		  fprintf(stdout,"Total:      %3d(%3d)  %3d\n",totilen,totins,totdel);
		  PutHist(stdout,60,HG); NilHist(HG);
		}
#else
		PutIndelsCMSA(stderr,cma);	// in cma_gmb.cc
#endif
	    } break;
	  case 'r': 
	    if(PutRelEntropy){
		e_type Csq=MkConsensusCMSA(cma);
		double d=AveRelEntropyCMSA((BooLean *) 0,cma);
		Int4	i,j,col;
		dh_type dH=dheap(N+2,4);
		e_type Qsq=TrueSeqCMSA(1,cma);
	        for(Int4 x=0,bk = 1 ; bk <= nBlksCMSA(cma); bk++){
		    float *RE=RelEntropyCMA(bk,cma);
		    h_type HG=Histogram("relative entropy",0,100,0.1);
		    for(col=1; col <= LengthCMSA(bk,cma); col++){
#if 0
			Int4 nd=NumDeletionsCMSA(1,col,cma);
			insrtHeap(col,-(keytyp)nd,dH);	// pick sites with most deletions.
#elif 0
			double fd=FractDeletionsCMSA(1,col,cma);
			insrtHeap(col,-(keytyp)fd,dH);	// pick sites with most deletions.
#else
			insrtHeap(col,-(keytyp)RE[col],dH);	// pick highest RE.
			// insrtHeap(col,(keytyp)RE[col],dH);	// lowest RE.
		        // insrtHeap(col,(keytyp) Random(),dH);
#endif
		        IncdHist(RE[col],HG);
			x++;
			// if(d >= (double)RE[col]) continue;
#if 0	// Debugging specific alignment for STARC paper.
			unsigned char r=ResSeq(x,Qsq); 
			fprintf(stdout,"%d.%c%d: RE=%.3f\n",bk,AlphaChar(r,A),col+10,RE[col]);
#else
			unsigned char r=ResSeq(x,Csq); 
			fprintf(stdout,"%d.%c%d: RE=%.3f\n",bk,AlphaChar(r,A),col,RE[col]);
#endif
		    }
		    double mean=MeanHist(HG);
		    double stdev=sqrt(VarianceHist(HG));
		    // fprintf(stdout,"\n  mean = %.3f; stdev = %.3f\n",mean,stdev); 
		    PutHist(stdout,60,HG); NilHist(HG); free(RE);
		}
		fprintf(stdout,"Y=");
		for(i=1; (col=delminHeap(dH)) != 0; i++){
			fprintf(stdout,"%d,",col);
		} fprintf(stdout,"%d\n",col);
		Nildheap(dH);
	    } else if(RptEval > 0.0){
		Int4	os,sq,N=NumSeqsCMSA(cma);
		double	NN,*pr,*freq,lambda,K,H,p_val,e,S,exp_length;
		Int4    score,*counts,v,total,low,high,len,hits=0;

		ss_type	data=TrueDataCMSA(cma);
		NEW(freq,nAlpha(A)+2,double);
		counts=CntsSeqSet(data);
		for(total=0,i=0; i<=nAlpha(A); i++) total += counts[i];
		for(i=0; i<=nAlpha(A); i++) freq[i] = (double)counts[i]/(double)total;
		low = lowAlphaR(A); high = highAlphaR(A); len = high - low + 1;
		NEW(pr,len+1,double);
		for(i=0; i<=nAlpha(A); i++){
		   for(j=0; j<=nAlpha(A); j++){
			v = (Int4) valAlphaR(i,j,A) - low;
			pr[v] += freq[i] * freq[j];
		   }
		}
		if(!karlin(low,high,pr,&lambda,&K,&H)) { print_error("fatal"); }
		fprintf(stderr,"K=%g; lambda=%g; H=%g\n",K,lambda,H); /****/
		fprintf(stderr,"set E = %g (adjusted: %g)\n",RptEval,RptEval/(double)N);
		free(pr);

		for(sq=1; sq<= N; sq++){
		   e_type sE=TrueSeqCMSA(sq,cma);
		   score = RepeatScoreSeq(sE,A,&os);
		   exp_length = (((double)score*lambda/H)+0.5);
		   NN = (double)LenSeq(sE) - exp_length;
                   if(NN < 1) NN = 1;
                   NN = (NN*NN)/2.0;
                   /** printf("n = %d; N = %g\n", n,N); ****/
                   /*** p_val = 1.0-exp(-K*N*exp(-lambda*(double)score)); /***/
                   /*** expm1(x) computes (e**x)-1 accurately even for tiny x. ***/
                   p_val = - expm1(-K*NN*exp(-lambda*(double)score)); /***/
                   e = p_val*(double)N;
		   if(e <= RptEval){
			// PutSeq(stdout,sE,A);
			fprintf(stderr," (S=%d; E=%1.2g: %d aa) ", score,e,LenSeq(sE));
			PutSeqID(stderr,sE); fprintf(stdout,"\n");
			hits++;
		   }
		} fprintf(stdout,"Repeats in %.0f%c of seqs\n",100*(double)hits/(double)N,'%');
		
	    } else if(NumRandom > 0){
		PutRandomCMA(stdout, blosum62freq, LengthCMSA(1,cma), NumRandom, AlphabetCMSA(cma));
	    } else if(rm_all_but > 0){	// remove all but this many sequences from alignment at random.
		Int4 m,sq,N=NumSeqsCMSA(cma);
		dh_type dH=dheap(N+2,4);
		for(sq=2; sq<= N; sq++){ insrtHeap(sq,(keytyp) Random(),dH); }
		BooLean	*skip;
		NEW(skip,N+3,BooLean);
		for(m=0; (sq=delminHeap(dH)) != 0; ){
			m++; skip[sq]=TRUE;
			if((N-m) <= rm_all_but) break;
		} Nildheap(dH);
		PutSelectCMSA(stdout,skip,cma); free(skip);
	    } else if(fraction > 0){	// randomize fraction
		cma_typ fakecma=ShuffleSeqCMSA(fraction, cma);
		char str0[108];
		Int4 percent=(Int4)floor(fraction*100.0);
		sprintf(str0,"%s_sim%d",argv[1],percent);
		FILE *fptr = open_file(str0,".cma","w");
    		PutCMSA(fptr,fakecma); fclose(fptr);
	    } else if(residue_pos > 0){ // Remove sequences without residue at position x
	        if(residue_pos > LengthCMSA(1,cma))
		   print_error("deleted_pos outside of alignment");
		if(nBlksCMSA(cma) != 1)
		   print_error("this option requires one block alignments");

		fprintf(stderr,"save %s at %d\n",residue_str,residue_pos);
		char str0[108];
		sprintf(str0,"%s_%s%d",argv[1],residue_str,residue_pos);
		BooLean	Negate=FALSE;
		for(Int4 r=0; residue_str[r]; r++){
			if(Negate && isupper(residue_str[r])) print_error(USAGE_START);
			else if(islower(residue_str[r])){
				residue_str[r]=toupper(residue_str[r]);
				Negate=TRUE;
			}
	        }
		gss_typ *gss=gssCMSA(cma);
		BooLean	*skip;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		data = TrueDataCMSA(cma);  // == True sequences...
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			char	str[10];
			Int4 res=ResidueCMSA(1,sq,residue_pos,cma);
			str[0]=AlphaChar(res,A); str[1]=0;
			if(Negate){
			  if(strstr(residue_str,str) != 0) skip[sq]=TRUE;
			  else skip[sq]=FALSE;
			} else {
			  if(strstr(residue_str,str) == 0) skip[sq]=TRUE;
			  else skip[sq]=FALSE;
			}
		}
		fp = open_file(str0,".cma","w");
		PutSelectCMSA(fp,skip,cma);
		fclose(fp); free(skip);
	    } else if(min_rpt > 0){ // i.e., option = -r<x>,<s>
		sprintf(str,"%s.new",argv[1]);
		FILE *fp = open_file(str,".cma","w");
		PutGoodRptsCMSA(fp,min_rpt,min_spacing,cma);
	    	fclose(fp);
		sprintf(str,"%s.new.cma",argv[1]);
		cma_typ cma2=ReadCMSA2(str,A);
		sprintf(str,"%s.new",argv[1]);
		PutAlnCMSA(str,cma2,NULL);
		TotalNilCMSA(cma2);
	    } else {
#if 0
		sprintf(str,"%s.new",argv[1]);
		WriteGoodCMSA(str,Cut,cma);
		sprintf(str,"%s.new.cma",argv[1]);
		cma_typ cma2=ReadCMSA2(str,A);
		sprintf(str,"%s.new",argv[1]);
                PutAlnCMSA(str,cma2,NULL);
		TotalNilCMSA(cma2);
#else
		{
	          Int4 Number;
	          fp=OpenFileToRead(argv[1]);
	          cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	          fp = open_file(argv[1],"_good.cma","w");
	          for(Int4 I=1; I <= Number; I++){
			// PutGoodCMSA(fp,Cut,IN_CMA[I]);
fprintf(stderr,"====== %s =====\n",NameCMSA(IN_CMA[I]));
		// PutGoodCMSA_X(fp,cutoff,purge_trigger_size,percentID,min_size,max_size, cma);
			PutGoodCMSA_X(fp,Cut,500000,10,12,500000,IN_CMA[I]);
			// PutGoodCMSA_X(fp,Cut,10000,10,12,10000,IN_CMA[I]);
			// PutGoodCMSA_X(fp,Cut,100,70,12,200,IN_CMA[I]);
		  } fclose(fp);
		}
#endif
	    } break;
	  case 'i': {
	     if(minlen > 0 && maxlen > 0){
		Int4 x=LengthCMSA(1,cma);
		if(x >= minlen && x <= maxlen) return 1; else return 0;
	     } else if(IronOut){
#if 0
		cma_typ	cma2=RmWrinklesCMSA(cma);
		PutCMSA(stdout,cma2); 
#elif 1		// modify to iron out multiple cma files 
	      Int4 Number;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      cma_typ *OUT_CMA; NEW(OUT_CMA,Number +3, cma_typ);
	      for(Int4 I=1; I <= Number; I++){
		OUT_CMA[I]=RmWrinklesCMSA(IN_CMA[I]); 
	      }
	      fp = open_file(argv[1],"_iron.cma","w");
	      for(Int4 I=1; I <= Number; I++)PutCMSA(fp,OUT_CMA[I]);
	      fclose(fp);
#else
			print_error("This option moved to gsq_typ");
#endif
	     } else if(fractDeleted < 0){
		Int4 col_deleted=RmQueryGapsCMSA(fractDeleted, cma);
		fprintf(stderr,"\n %d columns removed",col_deleted);
		if(col_deleted > 0){
           		sprintf(str,"%s.query",argv[1]);
			FILE *fp = open_file(str,".cma","w"); 
			PutCMSA(fp,cma); fclose(fp);
		}
	     } else if(fractDeleted > 0){
		if(0) fprintf(stderr,"Removing columns with %.3lf%c deletions\n",
				fractDeleted,'%');
		Int4 col_deleted=RmGappyColumnsCMSA(fractDeleted, cma);
		// fprintf(stderr,"\n %d columns removed",col_deleted);
		if(col_deleted > 0){
           		sprintf(str,"%s.delete%d",argv[1],(Int4)(100.0*fractDeleted));
			FILE *fp = open_file(str,".cma","w"); 
			PutCMSA(fp,cma); fclose(fp);
		}
	     } else if(print_indel_trans){
		Int4 sq_hits,indel;
		sq_hits=InsDelTransCMSA(&indel,cma);
		fprintf(stdout,"\nfile '%s': %d indel transitions in %d out of %d seqs.\n\n",
			NameCMSA(cma),indel,sq_hits,NumSeqsCMSA(cma));
	     } else if(blk == 0){ fprintf(stderr,"map = %g\n",RelMapCMSA(cma));
	     } else {
		if(blk < 1 || blk > nBlksCMSA(cma)) print_error(USAGE_START);
		if(front_back == 'f'){
		 for(Int4 col=1; col <= length_extend; col++)
		   InsertColCMSA(blk, FALSE,cma);
		} else if(front_back == 'b'){
		 for(Int4 col=1; col <= length_extend; col++)
		   InsertColCMSA(blk, TRUE,cma);
		} else print_error(USAGE_START);
		sprintf(str,"%s.new",argv[1]);
		PutAlnCMSA(str,cma,NULL);
	    }
	   } break;
	  case 'T': {
	     if(template_mode){
		Int4 Number;
		// fp = open_file(argv[1],".cma","r");
	        fp=OpenFileToRead(argv[1]);
		cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		IN_CMA[0]=IN_CMA[Number]; IN_CMA[Number]=0;
		cma_typ *OUT_CMA=ConversionViaTemplateCMSA(IN_CMA[0],IN_CMA);
#if 0
		for(Int4 II=1; II < Number; II++){
		   if(OUT_CMA[II]){
			char str2[500];
            		sprintf(str2,"%s_%d.cma",argv[1],II);
            		WriteCMSA(str2,OUT_CMA[II]);
        	   }
		}
#endif
		fp = open_file(argv[1],"_merged.cma","w");
		PutMergedCMSA(fp,Number-1,OUT_CMA); fclose(fp);
	     } else if(split_into_blks){	// 
		if(nBlksCMSA(cma) != 1) print_error(USAGE_START);
		unsigned short RmLeft,RmRight,End;
		// print_error("-T= option still being implemented...");
#if 1
		// 1. trim from end of cma to blk_end[nblks].
		// 1. for all blocks: split at start[i] and trim block 1 from end[i-1] 
			// if(!TrimCMSA(1,RmLeft,RmRight,cma);
		cma_typ	*out_cma;
		
		NEW(out_cma,nblks+2,cma_typ);
		out_cma[nblks]=cma;
		for(i=nblks; i > 0; i--){
		   End=LengthCMSA(1,out_cma[i]);
		   // 1a. trim from end[i] of cma to blk_end[nblks].
		   if(End > blk_end[i]) RmRight=End-blk_end[i];
		   else RmRight=0;
		   fprintf(stderr,"Block %d: %d..%d\n",i,blk_start[i],blk_end[i]);
		   fprintf(stderr,"RmRight=%d; End=%d\n",RmRight,End);
		   if(RmRight > 0){
			   if(!TrimCMSA(1,0,RmRight,out_cma[i]))
				print_error("-T= TrimCMSA input error");
		   }
			// 1b. split block 1 at blk_start[i].
		   if(blk_start[i] > 1){
           	  	out_cma[i-1]=SplitBlkCMSA(1,blk_start[i]-1,TrimMax,out_cma[i]);
		  	if(out_cma[i-1] == 0) print_error("-T= SplitBlkCMSA input error");
		   } else out_cma[i-1] = CopyCMSA(out_cma[i]);
		}
		// 2. trim from first residue to blk_start[1] of cma to blk_end[nblks].
		RmLeft=blk_start[1] -1;
		if(RmLeft > 0){
		    cmsa2 = RmBlkCMSA(1, out_cma[i]);
		    // if(!TrimCMSA(1,RmLeft,0,out_cma[i]))
		    // 		print_error("-T= input error");
		} else cmsa2=out_cma[0];
           	sprintf(str,"%s_%d_blks",argv[1],nblks);
		//SaveBestCMSA(cmsa2); InitMAPCMSA(cmsa2);
                WriteMtfCMSA(str,cmsa2,NULL);
		// PutAlnCMSA(str,cmsa2,NULL);

		for(i=0; i < nblks; i++) NilCMSA(out_cma[i]);
#endif
	     } else {
	      Int4 Number;
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      fp = open_file(argv[1],"_trim.cma","w");
	      for(Int4 f=1; f <= Number; f++) {
		  cma_typ tcma=IN_CMA[f];
		  if(blk < 1 || blk > nBlksCMSA(tcma)) print_error(USAGE_START);
		  if(lenrm < 0){
			Int4 lemon; lenrm = abs(lenrm);
			for(i=1; i<=lenrm; i++){
			   lemon = LengthCMSA(blk,tcma);
			   if(lemon <= 3) print_error(USAGE_START);
			   RmColumnMSA(blk, lemon, tcma);
			}
		  } else {
			for(i=1; i<=lenrm; i++){
			   if(LengthCMSA(blk,tcma) <= 3) print_error(USAGE_START);
			   RmColumnMSA(blk, 1, tcma);
			}
		  } 
#if 0
		  sprintf(str,"%s.new",argv[1]);
		  PutAlnCMSA(str,tcma,NULL);
#elif 0
		  // SaveBestCMSA(tcma); InitMAPCMSA(tcma);
		  FILE *fptr = open_file(argv[1],".edit.tcma","w");
    		  PutCMSA(fptr,tcma); fclose(fptr);
#else
    		  PutCMSA(fp,tcma); 
#endif
	       } fclose(fp);
	     } 
	    } break;
	  case 'R':
	    if(rename != 0){
		if(rename_id > NumSeqsCMSA(cma)) print_error(USAGE_START);
		sprintf(str,"%s pdbid",rename);
		e_type E=TrueSeqCMSA(rename_id,cma);
		ChangeInfoSeq(str,E); 
		E=FakeSeqCMSA(rename_id,cma);
		ChangeInfoSeq(str,E); 
		PutCMSA(stdout,cma); free(rename);
	    } else if(family_name[0] != 0){
	      Int4 Number;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 II=1; II <= Number; II++){
		cma=IN_CMA[II];
		if(strcmp(NameCMSA(cma),family_name) != 0){
		   PutCMSA(stdout,cma);
		} else fprintf(stderr,"\n ------------ '%s' removed! ------------\n",NameCMSA(IN_CMA[II]));
	      }
	    } else if(rm==NULL){
		data = DataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
	      if(rm_seq > 0 && rm_seq <= N){
		NEW(skip,N+3,BooLean); 
		skip[rm_seq] = TRUE;
		fp = open_file(argv[1],".new.cma","w");
		PutSelectCMSA(fp,skip,cma); fclose(fp); 
		free(skip); 
	      } else print_error(USAGE_START);
	    } else {
		NEW(remove,nBlksCMSA(cma) + 2, BooLean);
		NEW(value,nBlksCMSA(cma) + 10, Int4);
		i = 1;
		n=ParseIntegers(rm, value, USAGE_START);
		for(i=1; i <=n; i++){
		   s=value[i];
		   if(s < 0 || s > nBlksCMSA(cma)) print_error(USAGE_START);
		   else remove[s]=TRUE;
		}
		for(i=nBlksCMSA(cma); i > 0; i--){
		   if(remove[i]){
			cmsa = RmBlkCMSA(i, cma);
	    		NilCMSA(cma); cma=cmsa;
		   }
		}
		sprintf(str,"%s.new",argv[1]);
		PutAlnCMSA(str,cma,NULL);
		free(remove); free(value);
	     } break;
	  case 'D':
	    if(full_seq){		// retain only full length sequences
		if(nBlksCMSA(cma) != 1)
		   print_error("this option requires one block alignments");
		gss_typ *gss=gssCMSA(cma);
		Int4	pos[3];
		BooLean	*skip;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		data = TrueDataCMSA(cma);  // == True sequences...
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			if(IsDeletedCMSA(sq,1,cma)) skip[sq]=TRUE;
			else if(IsDeletedCMSA(sq,LengthCMSA(1,cma),cma)) skip[sq]=TRUE;
		} PutSelectCMSA(stdout,skip,cma);
		free(skip);
	    } else if(deleted_pos > 0){ // Remove sequences with deletions at position x
	     {
	      Int4 Number,NumLeft;
	      // fp = open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
	      cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      char str0[108]; sprintf(str0,"%s_is%d",argv[1],deleted_pos);
	      fp = open_file(str0,".cma","w");
	      for(Int4 II=1; II <= Number; II++){
		fprintf(stdout,"%s,",NameCMSA(IN_CMA[II]));
		cma=IN_CMA[II];
	        if(deleted_pos > LengthCMSA(1,cma)) print_error("deleted_pos outside of alignment");
	        if(nBlksCMSA(cma) != 1) print_error("this option requires one block alignments");
		gss_typ *gss=gssCMSA(cma);
		Int4	pos[3];
		BooLean	*skip;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		data = TrueDataCMSA(cma);  // == True sequences...
		NumLeft=0;
		for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			e_type sE=SeqSetE(sq,data);
			PosSiteCMSA(1, sq, pos, cma);
			if(IsDeletedCMSA(sq,pos[1]+deleted_pos-1, cma)) skip[sq]=TRUE;
			// if(IsDeletedCMSA(sq,deleted_pos,cma)) skip[sq]=TRUE;
			else { skip[sq]=FALSE; NumLeft++; }
		} if(NumLeft >= 1) PutSelectCMSA(fp,skip,cma); free(skip);
	      } fclose(fp); 
	     }
	    } else if(deleted_pos==0){
	         PutAsDomTypCMSA(stdout,"void",cma);
		 // print_error("PutAsDomTypCMSA( ) not yet implemented");
	    } else print_error("deleted_pos outside of alignment");
	    break;
	  case 'M':
	   char matstr[50];
	   if(MinColU){
		sprintf(matstr,"_Match%.0f.cma",100.0*MinCol);
		Int4 left=PutUniqueMergedMinColCMSA(argv[1],matstr,MinCol,A);
		if(left > 0){
		  fprintf(stderr,"opening %s%s\n",argv[1],matstr);
	          fp = open_file(argv[1],matstr,"r");
		  cma_typ xcma=ReadCMSA(fp,A); fclose(fp); 
		  sprintf(matstr,"_Match%.0f_U%d.cma",100.0*MinCol,purge);
	          fp = open_file(argv[1],matstr,"w");
		  Int4 Nset,Nsq=NumSeqsCMSA(xcma);
		  set_typ InSet=MakeSet(Nsq+4); FillSet(InSet);
		  set_typ Set=RtnFastRepSetCMSA(stderr,purge,InSet,xcma);
		  PutInSetCMSA(fp,Set,xcma); Nset=CardSet(Set); NilSet(Set); NilSet(InSet);
		  fprintf(stdout,"File \"%s\": (%d/%d removed; %d remain).\n",
						NameCMSA(xcma),Nsq-Nset,Nsq,Nset);
		  fclose(fp); TotalNilCMSA(xcma);

		  fprintf(stderr,"opening %s%s\n",argv[1],matstr);
	          fp = open_file(argv[1],matstr,"r");
		  xcma=ReadCMSA(fp,A); fclose(fp); 

		  sprintf(matstr,"_Match%.0f_U%d.hsw",100.0*MinCol,purge);
		  fprintf(stderr,"creating %s%s\n",argv[1],matstr);
	          fp = open_file(argv[1],matstr,"w");
		  CreateWriteHSW(fp,xcma,argv[1]); fclose(fp);
		  TotalNilCMSA(xcma);
		}
	   } else if(MinColHSW){
		sprintf(matstr,".Match%.0f.cma",100.0*MinCol);
		Int4 left=PutUniqueMergedMinColCMSA(argv[1],matstr,MinCol,A);
		if(left > 0){
		  fprintf(stderr,"opening %s%s\n",argv[1],matstr);
	          fp = open_file(argv[1],matstr,"r");
		  cma_typ xcma=ReadCMSA(fp,A); fclose(fp); 

		  sprintf(matstr,".Match%.0f.hsw",100.0*MinCol);
		  fprintf(stderr,"creating %s%s\n",argv[1],matstr);
	          fp = open_file(argv[1],matstr,"w");
		  CreateWriteHSW(fp,xcma,argv[1]); fclose(fp);
		  TotalNilCMSA(xcma);
		}
	   } else if(MinCol > 0.0){
		sprintf(matstr,".Match%.0f.cma",100.0*MinCol);
		PutUniqueMergedMinColCMSA(argv[1],matstr,MinCol,A);
	   } else {
		sprintf(str,"%sx%d",argv[1],num_rpts);
		PutAlnCMSA(str, cma,NULL,num_rpts);
	   } break;
	  case 'm': 
	  if(MinCol > 0.0){
	    Int4 Number,blk;
	    char matstr[20];
	    // fp = open_file(argv[1],".cma","r");
	    fp=OpenFileToRead(argv[1]);
	    cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	    sprintf(matstr,".match%.0f.cma",100.0*MinCol);
	    fp = open_file(argv[1],matstr,"w");
	    for(Int4 f=1; f<=Number;f++) {
		cma=IN_CMA[f];
		Int4    N = NumSeqsCMSA(cma);
		ss_type data = TrueDataCMSA(cma);
        	BooLean *skip; NEW(skip,N+3,BooLean);
	        h_type HG=Histogram("percentage of matching columns",0,100,1.0);
		Int4 J,I,n,Len,na,TotLen=TotalLenCMSA(cma);
                for(J=1; J <= N; J++){ skip[J]=TRUE; }
                for(n=0,J=1; J <= N; J++){
		  for(na=0,blk=1; blk <= nBlksCMSA(cma); blk++){
		    Len=LengthCMSA(blk,cma);
		    for(s=1; s <= Len; s++){
#if 1
			if(!IsDeletedCMSA(blk,J,s,cma)){ na++; }
#else
			// Int4 x=TruePosCMSA(J,s,cma);
			Int4 r=ResidueCMSA(blk,J,s,cma);
			if(r != UndefAlpha(A)) na++; 
#endif
		    }
		  }
		  double fr=(double)na/(double)TotLen;
		  IncdHist(100.0*fr,HG);
		  if(fr >= MinCol){
			skip[J]=FALSE; n++;
#if 1
		  } else {
			gsq_typ *gsq=gsqCMSA(J,cma);
			Int4    *sites=GetPosSitesCMSA(J,cma);
			// gsq->Put_cma_format(stderr,J,nBlksCMSA(cma),sites, LengthsCMSA(cma),A);
			// fprintf(stderr,"n=%d(%d); match=%d/%d; del=%d\n",n,N,na,TotLen,TotLen-na);
			free(sites);
#endif
		  }
		}
		if(n > 0){ PutSelectCMSA(fp,skip,cma); free(skip); }
		PutHist(stdout,60,HG); NilHist(HG);
	     } fclose(fp);
	  } else if(max_sq > 0){
		if(NumSeqsCMSA(cma) <= max_sq) PutCMSA(stdout,cma);
	  } else if(min_sq > 0){
		if(NumSeqsCMSA(cma) >= min_sq) PutCMSA(stdout,cma);
	  } else if(mm_tpl_file != 0){
	    Int4 Number,N;
	    fp=OpenFileToRead(argv[1]);
	    cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	    cma_typ *OUT_CMA; NEW(OUT_CMA,Number,cma_typ);
	    fp = open_file(mm_tpl_file,"","r");
	    cma_typ tplcma=ReadCMSA(fp,A); fclose(fp); 
	    fp = open_file(argv[1],".mm.cma","w");
	    BooLean *skip; NEW(skip, NumSeqsCMSA(tplcma) + 5, BooLean);
	    for(i=1; i <= NumSeqsCMSA(tplcma); i++){
		skip[i]=TRUE;
		e_type E=TrueSeqCMSA(i,tplcma);
		StrSeqID(str,25,E);
		fprintf(stderr,"%d: %s\n",i,str);
	        for(N=0,j=1; j <= Number; j++){
		   char *name=NameCMSA(IN_CMA[j]);
		   if(strncmp(str,name,20) == 0){
			skip[i]=FALSE;
			fprintf(stderr,"   %d: %s\n",j,name);
			N++; OUT_CMA[N]=IN_CMA[j];
		   }
		} if(N > 0) PutMergedCMSA(fp,N,OUT_CMA); 
	    } fclose(fp); free(OUT_CMA);
	    fp = open_file(argv[1],".mm.tpl","w");
	    skip[1]=FALSE; PutSelectCMSA(fp,skip,tplcma);
	    fclose(fp); free(skip);
	  } else {
	    Int4 Number,N,num_names,namelen,NewNumber;
	    char c,*buffer[1000]; // maximum of 1000 families
	    char tmpname[1000];
	    // fp = open_file(argv[1],".cma","r");
	    fp=OpenFileToRead(argv[1]);
	    cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	    fclose(fp);
	    if(merge_file_name || write_file_name){	// then ouput only those on list.
		cma_typ *USE_CMA;
		NEW(USE_CMA,Number+3,cma_typ);
		for(Int4 f=1; f<=Number;f++) {
		  USE_CMA[f]=IN_CMA[f]; IN_CMA[f]=0;
		}
	// 1. Get protein family names from input file:
	        if(write_file_name) fp = open_file(write_file_name,"","r");
		else fp = open_file(merge_file_name,"","r");
		for(namelen=0,N=0,num_names=1; (c=getc(fp)) != EOF; N++){
		  if(N==0) if(!isalnum(c)) print_error("-m option input file error1");
		  if(c==','){
			tmpname[namelen]=0;
			buffer[num_names]=NewString(tmpname); 
	fprintf(stderr,"Name %d: %s\n",num_names,buffer[num_names]);
			namelen=0;
			num_names++; 
		  } else if(isalnum(c) || c=='_' || c == '-'){
			tmpname[namelen]=c; namelen++;
		  } else if(c != '\n') print_error("-m option input file error2");
		} fclose(fp);
		if(namelen==0) print_error("-m option input file error3");
		tmpname[namelen]=0;
		buffer[num_names]=NewString(tmpname); 
	fprintf(stderr,"Name %d: %s\n",num_names,buffer[num_names]);

	// 2. remove all cma files that don't match a name from input file:
		NewNumber=0;
		for(Int4 f=1; f<=Number;f++){ 
		   BooLean found=FALSE;
		   char *str1=NameCMSA(USE_CMA[f]);
		   fprintf(stderr,"Name %d = '%s'\n",f,str1);
		   for(Int4 n=1; n <= num_names; n++){
		   	char *str2=buffer[n];
			if(strcmp(str1,str2)==0){	// 0 --> a match
				found=TRUE;
			fprintf(stderr,"===> Match: '%s' = '%s'\n",str1,str2);
				break;
			}
		   }
		   if(found){
			NewNumber++;
			IN_CMA[NewNumber]=USE_CMA[f];
		   } else {	// need to free data too...?
			NilCMSA(USE_CMA[f]);
		   }
		}
		Number=NewNumber;
		fprintf(stderr,"NewNumber = %d\n",NewNumber);
		for(Int4 n=1; n <= num_names; n++) free(buffer[n]);
	    }
	    if(Number==0) print_error("Error: zero files merged");
	    if(write_file_name){
	    	fp = open_file(argv[1],".select.cma","w");
		for(Int4 f=1; f<= Number; f++){ PutCMSA(fp,IN_CMA[f]); }
		fclose(fp);
	    } else {
	    	fp = open_file(argv[1],".merged.cma","w");
		for(Int4 f=1; f<= Number; f++){ 
		   if(LengthCMSA(1,IN_CMA[1]) != LengthCMSA(1,IN_CMA[f])){
			print_error("FATAL: input cma files not of the same length!");
		   }
		} PutMergedCMSA(fp,Number,IN_CMA); fclose(fp);
	    }
	    // PutModelsCMSA(stdout,cma); 
	   } break;
	  case 'A':
	     if(add_block){	
		fprintf(stderr,"add_block = %d; add_col = %d\n",add_block,add_col);
		if(add_block){
		  if(add_block < 1 || add_block > nBlksCMSA(cma))
					print_error(USAGE_START);
		  Boolean right=TRUE;
		  if(add_col < 0){ add_col = -add_col; right=FALSE; }
		  for(Int4 col=1; col<= add_col; col++){
			if(InsertColCMSA(add_block,right,cma) ==FALSE)
				fprintf(stderr,"InsertColCMSA() inserted gaps\n");
		  }
                  sprintf(str,"%s.new.cma",argv[1]);
                  WriteCMSA(str, cma);
		} else {
		  fprintf(stderr,"map = %g\n",RelMapCMSA(cma));
                  sprintf(str,"%s.new.cma",argv[1]);
                  WriteCMSA(str, cma);
		}
	     } break;
#if 0	// below needs lots more work...
	  case 'A':	// realign sequence i
		{
		  char *operation;
		  Int4 lenM= LengthCMSA(1,cma);
		  NEW(operation,lenM+9,char);
		  operation[0]='E';
		  operation[1]='M';
		  for(i=2; i <= lenM; i++) operation[i] = 'm';
		  operation[i] = 'E'; i++; operation[i] = 0; 
#if 0
	gsq_typ *gsq; gsq = new gsq_typ[1];
	Int4 trace_length=strlen(operation);
	gsq->initialize(gss->LeftFlank(),gss->RightFlank(), operation,trace_length,
                start,E);
	ReplaceCMSA(sq, gsq, cma);
	// private SubToFull --> public...
#endif
#if 0
		gss_typ     *gss=gssCMSA(cma);
		operation=gapped_aln_seq_smatrixSW(a,b,
                                LenSeq(E),SeqPtr(E),NmaxPrtnModel(PM),
                                SMatricesPrtnModel(PM),
                                GapScoresPrtnModel(PM),&start,&score);
		// BooLean ReAlignGSqCMSA(Int4 seq,char *operation, Int4 start, cma_typ *oldcma);
		ReAlignGSqCMSA(J,operation, start, &cma);
#endif
		  free(operation);
		} break;
#endif
	  case 'L': 
	     {
		assert(nBlksCMSA(cma) == 1);
		data = DataCMSA(cma); N = NSeqsSeqSet(data);
		assert(N > 1);
		if(look==0){  // then output histogram of all hits
		  Int4		qst,start,score;
		  double	d;
		  st_type	sites=SitesCMSA(cma);
		  h_type HG=Histogram("number of hits for each repeat",0,N,1);
		  prob_cut /= ((double)N*(N-1)/2.0);
		  // prob_cut /= (double)N;
		  Int4 nhits,lenM= LengthCMSA(1,cma);
	          for(i=1; i<= N; i++) {
		    e_type qE = SeqSetE(i,data);
		    smx_typ smx = MkSMatrix(2.0,lenM,tFreqSeqSet(data),A);
		    double	prob;
		    unsigned char *pqsq = SeqPtr(qE);
		    qst = SitePos(1,i,1,sites);
		    for(s=qst,j=1; j <= lenM; j++,s++) {
			  for(Int4 c=0; c <= nAlpha(A); c++){
				if(shuffle) SetSMatrix(c,lenM-j+1,valAlphaR(pqsq[s],c,A),smx);
				else SetSMatrix(c,j,valAlphaR(pqsq[s],c,A),smx);
			  }
		    }
		    for(nhits=0,j=1; j <= N; j++) {
			if(j == i) continue;
			e_type sE = SeqSetE(j,data);
			Int4 sts = SitePos(1,j,1,sites);
			score = ScoreSMatrix(SeqPtr(sE),sts, smx);
			if(score > 0){
			     prob = SMatrixProb(score, smx);
			     if(prob <= prob_cut){
				nhits++;
			     }
			}
		    } IncdHist(nhits,HG);
		  } PutHist(stdout,60,HG); NilHist(HG);
		} else {
		  char str2[108];
		  Int4 v,n2,end,end0,end1,qst,st0,st1,j,start,score;
		  Int4 gap0,gap1;
		  double	g_like,ug_like;
		st_type	sites=SitesCMSA(cma);
		gss_typ     *gss=gssCMSA(cma);
		NEW(value,N+3,Int4);
		// prob_cut /= (double)N;
		Int4	sum,nhits,lenM= LengthCMSA(1,cma);
		Int4	q_start,s_start;
		Int4	diag=0;
		hsp_typ hsp;
	        for(end=v=n2=0,i=1; i<= N; i++) {
		   e_type qE = SeqSetE(i,data);
		   StrSeqID(str2,100,qE);
		   if(strncmp(str2,look,100) == 0){
			if(n2 == 0) {
			  fprintf(stderr,"%s = seq %d\n",look,i);
			  end0 = 0; 
			} else { end0 = end; }
			n2++;
			qst = SitePos(1,i,1,sites);
			end = qst + lenM - 1;
			if(gss) {
			  g_like = GetGappedProbCMSA(1,i,cma);
			  ug_like = GetProbCMSA(1,i,cma);
			  start = gss->TrueSite(i, qst);
			  end = gss->TrueSite(i, end);
			  if(i < N){
			     StrSeqID(str2,100,SeqSetE(i+1,data));
			     if(strncmp(str2,look,100) == 0){
			       st1 = SitePos(1,i+1,1,sites);
			       st1 = gss->TrueSite(i+1, st1);
			     } else {
			       st1 = gss->TrueSite(i,LenSeq(qE));
			     }
			  } else st1 = gss->TrueSite(i,LenSeq(qE)); 
			} else { 
			  g_like = ug_like= GetProbCMSA(1,i,cma);
			  start = qst + OffSetSeq(qE);
			  end = end + OffSetSeq(qE);
			  if(i < N){
				StrSeqID(str2,100,SeqSetE(i+1,data));
				if(strncmp(str2,look,100) == 0) st1 = SitePos(1,i+1,1,sites);
				else st1 = LenSeq(qE);
			  } else st1 = LenSeq(qE);
			}

			smx_typ smx = MkSMatrix(2.0,lenM,tFreqSeqSet(data),A);
			double	prob;
			unsigned char *pqsq = SeqPtr(qE);
			for(s=qst,j=1; j <= lenM; j++,s++) {
			  for(Int4 c=0; c <= nAlpha(A); c++){
				score = valAlphaR(pqsq[s],c,A);
				SetSMatrix(c,j,score,smx);
			  }
			}
			// gapxdrop routines...
			double	evalue;
			Int4	gap_open=10,gap_extend=1;
			ss_type	fulldata = FullSeqCMSA(cma);
			Int4 fqsq = SubToFullCMSA(i,cma);
			e_type fqE = SeqSetE(fqsq,fulldata);
			sbp_typ sbp=GBLAST_ScoreBlkNew(A,SeqPtr(fqE)+1,LenSeq(fqE),gap_open,
                		gap_extend, TotalSeqSet(fulldata), NSeqsSeqSet(fulldata));
			SetStdStatsSBP(sbp);    // use standard statistics
			// double x_parameter_final=25.0;
			double x_parameter_final=10.0;
			gab_typ gap_align= GABNew(fqE,gap_open,gap_extend,SMatrixSBP(sbp),
			     -(GBLAST_SCORE_MIN),GapXDropoffSBP(x_parameter_final,sbp),0); 

// h_type HG=Histogram("number of hits for each repeat",-500,500,5.0);
			double j_like,max_like=-99999.0,max_evalue = 999999999.0;
			for(sum=nhits=0,j=1; j <= N; j++) {
			   if(j == i) continue;
			   e_type sE = SeqSetE(j,data);
			   if(verbose) StrSeqID(str2,100,sE);
			   Int4 sst = SitePos(1,j,1,sites);
			   if(shuffle){
			     unsigned char *sseq = ShuffleSeqArray(LenSeq(sE), SeqPtr(sE));
			     score = ScoreSMatrix(sseq,sst,smx); free(sseq);
			   } else {
			     score = ScoreSMatrix(SeqPtr(sE),sst, smx);
			   }
// IncdHist(score,HG);
			   if(score > 0){
			     prob = SMatrixProb(score, smx);
			     if(prob <= prob_cut){
				//  gapxdrop routines...
				Int4 word_score;
				Int4 os=FindMaxWordSeq(qst,sst,lenM, qE, sE, &word_score, A);
			        q_start = gss->TrueSite(i, qst+os);
			        s_start = gss->TrueSite(j, sst+os);

				Int4 fssq = SubToFullCMSA(j,cma);
				e_type fsE = SeqSetE(fssq,fulldata);
				// SetSubjectGAB(q_start, s_start, fsE, gap_align);
#if 1	// fix...
				Int4 ssq_start = gss->TrueSite(j,sst)-lenM;
				ssq_start = MAXIMUM(Int4,ssq_start,1);

				Int4 ssq_end = gss->TrueSite(j,sst+lenM-1)+lenM;
				ssq_end = MINIMUM(Int4,ssq_end,LenSeq(fsE));
				e_type ssqE = MkSubSeq(ssq_start, ssq_end, fsE);

				SetSubjectGAB(q_start,s_start-ssq_start+1,ssqE, gap_align);
#endif
				PerformGappedAlignment(gap_align);
				evalue= GappedScoreToEvalueSBP(gap_align->score,sbp);
#if 0
				if(verbose && evalue < prob_cut) {
				   PutSeqID(stderr,fsE); fprintf(stderr," "); 
				   fprintf(stderr,"gapped score = %d; E = %g\n",
					gap_align->score,evalue);
				}
#endif
				if(evalue <= prob_cut){
			  	   j_like = GetGappedProbCMSA(1,j,cma);
				   if(max_evalue > evalue){
					 max_evalue = evalue;
					 max_like = j_like;
				   }
			           sum+=gap_align->score;
			           //sum+=score;
				   nhits++;
	// if(verbose && strncmp(str2,look,100) == 0){
	if(verbose){
#if 1	// Do full alignment
#if 0
				PutDiagonalSeq(stderr,qst-sst,qE,sE,A);
		fprintf(stderr,"seq %d: %g (q_start=%d; s_start=%d)\n",j,prob, q_start,s_start);
unsigned char *fqsp=SeqPtr(fqE),*fssp=SeqPtr(fsE);
fprintf(stderr,"\n%c%c%c fqE (%3d-%-3d)\n",AlphaChar(fqsp[q_start],A),
				AlphaChar(fqsp[q_start+1],A), AlphaChar(fqsp[q_start+2],A),
				q_start,q_start+2);
fprintf(stderr,"%c%c%c fsE (%3d-%-3d) score = %d\n",AlphaChar(fssp[s_start],A),
				AlphaChar(fssp[s_start+1],A), AlphaChar(fssp[s_start+2],A),
				s_start,s_start+2,word_score);
#endif
		// hhp = MkHSPHeap(nhits+2);
		brh_typ results;
		PerformGappedAlignmentWithTraceback(gap_align);
		hsp=MakeHSPTyp(gap_align,sbp);
		// results = MakeGBLASTResultHitlist(n+1,fsE);
		results = MakeGBLASTResultHitlist(2,ssqE);
                AddHspBRH(hsp,results);
  		sap_typ sap;
		// Int4 n=PurgeHSPHeap(hhp);  ... use to eliminate weird alignments.
        	sap = ExtractAlnBRH(results,fqE);
		PutGSeqAlignList(stderr, sap, 60, A);
		fprintf(stderr,"Likelihood(%d) = %.2f\n",j,j_like);
		results=NilBRH(results);
		FreeGSeqAlignList(sap);
#endif
	}
				} else {
				}
				NilSeq(ssqE);
			     }
			   }
			}
			if(verbose && nhits) fprintf(stderr,"\n");
			GBLAST_ScoreBlkDestruct(sbp); GABDelete(gap_align);

// PutHist(stdout,60,HG); NilHist(HG);
			// sum = (Int4)(0.5+((double)sum/(double)(N-1)));
			if(nhits > 0) sum = (Int4)(0.5+((double)sum/(double)nhits));
			else sum=0;
			double  **colfreq = ColResFreqsCMSA(1, cma);
			Int4 expscore = (Int4) ExpScoreSMatrix(colfreq,smx);
			for(j=1; j <= lenM; j++) free(colfreq[j]); free(colfreq);
			double adj_like = ug_like + 0.30103*(2*expscore);
			// sum -= ExpScoreSMatrix(smx);
			NilSMatrix(smx);
			gap0 = start-end0-1;
			gap1 = st1-end-1;
			fprintf(stderr,"%3d: %s ",n2,look);
			// if(n2 > 1) fprintf(stderr," (%3d(%3d)) ",gap0,gap1);
			if(n2 > 1) fprintf(stderr," (%3d) ",gap0);
			else fprintf(stderr,"       ");
			char c1,c2,c3,c4;
			if(g_like < look_cut && nhits < 1 && (gap0 > 20 && gap1 > 20)) {
				value[v] = n2; v++; 
			} 
			char strikes=0;
			if(g_like < look_cut){ c1='*'; strikes +=2; }
			else if(g_like < (look_cut + 2.0)){ c1='?'; strikes ++; } else c1=' ';
			fprintf(stderr,"%c",c1); 
			if(gap0 > 20 && gap1 > 20){ c2='*'; strikes +=2; }
			else if(gap0 > 20 || gap1 > 20){ c2='?'; strikes ++; } else c2=' ';
			fprintf(stderr,"%c",c2); 
			// if(nhits < 1){ c3='*'; strikes +=2; }
			// else if(nhits < 2){ c3='?'; strikes ++; } else c3=' ';
			// fprintf(stderr,"%c",c3); 
			if(adj_like < -2.0){ c4='*'; strikes +=2; }
			else if(adj_like < 0.0){ c4='?'; strikes ++; } else c4=' ';
			fprintf(stderr,"%c%d ",c4,strikes); 
			fprintf(stderr,"%4d-%-4d ", start,end);
			fprintf(stderr,"[%.2f] (raw: %.2f; adj: %.2f)",g_like,ug_like,adj_like);
			if(nhits > 1) fprintf(stderr," %d hits [%.2f](%.2f) ",
					nhits,max_like,-log10(max_evalue));
			else if(nhits > 0) fprintf(stderr," 1 hit [%.2f](%.2f) ",
					max_like,-log10(max_evalue));
			else fprintf(stderr," no hits ");
			fprintf(stderr,"(%d[%d])\n",sum,expscore);
		   } 
		} 
		if(v){
		  fprintf(stderr,"-x%s:",look);
		  for(i=0; i < v-1; i++) fprintf(stderr,"%d,",value[i]);
		  fprintf(stderr,"%d\n",value[i]); 
		} free(value);
		} 
	    } break;
	  case 'k': 	// output cma with only sequence from kingdom 
	    {
	     if(Kingdom){
		char str0[108];
		BooLean	*skip;
	    	Int4 i,j,Number,N;
	    	FILE *fp=OpenFileToRead(argv[1]);
	    	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		sprintf(str0,"%s_%s",argv[1],Kingdom);
		fp = open_file(str0,".cma","w");
		for(i=1; i <= Number; i++){
		  cma_typ xcma=IN_CMA[i];
		  fprintf(stderr,"------> '%s'\n",NameCMSA(xcma));
		  NEW(skip,NumSeqsCMSA(xcma)+3,BooLean);
		  data = TrueDataCMSA(xcma);  // == True sequences...
		  Int4 hits=0;
		  for(j=1; j<=NumSeqsCMSA(xcma); j++){
			e_type sE=SeqSetE(j,data);
			char kingdom=toupper(KingdomSeq(sE)); 
			if(kingdom && strchr(Kingdom,kingdom)) { skip[j]=FALSE; hits++; }
			else if(!kingdom && strchr(Kingdom,'U')){ skip[j]=FALSE; hits++; }
			else skip[j]=TRUE;
		  }
		  if(hits > 0) PutSelectCMSA(fp,skip,xcma); free(skip);
		} fclose(fp);
	     } else {
		// Int4 *list=SortByKingdomCMSA(cma);
		Int4 *list=SortByKingdomPhylumCMSA(cma);
		fp = open_file(argv[1],".k_sort.cma","w");
		PutSelectOneCMSA(fp,0,list,cma);
		free(list); fclose(fp);
	     }
	    } break;
	  case 'K': 
		{
		char str0[108],str2[108];
		ss_type	data2=0;
		cma_typ cma2=0;
		if(keep != 0) data2=SeqSet(keep,A);
		else { cma2=ReadCMSA2(Keep,A); data2=DataCMSA(cma2); }
		data = DataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
	    	Int4 N2 = NSeqsSeqSet(data2);
		NEW(skip,N+3,BooLean); 
	        for(i=1; i<= N; i++) skip[i] = TRUE;
	        for(i=1; i<= N; i++) {
		   StrSeqID(str0,100,SeqSetE(i,data));
		   for(Int4 j=1; j<= N2; j++){
		     StrSeqID(str2,100,SeqSetE(j,data2));
		     if(strncmp(str2,str0,100) == 0) skip[i] = FALSE;
		   }
		} 
		fp = open_file(argv[1],".new.cma","w");
		PutSelectCMSA(fp,skip,cma); fclose(fp); 
		free(skip); 
		if(cma2==0) NilSeqSet(data2); else TotalNilCMSA(cma2);
		} break;
	  case 'v': 
	    {
		if(show_var){
	    	   Int4 Number,N,num_names,namelen,NewNumber;
	    	   FILE *fp=OpenFileToRead(argv[1]);
	    	   cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
		   h_type vHG=Histogram("relative std dev. of seq-to-consensus scores",-200,100,2.0);
		   Int4 start=2,end=Number; 
		   double RelStdDev,inc;
		   if(Number == 1){ start=1; end=2; }
		   for(Int4 i=start; i < end; i++){	// skip root and random sets
			Int4 score;
	          	e_type  cE=MkConsensusCMSA(IN_CMA[i]);
			for(score=0,j=1; j <= LenSeq(cE); j++){
			     unsigned char r=ResSeq(j,cE);
                	     if(r) score += valAlphaR(r,r,A);
			}
			inc=ceil((double)score/25.0);
			h_type	HG=Histogram("sequence scores",-200,score,inc);
		  	for(Int4 sq=1; sq <= NumSeqsCMSA(IN_CMA[i]); sq++){
				Int4 score=PseudoAlnScoreSqToCMSA(cE,sq,IN_CMA[i]);
	         		IncdHist((double)score,HG);
			}
			RelStdDev=100.0*sqrt(VarianceHist(HG))/MeanHist(HG);
			// if(RelStdDev > 0.0 && RelStdDev < 10.0)
			IncdHist(RelStdDev,vHG);
			fprintf(stdout,"%d.%s %.2f\n",i,NameCMSA(IN_CMA[i]),RelStdDev);
			NilSeq(cE); PutHist(stdout,50,HG); NilHist(HG);
		   } 
		   // if(Number == 1) fprintf(stdout,"%s RSD: %.2f\n",NameCMSA(IN_CMA[1]),RelStdDev);
		   if(Number == 1) fprintf(stdout,"%s RSD: %.2f\n",argv[1],RelStdDev);
		   else PutHist(stdout,50,vHG); 
		   NilHist(vHG);
		} else {
		  gss_typ *gss=gssCMSA(cma);
		  for(Int4 sq=1; sq <= NumSeqsCMSA(cma); sq++){
			gsq_typ *gsq0=gss->GetGSQ(sq); gsq0->Put(stderr,A); 
		  }
		}
	    } break;
	  case 'V': 
	    {
		data = TrueDataCMSA(cma);  // == Real sequences...
	     	N = NSeqsSeqSet(data);
		double	map=RelMapCMSA(cma);
		NEW(skip,N+3,BooLean); 
	        for(i=1; i<= N; i++) {
    		 FILE    *fp=tmpfile();
		 skip[i]=TRUE; PutSelectCMSA(fp,skip,cma); rewind(fp); skip[i]=FALSE;
		 cma_typ tcma=ReadCMSA(fp,A); fclose(fp); 
		 fprintf(stdout,"%3d: contribution to map = %5.1f\t",i,map-RelMapCMSA(tcma));
		 PutSeqID(stdout,SeqSetE(i,data));
		 fprintf(stdout,"\n");
		 TotalNilCMSA(tcma);
		} fprintf(stdout,"\n"); free(skip);
		fprintf(stdout,"total map = %5.1f\n\n",map);
	    } break;
	  case 'w':
	   {
	     dh_type dH=NULL;
	     Int4    Score,I,J,N = NumSeqsCMSA(cma);
	     if(put_worst > 0) {	// Output lowest scoring homologs 
	 	  NEW(skip,N+3,BooLean); 
		  if(nBlksCMSA(cma) > 1) print_error("this option requires only 1 blk");
		  dH=dheap(N+2,4);
		  if(KeySeq < 1 || KeySeq > N) print_error("key sequence out of range");
		  for(J=1; J <= N; J++){ 
			Score = PseudoAlnScoreCMSA(KeySeq, J, cma);
			insrtHeap(J,-(keytyp)Score,dH); 
		  }
		    for(I=1; I <= put_worst && !emptyHeap(dH); I++){
                	assert((J=delminHeap(dH)) != 0); skip[J]=TRUE;
		  } Nildheap(dH); 
		  PutSelectCMSA(stdout,skip,cma); free(skip);
	     } else {
#if 0
 {	// -w option...use to test new routine...
	gss_typ		*gss = gssCMSA(cma);
	char aln[2000];
	Int4	sq,len=LengthCMSA(1,cma),pos[5],max_id_len=30;
	FILE *fp=stdout;
	for(sq=1; sq <= N; sq++){ 
	   e_type sqE= TrueSeqCMSA(sq,cma);
	   StrSeqID(aln, max_id_len, sqE);
	   fprintf(fp, "[0_(1)=%s(1){go=10000,gx=2000,pn=1000.0,lf=0,rf=0}:\n",aln);
	   fprintf(fp,"(%d)",len);
           for(Int4 i=1; i <= len; i++) fprintf(fp,"*"); 
	   fprintf(fp,"\n\n$1=%d(%d):\n",len + gss->NumIns(sq) - gss->NumDel(sq),len);
	   // PutSeqInfo2(fp,sqE);
	   StrSeqID(aln, 30, sqE);
	   fprintf(fp,">%s ",aln);
	   StrSeqDescript(aln,50, sqE);
	   fprintf(fp,"%s\n",aln);
	   Int4 x=PosSiteCMSA(1, sq, pos, cma);
	   Int4 LenStr=gss->Region(sq,aln,pos[1],len);
	   fprintf(fp,"{()%s()}*\n\n_0].\n",aln);
	}
 }
#else
		// for(Int4 o=1; o <=100; o++)
		fprintf(stderr,"map = %g\n",RelMapCMSA(cma));
		sprintf(str,"%s.new",argv[1]);
		cma_typ cma3=CopyCMSA(cma);
		NilCMSA(cma); cma=cma3;
                WriteMtfCMSA(str, cma, NULL);
                sprintf(str,"%s.new.cma",argv[1]);
                WriteCMSA(str, cma);
#endif
	     } 
	    } break;
	  case 'W': 
	    {
	    	Int4 Number,N,num_names,namelen,NewNumber;
	    	FILE *fp=OpenFileToRead(argv[1]);
	    	cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	fprintf(stderr,"----- %d input cma files ----\n",Number);
		if(WriteAllCMA){
		   FILE *ifp = open_file(argv[1],".ids","w");
		   for(Int4 i=1; i <= Number; i++){
		      char str[500];
		      sprintf(str,"%s_%s",argv[1],NameCMSA(IN_CMA[i]));
		      fprintf(ifp,"%s\n",str);
#if 1
		      fp = open_file(str,".cma","w");
		      // fp = open_file(NameCMSA(IN_CMA[i]),".cma","w");
		      PutCMSA(fp,IN_CMA[i]); fclose(fp);
#else
		      fp = open_file(str,".seq","w");
		      PutSeqSetEs(fp,TrueDataCMSA(IN_CMA[i])); fclose(fp);
#endif
		   } fclose(ifp);
	        } else if(WriteCMA){
		   FILE *ifp = open_file(argv[1],".ids","w");
		   for(Int4 i=1; i <= Number; i++){
		      char str[500];
		      sprintf(str,"%s_%s",argv[1],NameCMSA(IN_CMA[i]));
		      fprintf(ifp,"%s\n",str);
		      fp = open_file(str,".cma","w");
		      // fp = open_file(NameCMSA(IN_CMA[i]),".cma","w");
		      PutCMSA(fp,IN_CMA[i]); fclose(fp);
		   } fclose(ifp);
		} else if(OutFileNumber > 0){
		   if(OutFileNumber > Number) print_error("-W=<int> input error");
		   PutCMSA(stdout,IN_CMA[OutFileNumber]);
		} else {
		  FILE *lfp=0,*ufp=0;
		  for(Int4 x=1; x <= Number; x++){
		    cma_typ tcma=IN_CMA[x];
	fprintf(stderr,"%d/%d: ----- %s ----\n",x,Number,NameCMSA(tcma));
		    Int4 Lbld=0,UnLbld=0;
		    data = TrueDataCMSA(tcma); N = NSeqsSeqSet(data);
		    NEW(skip,N+3,BooLean); 
		    for(i=1; i<= N; i++){
			  if(LabeledSeq(SeqSetE(i,data))){ skip[i] = FALSE; Lbld++; }
			  else { skip[i] = TRUE; UnLbld++; }
		    }
		    if(Lbld > 0){
		    	if(lfp == 0) lfp = open_file(argv[1],".labeled.cma","w");
		    	PutSelectCMSA(lfp,skip,tcma); 
		    } 
		    if(UnLbld > 0){
		    	for(i=1; i<= N; i++) if(skip[i]) skip[i]=FALSE; else skip[i]=TRUE;
		    	if(ufp == 0) ufp = open_file(argv[1],".unlabeled.cma","w");
		    	PutSelectCMSA(ufp,skip,tcma); 
		    } free(skip);
		  } if(lfp) fclose(lfp);  if(ufp) fclose(ufp);
		} break;
	     }
	  case 'Q': 
		{
			Int4 *list=SortByQueryOneCMSA(cma);
			fp = open_file(argv[1],".sort.cma","w");
			PutSelectOneCMSA(fp,0,list,cma);
			free(list); fclose(fp);
		} break;
	  case 'q': 
	    if(seqid == 0){	// then make alignment query centric.
		  print_error("query centric mode not yet fully implemented");
#if 0		// query centric code...
		UInt4 n=1;	// first sequence.
		unsigned short ins=InsertionCMSA(n,UInt4 i,cma_typ cma);

		unsigned short InsertionCMSA(UInt4 blk,UInt4 n,UInt4 i,
        cma_typ cma);
		nins = InsertionCMSA(i,p+j-1,cma);

// BooLean IsDeletedCMSA(UInt4 n, UInt4 r, cma_typ cma);
// BooLean IsDeletedCMSA(UInt4 blk, UInt4 n, UInt4 r, cma_typ cma);

#endif
	    } else {
		char str2[108];
		data = TrueDataCMSA(cma);  // == Fake sequences...
		Int4 v,n2;
	    	N = NSeqsSeqSet(data);
		// 1. get sequence id
#if 0
		NEW(value,N+3,Int4);
		while(seqid[0] != ':') seqid++; seqid++;
		n=ParseIntegers(seqid, value, USAGE_START);
		// n=ParseIntegers2(seqid, value, USAGE_START);
		if(n > N) print_error(USAGE_START);
#if 0	// debug...
fprintf(stderr,"str='%s'; n = %d; '%s'\n",str,n,seqid);
		for(i=1; i<= n; i++){
			fprintf(stderr,"value[%d]=%d\n",i,value[i]);
		}
#endif
		if(sscanf(seqid,"|%[^:]|",str) != 1) print_error(USAGE_START);  
		sprintf(str,"gi|%s|",seqid);
#endif
		sprintf(str,"%s",seqid);
		e_type	FirstSq=0;
	        for(n2=0,i=1; i<= N; i++) {
		   e_type SubSq = SeqSetE(i,data);
		   StrSeqID(str2,100,SubSq);
		   //if(strstr(str2,seqid) != 0)
		   // if(strstr(str2,str) != 0)
		   if(strcmp(str2,str) == 0)
		   {
			n2++;
			if(n2==1) FirstSq=SubSq;
		   	PutSeq(stdout,SubSq,A); 
#if 0
// fprintf(stderr,"str2='%s'; i = %d\n",str2,i);
			for(Int4 j=1; j<= n; j++){
			   v = value[j];
		   	   if(v == n2){ PutSeq(stdout,SubSq,A); break; }
			}
#endif
		   }
		} 
		if(n2==1) PutSeq(stdout,FirstSq,A); 
		// free(value);
		if(n2 > 100) return 100;
		else return n2;
	    } break;
	  case 'Z': {
		data = TrueDataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
		NEW(skip,N+3,BooLean); NEW(value,N+3,Int4);
	        for(i=1; i <= N; i++) skip[i]=TRUE;
	        for(i=FirstSeq; i <= LastSeq && i <= N; i++) skip[i]=FALSE;
#if 0
		fp = open_file(argv[1],".new.cma","w");
		PutSelectCMSA(fp,skip,cma);
		fclose(fp); 
#else
		PutSelectCMSA(stdout,skip,cma);
#endif
		free(skip); free(value);
	    } break;
	  case 'z': 
		{
		char str2[108];
		Int4 v,n2;
		data = TrueDataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
		NEW(skip,N+3,BooLean); NEW(value,N+3,Int4);
		// 1. get sequence id
		assert((n=ParseIntegers(seqid, value, USAGE_START)) <= N);
	        for(i=2; i<= N; i++) skip[i] = TRUE;
		skip[1]=FALSE;	// keep first sequence regardless...
	        for(n2=0,i=2; i<= N; i++) {
		   e_type SubSq = SeqSetE(i,data);
		   StrSeqID(str2,100,SubSq);
		   for(Int4 j=1; j<= n; j++){
		      sprintf(str,"gi|%d|",value[j]);
		      if(strstr(str2,str)){  // found gi number
			n2++;
			fprintf(stderr,"%d(%d): added %s to output alignment\n",
						n2,value[j],str2);
			skip[i] = FALSE; 
			break;
		      }
		   }
		} 
		fp = open_file(argv[1],".new.cma","w");
		PutSelectCMSA(fp,skip,cma);
		fclose(fp); free(skip); free(value);
		} break;
	  case 'x': 
	    if(rm_flank){		// remove flanking regions. 
	       Int4 Number;
	       fp=OpenFileToRead(argv[1]);
	       cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	       fclose(fp);
	       for(Int4 fam=1; fam <= Number; fam++){
	         cma=IN_CMA[fam];
	         cma_typ new_cma=RemoveOverhangsCMSA(cma,TRUE);	// TRUE == add 'X' on either end if a deletion.
		 // cma_typ new_cma=RmOverHangsCMSA(cma);	// works better...
	         PutCMSA(stdout,new_cma); TotalNilCMSA(new_cma);
	       }
	    } else if(RmSeqId[0] != 0){		// strip out sequences with RmSeqId string in defline.
	       Int4 Number;
	       // fp = open_file(argv[1],".cma","r");
	       fp=OpenFileToRead(argv[1]);
	       cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	       char str2[108];
	       fp = open_file(argv[1],".rmsq.cma","w");
	       for(Int4 fam=1; fam <= Number; fam++){
	        cma=IN_CMA[fam];
		fprintf(stderr,"----> %s",NameCMSA(cma));
		data = DataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
		Int4 hits=0,miss=0;
		NEW(skip,N+3,BooLean); 
	        for(i=1; i<= N; i++) {
		   e_type SubSq = SeqSetE(i,data);
		   StrSeqID(str2,100,SubSq);
		   if(strstr(str2,RmSeqId) == 0){ skip[i]=FALSE;	// --> string absent
			miss++;
		   } else {
		   	fprintf(stdout,"%s\n",str2);
			skip[i]=TRUE; hits++; }
		} 
		fprintf(stderr,"(%d) %d hits(%.1f%c); %d misses(%.1f%c).\n",N,
				hits, 100.0*(double)hits/(double)(hits+miss),'%',
				miss,100.0*(double)miss/(double)(hits+miss),'%');
		// PutSelectCMSA(stdout,skip,cma);
		if(miss > 0) PutSelectCMSA(fp,skip,cma); free(skip); 
	       } fclose(fp);
	    } else if(rm_csq){		// strip out consensus sequences...or other options
	       Int4 Number;
	       // fp = open_file(argv[1],".cma","r");
	       fp=OpenFileToRead(argv[1]);
	       cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A);
	       fclose(fp);
	       char str2[108];
	       if(rm_csq == 1) fp = open_file(argv[1],".rmcsq.cma","w");
	       else if(rm_csq == 3) fp = open_file(argv[1],".rspsq.cma","w");
	       else if(rm_csq == 4) fp = open_file(argv[1],".rmpdb.cma","w");
	       else if(rm_csq == 5) fp = open_file(argv[1],".rest.cma","w");
	       else fp = open_file(argv[1],".rcsq.cma","w");
	       for(Int4 fam=1; fam <= Number; fam++){
	        cma=IN_CMA[fam];
		data = DataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
		NEW(skip,N+3,BooLean); 
		Int4 hits=0;
	        for(i=1; i<= N; i++) {
#if 1
		  if(rm_csq == 1){
		   if(StringInSeqInfo("consensus",SeqSetE(i,data))) skip[i] = TRUE;
		   else hits++;
		  } else if(rm_csq == 3){	// retain swissprot and pdb only
		   e_type sE=SeqSetE(i,data);
		   if(!PdbSeq(sE) && !SwissSeq(sE)) skip[i] = TRUE; else hits++;
		  } else if(rm_csq == 4){	// retain pdb only
		   e_type sE=SeqSetE(i,data);
		   if(PdbSeq(sE)) skip[i] = TRUE; else hits++;
		  } else if(rm_csq == 5){	// remove ests and env_nr.
		   e_type sE=SeqSetE(i,data);
		   if(EST_Seq(sE)) skip[i]=TRUE; else if(PhylumSeq(sE)==0) skip[i]=TRUE;
		   else hits++;
		  } else {
		   if(!StringInSeqInfo("consensus",SeqSetE(i,data))) skip[i] = TRUE;
		   else hits++;
		  }
#else
		   e_type SubSq = SeqSetE(i,data);
		   StrSeqID(str2,100,SubSq);
		   // fprintf(stderr,"%s\n",str2);
		   if(strncmp(str2,"consensus",100) == 0) skip[i] = TRUE;
		   else hits++;
#endif
		} 
		// PutSelectCMSA(stdout,skip,cma);
		if(hits > 0) PutSelectCMSA(fp,skip,cma);
		free(skip); 
	       } fclose(fp);
	    } else {
		char str2[108];
		Int4 v,n2;
		data = DataCMSA(cma);  // == Fake sequences...
	    	N = NSeqsSeqSet(data);
		NEW(skip,N+3,BooLean); NEW(value,N+3,Int4);
		// 1. get sequence id
		if(sscanf(seqid,"%[^:]:",str) != 1) print_error(USAGE_START);  
		while(seqid[0] != ':') seqid++; seqid++;
		assert((n=ParseIntegers(seqid, value, USAGE_START)) <= N);
	        for(n2=0,i=1; i<= N; i++) {
		   e_type SubSq = SeqSetE(i,data);
		   StrSeqID(str2,100,SubSq);
		   if(strncmp(str2,str,100) == 0){
			n2++;
			for(Int4 j=1; j<= n; j++){
			   v = value[j];
		   	   if(v == n2){ 
				fprintf(stderr,"rm %s {|%d(%d)|}\n",
				   str2,OffSetSeq(SubSq),TaxIdentSeq(SubSq));
				skip[i] = TRUE; break; 
			   }
			}
		   }
		} 
		fp = open_file(argv[1],".new.cma","w");
		PutSelectCMSA(fp,skip,cma);
		fclose(fp); free(skip); free(value);
	    } break;
#if 0
	  case 'S': print_error(USAGE_START);
		data = SeqSet(argv[1],AlphaSMA(MA));
	    	N = NSeqsSeqSet(data);
		NEW(good,N+3,BooLean);
		fp = open_file(argv[1],".new","w");
	        for(n=0,i=1; i<= N; i++) {
		   x = LenSeq(SeqSetE(i,data));
		   if(x >= min && x <= max){
			good[i] = TRUE; n++;
			PutSeq(fp,SeqSetE(i,data),A);
		   }
		} fclose(fp);
		if(n < 1) print_error("fatal!: removes all sequences!");
		MA2 = RmSMA(good,n,MA);
		fp = open_file(argv[1],".new.msa","w");
		PutSMA(fp, MA2); fclose(fp);
		NilSMA(MA2); free(good);
		break;
	  case 'p': 
		print_error(USAGE_START);
		data = SeqSet(argv[1],AlphaSMA(MA));
		fp = open_file(argv[1],".new.msa","w");
		good = PutPurgeSMA(fp,cutoff,MA); fclose(fp);
		N = NSeqsSeqSet(data);
		fp = open_file(argv[1],".new","w");
		for(i=1; i<= N; i++) {
		   if(good[i]) PutSeq(fp,SeqSetE(i,data),AlphaSMA(MA));
		}
		fclose(fp); free(good);
		break;
#endif
	  case 'I': 
	 if(left_flank >= 0 && right_flank >= 0){
#if 0
            cma_typ cma0=TrimFlanksCMSA(left_flank,right_flank,cma);
	    PutCMSA(stdout,cma0); NilCMSA(cma0);
#else
	    PutSeqWithFlanksCMSA(stdout,left_flank,right_flank,cma);
#endif
	   
	 } else if(seqid_file_name){
		PutBySeqID_CMA(stdout,cma,seqid_file_name);
	 } else 
#if 1
	 {
	  assert(nBlksCMSA(cma) == 1);
	  e_type qE=SeqSetE(1,TrueDataCMSA(cma));
	  for(Int4 add=0; add < num_add; add++){
	    start_add[add] = start_add[add] - OffSetSeq(qE);
	    // 1. Split block right after start_ins
            cma_typ cma0=SplitBlkCMSA(1, start_add[add], min_len, cma);
	    // fprintf(stderr,"DEBUG2\n");
	    if(cma0 == 0){
		fprintf(stderr,"start=%d; min_len=%d\n",start_add[add],min_len);
		print_error("SplitBlkCMSA() returned NULL.");
	    }
            // WriteCMSA("debug0.cma", cma0);

	    // 2. Append 'len_add' columns to first block right after start_ins
	    Boolean right=TRUE;
	    // if(add_col < 0){ add_col = -add_col; right=FALSE; }
	    for(Int4 col=1; col<= len_add[add]; col++){
		if(InsertColCMSA(1,right,cma0) == FALSE)
			fprintf(stderr,"InsertColCMSA() inserted gaps\n");
	    	// fprintf(stderr,"DEBUG3\n");
	    }
            // WriteCMSA("debug1.cma", cma0);

	    // 3. Fuse blocks again.
	    cma_typ cma1=SimpleFuseBlksCMSA(1, 10000, cma0);
	    // fprintf(stderr,"DEBUG4\n");
	    if(cma1 == 0) print_error(USAGE_START);
	    // fprintf(stderr,"DEBUG5\n");
            // WriteCMSA("debug2.cma", cma1);

	    NilCMSA(cma); NilCMSA(cma0);
	    cma=cma1;
	  }
          sprintf(str,"%s.added.cma",argv[1]);
          WriteCMSA(str, cma);
	 }
#else
	    print_error(USAGE_START);
		data = SeqSet(argv[1],AlphaSMA(MA));
	    N = NSeqsSeqSet(data);
	    NEW(ListE,N+2,e_type);
	    for(i=1; i<= N; i++) ListE[i] = SeqSetE(i,data);
	    RePutSeqsSMA(stdout, ListE, left, right, MA);
	    free(ListE);
#endif
		break;
	  case 'C': 
	    if(PutCSQ){ 
	      Int4 Number;
	      FILE *fp = 0; // open_file(argv[1],".cma","r");
	      fp=OpenFileToRead(argv[1]);
              cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 I=1; I <= Number; I++) PutCsqWithCMSA(stdout,IN_CMA[I]);
	      break; 
	    } else putseqs=1;
	  case 'c': 	// Output a cobbled fasta alignment with first seq as query
	   if(cdhit > 0){
		RunCDHit(stdout,cdhit,argv[1],cma);
	   } else if(put_config){
		PutConfigCMSA(stdout,cma);
	   } else if(cdtree != 0){
		cma_typ new_cma=RtnCDTreeCMSA(cdtree, cma);
		fp = open_file(argv[1],"_cdtree.cma","w"); PutCMSA(fp,new_cma); fclose(fp);
		TotalNilCMSA(new_cma);
	   } else if(mode == 'c' && cobbled){
		a_type AB=AlphabetCMSA(cma);
		e_type tE= TrueSeqCMSA(1,cma);
		PutSeq(stdout,0,tE,AB,FALSE);
		Int4    N = NumSeqsCMSA(cma);
                ss_type data = TrueDataCMSA(cma);
                Int4 J,I,n,Len;
                double prob;
                Len=LengthCMSA(1,cma);
                for(J=2; J <= N; J++){
                   e_type E=FakeSeqCMSA(J,cma);
		   printf("\n>");
                   PutSeqInfo(stdout,E);
                   for(n=0,s=1 ; s <= Len;s++){
                     if(!IsDeletedCMSA(1,s,cma)){
                        Int4 r=ResidueCMSA(1,J,s,cma);
                        if(r != UndefAlpha(A)) printf("%c",AlphaChar(r,AB));
			else printf("-");
                     }
                     Int2 ins = InsertionCMSA(1,s,cma);
                     if(ins > 0){
                        for(n=1; n <=ins; n++){
                                printf("-");
                        }
                     }
                   } printf("\n");
                } printf("\n\n");
	   } else if(mode=='C' && percent_ident==0){
                sprintf(str,"%s.new.cma",argv[1]);
                WriteCMSA(str, cma);
	   } else if(mode=='c' && main_set_file){
		fp = open_file(argv[1],".chn","w");
		PutCMSA(fp,cma);
#if 0
           	cma_typ cma2=ReadCMSA2(main_set_file,A);
		PutCMSA(fp,cma2);
#else
		FILE *ms_fp=open_file(main_set_file,"","r");
		char ms_c;
		while((ms_c=fgetc(ms_fp)) != EOF){
		  if(!isprint(ms_c) && !(ms_c == '\n')){
			fprintf(stderr,"non-printable character found: '%c'\n",ms_c); 
			print_error("Fatal error!");
		  } fprintf(fp,"%c",ms_c); 
		}
		fclose(ms_fp);
#endif
		BooLean	*skip;
		NEW(skip,NumSeqsCMSA(cma)+3,BooLean);
		for(i=1; i<=NumSeqsCMSA(cma); i++) skip[i]=TRUE;
		for(i=1; i<=NumSeqsCMSA(cma); i++){
			skip[i]=FALSE;
			PutSelectCMSA(fp,skip,cma);
			skip[i]=TRUE;
		} fclose(fp);
	   } else if(put_consensus){
	      Int4 Number;
	      FILE *fp=0;
	      fp=OpenFileToRead(argv[1]);
	      // sprintf(str,"%s.cma",argv[1]);
	      // if((fp=fopen(str,"r")) == NULL){ fp = open_file(argv[1],".mma","r"); } 
              cma_typ *IN_CMA=MultiReadCMSA(fp,&Number,A); fclose(fp);
	      for(Int4 I=1; I <= Number; I++){
	       cma=IN_CMA[I];
	       if(put_consensus==1){
	        e_type  cE=MkConsensusCMSA(cma);
		PutSeq(stdout,cE,A);
		NilSeq(cE);
	       } else { // output in cma format
#if 0
	          e_type  cE=MkConsensusCMSA(cma);
		  // fprintf(stderr,"%d: %s\n",I,NameCMSA(IN_CMA[I])); 
		  fprintf(stdout, "[0_(1)=%s(1){go=10000,gx=2000,pn=1000.0,lf=0,rf=0}:\n",
			NameCMSA(cma));
		  fprintf(stdout,"(%d)",LenSeq(cE));
		  for(Int4 i=1; i <= LenSeq(cE); i++) fprintf(stdout,"*");
		  fprintf(stdout,"\n\n$1=%d(%d):\n",LenSeq(cE),LenSeq(cE));
		  // fprintf(stdout,">consensus seq\n");
		  fprintf(stdout,">%s consensus\n",NameCMSA(cma)); 
		  char *seq; NEW(seq,LenSeq(cE) +3, char);
		  SeqToString(seq, cE, A);
		  fprintf(stdout,"{()%s()}*\n\n_0].\n",seq);
		  free(seq); 
		  NilSeq(cE);
#else
		  PutConsensusCMSA(stdout,cma);
#endif
	       } 
	      }
	   } else if(mode == 'c'){
	      Int4 min_size=3,Nset;	
	      if(min_seq_cma > 0) min_size=min_seq_cma;
	      Nset=PutClusterOfCMSA(argv[1], percent_ident,min_size,IncludeFirst,UseNumPhyla,cma);
	      status=(char)Nset;
	   } else {   // mode == 'C' and percent_ident != 0
	      char str2[108];
	      Int4 Nset;
	      e_type **ListE=GetRepSetCMSA(stderr, percent_ident,&Nset,cma);
	      for(j=i=1; i<=Nset; i++){
		Int4 sq;
		BooLean okay=FALSE;
		for(sq=0; ListE[i][sq]; sq++) ;
		if(putseqs && sq >= min_set_size){
			sprintf(str2,"%s.set%d",argv[1],j); j++;
			fp = open_file(str2,"","w"); okay = TRUE;
		} else fp=0;
		if(okay)fprintf(stdout,"Set %d(%d seqs):\n",i,sq);
		for(sq=0; ListE[i][sq]; sq++){
			if(okay) PutSeqInfo(stdout,ListE[i][sq]); 
			// PutSeqID(stdout,ListE[i][sq]); fprintf(stdout,"\n");
			if(fp) PutSeq(fp,ListE[i][sq],A);
		} if(okay) fprintf(stdout,"\n");
		if(fp) fclose(fp);
	      }
	  } break;
	  default : print_error(USAGE_START); break;
	}
	if(cma) TotalNilCMSA(cma);
	// if(data != NULL) NilSeqSet(data);
	NilAlpha(A);
	FreeSets();	// use at end of main() only!!!
	if(mode != 'T') fprintf(stderr,
		"\ttime: %d seconds (%0.2f minutes)\n",
                        time(NULL)-time1,(float)(time(NULL)-time1)/60.0);
	return status;
}

