/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "scc_typ.h"

sst_typ	*scc_typ::UnionOfSSTs(sst_typ *sst1,sst_typ *sst2)
// set sst1 and sst2 to the union of both...
{
	sst_typ *sst; NEW(sst,Length+3,sst_typ);
	for(Int4 s=1; s <= Length; s++) sst[s]=UnionSset(sst1[s],sst2[s]); 
	return sst;
}

#if 0
void    scc_typ::MergeTheseSSTs(sst_typ *sst1,sst_typ *sst2)
{
	for(Int4 s=1; s <= Length; s++){
		sst1[s]=UnionSset(sst1[s],sst2[s]); sst2[s]=sst1[s];
	}
}
#endif

BooLean	scc_typ::ConsistentSSTs(sst_typ *sst1, sst_typ *sst2)
{
	BooLean okay=TRUE;
	for(Int4 s=1; s <= Length; s++){
	    if(sst1[s] && sst2[s] && IntersectSset(sst1[s],sst2[s]) == 0) return FALSE;
	} return TRUE;
}

void    scc_typ::MergeSSTs(sst_typ *sst1,sst_typ *sst2)
// set sst1 and sst2 to the union of both...
{
	for(Int4 s=1; s <= Length; s++){
		sst1[s]=UnionSset(sst1[s],sst2[s]); sst2[s]=sst1[s];
	}
}

void	scc_typ::PutOptSST(FILE *fp)
{
	Int4 i,j,n;
	fprintf(fp,"\nSettings:\n1.Set1\n");
        for(Int4 s=2; s <= NumOptimizedSets; s++){
           fprintf(fp,"%d.Set%d -P=",s,s);
           for(n=0,i=1; i <= Length; i++){
	  	sst_typ sst = OptimizedSST[s][i];
		if(sst){
			char *tmp=GetPatternFromSST(sst);
			if(n > 0) fprintf(fp,","); fprintf(fp,"%s%d",tmp,i); free(tmp); n++;
		}
	   } fprintf(fp,"\n");
        } fprintf(fp,"\n\n");
}

void	scc_typ::PutOptPattern(FILE *fp, Int4 st)
{
	assert(st > 0 && st <= NumOptimizedSets);
	Int4 id=OptimizedSetToInSet[st];
	fprintf(fp,"%d.Set%d ",st,id);
	// Int4    p=ParentNode(id,OptTree);
	if(OptimizedSST[st]){
		fprintf(fp,"-P=");
		 PrintPattern(fp,OptimizedSST[st]);
	} else fprintf(fp,"\n");
} 

Int4	scc_typ::PatternLength(sst_typ *sst)
{ Int4 n=0; for(Int4 s=1; s <= Length; s++){ if(sst[s]) n++; } return n; }

Int4	scc_typ::LengthPattern(sst_typ *sst)
{ Int4  i,n=0; for(i=1; i <= Length; i++) if(sst[i]) n++; return n; }


void	scc_typ::PutPatternFromSST(FILE *fp,sst_typ *xsst)
{
	for(Int4 s=1; s <= Length; s++){
	     if(xsst[s]){
		char *tmp=GetPatternFromSST(xsst[s]); fprintf(fp,"%s%d ",tmp,s); free(tmp); 
	     }
	} fprintf(fp,"\n");
}

Int4	scc_typ::GetPatternScore(sst_typ *sst, Int4 sq)
{
        assert(nBlksCMSA(cma) ==1);
	Int4	N=NumSeqsCMSA(cma);
	assert(sq > 0 && sq <= N);
	unsigned char *seq = SeqSeqSet(sq,DataCMSA(cma));
	Int4 s = SitePos(1,sq,1,SitesCMSA(cma));
       	Int4	i,score=0;
	for(score=0,i=1; i <= Length; i++,s++){
	   if(MemSset(seq[s],sst[i])) score++;
       	} return score;
}

void	scc_typ::PrintPattern(FILE *fp, sst_typ *xsst)
{
    Int4 n=0;
    for(Int4 j=1; j <= Length; j++){
	if(xsst[j]){
	  char *tmp=GetPatternFromSST(xsst[j]);
	  if(n > 0) fprintf(fp,",");
	  fprintf(fp,"%s%d",tmp,j); n++; free(tmp);
	}
    } fprintf(fp,"\n");
}

void    scc_typ::PrintOptimizedCSQ(FILE *fp, Int4 i,Int4 id)
// print out a concensus sequence as a cma file
{
        fprintf(fp, "[0_(1)=Set%d(1){go=10000,gx=2000,pn=1000.0,lf=0,rf=0}:\n",id);
        fprintf(fp,"(%d)",Length);
        for(Int4 j=1; j <= Length; j++) fprintf(fp,"*");
        fprintf(fp,"\n\n$1=%d(%d):\n",Length,Length);
        fprintf(fp,">Set%d consensus seq\n{()",id);
        for(Int4 j=1; j <= Length; j++){
		sst_typ sst=OptimizedSST[i][j];
		if(sst && !MemSset(OptimizedCSQ[i][j],sst)){
		  	char *tmp=GetPatternFromSST(sst);
			fprintf(stderr,"%d(%d): %c not in %s\n",
				id,i,AlphaChar(OptimizedCSQ[i][j],AB),tmp);
			free(tmp);
		}
		fprintf(fp,"%c",AlphaChar(OptimizedCSQ[i][j],AB));
	}
        fprintf(fp,"()}*\n\n_0].\n");
}

double	scc_typ::PatternIntersection(FILE *fp, Int4 Length, double *lprI, double *lprJ,
							sst_typ *sstI, sst_typ *sstJ,a_type AB)
{
	Int4	i,j,n,s,r;
	double	score=0.0,TotalLPR_I=0.0,TotalLPR_J=0.0;

	if(lprI[0] <= 0.0 || lprJ[0] <= 0.0) return 0.0;
	for(s=1; s <= Length; s++){
	    if(sstI[s]) TotalLPR_I += lprI[s];
	    if(sstJ[s]) TotalLPR_J += lprJ[s];
	}
	for(s=1; s <= Length; s++){
	    if(sstI[s] && sstJ[s]){	// if patterns present at this position.
		if(IntersectSset(sstI[s],sstJ[s]) != 0){
		   double fract_lprI = lprI[s]/TotalLPR_I;
		   double fract_lprJ = lprJ[s]/TotalLPR_J;
		   if(sstI[s] == sstJ[s]){ 		// eg
			score += fract_lprI + fract_lprJ;
		   } else {
		     if(SubSset(sstI[s],sstJ[s]) || SubSset(sstJ[s],sstI[s])){
			score += 0.5*(fract_lprI + fract_lprJ);
		     } else {	// Overlapping but not subset/superset.
			// score += (double)nc/(double)(nc+nd)*(fract_lprI + fract_lprJ);
			score += 0.25*(fract_lprI + fract_lprJ);
		     }
		   }
		} // else no addition to score.
	    }
	} return score*10.0;
}

sst_typ	*scc_typ::PatternIntersection(FILE *fp, Int4 N, sst_typ **in_sst,Int4 &Score)
{
	Int4	i,j,n,s,r,score;
	sst_typ sst=0,*ISST;

	NEW(ISST,Length+3,sst_typ);
	for(score=0,s=1; s <= Length; s++){
	    for(i=1; i<=N; i++) if(!in_sst[i][s]) break;	// pattern in all sets.
	    if(i > N){
		sst=in_sst[1][s];
	        for(i=2; i<=N; i++) sst=IntersectSset(sst,in_sst[i][s]);
		if(sst){
		   ISST[s]=sst;
		   // then print out common residue(s):
		   if(fp && score > 0) fprintf(fp,",");
		   for(n=0,r=1; r <= nAlpha(AB); r++){
			if(MemSset(r,sst)){ n++; if(fp) fprintf(fp,"%c",AlphaChar(r,AB)); }
		   } if(fp) fprintf(fp,"%d",s);
		   score++; // score += n;
		} 
	    }
	} Score=score;
	return ISST;
}

sst_typ	*scc_typ::UnionizePatterns(FILE *fp, Int4 N, sst_typ **in_sst,Int4 &Score)
#if 0
ToDO:
	1. Check cma to see how many of the sequences conserve each pattern at each position.
	2. Use exact test to combine patterns?
#endif
{
	Int4	i,j,k,n,num,v,s,r,score;
	sst_typ sst=0,*USST;

	NEW(USST,Length+3,sst_typ);
	for(score=0,s=1; s <= Length; s++){
	    for(num=0,i=1; i<=N; i++){
		 if(in_sst[i][s]) num++;	// count # patterns at this position.
	    }
	    if(num > 1){	// at least two sets have a pattern.
	        grf_typ grf(N+1);
	        for(i=1; i<=N; i++){
	          for(j=i+1; j<=N; j++){
		    sst=IntersectSset(in_sst[i][s],in_sst[j][s]);
		    if(sst) grf.AddEdge(i,j);
		  }
		}
		// take the transitive closure of relationships...
		do {
	         for(num=0,i=1; i<=N; i++){
	          for(j=i+1; j<=N; j++){
		     if(grf.IsEdge(i,j)) continue;
	             for(k=1; k<=N; k++){	// i.e., !grf.IsEdge(i,j)
		        if(k==i || k==j) continue;
		        if(grf.IsEdge(i,k) && grf.IsEdge(j,k)){ num++; grf.AddEdge(i,j); break; }
		     }
		  }
		 }
		} while(num > 0);	// if added an edge, then need to check for more.
	        Int4 numclust=0;
		vst_typ **clique=grf.Bron_Kerbosch(N,100,&numclust,0.01,0,N);
		if(numclust > 0){	// Take the 1st (a largest) clique...
		  for(sst=0,j=0; j < clique[1]->Size(); j++){
		     v=clique[1]->Vertex(j);
		     sst=UnionSset(sst,in_sst[v][s]);
		  } USST[s]=sst;
		  if(score > 0 && fp) fprintf(fp,","); 
		  for(n=0,r=1; r <= nAlpha(AB); r++){
			if(MemSset(r,sst)){ n++; if(fp) fprintf(fp,"%c",AlphaChar(r,AB)); }
		  } if(fp) fprintf(fp,"%d",s); 
		  score += n;
		} for(j=0; j< numclust; j++) delete clique[j]; free(clique);
	   }
	} Score=score;
	return USST;
}

sst_typ	*scc_typ::ConsensusPattern(FILE *fp, Int4 N, sst_typ **in_sst,Int4 &Score)
#if 0
#endif
{
	Int4	i,j,n,s,r,score,Num;
	sst_typ sst=0,*USST;

	NEW(USST,Length+3,sst_typ);
	for(score=0,s=1; s <= Length; s++){
	    for(Num=0,i=1; i<=N; i++) if(in_sst[i][s]) Num++;	 
	    if(Num > 1){	// at least two have pattern...
		  BooLean okay=TRUE,does_match;
		  Int4	num_match=0;
	          for(i=1; i <= N; i++){
	            if(in_sst[i][s] == 0) continue;
		    does_match=FALSE;
	            for(j=1; j<=N; j++){
			if(i == j) continue;
	                if(in_sst[j][s] == 0) continue;
	                if(IntersectSset(in_sst[i][s],in_sst[j][s])==0){
				  okay=FALSE; // break;
			} else does_match=TRUE;
		    } if(does_match) num_match++;
		  }
		  if(!okay) continue; // then skip this position...
		  if(num_match == 0) continue; 
		  if((N/num_match) >= 2) continue; // require a clear majority pattern...
		  sst=in_sst[1][s];
	          for(i=2; i<=N; i++) sst=UnionSset(sst,in_sst[i][s]);
		  USST[s]=sst;
		  if(score > 0 && fp) fprintf(fp,","); 
		  for(n=0,r=1; r <= nAlpha(AB); r++){
			if(MemSset(r,sst)){ n++; if(fp) fprintf(fp,"%c",AlphaChar(r,AB)); }
		  } if(fp) fprintf(fp,"%d",s); 
		  score++; // score += n;
	   }
	} Score=score;
	return USST;
}

char	*scc_typ::GetPatternFromSST(sst_typ sst)
{
   char tmp[30];
   Int4 r,c=0;
   if(sst == 0) return 0;
   for(r=1; r <= nAlpha(AB); r++){
	if(MemSset(r,sst)){ tmp[c]=AlphaChar(r,AB); c++; }
   } tmp[c]=0;
   return AllocString(tmp);
}

double	scc_typ::SimilarPatterns(FILE *fp,double cutoff,Int4 I,Int4 J)
// Taken from ptrn[Iter]->SimilarPatterns(fp,5.0,i,j,ptrn[Jter],AB);
{
	Int4	i,j,r;
	double	N;
	char	IsMatch,str[5000],str2[100],str1[100];

	assert(I > 0 && I <= NumSets && J > 0 && J <= NumSets);
// 	if(LPR[I][0] <= 0 || LPR[J][0] <= 0) return -1.0;
	str[0]=0; N=0.0; str1[0]=0; str2[0]=0;
// dh_type dH=dheap(Length+2,4);
        for(i=1; i <= Length; i++){
// if(SST[I][i] != 0 && SST[J][i] != 0) insrtHeap(i,-(keytyp)LPR[I][i],dH);
// 	}
// while((i=delminHeap(dH)) != 0){
		if(SST[I][i] == 0 || SST[J][i] == 0) continue;
		sst_typ seti=SST[I][i], setj=SST[J][i]; IsMatch=0;
		if(setj == seti) IsMatch='I';    // identical.
		else  if(SubSset(seti,setj) || SubSset(setj,seti)) IsMatch='S';	// subset.
		else if(IntersectSset(seti,setj) != 0) IsMatch='O';	// Overlap.
		if(IsMatch){
			switch (IsMatch){
			  case 'I': N += 2.0; sprintf(str2," !\n"); break;
			  case 'S': N += 1.0; sprintf(str2,"\n"); break;
			  case 'O': N += 0.5; sprintf(str2," ?\n"); break;
			  default: print_error("scc_typ::SimilarPatterns( ) error!");
			}
			// Int4 rowI=RowID(I)+2,rowJ=RowID(J)+2;
			Int4 rowI=RowID(I),rowJ=RowID(J);
			char *strJ=GetPatternFromSST(setj),*strI=GetPatternFromSST(seti);
			sprintf(str1,"%d%s%d vs %d%s%d%s",rowI,strI,i,rowJ,strJ,i,str2);
		        strcat(str,str1); free(strI); free(strJ);
		}
	}
	if(fp && N == 0.0 && cutoff <=0.0) fprintf(fp,"(no pattern similarity)\n");
	if(fp && N >= cutoff) fprintf(fp,"%s  score = %.1f\n\n",str,N);
// Nildheap(dH);
	return N;
}

