/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

#include "edc_typ.h"

void	dci_typ::PutVSI(FILE *fp,Int4 End)
{
	// output the pairs with the best DCA scores. 
	Int4 s1,s2,i; End=MINIMUM(Int4,End,this->N);
	for(i=1; i <= End; i++){ 
		char c1,c2;
		float dd,dc; rtn(i,c1,s1,c2,s2,dd,dc);
		fprintf(fp,"%c%d.W,%c%d.W",c1,s1,c2,s2);
		if(i < End) fprintf(fp,","); else fprintf(fp,"\n");
	}
}

#if 0	// PyMOL script...
	Q182=S214,A190=V203,L118=M186.
	set dash_width, 0
	set dash_round_ends, off
	dist ec171, chain A and name ca and resid 76, chain A and name ca and resid 56, label=0
	set dash_radius, 0.500, ec1
	set dash_gap, 0.075, ec1
	set dash_length, 0.925, ec1
	color green, ec1
#endif

void	dci_typ::PyMolLines(FILE *fp,Int4 End, char chn, char adj_chn)
{
		   // output the pairs with the best DCA scores. 
	Int4 s1,s2,i,j; End=MINIMUM(Int4,End,this->N);
	float radius=0.300,inc=0.005;
        char c1,c2,chn2,str[25];
	float	dd,dc; 
	for(i=1; i <= End; i++){ 
	  radius = radius - inc;
	  if(radius <  0.250) radius=0.250;
	  rtn(i,c1,s1,c2,s2,dd,dc,chn2);
	  if(chn2==0) chn2=chn;
	  if(chn != chn2 && dd > 15.0) continue;	// ignore couplings at great distances between chains.
	  if(!isspace(adj_chn) && chn == chn2) continue;  // ignore couplings internal to adjacent chains.
	  if(0) fprintf(stderr,"  %d(%d):%c%d%c=%c%d%c=%.3f; adj='%c'\n",
			i,rank[i],c1,s1,chn,c2,s2,chn2,dd,adj_chn);
	  sprintf(str,"dc%d%c_%d%c",s1,chn,s2,chn2);
	  fprintf(fp,"dist %s, chain %c and name ca and resid %d, ",str,chn,s1);
	  fprintf(fp,"chain %c and name ca and resid %d, label=0\n",chn2,s2);
	  if(dd > 10.0){ 
		fprintf(fp,"set dash_radius, 0.1, %s\n",str);
	  	fprintf(fp,"set dash_gap, 0.4, %s\n",str);
	  } else {
		fprintf(fp,"set dash_radius, %.3f, %s\n",radius,str);
	  	fprintf(fp,"set dash_gap, 0.075, %s\n",str);
	  }
	  fprintf(fp,"set dash_length, 0.925, %s\n",str);
	  if(0 && dd > 10.0) fprintf(fp,"color palecyan, %s\n",str);
	  else if(chn != chn2) fprintf(fp,"color lightorange, %s\n",str);
	  else fprintf(fp,"color lime, %s\n",str);
	  // fprintf(fp,"color lightblue, %s\n",str);
	  // fprintf(fp,"%c%d%c=%c%d%c",rank[i],c1,s1,c2,s2,dd);
	  // if(i < End) fprintf(fp,","); else fprintf(fp,"\n");
	  // fprintf(fp,"cmd.disable(\"%s\")\n",str);
	}
#if 0	// this does not work when treating distance lines as objects.
	fprintf(fp,"cmd.create(\"Couplings\",\"");
	for(i=1; i < End; i++){ 
		rtn(i,c1,s1,c2,s2,dd,chn2); 
		if(chn2==0) chn2=chn;
		fprintf(fp,"dc%d%c_%d%c ", s1,chn,s2,chn2);
	} rtn(i,c1,s1,c2,s2,dd,chn2); 
	if(chn2==0) chn2=chn;
	fprintf(fp,"dc%d%c_%d%c\")\n",s1,chn,s2,chn2);

	fprintf(fp,"cmd.order(\"");
	for(i=1; i < End; i++){ 
		rtn(i,c1,s1,c2,s2,dd,chn2); 
		if(chn2==0) chn2=chn;
		fprintf(fp,"dc%d%c_%d%c ", s1,chn,s2,chn2);
	} rtn(i,c1,s1,c2,s2,dd,chn2); 
	if(chn2==0) chn2=chn;
	fprintf(fp,"dc%d%c_%d%c\",",s1,chn,s2,chn2);
	fprintf(fp,"\"no\",\"bottom\")\n");
#endif
}

void	dci_typ::PutBest(FILE *fp,Int4 End, set_typ SetB, char chn,FILE *pfp,
		BooLean PutVSI)
{
	// output the pairs with the best DCA scores. 
	char	c1,c2,chn2;
	float	dd,dc; 
	Int4 s1,s2,i; End=MINIMUM(Int4,End,this->N);
	fprintf(fp,"rank\tRes1\tRes2\tdist\tDC-score\n");
	for(i=1; i <= End; i++){ 
        	char x1=' ',x2=' ';
		rtn(i,c1,s1,c2,s2,dd,dc,chn2); 
		if(SetB){ if(MemberSet(s1,SetB)) x1='*'; if(MemberSet(s2,SetB)) x2='*'; }
		fprintf(fp,"%d\t%c%d%c\t%c%d%c\t%.2f\t%.3f",rank[i],c1,s1,x1,c2,s2,x2,dd,-dc);
		if(x1=='*' && x2=='*') fprintf(fp," <-!\n"); else fprintf(fp,"\n");
	}
	if(PutVSI){
	    for(i=1; i <= End; i++){ 
		rtn(i,c1,s1,c2,s2,dd,dc);
		fprintf(fp,"%c%d.W,%c%d.W",c1,s1,c2,s2);
		if(i < End) fprintf(fp,","); else fprintf(fp,"\n");
	    }
	}
#if 1
	char str[25];
	// FILE *pfp=open_file(dca_file,".pml","w");
    if(pfp){
	float radius=0.300,inc=0.005;
	for(i=1; i <= End; i++){ 
	  rtn(i,c1,s1,c2,s2,dd,dc,chn2);
	  if(chn2==0) chn2=chn;
	  // fprintf(pfp,"dist dc_%c%d%c_%c%d%c ",c1,s1,chn,c2,s2,chn2);
	  fprintf(pfp,"dist dc%d%c_%d%c, ",s1,chn,s2,chn2);
	  fprintf(pfp,"chain %c and name ca and resid %d, ", chn,s1);
	  fprintf(pfp,"chain %c and name ca and resid %d, label=0\n",chn2,s2);
	  sprintf(str,"dc%d%c_%d%c",s1,chn,s2,chn2);
	  if(dd > 10.0){ 
		fprintf(pfp,"set dash_radius, 0.1, %s\n",str);
	  	fprintf(pfp,"set dash_gap, 0.4, %s\n",str);
	  } else {
		fprintf(pfp,"set dash_radius, %.3f, %s\n",radius,str);
	  	fprintf(pfp,"set dash_gap, 0.075, %s\n",str);
	  } radius = radius - inc;
	} fprintf(pfp,"set dash_color, red\n"); 
    } // fclose(pfp);
#endif
}

Int4	dci_typ::PutSameDiff(FILE *fp,dci_typ *that, char mode,set_typ SetB)
{
	// output the pairs in this not in that 
	Int4	i,j,same,n=0;
	for(i=1; i <= this->N; i++){
		for(same=0,j=1; j <= that->N; j++){
			if(this->array[i] == that->array[j]){ same=1; break; }
		}
		if(mode=='D' && !same){ this->put(fp,i,-1,SetB); n++; }
		else if(mode != 'D' && same){ this->put(fp,i,-1,SetB); n++; }
	} return n;
}

Int4	dci_typ::PutDiff(FILE *fp,dci_typ *that, set_typ SetB, double cutoff)
{
	// output the pairs in this not in that 
	Int4	i,j,same,n=0,s1,s2;
	float	dd,dc;
#if 0	// sort these using a heap; use rtn() to get values.
        char c1,c2,x1=' ',x2=' ';
	Int4 s1,s2;
	double dd; rtn(n,c1,s1,c2,s2,dd); if(DD > 0) dd=DD;
#endif
	set_typ SetNB=0,SetInB=0;
	if(SetB){ SetNB=CopySet(SetB); ClearSet(SetNB); SetInB=CopySet(SetNB); }
	for(i=1; i <= this->N && i <= that->N; i++){
		for(same=0,j=1; j <= that->N; j++){
			if(this->array[i] == that->array[j]){ same=1; break; }
		}
		if(!same){ 
			assert(i > 0 && i <= N);
        		char c1,c2,x1=' ',x2=' ';
			rtn(i,c1,s1,c2,s2,dd,dc); 
			if(dd < cutoff) continue;
			if(SetB){ 
			    if(MemberSet(s1,SetB)) x1='*'; 
			    if(MemberSet(s2,SetB)) x2='*';
			}
			if(SetB){
			   if(x1 == '*') AddSet(s1,SetInB);
			   if(x2 == '*') AddSet(s2,SetInB);
			   if(x2 != '*' && x1 == '*') AddSet(s2,SetNB); 
			   if(x1 != '*' && x2 == '*') AddSet(s1,SetNB); 
			}
			if(!SetB || dd >= cutoff){
			   fprintf(fp,"%d. %c%d%c vs %c%d%c: %.2f\n",
				rank[i],c1,s1,x1,c2,s2,x2,dd);
			} n++; 
		}
	}
	if(SetB){ 
		fprintf(fp,"%d pairs: # with 1 residue from BPPS: %d; # with both: %d/%d\n",
				n,CardSet(SetNB),CardSet(SetInB),CardSet(SetB));
		PutSet(fp,SetNB); PutSet(fp,SetInB); 
		NilSet(SetNB); NilSet(SetInB);
	} return n;
}

Int4	dci_typ::PutSame(FILE *fp,Int4 NN,dci_typ **that)
{
	// output the pairs in this and also all NN thats
	Int4	i,j,n;
	char	same,*Same; NEW(Same, this->N+5, char);
	for(i=1; i <= this->N; i++) Same[i]=1;	// assume the same till find conflict
	for(n=1; n <= NN; n++){
		for(i=1; i <= this->N; i++){
		   for(same=0,j=1; j <= that[n]->N; j++){
			   if(this->array[i] == that[n]->array[j]){ same=1; break; }
		   } if(same == 0 && Same[i]==1){ Same[i]=0;; }
		}
	} 
	for(n=0,i=1; i <= this->N; i++) if(Same[i]){ this->put(fp,i); n++; }
	free(Same); return n;
}

Int4	dci_typ::add(Int4 rnk, char c1, Int2 s1, char c2, Int2 s2, float dd,float dc,char chn2)
{
	if(N == max) print_error("FATAL: dci_typ array is full");
	UInt8 X=0; X = X | (UInt8) c1 << 40; X = X | (UInt8) c2 << 32;
	X = X | (UInt8) s1 << 16; X = X | (UInt8) s2;
	N++; array[N] = X; rank[N]=rnk; chain2[N]=chn2; dist[N]=dd; DCscore[N]=dc;
	return N;
}

void	dci_typ::Put(FILE *fp,Int4 n)
{
	Int4	i,j;
	if(n > 0 && n <= this->N) this->put(stderr,n);
	else if(n==0){
char *array; NEW(array,this->N * 2, char); 
	  for(i=1,j=0; i <= this->N; i++){
		char c=this->put(stderr,i); 
		if(c){ array[j]=c; j++; }
		if(j%100==0){ array[j]='\n'; j++; }
	  } array[j]=0; fprintf(stderr,"%s\n",array); free(array);
	  this->PutGraph(fp);
	}
}

Int4	dci_typ::PutGraph(FILE *fp)
// Output a graph to look for correlations visually..
{
	char	ci,cj,chn2;
	Int4	s1,s2,mn,mx,n,i,j,M=0,E=0,si,sj;
	float	dd,dc;
	h_type	HG=Histogram("distances",0,50,0.5);
	for(mn=INT4_MAX,mx=0,n=1; n <= this->N; n++){
	   this->rtn(n,ci,i,cj,j,dd,dc,chn2); IncdHist(dd,HG);
	   mn=MINIMUM(Int4,mn,i); mn=MINIMUM(Int4,mn,j);
	   mx=MAXIMUM(Int4,mx,i); mx=MAXIMUM(Int4,mx,j);
	} PutHist(stderr,60,HG); NilHist(HG);
	set_typ	SetU=MakeSet(mx+2),*SetX=0; NEW(SetX,mx+2,set_typ);
	for(i=1; i <= mx; i++){ SetX[i]=MakeSet(mx+2); AddSet(i,SetX[i]); }
	ds_type sets = DSets(mx+2);
	for(n=1; n <= this->N; n++){
	   this->rtn(n,ci,i,cj,j,dd,dc,chn2); 
	   AddSet(j,SetX[i]); AddSet(i,SetX[j]); E++;
	   si = findDSets(i,sets); sj = findDSets(j,sets);
	   linkDSets(si,sj,sets);
	} 
	HG=Histogram("node degree",0,100,1);
	set_typ SetUse=MakeSet(mx+2);  ClearSet(SetUse);
	for(M=0,i=1; i <= mx; i++){
	    if(CardSet(SetX[i]) > 1){
		IncdHist(CardSet(SetX[i])-1,HG);
		M++; // fprintf(fp,"%d. ",i); PutSet(fp,SetX[i]); 
		UnionSet(SetU, SetX[i]); AddSet(i,SetUse);
	    }
	} fprintf(fp,"Total = %d; %d nodes; %d edges; N=%d.\n",
			M,CardSet(SetU),E,this->N);
	fflush(fp);
	PutHist(stderr,60,HG); NilHist(HG);
	HG=Histogram("contacting node intersections",0,100,1);
	// Int4 *ds=AssignDSets(ds_type sets, Int4 **Cardinality, Int4 *NumSets);
	for(i=1; i < mx; i++){
	  if(MemberSet(i,SetUse)){
	    for(j=i+1; j <= mx; j++){
	      if(MemberSet(j,SetUse)){
	       	M=CardInterSet(SetX[i],SetX[j]);
// fprintf(stderr,"DEBUG %d vs %d: %d\n",i,j,M);
		if(M > 1) IncdHist(M,HG);
	      }
	    }
	  }
	} NilSet(SetUse);
	PutHist(stderr,60,HG); NilHist(HG);
	for(i=1; i <= mx; i++) NilSet(SetX[i]); free(SetX);
	NilSet(SetU); NilDSets(sets);
	return 0;
}

Int4	dci_typ::rtn(Int4 n, char &c1, Int4 &s1, char &c2, Int4 &s2, float &dd, float &dc, char &chn2)
{
	if(n < 1 || n > N){ return 0; }
	else {
		UInt8 X=array[n];
		c1 = (char) ( cmask & (X >> 40)); s1 = (Int4) ( smask & (X >> 16));
		c2 = (char) ( cmask & (X >> 32)); s2 = (Int4) ( smask & X);
		dd = dist[n]; dc=DCscore[n]; chn2=chain2[n]; return 1;
	}
} 

char	dci_typ::put(FILE *fp, Int4 n, float DD,set_typ SetB)
{
	assert(n > 0 && n <= N);
        char	rtrn=0,c1,c2,x1=' ',x2=' ',chn2;
	Int4 s1,s2;
	float dd,dc; this->rtn(n,c1,s1,c2,s2,dd,dc,chn2); if(DD > 0) dd=DD;
	if(SetB){ 
		if(MemberSet(s1,SetB)) x1='*'; 
		if(MemberSet(s2,SetB)) x2='*';
	}
	if(!SetB || dd >= 0.33 || 1){
		if(chn2){
		  fprintf(fp,"%d. %c%d%c vs %c%d%c%c: %.2f %.5f\n",
			rank[n],c1,s1,x1,c2,s2,chn2,x2,dd,-dc);
		} else {
#if 0	// DEBUG
		   // if(0 || (s1 <= 304 && s2 > 304 && dd < 90.0))
		   if(0 || (s1 <= 481 && s2 > 481 && dd < 90.0))
		   {
		     if(dd < 5.0) rtrn='*'; else rtrn='.';
		     fprintf(fp,"%d. %c%d%c vs %c%d%c: %.2f %.5f\n",
			rank[n],c1,s1,x1,c2,s2,x2,dd,-dc);
		   }
#else
		   fprintf(fp,"%d. %c%d%c vs %c%d%c: %.2f %.5f\n",
			rank[n],c1,s1,x1,c2,s2,x2,dd,-dc);
#endif
		}
	} return rtrn;
}

long double dci_typ::PutResults(FILE *fp,char Mode,char *Chns,char *pdb_id,double S0)
{
	long double S,S_nats;
	static BooLean	print_header=TRUE;
	if(pval == 1) S=S_nats=0; 
	else { S=-log10l(pval); S_nats=-log10l(pval); }
// assert(std::isfinite(S));
	if(0 && Mode != 'M' && Mode != 'm' && Mode != 'C'){ // debug only...
		fprintf(fp," Optimal ICA: ");
		double pc_d_D=100*(double)d/(double)D,pc_d_X=100*(double)d/(double)X;
		double pc_X_L=100*(double)X/(double)L;
		double pc_Dmd_LmX=100*(double)(D-d)/(double)(L-X);
		fprintf(fp,
		 "L=%d; D=%d; X=%d; d(#D before X)=%d; d/D=%.1f%c; X/L=%.1f%c.\n",
                     L,D,X,d,pc_d_D,'%',pc_X_L,'%');
		if(S > 0) fprintf(fp,
		 "           ...d/X=%.1f%c; (D-d)/(L-X)=%.1f%c; ratio = %.3f.\n",
					pc_d_X,'%',pc_Dmd_LmX,'%',pc_d_X/pc_Dmd_LmX);
        	if(Mode=='I'){
		    fprintf(fp,
			"  ICA p-value = %.3Lg; Score = %.2Lf; Normalized=%.2Lf.\n",
					pval,S,100*S_nats/(long double) L);
        	} else { 
        	  fprintf(fp," Unadjusted Ball-in-urn p-value = %Lg;",pBiU);
		  fprintf(fp,"  Unified = %.3Lg.\n",pval);
        	  fprintf(fp,
		   " Adjusted Unified P-value = %Lg; Score = %.2Lf; Normalized=%.2LF.\n",
					pval,S,100*S_nats/(long double) L);
		}
	}
	if(pdb_id==0) pdb_id=dca_file;
	double dS = -S0 + (double) S; 
	// if(S0 == 0 || fabs(dS) < 2.0) dS=0; 
	if(S0 == 0) dS=0; 
	long double dd; 
	if(pBiU == 1) dd=0; 
	else dd=-log10l(pBiU);
	if(dd < 0.0) dd=0.0;
//**************************************
	if(0) {	// for Figure 2 in paper
             	     fprintf(fp,"Summary: "); fprintf(fp,"%.2f\t%.2Lf\t",Dmax,S);
//**************************************
	} else if(Chns){
             	if(Mode != 'M' && Mode != 'm' && Mode != 'C'){
		    if(print_header){
		      // fprintf(fp,"mthd\tchn\tr(A)\tscore\tBIU\tL\tD\tX\td\tR\tF\n");
		      fprintf(fp,"mthd\tchn\tr(A)\tscore\tBIU\tL\tD\tX\td\tF\n");
		      print_header=FALSE;
		    }
		} else if(Mode == 'C'){
		    if(print_header){
		      fprintf(fp,"mthd\tr(A)\tscore\tBIU\tL\tD\tX\td\tF\n");
		      print_header=FALSE;
		    }
		}
#if 0	
		double key =100.0*S/(double)L;
		if(key < 1.0) fprintf(fp,"%.3f\t",key);
                else if(key < 10.0) fprintf(fp,"%.2f\t",key);
                else if(key < 100.0) fprintf(fp,"%.1f\t",key);
                else fprintf(fp,"%.0f\t",key);
#endif
		char str[40],Str[8],name[10];
		strncpy(name,pdb_id,6);
		if(R < 0.0){ sprintf(str,"n.a."); sprintf(Str,"n.a."); }
		else { sprintf(str,"%.1Lf",dd); sprintf(Str,"%d",R); }
#if 1
		if(Mode == 'C'){
             	   fprintf(fp,"%s\t%.1f\t%.1Lf\t%s\t%d\t%d\t%d\t%d",
				name,Dmax,S,str,L,D,X,d);
		} else {
             	   fprintf(fp,"%s\t%s\t%.1f\t%.1Lf\t%s\t%d\t%d\t%d\t%d",
				name,Chns,Dmax,S,str,L,D,X,d);
		}
#elif 1	// print out R
             	fprintf(fp,"%s\t%s\t%.1f\t%.1Lf\t%s\t%d\t%d\t%d\t%d\t%s",
				name,Chns,Dmax,S,str,L,D,X,d,Str);
#else	// greater precision for simulations
             	fprintf(fp,"%s\t%s\t%.1f\t%.9Lf\t%s\t%d\t%d\t%d\t%d\t%s",
				name,Chns,Dmax,S,str,L,D,X,d,Str);
#endif
		if(F_factor > 0.0) fprintf(fp,"\t%.1f",F_factor); 
             	if((Mode == 'M' || Mode == 'm') && fabs(dS) > 0){
			fprintf(fp,"\t%.1lf\n",dS);
		} else fprintf(fp,"\n");
		Int4 numChn=strlen(Chns);
		char cc=color_code; if(cc==0) cc='?';
		if(bpL > 0 && bpX > 0 && bpd > 0){
		   if(bpS >= 2.0 || bpBIU >= 2.0){
		      if(bpS <= 0.0){
			   fprintf(fp," bpps\t(%c)\t(3D)\t-\t(%.2Lf)\t-\t-\t-\t-\n",
				cc,bpBIU);
		      } else {
			   fprintf(fp,
			     " bpps\t(%c)\tDC(3D)\t%.3Lf\t(%.2Lf)\t%d\t%d\t%d\t%d\n",
                		cc,bpS,bpBIU,bpL,bpD,bpX,bpd);
		      }
		   }	// bpS = BPPS-pairs vs DC-pairs; bpBIU = BPPS-pairs vs 3D-contacts <= Dmax.
		}
	} else {
             	fprintf(fp,"Summary: %s\t%.1f\t%.2Lf\t%.2Lf\t%d\t%d\t%d\t%d\n",
                		pdb_id,Dmax,S,dd,L,D,X,d);
		if(F_factor > 0.0) fprintf(fp,"\t%.3f\n",F_factor); else fprintf(fp,"\n");
	} return S;
}

