/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/

/* gblast.h - generic blast program. */
#if !defined (GBLAST)
#define GBLAST
#include "stdinc.h"
#include "afnio.h"
#include "random.h"
#include "residues.h"
#include "alphabet.h"
#include "dheap.h"
#include "mheap.h"
#include "sequence.h"
#include "probability.h"
#include "mlist.h"
#include "sumstat.h"
#include "wdigraph.h"

/*********************** Finite State Machine ************************
FSM = (Q,q0,A,Sigma,d)
	Q = States
	q0 e Q = start state
	A subset Q = accepting states
	Sigma = input alphabet
	d = function from Q x Sigma -> Q (transition function)

if in an accepting state then execute appropriate action.
	- go to positions on list and try extending alignment

input text T[1..n];  pattern to be found P[1..m]
 n = input string length; m = pattern length 

	(note: lex is a FSM)

  input tokens = A-Y and '-' 'x' '*' '\0'

 if q = a then  go to query sequence:
   pos[q][1..n][A...Y] = list of positions matching pattern in accepting 
	state = NULL if not an accepting state.

  blast method:
	1. compile list of high scoring words and make fsm.
	2. scan database for hits.
	3. extend hits.
(for purge extend only until find that score >= cutoff.)

	QWL
	..:  -> S = R(Q,N) + R(W,Y) + R(L,L).
	NYL	
		if(S > Threshold) then extend hit to find MSP.

		need drop-score.
 *********************************************************************/

/*************************** generic gblast type **************************/
typedef struct {
	Int4	nQ;		/** number of States **/
	Int4	**d;		/** d[q][r] = function from Q x A -> Q **/
	ml_type	mlist;		/** lists for accept **/
	Int4	*tmp;		/** temporary list **/
	Int4	T;
	a_type  A;		/** alphabet **/
	e_type  E;              /** query sequence **/
	Int4	x_dropoff;
	/******************************************************/
	Int4	cutoff;		/** HSP score cutoff **/
	Int4	*hit_s;		/** hit start **/
	Int4	*hit_e;		/** hit end **/
	Int4	*hit_d;		/** hit diagonal **/
	Int4	*hit_s2;	/** hit start **/
	Int4	*hit_e2;	/** hit end **/
	Int4	*hit_d2;	/** hit diagonal **/
	BooLean	update;		/** have the hits been updated? **/
	double	*score;		/** key storage **/
	Int4	nhits;		/** number of hits **/
	mh_type	mH;		/** heap for best hits **/
	wdg_typ	G;		/** graph for finding consistent HSPs **/
	/******************************************************/
	Int4	zero;		/** diagonal zero value: e.g. -25,000 **/
	Int4	*diag0,*diag;	/** diagonal list [1,4,0,0,2] **/
	Int4	*ed0,*extdiag;	/** limit to which diagonal was extended **/
} gblast_type;
typedef gblast_type *gb_typ;
/*********************************************************************/
#define MAX_SEQ_LENG_GB 100000
#define MAX_ID_LENG_GB  10000
#define MAX_OVERLAP_GB	9 
#define MAX_FRACTION_GB	7 

/******************************* private *******************************/
Int4    ExtendGBlastStr(e_type E1, Int4 i1, Int4 len2, unsigned char *seq2, 
	Int4 i2, Int4 *left, Int4 *right, register char **R,
	register Int4 x_dropoff);
void	gblast_error(char *s);
void	UpdateHitsGBlast(gb_typ B);
Int4	ExtendGBlast(e_type E1, Int4 i1, Int4 len2, unsigned char *seq2,
	Int4 i2, register char **R);
BooLean	FastExtendGBlastStr(e_type E1, Int4 i1, Int4 len2, unsigned char *seq2,
	Int4 i2, register char **R, Int4 score);
/****************************** macros ********************************/
#define StateGB(q,t)		(((t)<<9)|(q))
#define MaxStateGB(n)		(((n)<<9)|0777)

/******************************* PUBLIC *******************************/
gb_typ	MakeGBlast(Int4 T, e_type E, a_type A) ;
gb_typ	MkGBlast(Int4 hpsz, Int4 cutoff, Int4 T, e_type E, a_type A);
gb_typ	MkGBlast2(Int4 hpsz, Int4 cutoff, Int4 T, e_type E, a_type A,
	Int4 xdrop);
Int4    MatcherGBlastStr(Int4 len, register unsigned char *seq, 
	register gb_typ B, char **R);
Int4    ConsistentGBlast(Int4 N, Int4 M, Int4 *segs, double lambda,
        double H, double K, gb_typ B);
void    NilGBlast(gb_typ B);
Int4	MatcherGBlast(FILE *fptr, e_type E, gb_typ B);
BooLean FastMatcherGBlast(e_type E, gb_typ B, Int4 score);
BooLean FastMatcherGBlastStr(unsigned char *seq, Int4 length, gb_typ B, Int4 score);
double  SumStatGBlastStr(double N, double M, double lambda, double H, double K,
	gb_typ B);
double  FastSumStatGBlast(Int4 N, Int4 M, gb_typ B);
#if 0
Int4    PutHitsGBlastStr(FILE *fp, Int4 N, Int4 M, Int4 dbslen, gb_typ B);
Int4    ColBlcksGBlastStr(Int4 N, Int4 M, Int4 number, unsigned char *seq,
	Int4 *col, char **R, gb_typ B);
e_type  SubSeqGBlast(char *id, Int4 len, unsigned char *seq, Int4 flank, 
	gb_typ B);
#endif
/******************************** MACROS *****************************/
#define nHitsGBlast(B)		(((B)->update)? UpdateHitsGBlast(B),\
					(B)->nhits: (B)->nhits)
#define StartGBlast(r,B)	(((B)->update)? UpdateHitsGBlast(B),\
				  (B)->hit_s2[(r)]: (B)->hit_s2[(r)])
#define EndGBlast(r,B)		(((B)->update)? UpdateHitsGBlast(B),\
				  (B)->hit_e2[(r)]: (B)->hit_e2[(r)])
#define DiagGBlast(r,B)		(((B)->update)? UpdateHitsGBlast(B),\
				  (B)->hit_d2[(r)]: (B)->hit_d2[(r)])

#endif

