/**************************** gibbs.h - *******************************
   Gibbs Sampling algorithms for local multiple alignment.
*************************************************************************/
#if !defined(GIBBS)
#define GIBBS
#include "time.h"
#include "afnio.h"
#include "stdinc.h"
#include "fmodel.h"
#include "betaprior.h"
#include "sites.h"
#include "dheap.h"
#include "mheap.h"
#include "seqset.h"
#include "histogram.h"
#include "probability.h"
#include "residues.h"
#include "order.h"
#include "random.h"

/********************************* PRIVATE ********************************/
typedef struct {
        ss_type		data;			/** input data **/
	char		*name;			/** input file name **/
        a_type		A;			/** alphabet used **/
	Boolean		test[10];		/** test new items -T option **/
	UInt4	seed;			/** random seed **/
	/****** sites ****/
        st_type		sites;			/** working sites **/
	Int4		ntyps,*order;
	Int4		*site_len,*pos;
	sti_typ		best,map,start;		/** archived sites **/
	/****** fmodel ****/
        fm_type		*model;
	Int4		*ncol,*maxlen;
	Boolean 	*null,**bestnull,**mapnull;
        double		pseudo;
	double		qseudo;
	/****** sampling parameters ********/
        Int4		nruns,nconverge,ncycles;
        Int4		**tmpfreq;
        double		*tmpratio;
	Boolean		move,fragment,verbose;
	Int4		wilcoxon;		/* = 0 if not wilcoxon */
        FILE		*fptr,*ifptr,*sfptr,*mfptr,*gosfp,*cmafp;
	/******** fgibbs items *********/
	Int4		limit;
	Boolean		use_order;
	/******** motif sampler items *********/
	Int4		stop;
	double		***readprob,readcutoff,*sumprob,nread;
	double		*p, *map_p, *best_p;
	bp_type		*prior;
	Int4		*expect;
	double		weight;
	/************ Output options **************/
	Int4		NtermX,CtermX;
} gibbs_sampler_type;

typedef gibbs_sampler_type *gs_type;

/******************************** private ********************************/
Boolean move_column_gibbs(gs_type G, Int4 lemon, Int4 t);
Int4	*GetSiteFreq(gs_type G, Int4 t, Int4 d);
fm_type *InitGibbs(gs_type G);
fm_type *InitMAPGibbs(gs_type G);
Boolean MoveColumn(gs_type G, Int4 t);
Boolean MoveMultiCols(gs_type G, Int4 t, Int4 num);
Boolean Metropolis(gs_type G, Int4 t, fm_type M);
Int4     SaveBestGibbs(gs_type G);
Boolean	SaveFinalGibbs(gs_type G);
Boolean ShiftGibbs(gs_type G, Int4 t, fm_type M);
Boolean OptionsGibbs(Int4 argc, char *argv[], gs_type G);
Int4     GetFreqProb(Int4 t, Int4 n, fm_type M, gs_type G, o_type R);
double	motif_sampler(gs_type G);
double  MapBGibbs(gs_type G);
double  NetMapBGibbs(gs_type G);
double  NetMapBGibbs1(Int4 t, fm_type M, gs_type G);
double  NullMapBGibbs(gs_type G);
Boolean TransferColumn(gs_type G, Int4 ntyp, fm_type *model);

/********************************* PUBLIC ********************************/
gs_type MkGibbs(Int4 nopt, char *options[], st_type S);
st_type StartSitesGibbs(Int4 argc, char *argv[]);
Int4     RunGibbs(FILE *fptr, gs_type G);
void    PutWilcoxBGibbs(FILE *fptr,Int4 first, Int4 last, Int4 t, gs_type G);
void    NilGibbs(gs_type G);

/**************************** site sampler *******************************/
void	SiteSampler(FILE *fptr, gs_type G);

/***************************** motif sampler *****************************/
Int4	MotifSampler(FILE *fptr, gs_type G);

/*********************** near optimum sampler ****************************/
double  ***NearOptimumSampler(Int4 niter, gs_type G);
Int4     NearOptFit(FILE *fptr, gs_type G);

/********************************* MACROS ********************************/
#define GIBBS_USAGE0 "\nUsage(sites sampler): gibbs file lengths [options] \n\
\nUsage(motif sampler): gibbs file lengths expect [options] \n\n\
  lengths = <Int4>[,<Int4>]: lengths of elements for each type\n\
  expect = <Int4>[,<Int4>]: expected number of elements for each type \n\
  <Int4>[,<Int4>] = numbers for each element (e.g. \"10,12,15\" for 3 types)\n\
  options:\n\
\t-a               - create a cma output file (motif sampler with 1 block only)(file.cma)\n\
\t-C<real>         - prob. cutoff (0 < C <= 1) for near optimum sampling (default: 0.5)\n\
\t-c<int>          - number of cycles between shifts (sites sampler)\n\
\t                    or maximum number of cycles per run (motif sampler) \n\
		      (default: 1)\n\
\t-d               - DON'T use fragmentation (i.e., column sampler)\n\
\t-f               - create a scan output file (file.sn)\n\
\t-g               - create a goscan output file (motif sampler with 1 block only)(file.msa)\n\
\t-I               - interactively specify #sites/sequence (sites sampler)\n\
\t                    (default: one site of each type per sequence)\n\
\t-L<int>          - set rapid convergence limit (higher = more time to converge)(default: 10)\n\
\t-m<int>          - set maximum number of cycles in each run (sites sampler)(default: 500)\n\
\t-n               - use nucleic acid alphabet\n\
\t-O=<int1>:<int2> - Output a fasta file of the repeats with <int1> N-terminal extensions\n\
\t                     and <int2> C-terminal extensions.\n\
\t-o               - use element order in probabilities (sites sampler)\n\
\t                   (each sequence must contain the same number of elements)\n\
\t-p<float>        - number of pseudo counts for product multinomial model (default: 0.10)\n\
\t-q<float>        - pseudo counts for ordering model (sites sampler)(default: 0.05)\n\
\t-R<int>          - set number of near-optimum readings taken (default: 500)\n\
\t-r               - randomly shuffle input sequences\n\
\t-s<int>          - give seed for random number generator\n\
\t-t<int>          - maximum number of sampling runs (default: 10)\n\
\t-x               - DON'T remove protein low complexity regions\n\
\t-w               - output wilcoxon rank test information (motif sampler)\n\
\t-W<float>        - set fractional weight (0 to 1.0) on priors (motif sampler)\n\n\
  REFERENCE: Neuwald A.F., Liu J.S. and C. E. Lawrence. 1995. Gibbs motif sampling: \n\
       detection of bacterial outer membrane protein repeats. Protein Science 4: 1618-1632.\n\
\n"

#endif

