#if !defined (_TWEAK_CMA_USAGE_)

#define _TWEAK_CMA_USAGE_

/******************************************************************************************
    Copyright (C) 1997-2014 Andrew F. Neuwald, Cold Spring Harbor Laboratory
    and the University of Maryland School of Medicine.

    Permission is hereby granted, free of charge, to any person obtaining a copy of 
    this software and associated documentation files (the "Software"), to deal in the 
    Software without restriction, including without limitation the rights to use, copy, 
    modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
    and to permit persons to whom the Software is furnished to do so, subject to the 
    following conditions:

    The above copyright notice and this permission notice shall be included in all 
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
    INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
    PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 
    LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 
    OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
    OTHER DEALINGS IN THE SOFTWARE.

    For further information contact:
         Andrew F. Neuwald
         Institute for Genome Sciences and
         Department of Biochemistry & Molecular Biology
         University of Maryland School of Medicine
         801 West Baltimore St.
         BioPark II, Room 617
         Baltimore, MD 21201
         Tel: 410-706-6724; Fax: 410-706-1482; E-mail: aneuwald@som.umaryland.edu
 ******************************************************************************************/
#define	USAGE_START	"\
  ----------------------------------------------------\n\
     TweakCMA v1.1.6 (February 21, 2022)\n\
     www.igs.umaryland.edu/labs/neuwald/software/auxiliary/\n\
     Copyright 2022 The University of Maryland\n\
     Freely distributed under the MIT License\n\
  ----------------------------------------------------\n\
   Tweak Colinear Multiple Alignment (CMA)\n\
   Usage: tweakcma cmafile [options]\n\
      Performs operations on a multiple sequence alignment in CMA format.\n\
   Options:\n\
     -A<int1>:<int2> - add <int2> columns to end or (if < 0) start of block <int1>\n\
     -Best         - output the Best sequence as a cma file\n\
     -Best=<int>   - output cma file of the best seq and <int> highest scoring homologs\n\
     -Best=<int1>:<int2> - output cma file of seq <int1> and <int2> highest scoring homolog\n\
			from distinct phyla\n\
     -best=<int1>   - output cma file of best seq <int2> highest scoring homologs from distinct phyla\n\
     -best=<int1>:<int2> - output cma file of seq <int1> and <int2> highest scoring homologs\n\
     -csq          - output the consensus sequence in cma format\n\
     -CSQ          - output each cma input file with a consensus sequence added\n\
     -cdhit=<int>  - Purge seqs at <int> percent identity with cd-hit (range: 40-100)\n\
     -diversity    - show average residue diversity over all position in input file\n\
     -fullseq      - output a cma file of those sequences that span the full alignment\n\
     -hmm          - convert cma to HMM profile format\n\
     -hsw          - create a hsw (Henikoff sequence weighting) file\n\
     -I=<filename> - print out a cma file of the sequences with IDs in <filename>\n\
     -iron         - 'iron out' deletions preceding insertions.\n\
     -iron=<real> - remove columns with fraction >= real deletions from the alignment\n\
		     (also 'irons out' deletions preceding insertions)\n\
     -K<fafile>    - keep only those sequences in <fafile> in the alignment\n\
     -K=<cmafile>   - Keep only those sequences in <cmafile> in the alignment\n\
     -k            - output a cma file with sequences sorted by kingdom\n\
		      (Requires NCBI taxonomy annotations)\n\
     -k<str>       - output a cma file with only those sequences from kingdoms in <str>\n\
                     possible kingdoms = MFVEBA\n\
     -label        - label all of the seuqences in input cma file\n\
		      (this is required by some programs)\n\
     -lpr          - Print out log-probability ratio\n\
     -m            - merge (multiple) input cma files into one cma file (must be consistent input files)\n\
     -Mincol=<real> - output cma file with -mincol option after merging files; remove identical seqs afterwards\n\
     -MinColHSW=<real> - Run -Mincol=<real> and hsw and create two files (*.Match?.cma & *.hsw)\n\
     -MinColU=<int1>:<int2> - Run -Mincol=0.<int1> & -U=<int2> and hsw (creates *_Match?_U?.cma & .hsw)\n\
     -mincol=<real> - output cma files for seqs in input cma files with >= <real> fraction of column matches\n\
     -minsq=<int>  - output the input cma file if it contains at least <int> seqs\n\
     -maxsq=<int>  - output the input cma file if it contains no more than <int> seqs\n\
     -n=<string>   - rename the input cmafile to <string> and print it out\n\
     -pairs=<int1>:<int2> - show freqs of pairs of residues at columns <int1> and <int2> of alignment\n\
     -phyla        - show phyla in cma file\n\
     -p=<str>      - remove sequences NOT from phylum <str> and output new cma file\n\
     -poor=<int>   - print out the <int> worst sequences and output into 'poor.cma' file\n\
     -pdb          - output a cma file with pdb sequences only\n\
     -Q            - sort sequences in cmafile by similarity to 1st seq (into <infile>.sort.cma)\n\
     -random=<int> - generate a cma file with <int> random sequences of the same length as cma\n\
     -rmcsq        - remove consensus sequences from alignment\n\
     -rmflank      - remove overhanging flanking regions within input cma file \n\
     -rpdb         - remove pdb sequences from alignment\n\
     -rm=<int>     - randomly remove all but <int> seqs from alignment (keeps 1st seq)\n\
     -sameIDs      - output sequences having the same seqIDs\n\
		        (creates *.idfix.cma file with unique seqids)\n\
     -s            - see number of insertions and deletions in alignment at each position\n\
     -S            - shuffle non-null residues within columns as a control for DCA & BPPS\n\
     -see=<int>    - show freqs of amino acids at position <int> of 1st seq\n\
     -show=<int>   - show freqs of amino acids in column <int> of alignment\n\
     -seqlens      - plot histogram of sequence lengths\n\
     -tax	   - print out a cma file only of those sequences with taxids\n\
     -tax=<fafile> - add taxonomic info to the input cma file; get tax info from \n\
                        the corresponding sequences in <fafile> ordered the same\n\
     -terms	   - print out predominant terms in sequence deflines\n\
     -tx=<int>	   - print out a cma file only of those sequences with taxid = <int>\n\
     -U<int>       - Purge cma with a percent identity cutoff of <int>\n\
     -U=<int>      - same as -U<int> option except keep first seq in output file\n\
     -U            - remove identical sequences from cma file (keep first occurances)\n\
     -u=<int>      - Run -cdhit option with purge=<int> and then -U<int> option\n\
     -unknwn       - remove sequences whose phylum is unknown\n\
     -unlabel      - unlabel all of the sequences in input cma file\n\
     -W            - write cma file containing labeled sequences only\n\
     -write        - write all cma files within a hiMSA from bpps (<prefix>_new.mma)\n\
\n"

//     -A<i>:<s>,<e> - ReAlign ith sequence against model using subseq <s>,<e>\n\


#define	USAGE_START_PRIVATE	"USAGE: tweakcma fafile [options]\n\
   options:\n\
     -A<int1>:<int2> - Lengthen block <int1> by adding <int2> columns\n\
                       <int2> < 0 --> add to left; <int2> > 0 --> add to right\n\
     -A              - output alignment in fasta format with overhangs\n\
     -a<real>:<real> - Add related sequences to the alignment with cutoffs <real>:<real>\n\
     -add          - Add a duplicate sequence to alignments with only one sequence\n\
     -B            - output the Best (most characteristic) sequence\n\
     -Best         - output the Best sequence as a cma file\n\
     -Best=<int>   - output cma file of the best seq and <int> highest scoring homologs\n\
     -Best=<int1>:<int2> - output cma file of seq <int1> and <int2> highest scoring homolog\n\
			from distinct phyla\n\
     -best=<int1>    - output cma file of best seq <int2> highest scoring homologs from distinct phyla\n\
     -best=<int1>:<int2> - output cma file of seq <int1> and <int2> highest scoring homologs\n\
     -c            - output the consensus sequence\n\
     -config       - output the cma block configuration\n\
     -csq          - output the consensus sequence in cma format\n\
     -cdtree=<file>- merge in CDTree seed aligment (<file>.sma>) into main cma file.\n\
     -CSQ          - output each cma input file with a consensus sequence added\n\
     -c=<cmafile>  - create a .chn file using fafile.cma as Display set and <cmafile> as Main set\n\
     -cdhit=<int>  - Purge seqs at <int> percent identity with cd-hit (range: 40-100)\n\
     -c<int1>:<int2> - cluster sequences into related sets at <int1> percent identity\n\
                     and output sets with more than <int2> phyla into cma files\n\
     -c:<int1>:<int2> - cluster sequences into related sets at <int1> percent identity\n\
                     and output sets with more than <int2> phyla into cma files\n\
                     Don't include the first sequence in each set\n\
     -c<int>       - cluster sequences into related sets at <int> percent identity\n\
                     and output each set with more than 2 seqs into a separate cma file\n\
     -C            - create a copy of input cma file and write it out as <infile>.new.cma\n\
     -cobbled      - create fasta alignment of first seq with other seqs cobbled in\n\
     -C=<int1>:<int2> - cluster sequences into related sets with <int1> percent identity \n\
                     and output sets with at least <int2> sequences to files (*.set*)\n\
     -D            - convert cma alignment into dom_typ format\n\
     -D=<int>      - remove all sequences with a deletion at alignment position <int>\n\
     -d            - put dom_typ file to stdout\n\
     -doubles      - output a cma file for each sequence in input file (for use with mkmaps & gapmap)\n\
     -diversity    - show average residue diversity over all position in input file\n\
     -e<real>      - set e-value for pairwise comparisons to <real> (default: 0.05)\n\
     -extend       - extend fake sequences to correspond to real sequences\n\
     -E<int><char><real><char> - Emit CRS-format lines for file <int> chain <char>\n\
                     freq_cutoff <real> and color <char>\n\
     -First        - output the first sequence as a cma file\n\
     -first        - output the first sequence renaming it the same as input cma file.\n\
                     This assumes that a consensus sequence is first...\n\
                     NOTE: only used with -m (merge) option.\n\
     -F            - output Fake sequence set\n\
     -FootPrint    - output subsequences corresponding to domain foot print\n\
     -F<int>       - Fuse block <int> with block <int>+1\n\
     -f            - output full sequence set\n\
     -fullseq      - output a cma file of those sequences that span the full alignment\n\
     -fasta        - output alignment in fasta format\n\
     -G            - output histograms of gaps between blocks\n\
     -g<blk>:<int> - grow block <blk> by <int> columns to right (negative -->to left)\n\
     -h            - look at Jun Liu's HMM parameters\n\
     -hmm          - convert cma to Sean Eddy's hmm format\n\
     -hsw          - create a hsw (Henikoff sequence weighting) file\n\
     -I=<int>:<int> - Put MSA with Lft/Rght flanks trimmed to <int> and <int>\n\
     -I<int>:<int> - Add <int2> columns to alignment starting at position <int1> \n\
                   - NOTE: can enter more than 1 but these need to be in decreasing order\n\
     -I=<filename> - print out a cma file of the sequences with IDs in <filename>\n\
     -i<blk>:<char><int> -insert <int> columns in front ('f') or behind ('b') block\n\
     -indel        - print out the number of sequences with (illegal) indel transitions\n\
     -iron         - 'iron out' deletions preceding insertions.\n\
     -islen=<int>:<int> - return 1 if cma length >= <int1> && <= <int2>; else return 0.\n\
     -iron=<real> - remove columns with fraction >= real deletions from the alignment\n\
		     (also 'irons out' deletions preceding insertions)\n\
     -K<fafile>    - keep only those sequences in <fafile> in the alignment\n\
     -K=<cmafile>   - Keep only those sequences in <cmafile> in the alignment\n\
     -k            - output a cma file with sequences sorted by kingdom\n\
     -k<str>       - output a cma file with only those sequences from kingdoms in <str>\n\
                     possible kingdoms = MFVEBA\n\
     -L            - look at histogram of number of hits for each subsequence\n\
     -label        - label all of the seuqences in input cma file\n\
     -L<seqid>     - look at subsequences of <seqid> from the alignment\n\
     -lpr          - Print out log-probability ratio\n\
     -level=<int>  - Set cma level to <int>\n\
     -l<real>      - look cutoff gapped log10(likelihood)(default: 3.0)\n\
     -layer=<fafile> - layer motifs onto sequence in fafile and output as single block cma file\n\
     -M<int>       - output *.msa file with <int> repeats of model\n\
     -m            - merge (multiple) input cma files into one cma file (must be consistent input files)\n\
     -mm=<file>    - multi-merge input cma files into one mma file based on template <file>\n\
     -m=<file>     - merge (multiple) cma files named in <file> into one cma\n\
                      (sample file format: 'Rab11,Rab1,Rab1Like,Rab2,Rab5,Rab6,Rab7,Arf1'\n\
     -Mincol=<real> - output cma file with -mincol option after merging files; remove identical seqs afterwards\n\
     -MinColHSW=<real> - Run -Mincol=<real> and hsw and create two files (*.Match?.cma & *.hsw)\n\
     -MinColU=<int1>:<int2> - Run -Mincol=0.<int1> & -U=<int2> and hsw (creates *_Match?_U?.cma & .hsw)\n\
     -mincol=<real> - output cma files for seqs in input cma files with >= <real> fraction of column matches\n\
     -minsq=<int>  - output the input cma file if it contains at least <int> seqs\n\
     -maxsq=<int>  - output the input cma file if it contains no more than <int> seqs\n\
     -N<n>         - return 1 if at least <int> seqs in cma file else return 0\n\
     -Newick       - Create a simple Newick tree from input (multiple) cma files\n\
     -n            - rename the input cmafile to match the name of the first seq\n\
     -n=<string>   - rename the input cmafile to <string> and print it out\n\
     -nofrag=<int1>:<int2> - remove sequences from cma file that harbor a \n\
                       deletion at positions <int1> from N-term and <int2> from C-term\n\
     -O            - Output True sequence set\n\
     -O=<string>   - Output True sequences with <string> in defline\n\
     -Out=<int>    - output sequences in the <int>th cmafile\n\
     -O<file>      - (turned off) Output a phylip protdist input file to construct a tree \n\
                              (e.g., using neighbor)\n\
     -O<file>      - Output a Selex file of the input cma file\n\
     -o            - output histogram of repeat spacings\n\
     -out=<cma>    - output those cmafiles in <cma> that match sequences in the commandline cmafile\n\
     -P            - Show repeat numbers on either side of shuffled/real partition\n\
     -P=U          - print use_file with names of all functionally divergent groups\n\
     -P=<int>      - return 1 if # phyla >= <int> else return 0\n\
     -P<int>..<int>,<int>..<int>/<int>..<int>:<int>,<int>..<int>\n\
                - specify gap penalties \n\
                  (default: iol=100,ioh=1200,ixl=20,ixh=120,ixal=0,ixah=0,od=500,dxl=40,dxh=400)\n\
     -pairs=<int1>:<int2> - show freqs of pairs of residues at columns <int1> and <int2> of alignment\n\
     -phyla        - show phyla in cma file\n\
     -p<int>       - randomize residues at position <int> of first sequence\n\
     -p=<str>      - remove sequences NOT from phylum <str> and output new cma file\n\
     -poor=<int>   - print out the <int> worst sequences and output into 'poor.cma' file\n\
     -Partition=<str> - partition main cma file based on consensus sequences in <str>.cma \n\
     -Pttrn	   - find a seed pattern that distinguishes input cma[2] from cma[3] with display set cma[1]\n\
     -p            - show pseudo map for HMM_typ\n\
     -pdb          - output a cma file with pdb sequences only\n\
     -Q            - sort sequences in cmafile by score to query seq (into <infile>.sort.cma)\n\
     -q=<seqid>    - output True sequences with seqid and return the number found\n\
     -q            - query centric: remove columns that contain deletions in first sequence\n\
                      so as to make alignment query centric\n\
     -r<y>,<s>     - remove sequences from alignment with less than x repeats\n\
     -rand=<real>  - randomize <real> fraction of sites in each sequence, output as cma\n\
     -random=<int> - generate a cma file with <int> random sequences of the same length as cma\n\
     -r=<string><int>   - remove seqs from cma lacking residues in string at pos <int>\n\
     -r<real>      - remove poor sequences from alignment with LogOdds < <real>\n\
     -rmcsq        - remove consensus sequences from alignment\n\
     -rmflank      - remove overhanging flanking regions within input cma file \n\
     -rmsq=<str>   - remove sequences from alignment with <str> in sequence id.\n\
     -rcsq         - retain consensus sequences only from alignment\n\
     -re           - Put the relative entropy at each position\n\
     -rspsq        - retain swissprot and pdb sequences only from alignment\n\
     -rpdb         - remove pdb sequences from alignment\n\
     -rpts=<real>  - return the number of sequences with a repeat with E-value <= <real>\n\
     -rmest         - remove ests and environmental sequences from alignment\n\
     -rm=<int>     - randomly remove all but <int> seqs from alignment (keeps 1st seq)\n\
     -Rm=<string>  - Remove the cma file named <string> from the multiple cma input file\n\
     -Rename=<int>:<string>  - Rename the <int>th sequence to <string>\n\
     -R<array>     - Remove blocks from alignment\n\
     -R=<int>      - Remove the <int>th sequence from the alignment\n\
     -S            - shuffle non-null residues within columns (control for CHAIN analysis)\n\
     -S<int>       - to shuffle a certain percentage of positions in each cma seq\n\
                     use the shuffle program with the -F option\n\
                     (NOTE: need to emit a fafile of the Fake seqs in cmafile)\n\
     -S<x>:<int>   - split block x into two with left block of length <int>\n\
     -S=<cmafile>  - split single cma into blocks of the same length as in <cmafile>\n\
     -Splt=<int>   - split a single cma into multiple cmas each with no more than <int> seqs\n\
     -single       - output cma files, one for each sequence in input cma file\n\
     -sameIDs      - output sequences having the same seqIDs\n\
     -scores       - printl histogram of ungapped log-odds scores for the alignment\n\
     -Scores       - printl histogram of gapped log-odds scores for the alignment\n\
     -s            - see number of insertions and deletions in alignment at each position\n\
     -sma          - convert input cma file into a sma file.\n\
     -split        - split a single block alignment into multiple blocks based on insertions\n\
     -sq=<int>     - output the <int> sequence in input cma file\n\
     -Sq=<str>    - output a cma file of the sequence ids in <str> e.g. = '24,28,345,98,45'\n\
     -sort=<cmafile>  - sort input cma based on cma file <cmafile>\n\
     -Sort=<file>  - sort input (template) cma based on seqids in <file>\n\
                      (sample file format: 'Rab11,Rab1,Rab1Like,Rab2,Rab5,Rab6,Rab7,Arf1'\n\
     -see=<int>    - show freqs of amino acids at position <int> of 1st seq\n\
     -show=<int1>:<int2> - show freqs of amino acids at column <int2> in blk <int1> of alignment\n\
     -show=<int1>:<int2>,<str> - show freqs in column <int2> in blk <int1> of <str> cma file\n\
     -Show=<int1>:<int2> - same as show option above but output in excel input format\n\
     -seqlens      - plot histogram of sequence lengths\n\
     -stockholm    - output alignment in stockholm format\n\
     -t<real>      - trim models at ends up to columns of <real> bits\n\
     -tax          - Retain only sequences with taxids\n\
     -T            - convert input cmafiles using template cma at end of file (Default: 3)\n\
     -T<res>       - TrimMax residues from single block (Default: 3)\n\
     -T<blk>:<n>   - remove n residues from right end of block (negative = left end)\n\
     -T=<start>..<end>,[<start>..<end>] or \n\
     -T_<int>=<start>..<end>,[<start>..<end>] - split into blocks (shift res by <int>)\n\
     -U<int>       - Purge cma with a percent identity cutoff of <int>\n\
     -U=<int>      - same as -U<int> option except keep first seq in output file\n\
     -U            - remove identical sequences from cma file (keep first occurances)\n\
     -u            - remove identical sequences from the same phylum\n\
     -unknwn       - remove sequences whose phylum is unknown\n\
     -unlabel      - unlabel all of the sequences in input cma file\n\
     -use=<file>   - output only those cma files listed in <file>\n\
     -v            - verbose mode\n\
     -var          - output histogram of relative std deviation of seqs from the consensus\n\
     -V            - output contribution of each sequence to the map\n\
     -W            - write cma file containing labeled sequences only\n\
     -W=<int>      - write to stdout the <int>th cma file\n\
     -Write        - write cma file containing all sequences\n\
     -write        - write all N cma files in input as fafile_1.cma ... fafile_N.cma\n\
     -w            - write msa files\n\
     -w=<file>     - write those cma files named in <file>\n\
     -worst=<int1>:<int2> - output cma file lacking <int2> seqs with highest scores against the <int1>th seq.\n\
     -X            - mask out both known domains and aligned regions in sequences\n\
     -x<seqid>:<int_list>  - eliminate list of subsequences of <seqid> from the alignment\n\
     -Z<int1>:<int2> - put the <int1>th to <int2>th sequences in output alignment\n\
     -z<seqid>,...,<seqid>  - output only sequences with one of the <seqid> in alignment\n\
\n\n"

#endif

