/***************************************************************************
 *                                                                         *
 *   alignment.h (begin: Feb 20 2003)                                      *
 *                                                                         *
 *   Parallel IQPNNI - Important Quartet Puzzle with NNI                   *
 *                                                                         *
 *   Copyright (C) 2005 by Le Sy Vinh, Bui Quang Minh, Arndt von Haeseler  *
 *   Copyright (C) 2003-2004 by Le Sy Vinh, Arndt von Haeseler             *
 *   {vinh,minh}@cs.uni-duesseldorf.de                                     *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#ifndef ALI_H
#define ALI_H

#include <iostream>
#include <fstream>

#include "vec.h"
#include "seq.h"
#include "ptn.h"
#include "mat.h"
#include "dvec20.h"
#include "dmat20.h"

using namespace std;

/**
	 create the temporary char-BASE map
*/
void createPosGenChar ();


/**
* The Alignment class contains all information and methods for DNA alignment and Protein alignment.
* Remark: The Alignment class does not inherit from Sequence class.
*/
class Ali {
public :
	/**
		alignment filename
	*/
	char fileName_[FILENAME_LEN];

	/**
		prefix for output file name
	*/
	char* out_prefix;

	/**
		construction function to init data
	*/
	Ali ();

	/**
		use this function for constructing this class
	*/
	void doConstructor ();

	/**
	 	clean all content of this class
	*/
	void clean ();

	/**
	 	everything is inited here
	*/
	void init ();

	/**
		detect the data source
		@return REAL or SIMULATION
	*/
	PAM_TYPE detectDataSource ();

	/**
		detect the data type
		@return NUCLEOTIDE or AMINO_ACID
	*/
	DATA_TYPE detectDataType ();

	/**
		convert the data input file from character to number
		e.g. A, C, G and T to index from 0 (BASE_A, BASE_C, BASE_G, BASE_T)
	*/
	void convertDataType(DATA_TYPE data_type);

	/**
		convert the protein-coding sequences into number from 0 to 60
	*/
	void convertToCodons();
	
	/**
		get the length of the longest name
		@return the length of the longest name 
	*/
	int getLongestSeqNameLen ();

	/**
		get content at the given site, only active sequences are taken into account
		@param siteNo (IN) index of the wanted site
		@param ptn (OUT) the content at the given site
	*/
	void getPtn (const int siteNo, Ptn &ptn);

	/**
		get the number of sequences in this alignment
		@return the number sequences in this alignment
	*/
	int getNSeq ();

	/**
		get the number of sites in the alignment
		@return the number of sites in the alignment
	*/
	int getNSite ();

	/**
		get the indexth sequence 
		@param index the index of the sequence
		@return the indexth sequence 
	*/
	Seq &getSeq (const int index);

	/**
		sort sequences by their name, it serves for deburging only
	*/
	void sortSeq ();

	/**
		read the input alignment, must by in extended PHYLIP format
		@param fileName the alignment file name
		@return 1 of success, 0 otherwise
	*/
	int readInput (const char *fileName);

	/**
		get the genetic distance between two sequences
		@param seqNo1 the first sequence
		@param seqNo2 the second sequence
		@return distance between them
	*/
	double getGenDis (const int seqNo1, const int seqNo2);

	/**
		get the address of genetic distance matrix
		@return the address of genetic distance matrix
	*/
	Mat<double> *getGenDis ();

	/**
		create the distance matrix all pairs of sequences based on maximum likelihood
		@param out_prefix name of the file which the distance matrix will be written on
	*/
	void cmpGenDis (char* out_prefix);

	/**
		set the pattern index of a site (pattern[siteNo] := ptnNo)
		@param siteNo site index 
		@param ptnNo pattern index
	*/
	void setPtn (int siteNo, int ptnNo);

	double calcNumConstSites();

	/**
		get the pattern index of a site (return pattern[siteNo])
		@param siteNo site index 
		@return pattern index
	*/
	int getPtn (int siteNo);

	//--------------------------------------------------------------------
	/**
		write all information of this alignment 
	*/
	ostream &write (ostream &out);

	//--------------------------------------------------------------------
	/**
		write all information of this alignment 
	*/
	void write (const char *fileName) {
		std::ofstream out_;
		out_.open (fileName);
		out_ << nSeq_ << " " << nSite_ << endl;
		for (int seqNo_ = 0; seqNo_ < nSeq_; seqNo_ ++) {
			out_ <<'T' << seqNo_ <<"   ";
			items_[seqNo_].writeItem (out_) << endl;
		}
		out_.close ();
	}

	/**
		get the name of a node in the tree. 
		@param ndNo the node number
		@return sequence name if at external node, ndNo otherwise
	*/
	Vec<char> &getName (int ndNo);

	/**
		get the name of a node in the tree. 
		@param ndNo the node number
		@param name (OUT) sequence name if at external node, ndNo otherwise
	*/
	void getName (int ndNo, Vec<char> &name);

	/**
		get the number of the sequence given a name
		@param name the name of the sequence
		@return the sequence index with this name
	*/
	int getSeqNo (Vec<char> &name);

	/**
		allocate memory for this class
		@param nSeq number of sequences
		@param nSite number of sites
	*/
	void setLimit (int nSeq, int nSite);

	/** 
		make all frequencies a little different (zero frequency, equal frequency)
	*/
	void convfreq(DVec20 &freqemp, int num_state);


	/**
		get the nucleotide and amino acid frequency of this alignment
		@param stateFrqArr (OUT) frequency vector
	*/
	void estimateStateFrq (DVec20 &stateFrqArr);

	/**
		estimate the codon frequency
		@param stateFrqArr (OUT) frequency vector
	*/
	void estimateCodonFrq(DVec20 &stateFrqArr);
	
	/**
		counting the frequency change from a base to another base
		@param baseX the first base
		@param baseY the second base
		@return the frequency baseX <-> baseY
	*/
	double estimateProb (BASE baseX, BASE baseY);

	/**
		count the frequency change between all pairs of bases
		@param tsAG (OUT) transition frequency A<->G
		@param tsCT (OUT) transition frequency C<->T
		@param tvAC (OUT) transversion frequency A<->C
		@param tvAT (OUT) transversion frequency A<->T
		@param tvCG (OUT) transversion frequency C<->G
		@param tvGT (OUT) transversion frequency G<->T
	*/
	void estimateGenPam (double &tsAG, double &tsCT, double &tvAC, double &tvAT, double &tvCG, double &tvGT);

	/**
		count the ratio between transition and transversion
		@return the ratio between transition and transversion
	*/
	double estimateTsTvRatio ();

	/**
		get the appearance of a stateNo
		@param stateNo the state
		@param appArr (OUT) appearance vector of the stateNo
	*/
	void getApp (int stateNo, DVec20 &appArr);

	/**
		check if alignment is correct
		print error message and quit the program immediately if this happens.
	*/
	int checkAli ();

	/**
		release all memmory for this class
	*/
	void release ();


	/**
		destruction function to release data
	*/
	~Ali ();

	/**************************************************************************
	**************************************************************************
	**************************************************************************
	**************************************************************************/

	/**
		resample the alignment with bootstrap
	*/
	void generateBootstrap();

	/**
		load the bootstrap alignment
	*/
	void loadBootstrap();

	/**
		@return true if this alignment was bootstrapped
	*/
	bool isBootstrapped();

	/**
		check whether or not the genDismat of this class was computed
	*/
	int isGenDisMatCmped_;

	int getOriginSite(int site);

	void loadGenDis(char *prefix);
	
private :
	/**
		the number of sequence of this alignment, we do not use it, instead of using nActSeq
	*/
	int nSeq_;

	/**
		length of the longest name
	*/
	int longestSeqNameLen_;

	/**
		the pattern vector which maps each site to a pattern
	*/
	Vec<int> ptnArr_;

	/**
		the number of sites of this alignment
	*/
	int nSite_;

	/**
		all sequences here
	*/
	Vec<Seq> items_;

	/**
		original sequences saved here, in case doing bootstrap
	*/
	Vec<Vec<char> > orig_items_;


	/**
		disMat_[i][j] is the distance between sequence i and sequence j
	*/
	Mat<double> genDisMat_;

	/**
		map to original site IDs, in case of codon model
	*/
	Vec<int> origin_site;

	/**
		true if this alignment was bootstrapped
	*/
	bool bootstrapped;

}; //end class ali

/**
	global alignment variable 
*/
extern Ali alignment;
extern int nto[NUM_CHAR];


#endif
