/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* othcreatealign.c
 *
 * ER, Tue Jun  1 09:31:19 CDT 1999 [St. Louis]
 * 
 * creates an alignment acoding to the othermodel
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif


static void realloc_strings(int L, int *ret_len, int k, int **ret_seqX, int **ret_seqY, char **ret_charX, char **ret_charY);

/* Function:  SimulateOTHAlign()
 * Date:      ER, Sat Feb  5 14:13:23 CST 2000 [St. Louis]
 *
 * Purpose:  Simulate an aligment of two sequences
 *           related by the OTH model.
 *
 * Args:     s1, s2    - the sequences 
 *           L         - length of aligmnemt
 *           othmodel - the null model
 *
 * Returns:  (void)
 */
void
SimulateOTHAlign(FILE *ofp, SQINFO *sqinfoX, int **seqX, SQINFO *sqinfoY, int **seqY, int L, int start, int *ret_len, 
		 struct othmodel_s *oth, int traceback, int alignment, 
		 char *string_name)
{
  struct tracer_s      *tr;      /* the traceback tree under construction  */
  struct tracer_s      *cur_tr;  /* ptr to node of tr we're working on     */
  struct tracerstack_s *dolist;  /* pushdown stack of active tr nodes      */

  int                   *mseqX;
  int                   *mseqY;
  char                  *mcharX;
  char                  *mcharY;

  int    len;                      /* length of the allocated strings      */
  int    leg;                      /* length of the aligment generated     */
  int    cur_x,   cur_y;           /* nucleotides at those positions       */
  int    cur_st,  nxt_st;
  int    cur_pos, nxt_pos;
  int    t, x;
  int    verbose;

  verbose = FALSE;

  len = L;

  AllocCharSeqs(len, &mcharX, &mcharY);

  mseqX = *seqX;
  mseqY = *seqY;

 /* Initialize
   * Start at pos "0" with state "stB"
   */
  tr     = InitTracer();       /* start a trace tree */
  dolist = InitTracerstack();  /* start a stack for traversing the trace tree */
  
  cur_tr = AttachTracer(tr, start, stFL); 
  PushTracerstack(dolist, cur_tr);

  
  while ((cur_tr = PopTracerstack(dolist)) != NULL)
    {
      cur_st = cur_tr->type;
      
      t = DENChoose(oth->t + IdxTransOTH[cur_st], TransPerOTHState[cur_st]);

      nxt_st = ToStateOTH[t + IdxTransOTH[cur_st]];
      if (nxt_st >= OSTATES) Die ("invalid transition in SimulateOTHAlign()");

      cur_pos = cur_tr->emit;

      switch (cur_st){
      case stFL:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos;
	break;

       case stFR:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos;
	break;

      case stFJ:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos + 1;
	mcharX[cur_pos] = '*';
	mcharY[cur_pos] = '*';
	break;

     case stB:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos;
	break;

      case stM:
	x = DLog2Choose(oth->mem, 16);

	cur_x = x/4;
	cur_y = x%4;
	nxt_pos = cur_pos + 1;
	mcharX[cur_pos] = DNAAlphabet[cur_x];
	mcharY[cur_pos] = DNAAlphabet[cur_y];
	break;

      case stX: 
	cur_x = DLog2Choose(oth->xem, 4);
	cur_y = 4;
	nxt_pos = cur_pos + 1;
	mcharX[cur_pos] = DNAAlphabet[cur_x];
	mcharY[cur_pos] = '.';
	break;

      case stY: 
	cur_x = 4;
	cur_y = DLog2Choose(oth->yem, 4);
	nxt_pos = cur_pos + 1;
	mcharX[cur_pos] = '.';
	mcharY[cur_pos] = DNAAlphabet[cur_y];
	break;

      case stE:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos;
	break;

      default:
	Die("invalid state in SimulateOTHAlign()");
      }

      if (traceback) fprintf(ofp,"emiting %s (%d) [%d %d] \n", ostNAME[cur_st], cur_pos, cur_x, cur_y);
      
      mseqX[cur_pos] = cur_x;
      mseqY[cur_pos] = cur_y;
      
      if (cur_st != stFR) {
	if (traceback) fprintf(ofp," %s->%s \n", ostNAME[cur_st], ostNAME[nxt_st]);
	PushTracerstack(dolist, AttachTracer(cur_tr, nxt_pos, nxt_st));
      }
      else 
	if (traceback) fprintf(ofp," %s-> END \n", ostNAME[cur_st]);
      
      leg = nxt_pos-start;
      if (leg >= len-1) {   
	realloc_strings(L, &len, cur_pos, &mseqX, &mseqY, &mcharX, &mcharY); 
	if (verbose) Warn ("SimulateOTHAlign(): reallocate\n");
      }
    }
  
  if (alignment) PrintNewCharSeqs(ofp, sqinfoX, sqinfoY, start, leg, mcharX, mcharY, string_name);

  *seqX    = mseqX;
  *seqY    = mseqY;
  *ret_len = len;
  
  free(mcharX);
  free(mcharY);
  FreeTracer(tr);
  FreeTracer(cur_tr);
  FreeTracerstack(dolist);

}

/* Function:  SimulateOTHSequence()
 * Date:      ER, Wed Mar  1 13:55:51 CST 2000 [St. Louis]
 *
 * Purpose:  Given a sequence generate another at [tfactor*t_o] evolutionary distance 
 *           forming an alignmentrelated by the OTH model.
 *
 * Args:     s1, s2    - the sequences 
 *           L         - length of aligmnemt
 *           othmodel - the null model
 *
 * Returns:  (void)
 */
void
SimulateOTHSequence(FILE *ofp, int *seq, SQINFO *sqinfoX, int *seqX, SQINFO *sqinfoY, int *seqY, int start, 
		    int *L, struct othmodel_s *oth, struct ali_s *ali, int traceback, 
		    int alignment, char *string_name)
{
  struct tracer_s      *tr;      /* the traceback tree under construction  */
  struct tracer_s      *cur_tr;  /* ptr to node of tr we're working on     */
  struct tracerstack_s *dolist;  /* pushdown stack of active tr nodes      */

  double pcond[4][4];            /* array for conditional probabilities    */
  int    cur_x,   cur_y;         /* nucleotides at those positions         */
  int    cur_st,  nxt_st;
  int    cur_pos, nxt_pos, seq_pos = 0;
  int    len_seq; 
  int    t, ct, x;

  /* calculate pcond[x][y]= P(y|x) = P(x,y) / P(x)
   */
  for (x = 0; x < 16; x++) 
    pcond[x/4][x%4] = oth->mem[x] - oth->xem[x/4];

  /* Initialize
   * Start at pos "0" with state "stB"
   */
  tr     = InitTracer();       /* start a trace tree */
  dolist = InitTracerstack();  /* start a stack for traversing the trace tree */
  
  cur_tr = AttachTracer(tr, start, stB); 
  PushTracerstack(dolist, cur_tr);

  len_seq = *L;

  while ((cur_tr = PopTracerstack(dolist)) != NULL)
    {
      cur_st = cur_tr->type;
      
      t = DENChoose(oth->t + IdxTransOTH[cur_st], TransPerOTHState[cur_st]-1);
      if      (t == 0) nxt_st = stM;
      else if (t == 1) nxt_st = stX;
      else if (t == 2) nxt_st = stY;
      else Die ("invalid transition in SimulateOTHAlign()");

      cur_pos = cur_tr->emit;

      switch (cur_st){
      case stB:
	cur_x = -1;
	cur_y = -1;
	nxt_pos = cur_pos;
	break;

      case stM:
	ct = 0;
	while (ct < 1) {
	  if (!isitagap(seq[seq_pos])) {
	    cur_x = seq[seq_pos]; 
	    ct++;
	  }
	  seq_pos++;
	}
	if( cur_x > 4) Die ("hold your horses, stX needs a real base here");
	cur_y = DLog2Choose(pcond[cur_x], 4);

	nxt_pos = cur_pos + 1;
	ali->charX[cur_pos] = DNAAlphabet[cur_x];
	ali->charY[cur_pos] = DNAAlphabet[cur_y];
	break;

      case stX: 
	ct = 0;
	while (ct < 1) {
	  if (!isitagap(seq[seq_pos])) {
	    cur_x = seq[seq_pos]; 
	    ct++; 
	  }
	  seq_pos++;
	}
	if( cur_x > 4) Die ("hold your horses, stX needs a real base here");
	cur_y = 4;

	nxt_pos = cur_pos + 1;
	ali->charX[cur_pos] = DNAAlphabet[cur_x];
	ali->charY[cur_pos] = '.';
	break;

      case stY: 
	cur_x = 4;
	cur_y = DLog2Choose(oth->yem, 4);

	nxt_pos = cur_pos + 1;
	ali->charX[cur_pos] = '.';
	ali->charY[cur_pos] = DNAAlphabet[cur_y];
	break;

      default:
	Die("invalid state in SimulateOTHAlign()");
      }

      if (traceback) fprintf(ofp,"emitting %s (%d) [%d %d] \n", ostNAME[cur_st], cur_pos, cur_x, cur_y);
      
      seqX[cur_pos] = cur_x;
      seqY[cur_pos] = cur_y;
      
      if (seq_pos < len_seq ) {
	if (traceback) fprintf(ofp," %s->%s \n", ostNAME[cur_st], ostNAME[nxt_st]);
	PushTracerstack(dolist, AttachTracer(cur_tr, nxt_pos+start, nxt_st));
      }
    }
  
  if (alignment) PrintNewAlign(ofp, sqinfoX, sqinfoY, start, cur_pos, ali, string_name);
  
  *L = nxt_pos;

  FreeTracer(tr);
  FreeTracer(cur_tr);
  FreeTracerstack(dolist);
}


/* Function: SimulateMutSequence()
 * Date:     ER, Wed Mar 22 17:11:19 CST 2000 [St. Louis]
 *
 * Purpose:  Given a sequence, simulate a second sequence
 *           related to it by the mutpxy.
 *
 * Args:     s1        - the starting sequence 
 *           L         - length of s1
 *           nullmodel - the null model
 *
 * Returns:  void
 */
void
SimulateMutSequence(FILE *ofp, int *seq, SQINFO *sqinfoX, int *seqX, SQINFO *sqinfoY, int *seqY, int start, 
		    int L, double *mutpxy, struct ali_s *ali, int alignment, char *string_name)
{
  double pxy[4][4];	 /* P(x | y) conditionals               */
  double px[4];		 /* P(x)                                */
  int    cur_x, cur_y;
  int    x;
  int    pos;

  /* calculate pxy[x][y]= P(y|x) = P(x,y) / P(x)
   */
  for (x = 0; x < 4; x++) 
    px[x] = 0.;

  for (x = 0; x < 16; x++)
    px[x/4] += EXP2(mutpxy[x]);

  for (x = 0; x < 4; x++)
    px[x] = LOG2(px[x]);
  
  for (x = 0; x < 16; x++)
    pxy[x/4][x%4] = mutpxy[x] - px[x/4];

  for (pos = 0; pos < L; pos ++)
    {
      cur_x = seq[pos];
      cur_y = DLog2Choose(pxy[cur_x], 4);
      
      ali->charX[pos] = DNAAlphabet[cur_x];
      ali->charY[pos] = DNAAlphabet[cur_y];
      
      seqX[pos] = cur_x;
      seqY[pos] = cur_y;
    }

  if (alignment) PrintNewAlign(ofp, sqinfoX, sqinfoY, start, L, ali, string_name);

}

/* Function: SimulateMut5Sequence()
 * Date:     ER, Wed Mar 22 15:55:28 CST 2000 [St. Louis]
 *
 * Purpose:  Given a sequence, simulate a second sequence
 *           related to it by the mut5pxy.
 *
 * Args:     s1        - the starting sequence 
 *           L         - length of s1
 *           nullmodel - the null model
 *
 * Returns:  s2, an alloc'ed simulated second sequence
 */
void
SimulateMut5Sequence(FILE *ofp, int *seq, SQINFO *sqinfoX, int *seqX, SQINFO *sqinfoY, int *seqY, int start, 
		    int *L, double *mut5pxy, struct ali_s *ali, int alignment, char *string_name)
{
  double pxy[5][5];	 /* P(x | y) conditionals               */
  double px[5];		 /* P(x)                                */
  double pgap[2];	 /* no-gap gap   probabilities for seqX */
  int    len_seq;
  int    seq_pos = 0;
  int    cur_x, cur_y;
  int    x;
  int    pos;

  len_seq = *L;

  /* calculate pxy[x][y]= P(y|x) = P(x,y) / P(x)
   */
  for (x = 0; x < 5; x++) 
    px[x] = 0.;

  for (x = 0; x < 25; x++)
    px[x/5] += EXP2(mut5pxy[x]);

  pgap[0] = 1. - px[4];
  pgap[1] = px[4];

  for (x = 0; x < 5; x++)
    px[x] = LOG2(px[x]);
  
  for (x = 0; x < 25; x++)
    pxy[x/5][x%5] = mut5pxy[x] - px[x/5];
  
  pos = 0;
  while (seq_pos < len_seq)
    {
      if(!DChoose(pgap, 2)) { /* don't put a gap in seqX */
	cur_x = seq[seq_pos];
	ali->charX[pos] = DNAAlphabet[cur_x];
	seq_pos++;
      } 
      else {
	cur_x = 4;
	ali->charX[pos] = '.';
      }
      cur_y = DLog2Choose(pxy[cur_x], 5);
      
      if (cur_y < 4)
	ali->charY[pos] = DNAAlphabet[cur_y];
      else
	ali->charY[pos] = '.';
      
      seqX[pos] = cur_x;
      seqY[pos] = cur_y;
      
      pos ++;
    }

  if (alignment) PrintNewAlign(ofp, sqinfoX, sqinfoY, start, pos, ali, string_name);

  *L = pos; /* length of the alignment */
}



void
realloc_strings(int L, int *ret_len, int k, int **ret_seqX, int **ret_seqY, char **ret_charX, char **ret_charY)
{    
  int  *mseqX;
  int  *mseqY;
  char *mcharX;
  char *mcharY;
  int len;
  int new_len;
  int x;

  len = *ret_len;

  mseqX  = *ret_seqX;
  mseqY  = *ret_seqY;
  mcharX = *ret_charX;
  mcharY = *ret_charY;
  
  if (L == 0) {  
    *ret_seqX  = mseqX;
    *ret_seqY  = mseqY; 
    *ret_charX = mcharX;
    *ret_charY = mcharY;
    
    *ret_len = len;
    
    return;
  }

  new_len = len + L;

  mseqX  = (int  *) ReallocOrDie(mseqX,  sizeof(int ) * new_len);
  mseqY  = (int  *) ReallocOrDie(mseqY,  sizeof(int ) * new_len);
  mcharX = (char *) ReallocOrDie(mcharX, sizeof(char) * new_len);
  mcharY = (char *) ReallocOrDie(mcharY, sizeof(char) * new_len);
  
  for (x = len-1; x >= k+1; x--) 
    {
      mseqX[x+L] = mseqX[x]; 
      mseqY[x+L] = mseqY[x]; 
      
      mcharX[x+L] = mcharX[x];
      mcharY[x+L] = mcharY[x];
    }

  for (x = k+1; x <= k+L; x++) 
    {
      mseqX[x] = -1;
      mseqY[x] = -1;
           
      mcharX[x] = '.';
      mcharY[x] = '.';
    }
  
  *ret_seqX  = mseqX;
  *ret_seqY  = mseqY; 
  *ret_charX = mcharX;
  *ret_charY = mcharY;
  
  *ret_len = new_len;
}
