/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* evd.c
 * 
 * Sun Feb 22 15:23:32 CST 2004 [STL]
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <time.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

static void   lambda_f_and_df (double lambda, int num, double *score, double *ret_f, double *re_df);
static void   lambda_f_and_df_from_histo (double censor, int n_censor, double lambda, int N, double k, int shift, int *sc_histo, double *ret_f, double *ret_df);
static void   cal_censorship_param (double censor_cutoff, int N, double k, int shift, int *histo, double *ret_censor, int *ret_n_censor);
static double ml_lambda_evd (int num, double *score);
static double ml_lambda_evd_from_histo (double censor, int n_censor, int N, double k, int shift, int *sc_histo);
static double ml_mu_evd (double lambda, int num, double *score);
static double ml_mu_evd_from_histo (double censor, int n_censor, double lambda, int N, double k, int shift, int *sc_histo);

 
void
AllocHistogram (int N, double k, int **ret_histo)
{
  int *histo;
  int  dim;
  int  i;

  dim = (int)N*k;

  histo = (int *) MallocOrDie (sizeof(int) * dim);

  for (i = 0; i <= dim; i ++) histo[i] = 0;

  *ret_histo = histo;
}

void
FillScoreHistograms (FILE *ofp, struct scores_s *sc, int N, double k, int shift, int *histo, int ones)
{

  double othsc;
  double codsc;
  double rnasc;
  double sigm_r;

  if (!ones) {
    othsc = sc->vidiag->oth->pl + LOG2(1.0 + EXP2(sc->vidiag->oth->mn - sc->vidiag->oth->pl)) - 1.0;
    codsc = sc->vidiag->cod->pl + LOG2(1.0 + EXP2(sc->vidiag->cod->mn - sc->vidiag->cod->pl)) - 1.0;
    rnasc = sc->vidiag->rna->pl + LOG2(1.0 + EXP2(sc->vidiag->rna->mn - sc->vidiag->rna->pl)) - 1.0;
  }
  else {
    othsc = sc->vidiag->oth->pl;
    codsc = sc->vidiag->cod->pl;
    rnasc = sc->vidiag->rna->pl;
  }

  sigm_r = SigmoidalLog2(rnasc, othsc, codsc);

  if (!FillHistogram (ofp, sigm_r, N, k, shift, histo)) Die ("Bad filling of sc histogram %f\n", sigm_r);

}


int
FillHistogram (FILE *ofp, double score, int N, double k, int shift, int *histo)
{
  int dim;
  int flag = FALSE;
  int i;

  dim = (int)N*k;
  
  if (score >=  N-shift) { histo[dim] ++; flag = TRUE; return flag; }
  if (score <=  -shift)  { histo[0] ++;   flag = TRUE; return flag; }
  
  for (i = 0; i <= dim; i++) 
    if (i/k-shift <= score && score < (i+1)/k-shift) {
      histo[i] ++; 
      flag = TRUE;
    }
 
  return flag;
}

double 
FitEVD (double fp, int L, double *sc, double *scnull)
{
  double tp = 0.0;
  double score;
  double lambda;
  double mu;
  int    n;

  lambda = ml_lambda_evd (L, scnull);
  mu     = ml_mu_evd (lambda, L, scnull);
  
  score = mu - 1.0/lambda*log(-log(1.0-fp));
  /*printf("cutoff %f %f %f\n", lambda, mu, score);*/

  for (n = 0; n < L; n++) if (sc[n] >= score) tp += 1.0;

  tp /= (double)L;

  return tp;
}

void 
FitHistoEVD (FILE *ofp, double censor_cutoff, int N, double k, int shift, int *sc_histo)
{
  double lambda;
  double mu;
  double censor;
  int    n_censor;

  cal_censorship_param (censor_cutoff, N, k, shift, sc_histo, &censor, &n_censor);

  lambda = ml_lambda_evd_from_histo (censor, n_censor, N, k, shift, sc_histo);
  mu     = ml_mu_evd_from_histo (censor, n_censor, lambda, N, k, shift, sc_histo);
  
  PrintHistogram (ofp, censor_cutoff, censor, n_censor, lambda, mu, N, k, shift, sc_histo);
}

void
PrintHistogram (FILE *ofp, double censor_cutoff, double censor, int n_censor, double lambda, double mu, int N, double k, int shift, int *sc_histo)
{  
  int    cum = 0;
  int    add = 0;
  int    dim;
  int    i, imax, imin;

  dim = (int)N*k;

  fprintf(ofp, "# N = %d k = %.4f shift = %d\n", N, k, shift);
  fprintf(ofp, "# censor_cutoff = %.4f censor = %.4f n_censored = %d\n", censor_cutoff, censor, n_censor);
  fprintf(ofp, "# lambda = %.4f mu = %.4f\n", lambda, mu);

  imin = dim;
  for (i = dim; i >= 0; i--) 
    if (sc_histo[i] > 0)  imin = i-1;

  imax = 0;
  for (i = 0; i <= dim; i++) 
    if (sc_histo[i] > 0)  imax = i+1;

  for (i = 0; i <= dim; i++) 
    cum += sc_histo[i];

  for (i = 0; i <= dim; i++) {
    add += sc_histo[i];
    
    if (i >= imin && i <= imax) {
      fprintf(ofp, "%.4f\t%d\t%d\t", (double)i/k-(double)shift, sc_histo[i], add);
      
      if (cum > 0) fprintf(ofp, "%.4f\t%.4f\n", (double)sc_histo[i]/(double)cum, (double)add/(double)cum);
      else         fprintf(ofp, "\n");
    }
  }

}


void
cal_censorship_param (double censor_cutoff, int N, double k, int shift, int *histo, double *ret_censor, int *ret_n_censor)
{
  double censor;
  double add = 0.0;
  int    n_censor = 0;
  int    cum      = 0;
  int    dim;
  int    i;

  dim = (int)N*k;
  
  censor = -(double)shift;
  
  for (i = 0; i <= dim; i ++) add += histo[i]; 
  
    for (i = 0; i <= dim; i ++) {
      
      cum += histo[i];
      
	if (add > 0.0 && cum/add <= censor_cutoff) { censor = (double)i/k-(double)shift; n_censor = cum; }
    }

    *ret_censor   = censor;
    *ret_n_censor = n_censor;
}

void
lambda_f_and_df (double lambda, int num, double *score, double *ret_f, double *ret_df) 
{

   double f;
   double df;
   double mean  = 0.0;
   double z     = 0.0;
   double u     = 0.0;
   double v     = 0.0;
   double add_z = 0.0;
   double add_u = 0.0;
   double add_v = 0.0;
   double log_z;
   double exp_u = 0.0;
   double exp_v = 0.0;

   double x;
   double y;
   int    n;
   
   double inv_lambda = 1.0 / lambda;


   for (n = 0; n < num; n++) 
   {
       x = score[n]; if (!x) Die("lambda_f_and_df(): wrong histogram? (n=%d, total=%d)\n", n, num); 
       y =  exp(-lambda * x);
       
       mean += x;
       z    += y;
       u    += x * y;
       v    += x * x * y;
       
   }
   
   if (num > 0) mean /= num; 
   
   u += add_u;
   z += add_z;
   v += add_v;
   
   /* z and v are always positive; u can be positive or negative
    */
   if (z > 0.0) log_z = log(z);  else Die("lambda_f_and_df(): weird, z = sum(exp(-lambda*x_i) is zero (z=%f lambda=$%f)\n", z, lambda); 
   if (v > 0.0) exp_v =  exp(log(v)  - log_z); 
   if (u > 0.0) exp_u =  exp(log(u)  - log_z); 
   if (u < 0.0) exp_u = -exp(log(-u) - log_z); 

   f  = inv_lambda - mean + exp_u;
   df = -inv_lambda*inv_lambda - exp_v + exp_u*exp_u;
   
   *ret_f  = f;
   *ret_df = df;
}

void
lambda_f_and_df_from_histo (double censor, int n_censor, double lambda, int N, double k, int shift, int *sc_histo, double *ret_f, double *ret_df) 
{

   double f;
   double df;
   double mean  = 0.0;
   double z     = 0.0;
   double u     = 0.0;
   double v     = 0.0;
   double add_z = 0.0;
   double add_u = 0.0;
   double add_v = 0.0;
   double log_z;
   double exp_u = 0.0;
   double exp_v = 0.0;

   double num = 0.0;
   double val;

   double x;
   double y;
   int    i;
   int    dim;
   
   double inv_lambda = 1.0 / lambda;

   dim = (int)N*k;

   if (n_censor > 0) {
       add_z = n_censor*exp(-lambda*censor);
       add_u = add_z * censor;
       add_v = add_u * censor;
   }


   for (i = 0; i <= dim; i++) 
   {
       x = (double)i/k - (double)shift;
       
       if (x > censor && sc_histo[i] > 0) {
	 y =  exp(-lambda * x);
	 
	 val = (double)sc_histo[i];
	 
	 num += val;
	 
	 mean += val * x;
	 z    += val * y;
	 u    += val * x * y;
	 v    += val * x * x * y;
       }
   }

   if (num > 0.0) mean /= num; 
   
   u += add_u;
   z += add_z;
   v += add_v;
   
   /* z and v are always positive; u can be positive or negative
    */
   if (z > 0.0) log_z = log(z);  else Die("lambda_f_and_df_from_histo(): weird, z = sum(exp(-lambda*x_i) is zero (z=%f lambda=$%f)\n", z, lambda); 
   if (v > 0.0) exp_v =  exp(log(v)  - log_z); 
   if (u > 0.0) exp_u =  exp(log(u)  - log_z); 
   if (u < 0.0) exp_u = -exp(log(-u) - log_z); 

   f  = inv_lambda - mean + exp_u;
   df = -inv_lambda*inv_lambda - exp_v + exp_u*exp_u;
   
   *ret_f  = f;
   *ret_df = df;

}

double
ml_lambda_evd (int num, double *score)
{
  
    double lambda;
    double it = 0.1;

    double f;
    double df;

    lambda_f_and_df (it, num, score, &f, &df);
    /*printf("IT:%f %f %f\n", it, f, df);*/

    while (f > MARGIN || f < -MARGIN) {

	if (df < MARGIN && df > -MARGIN) Die("weird; derivative is zero\n"); 
	else                                    
	  it -= f/df;               

	if (it > 50) Die("careful; lambda is getting very large ($it)\n");

	lambda_f_and_df (it, num, score, &f, &df);
	/*printf("IT:%f %f %f\n", it, f, df);*/
   
   }

    lambda = it;

    return lambda; 
}

double
ml_lambda_evd_from_histo (double censor, int n_censor, int N, double k, int shift, int *sc_histo) 
{
  
    double lambda;
    double it = 0.1;

    double f;
    double df;

    lambda_f_and_df_from_histo (censor, n_censor, it, N, k, shift, sc_histo, &f, &df);
    /*printf("IT:%f %f %f\n", it, f, df);*/

    while (f > MARGIN || f < -MARGIN) {

	if (df < MARGIN && df > -MARGIN) Die("weird; derivative is zero\n"); 
	else                                    
	  it -= f/df;               

	if (it > 50) Die("careful; lambda is getting very large ($it)\n");

	lambda_f_and_df_from_histo (censor, n_censor, it, N, k, shift, sc_histo, &f, &df);
	/*printf("IT:%f %f %f\n", it, f, df);*/
	
    }

    lambda = it;

    return lambda; 
}


double
ml_mu_evd (double lambda, int num, double *score) 
{
   double mu = 0.0;
   int    n;

    for (n = 0; n < num; n ++) mu += exp(-lambda * score[n]);

    if (num > 0) mu = log(mu/num);
    mu /= -lambda;

    return mu;
}

double
ml_mu_evd_from_histo (double censor, int n_censor, double lambda, int N, double k, int shift, int *sc_histo) 
{
  double score;
  double num    = 0.0;
  double add_mu = 0.0;
  double mu     = 0.0;
  double val;
  int    dim;
  int    i;
  
  dim = (int)N*k;
  
  if (n_censor > 0) add_mu = n_censor*exp(-lambda*censor);
  
  
  for (i = 0; i <= dim; i ++) {
    
    score = (double)i/k - (double)shift;
    
    if (score > censor) {
      val = (double)sc_histo[i];
      
      num += val;
      mu  += val * exp(-lambda * score);
    }
  }
  
  mu += add_mu;
  
  if (num > 0) mu = log(mu/num);
  mu /= -lambda;
  
  return mu;
}
