/* FILE anova */
/* From David Bradley, modified by me */

#define NON_PARAMETRIC	0		/* Parametric or non-parametric? */


static double gammln (double xx);
#if NON_PARAMETRIC == 1
static void rank_dmatrix(double **data, double **rankdata, int rows, int cols);
static double chiprob (double chisq, int dof);
static double gammq(double a, double x);
static void gser(double *gamser, double a, double x, double *gln);
static void gcf(double *gammcf, double a, double x, double *gln);
#endif
#if NON_PARAMETRIC == 0
static double betai(double a, double b, double x);
static double betacf(double a, double b, double x);
static double fprob (double f, int model_dof, int error_dof, int tails);
#endif

#include <math.h>
#include <float.h>
#include "deffs.h"


#if NON_PARAMETRIC == 1
/* FUNCTION rank_dmatrix */
         /* ranks a matrix of doubles.  Note: data and rankdata MUST be 2D
            arrays defined by declaring double **data and double **rankdata
            (then allocating with dmatrix).  
	    */
static void rank_dmatrix (double **data, double **rank_data, int rows, int cols) {
 int i, j, k, l;
 double rank;

 for (i=0; i<rows; i++)
  for (j=0; j<cols; j++) {
    rank = 1.0;

    if ((int)data[i][j] == FAIL) {
      rank_data[i][j] = (double)FAIL;
      continue;
      }

    for (k=0; k<rows; k++)
     for (l=0; l<cols; l++) {
       if ( (k == i && l == j) || (int)data[k][l] == FAIL) continue;
       if (data[k][l] < data[i][j]) rank += 1.0;
       else if (fabs(data[k][l] - data[i][j]) < .000001) rank += 0.5;
       }

    rank_data[i][j] = rank;
    }
 }

/* The following routines are from or modified from Numerical Recipes.  */

/* FUNCTION gammq */
	 /* Gives Q, the probability that your model
	    really is valid but the chisq value just happened to be this lousy.
	    If Q is less than the significance level, that means you have to
	    reject the model.  To use gammq, enter (as parameter 'a') the
	    degrees of freedom divided by 2.0 (e.g. dof/2.0), followed by
	    (as parameter 'x') the chisq value for the model, also divided by
	    2.0 (e.g. chisq/2.0).  The function returns Q, the probability that
	    chisq was accidentally this bad.  NOTE:  Chisq is a lot like the
	    residual sum of squares, except that it takes into account the
	    variance.*/
static double gammq(double a, double x) {
  double gamser = 0.0,gammcf=0.0,gln;

  if(x < (a+1.0)) {
    gser(&gamser,a,x,&gln);
    return(1.0-gamser);
   } else {
    gcf(&gammcf,a,x,&gln);
    return(gammcf);
    }
 }


#define ITMAX 100      /* NOTE: ITMAX and EPS definitions required for gser () */
#define EPS 3.0e-7     /*  and gcf () */

/* FUNCTION gser */
static void gser(double *gamser, double a, double x, double *gln) {
 int n;
 double sum,del,ap;

 *gln=gammln(a);

 if(x<=0.0) {
    *gamser=0.0;
     return;
   }

 ap=a;
 del=sum=1.0/a;
 for(n=1;n<=ITMAX;n++) {
   ap += 1.0;
   del *= x/ap;
   sum += del;
   if (fabs(del) < fabs(sum)*EPS) {
      *gamser=sum*exp(-x+a*log(x)-(*gln));
      return;
      }
   }
}

/* FUNCTION gcf */
static void gcf(double *gammcf, double a, double x, double *gln) {
   int n;
   double gold=0.0, g,fac=1.0,b1=1.0;
   double b0=0.0,anf,ana,an,a1,a0=1.0;

   *gln=gammln(a);
   a1=x;
   for(n=1;n<=ITMAX;n++) {
     an=(double) n;
     ana=an-a;
     a0=(a1+a0*ana)*fac;
     b0=(b1+b0*ana)*fac;
     anf=an*fac;
     a1=x*a0+anf*a1;
     b1=x*b0+anf*b1;
     if (fabs(a1) > .00000001) {
       fac=1.0/a1;
       g=b1*fac;
       if(fabs((g-gold)/g) < EPS) {
	 *gammcf=exp(-x+a*log(x)-(*gln))*g;
	 return;
         }
       gold=g;
       }
     }
  }
# endif


/* FUNCTION gammln */
static double gammln(double xx) {
 double x,tmp,ser;
 static double cof[6]={76.18009173,-86.50532033,24.01409822,
			-1.231739516,0.120858003e-2,-0.536382e-5};
 int j;
 x=xx-1.0;
 tmp=x+5.5;
 tmp -= (x+0.5)*log(tmp);
 ser=1.0;
 for(j=0;j<=5;j++) {
   x += 1.0;
   ser += cof[j]/x;
   }
 return(-tmp+log(2.50662827465*ser));
}


/* FUNCTION init_dmatrix */
void init_dmatrix(double **mat, int rows, int cols) {
 int i,j;
 for (i=0;i<rows;i++) for (j=0;j<cols;j++) mat[i][j] = 0.;
 }


/* FUNCTION dmatrix */
double **dmatrix(int rows, int cols) {
 int i;
 double **m;

 if(( m = (double **)malloc((size_t) rows*sizeof(double *)))==NULL )
    Exit("Allocation error in dmatrix (row)", "dmatrix");
 for(i=0;i<rows;i++) {
    if( ( m[i]=(double *)malloc((size_t) cols*sizeof(double)) )==NULL)
       Exit("Allocation error in dmatrix (col)", "dmatrix");
    }
 return(m);
 }

/* FUNCTION free_dmatrix */
void free_dmatrix(double **m, int rows /*, int cols */) {
 int i;
 for(i=rows-1;i>=0;i--) free((char*) m[i]);
 free((char*) m);
 }

#if NON_PARAMETRIC == 1
/* FUNCTION kruskal_wallis */
/* FUNCTION anova */
	 /* Calculates Kruskal-Wallis nonparametric ANOVA test */
   	 /* Pass matrix: rows are factors, columns are replicates. */
double *anova(double **data, int rows, int cols) {

/* NOTE: "data" MUST be created using a pointer to an array of pointers
   (with dmatrix).  Also, it must be made of doubles.  The return value
   is the p value for the test.

   THE ROWS OF "DATA" REPRESENT THE ANOVA CLASSES, AND THE COLUMNS
   REPRESENT THE REPLICATES WITHIN EACH CLASS.

 The KW test is very powerful for biological data.  If data are distributed
 perfectly normally (i.e. gaussian), AND the variances of the individual
 ANOVA groups (in this case the rows of the rankdata matrix), then the KW
 test is only 5% less powerful than parametric (i.e. regular) ANOVA.  However,
 whenever data are not normally distributed and/or variances are not equal
 (as is almost always the case with bio data), the KW test is likely to
 become more powerful, since it makes no assumptions about distribution and
 variance equality.  In general, it is  preferable to ANOVA for biological data.

 NOTE: I use a chisquare probability here, which works well when we have
 at least 5 groups or many replicates.  However, if we have less than 5
 groups and small replicates (say, less than 10), there will be some loss
 of power.

 See Zar, p. 177 for details on the KW test.

 NOTE: This routine doesn't make offset assumptions, since you include
 the actual array boundaries.

 Results are stored in a static vector of doubles called results.  The
 address of this vector is returned.  Values are as follows:

 results[0] : p value for the test
 results[1] : degrees of freedom
 results[2] : total samples (including missing)
 results[3] : missing samples
 results[4] : H value
 */


 static double results[5];
 double **rank_data;
 int N, n;
 int i, j, missing_count;
 double factor, r, h;
 double Q;

 /* First rank the data using the rank_dmatrix routine */

 rank_data = dmatrix(rows, cols);

 rank_dmatrix(data, rank_data, rows, cols);

 results[1] = (double) rows - 1;

 /* determine n */
 N = 0;
 missing_count = 0;

 for (i=0; i<rows; i++)
  for (j=0; j<cols; j++)
   if ( (int)rank_data[i][j] != FAIL ) N++;
   else missing_count++;

 /* fprintf("N = %d (%d missing), ", N, missing_count); */

 results[2] = (double)N; results[3] = (double)missing_count;

 if (N == 0)
   Exit("N is zero!", "Kruskal-Wallis");

 factor = 12.0 / ( (double)N * ( (double)N+1.0 ) );

 h = 0.0;

 for (i=0; i<rows; i++) {
   r = 0.0;
   n = 0;
   for (j=0; j<cols; j++)
    if ( (int)rank_data[i][j] != FAIL) {
      r += rank_data[i][j];
      n++;
      }

   if (n == 0)
     Exit("n is zero!","Kruskal-Wallis");

   h += ( pow(r,2.0)/(double)n );
   }

 h *= factor;
 h -= 3.0*((double)N+1.0);

 Q = chiprob(h, rows - 1);

 results[4] = h;
 results[0] = Q;

 free_dmatrix(rank_data, rows /*, cols*/);
 return(results);
 }

/* FUNCTION chiprob */
	 /* Calculates probability from a chi-squared distribution.  To use,
	    calculate your chisquared value.  Call this routine, sending over
	    chisq and deg. of freedom.  This function returns the probability
	    that your model is valid, but chisq accidentally ended up poor
	  */
static double chiprob(double chisq, int dof) {

 return(gammq((double)dof/2.0,chisq/2.0));
 }


#endif

#if NON_PARAMETRIC == 0

/* FUNCTION betai */
     /* Incomplete beta distribution, from Numerical Recipes.  Used
	to calculate t and F distribution probabilities, according
	to transformation equations in my subroutines 'tprob()' and 'fprob()'.
	Uses betacf() (also from Numerical Recipes) to make intermediate
	calculations. */
static double betai (double a, double b, double x) {
 double bt;

 if (x < 0.0 || x > 1.0) Exit("Bad x in betai", "betai");
 if (fabs(x) > 0.000001 || (fabs(x-1) > 0.00001)) bt=0.0;
 else
  bt=exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(1.0-x));
 if (x < (a+1.0)/(a+b+2.0)) return(bt*betacf(a,b,x)/a);
 else return(1.0-bt*betacf(b,a,1.0-x)/b);
 }

#define MAXIT 100
#define EPS 3.0e-7
#define FPMIN 1.0e-30

/* FUNCTION betacf */
	 /* used by betai() */
static double betacf (double a, double b, double x) {
 int m, m2;
 double aa,c,d,del,h,qab,qam,qap;

 qab=a+b;
 qap=a+1.0;
 qam=a-1.0;
 c=1.0;
 d=1.0-qab*x/qap;
 if (fabs(d) < FPMIN) d=FPMIN;
 d=1.0/d;
 h=d;
 for (m=1;m<=MAXIT;m++) {
   m2=2*m;
   aa=m*(b-m)*x/((qam+m2)*(a+m2));
   d=1.0+aa*d;
   if (fabs(d) < FPMIN) d=FPMIN;
   c=1.0+aa/c;
   if (fabs(c) < FPMIN) c=FPMIN;
   d=1.0/d;
   h *= d*c;
   aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2));
   d=1.0+aa*d;
   if (fabs(d) < FPMIN) d=FPMIN;
   c=1.0+aa/c;
   if (fabs(c) < FPMIN) c=FPMIN;
   d=1.0/d;
   del=d*c;
   h *= del;
   if (fabs(del-1.0) < EPS) break;
   }
 if (m > MAXIT) Exit("\na or b too big, or MAXIT too small", "betacf");
 return(h);
 }


/* FUNCTION fprob */
    /* Calculates probability from an f distribution.  To use this routine,
    calculate your f value (for means comparison, or regression, or
    whatever).  Call this routine, sending over the f, and also the deg. of
    freedom for the numerator (model_dof) and denominator (error_dof) of the
    F ratio equation.  Usually, you should think of numerator as 'model'
    and denominator as 'error' or 'residual'.  This function returns the
    ONE-TAILED probability that F ended up as great as it is by accident.
   */
static double fprob(double f, int model_dof, int error_dof, int tails) {

 double atv;

 atv = betai( (double)error_dof/2.0,  (double)model_dof/2.0,
	      (double)error_dof/( (double)error_dof+(double)model_dof*f ) );

 return(atv*(double)tails);
 }

/* FUNCTION anova */
   	 /* Pass matrix: rows are factors, columns are replicates. */
	 /* Parametric anova.  Put your data in data[][]; data must be a
	    vector of pointers to pointers ( use dmatrix to create it).
	    Accepts missing data.  The results are stored in results.
	  */
double *anova (double **data, int rows, int cols) {
 static double results[5];
 int i,j;
 int N=0, n, miss=0;
 double C, total_sos, groups_sos, error_sos;
 double sumX=0.0, sumX2=0.0, sum_group1=0.0, sum_group2=0.0;
 int total_df, groups_df, error_df;
 double F;

 for (i=0;i<rows;i++)
  for (j=0;j<cols;j++)
    if ((int)data[i][j] == FAIL) {
      miss++;
      continue;
     } else
      N++;

 total_df = N - 1;
 groups_df = rows - 1;
 error_df = N - rows;

 for (i=0;i<rows;i++)
  for (j=0;j<cols;j++)
    if ((int)data[i][j] == FAIL) continue;
    else sumX += data[i][j];

 for (i=0;i<rows;i++)
  for (j=0;j<cols;j++)
    if ((int)data[i][j] == FAIL) continue;
    else sumX2 += data[i][j]*data[i][j];

 for (i=0;i<rows;i++) {
   sum_group1 = 0.0;
   n = 0;
   for (j=0;j<cols;j++)
    if ((int)data[i][j] == FAIL) continue;
    else {
      sum_group1 += data[i][j];
      n++;
      }

   sum_group2 += (sum_group1*sum_group1)/(double)n;
   }

 C = (sumX*sumX)/(double)N;

 total_sos = sumX2 - C;

 groups_sos = sum_group2 - C;

 error_sos = total_sos - groups_sos;

 F = (groups_sos/(double)groups_df)/(error_sos/(double)error_df);

 results[1] = (double)error_df;
 results[2] = (double)N + (double)miss;
 results[3] = (double)miss;
 results[4] = F;
 results[0] = fprob(F,groups_df,error_df,1);

 return(results);
 }
#endif
