/* $Log: feas_cv.c,v $
 * Revision 1.7  1996/07/10  15:41:48  jracine
 * Code compiles on numerous platforms, appears to run well.
 *
 * Revision 1.1  1995/05/01  13:34:27  jracine
 * Initial revision
 * */

/* static char rcsid[] = "$Id: feas_cv.c,v 1.7 1996/07/10 15:41:48 jracine Exp jracine $" */ 

/*

  Program for the computationally efficient h-block cross-validation algorithm
  by J. Racine, (C) 1996.

  This code is not optimized for speed. It simply demonstrates
  how an O(T^2) algorithm can be converted into
  an O(T) one. If you wanted to implement this for commercial
  real-time applications, the matrix routines and implementation
  could be tweaked a fair bit (e.g. taking advantage of symmetric
  nature of various matrices etc.)

  This code uses modified versions of the matrix routines
  coded by Patrick Ko Shu Pui from `Mathematical Methods
  in the Pysical Sciences', John Wiley and Sons, 2nd Ed.,
  1983, Chap 3. I am indebeted to him for his code.

  INSTRUCTIONS FOR USE:

  COMPILING - the code is written in ANSI C. This code has been
  compiled using gcc 2.7.2, MS C++ for NT 4.0, and SUNOS's cc.
  Makefiles are included for gcc and MS C++ for NT 4.0.

  To enable debugging, define DEBUG  in the makefile.
  To enable timing with the system clock (seconds),
  enable SECS_TIME in the makefile, while for milliseconds
  using gcc, enable MILLI_TIME

  DATA FILES - this file expects a disk file named data.dat which contains
  k+1 columns, the first being the dependent variable Y and the next
  K being the matrix of regressors, X. The file is expected to be
  Tx(K+1), that is, T observations (T lines long) by K+1 columns.

  AUXILIARY FILES -

  num_reg.dat - one line containing the number of regressors, K.
  num_obs.dat - one line containing the number of observations, T.
  h_block.dat - one line containing h - the size of the removed
  data set will be 2h+1.

  If you have any questions regarding this code, please feel
  free to contact me - jracine@nonlin.bsn.usf.edu

*/

#include <float.h>
#include <malloc.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include "matrix.h"

#ifdef SUNOS
unsigned int difftime(time_t t1, time_t t0);
unsigned int difftime(time_t t1, time_t t0)
{
  t0 = t0 - time(NULL);
  t1 = t1 - time(NULL);
  return (t0 <= t1 ? (unsigned int) (t1-t0) : -(unsigned int)(t0-t1));
}
#endif

int main()
{
  MATRIX  X;
  MATRIX  XT;
  MATRIX  XTX;
  MATRIX  XTXINV;
  MATRIX  Y;
  MATRIX  XTY;
  MATRIX  IDENT;
  MATRIX  X_SUB;
  MATRIX  X_SUBT;
  MATRIX  Y_ROW;
  MATRIX  Y_SUB;
  MATRIX  X_ROW;

  MATRIX  TMP_DATA;

  /* Some temporary dummies which are reused */

  MATRIX MAT_DUM_11;
  MATRIX MAT_DUM_REGREG;
  MATRIX MAT_DUM_REGREG_A;
  MATRIX MAT_DUM_REGREG_B;
  MATRIX MAT_DUM_REGREG_C;
  MATRIX MAT_DUM_REG2H1;
  MATRIX MAT_DUM_2H1REG;
  MATRIX MAT_DUM_2H12H1;
  MATRIX MAT_DUM_REG1;
  MATRIX MAT_DUM_REG1_A;
  MATRIX MAT_DUM_REG1_B;

  FILE  *fp;
  int i, j;

  int h;
  int num_ob;
  int num_reg;

  double cv_eff = 0.0;
  double tmp;

  time_t t1;
  time_t t2;

#ifdef MILLI_TIME
	uclock_t time_1;
	uclock_t time_2;
#endif

#ifdef SECS_TIME
    t1 = time(&t1);
#endif

  /* Get data on number of observations, regressors, and h from files */

  if (!(fp = fopen( "h_block.dat", "r" )))
    {
    fprintf( stderr, "file `h_block.dat' cannot be opened\n" );
    exit (0);
    }

  fscanf(fp,"%d",&h);

  fclose(fp);

  if (!(fp = fopen( "num_obs.dat", "r" )))
    {
    fprintf( stderr, "file `num_obs.dat' cannot be opened\n" );
    exit (0);
    }

  fscanf(fp,"%d",&num_ob);

  fclose(fp);

  if (!(fp = fopen( "num_reg.dat", "r" )))
    {
    fprintf( stderr, "file `num_reg.dat' cannot be opened\n" );
    exit (0);
    }

  fscanf(fp,"%d",&num_reg);

  fclose(fp);

  if(num_reg <= 0)
    {
    fprintf( stderr, "Invalid number of regressors\n" );
    exit (0);
    }

  if(num_ob <= num_reg)
    {
    fprintf( stderr, "Invalid degrees of freedom\n" );
    exit (0);
    }

  if(num_ob <=0)
    {
    fprintf( stderr, "Invalid number of observations\n" );
    exit (0);
    }

  if(((h < 0) || h >=(int) (float)  num_ob/(float) 2))
    {
    fprintf( stderr, "Invalid size of h block\n" );
    exit (0);
    }

#ifdef BANNER
    printf("                      ** Feasible h-block cross validation ** \n");
    printf("                      **   Copyright (C) 1996, J. Racine   ** \n\n");
    printf("  Number of observations: %d\n  Number of regressors: %d\n",num_ob, num_reg);
    printf("  Size of h-block: %d\n  Size of removed block: %d\n\n", h, 2*h+1);
#endif

  /* Create all matrices */

  X = mat_creat( num_ob, num_reg, UNDEFINED );
  XT = mat_creat( num_reg, num_ob, UNDEFINED );
  XTX = mat_creat( num_reg, num_reg, UNDEFINED );
  XTXINV = mat_creat( num_reg, num_reg, UNDEFINED );
  Y = mat_creat( num_ob, 1, UNDEFINED );
  XTY = mat_creat( num_reg, 1, UNDEFINED );
  X_SUB = mat_creat( 2*h+1, num_reg, UNDEFINED);
  X_SUBT = mat_creat( num_reg, 2*h+1, UNDEFINED);
  X_ROW = mat_creat( 1, num_reg, UNDEFINED);
  Y_ROW = mat_creat( 1, 1, UNDEFINED);
  Y_SUB = mat_creat( 2*h+1, 1, UNDEFINED);
  IDENT = mat_creat( 2*h+1, 2*h+1, UNIT_MATRIX);

  /* Create extra temporary dummies */

  MAT_DUM_11 = mat_creat( 1, 1, UNDEFINED);
  MAT_DUM_REGREG = mat_creat( num_reg, num_reg, UNDEFINED);
  MAT_DUM_REGREG_A = mat_creat( num_reg, num_reg, UNDEFINED);
  MAT_DUM_REGREG_B = mat_creat( num_reg, num_reg, UNDEFINED);
  MAT_DUM_REGREG_C = mat_creat( num_reg, num_reg, UNDEFINED);
  MAT_DUM_REG2H1 = mat_creat( num_reg, 2*h+1, UNDEFINED);
  MAT_DUM_2H1REG = mat_creat( 2*h+1, num_reg, UNDEFINED);
  MAT_DUM_REG1 = mat_creat( num_reg, 1, UNDEFINED);
  MAT_DUM_REG1_A = mat_creat( num_reg, 1, UNDEFINED);
  MAT_DUM_REG1_B = mat_creat( num_reg, 1, UNDEFINED);
  MAT_DUM_2H12H1 = mat_creat( 2*h+1, 2*h+1, UNDEFINED);

  /* First order of business: read in the data and store in the
     matrices X and Y.  */

  TMP_DATA = mat_creat( num_ob, num_reg+1, UNDEFINED );

  if (!(fp = fopen( "data.dat", "r" )))
    {
    fprintf( stderr, "file `data.dat' cannot be opened\n" );
    exit (0);
    }

  fgetmat(TMP_DATA, fp );

  fclose(fp);

  for(i=0;i<num_ob;i++) {
    Y[i][0]=TMP_DATA[i][0];
    for(j=0;j <num_reg; j++) {
      X[i][j]=TMP_DATA[i][j+1];
    }
  }

  mat_free(TMP_DATA); /* Simply temporary */

#ifdef DEBUG
    printf( "|- Matrix X -|\n");
    mat_dumpf( X, "%g " );
    printf( "|- Matrix Y -|\n");
    mat_dumpf( Y, "%g " );
#endif

  /* Calculate necessary matrices which do not change with each iteration */

#ifdef MILLI_TIME
		time_1 = uclock();
#endif

  XT = mat_tran( X, XT );
  XTX = mat_mul( XT, X, XTX );
  XTXINV = mat_inv( XTX, XTXINV );
  XTY = mat_mul( XT, Y, XTY );

#ifdef DEBUG
    printf( "|- Matrix XTX -|\n");
    mat_dumpf( XTX, "%g " );
    printf( "|- Matrix XTXINV -|\n");
    mat_dumpf( XTXINV, "%g " );
    printf( "|- Matrix XTY -|\n");
    mat_dumpf( XTY, "%g " );
    printf( "|- Matrix BETA -|\n");
    mat_dumpf( mat_mul(XTXINV,XTY,MAT_DUM_REG1), "%g " );
#endif

#ifdef MILLI_TIME
		time_2 = uclock();
	  printf("\n  Feasible algorithm overhead: %.3f seconds", (float) (time_2-time_1)/(float) UCLOCKS_PER_SEC);
#endif


  /* We now calculate the cross-validation function */
  /* We break this up into three parts to save on
     a large number of memory mallocs() and frees */

  /* Observations for which h < i < num_ob - h */

  for(i=h;i < num_ob-h; i++) {

#ifdef DEBUG
    printf("i=%d\n", i);
#endif

    X_ROW = mat_sub_mat ( X, i, i, X_ROW, 0, num_reg-1); /* Pointer only... */

    X_SUB = mat_sub_mat ( X, i-h, i+h, X_SUB, 0, num_reg-1); /* Pointer only...*/
    X_SUBT = mat_tran( X_SUB, X_SUBT ); /* Copy... */

    Y_ROW = mat_sub_mat ( Y, i, i, Y_ROW, 0, 0);
    Y_SUB = mat_sub_mat ( Y, i-h, i+h, Y_SUB, 0, 0);

    tmp = mat_sub(Y_ROW,
            mat_mul(X_ROW,
              mat_mul(
                mat_add(XTXINV,
                  mat_mul(
                    mat_mul(XTXINV,
                      mat_mul(
                        mat_mul(X_SUBT,
                         mat_inv(
                           mat_sub(IDENT,
                              mat_mul(
                                mat_mul(X_SUB,XTXINV,MAT_DUM_2H1REG),
                                  X_SUBT,MAT_DUM_2H12H1)
                                ,MAT_DUM_2H12H1)
                             ,MAT_DUM_2H12H1)
                            ,MAT_DUM_REG2H1),
                          X_SUB,MAT_DUM_REGREG_A)
                         ,MAT_DUM_REGREG),
                        XTXINV,MAT_DUM_REGREG_B)
                      ,MAT_DUM_REGREG_C),
                    mat_sub(XTY,
                  mat_mul(X_SUBT,Y_SUB,MAT_DUM_REG1)
               ,MAT_DUM_REG1)
              ,MAT_DUM_REG1_A)
            ,MAT_DUM_11),
          MAT_DUM_11)[0][0];

#ifdef DEBUG
      printf("Finished computation %d\n", i);
      printf("%g\n", tmp*tmp);
#endif

    cv_eff += tmp*tmp;
  }

  mat_free( X_SUB );
  mat_free( X_SUBT );
  mat_free( Y_SUB );
  mat_free( IDENT );
  mat_free( MAT_DUM_REG2H1 );
  mat_free( MAT_DUM_2H1REG );
  mat_free( MAT_DUM_2H12H1 );

  /* Observations for which i < h */

  for(i=0, j=0;i < h; i++, j++) {

#ifdef DEBUG
    printf("i=%d\n", i);
#endif

    X_ROW = mat_sub_mat ( X, i, i, X_ROW, 0, num_reg-1); /* Pointer only... */
    Y_ROW = mat_sub_mat ( Y, i, i, Y_ROW, 0, 0);

    X_SUB = mat_creat( h+j+1, num_reg, UNDEFINED);
    X_SUBT = mat_creat( num_reg, h+j+1, UNDEFINED);
    Y_SUB = mat_creat( h+j+1, 1, UNDEFINED);
    IDENT = mat_creat( h+j+1, h+j+1, UNIT_MATRIX);

    MAT_DUM_REG2H1 = mat_creat( num_reg, h+j+1, UNDEFINED);
    MAT_DUM_2H1REG = mat_creat( h+j+1, num_reg, UNDEFINED);
    MAT_DUM_2H12H1 = mat_creat( h+j+1, h+j+1, UNDEFINED);

    X_SUB = mat_sub_mat ( X, i-j, i+h, X_SUB, 0, num_reg-1); /* Pointer only...*/
    X_SUBT = mat_tran( X_SUB, X_SUBT ); /* Copy... */
    Y_SUB = mat_sub_mat ( Y, i-j, i+h, Y_SUB, 0, 0);

    tmp = mat_sub(Y_ROW,
            mat_mul(X_ROW,
              mat_mul(
                mat_add(XTXINV,
                  mat_mul(
                    mat_mul(XTXINV,
                      mat_mul(
                        mat_mul(X_SUBT,
                         mat_inv(
                           mat_sub(IDENT,
                              mat_mul(
                                mat_mul(X_SUB,XTXINV,MAT_DUM_2H1REG),
                                  X_SUBT,MAT_DUM_2H12H1)
                                ,MAT_DUM_2H12H1)
                             ,MAT_DUM_2H12H1)
                            ,MAT_DUM_REG2H1),
                          X_SUB,MAT_DUM_REGREG_A)
                         ,MAT_DUM_REGREG),
                        XTXINV,MAT_DUM_REGREG_B)
                      ,MAT_DUM_REGREG_C),
                    mat_sub(XTY,
                  mat_mul(X_SUBT,Y_SUB,MAT_DUM_REG1)
               ,MAT_DUM_REG1)
              ,MAT_DUM_REG1_A)
            ,MAT_DUM_11),
          MAT_DUM_11)[0][0];

#ifdef DEBUG
      printf("Finished computation %d\n", i);
      printf("%g\n", tmp*tmp);
#endif

    mat_free( X_SUB );
    mat_free( X_SUBT );
    mat_free( Y_SUB );
    mat_free( IDENT );

    mat_free( MAT_DUM_REG2H1 );
    mat_free( MAT_DUM_2H1REG );
    mat_free( MAT_DUM_2H12H1 );

    cv_eff += tmp*tmp;
  }

  /* Observations for which i > num_ob - h */

  for(i=num_ob-h, j=h-1;i < num_ob; i++, j--) {

#ifdef DEBUG
    printf("i=%d\n", i);
#endif

    X_ROW = mat_sub_mat ( X, i, i, X_ROW, 0, num_reg-1); /* Pointer only... */
    Y_ROW = mat_sub_mat ( Y, i, i, Y_ROW, 0, 0);

    X_SUB = mat_creat( h+j+1, num_reg, UNDEFINED);
    X_SUBT = mat_creat( num_reg, h+j+1, UNDEFINED);
    Y_SUB = mat_creat( h+j+1, 1, UNDEFINED);
    IDENT = mat_creat( h+j+1, h+j+1, UNIT_MATRIX);

    MAT_DUM_REG2H1 = mat_creat( num_reg, h+j+1, UNDEFINED);
    MAT_DUM_2H1REG = mat_creat( h+j+1, num_reg, UNDEFINED);
    MAT_DUM_2H12H1 = mat_creat( h+j+1, h+j+1, UNDEFINED);

    X_SUB = mat_sub_mat ( X, i-h, i+j, X_SUB, 0, num_reg-1); /* Pointer only...*/
    X_SUBT = mat_tran( X_SUB, X_SUBT ); /* Copy... */
    Y_SUB = mat_sub_mat ( Y, i-h, i+j, Y_SUB, 0, 0);

    tmp = mat_sub(Y_ROW,
            mat_mul(X_ROW,
              mat_mul(
                mat_add(XTXINV,
                  mat_mul(
                    mat_mul(XTXINV,
                      mat_mul(
                        mat_mul(X_SUBT,
                         mat_inv(
                           mat_sub(IDENT,
                              mat_mul(
                                mat_mul(X_SUB,XTXINV,MAT_DUM_2H1REG),
                                  X_SUBT,MAT_DUM_2H12H1)
                                ,MAT_DUM_2H12H1)
                             ,MAT_DUM_2H12H1)
                            ,MAT_DUM_REG2H1),
                          X_SUB,MAT_DUM_REGREG_A)
                         ,MAT_DUM_REGREG),
                        XTXINV,MAT_DUM_REGREG_B)
                      ,MAT_DUM_REGREG_C),
                    mat_sub(XTY,
                  mat_mul(X_SUBT,Y_SUB,MAT_DUM_REG1)
               ,MAT_DUM_REG1)
              ,MAT_DUM_REG1_A)
            ,MAT_DUM_11),
          MAT_DUM_11)[0][0];

#ifdef DEBUG
      printf("Finished computation %d\n", i);
      printf("%g\n", tmp*tmp);
#endif

    mat_free( X_SUB );
    mat_free( X_SUBT );
    mat_free( Y_SUB );
    mat_free( IDENT );

    mat_free( MAT_DUM_REG2H1 );
    mat_free( MAT_DUM_2H1REG );
    mat_free( MAT_DUM_2H12H1 );

    cv_eff += tmp*tmp;
  }

  cv_eff /= ((double)(num_ob));

#ifdef MILLI_TIME
  	time_2 = uclock();
	  printf("\n  Feasible execution time: %.3f seconds", (float) (time_2-time_1)/(float) UCLOCKS_PER_SEC);

    if (!(fp = fopen( "time.dat", "w" )))
      {
      fprintf( stderr, "file `time.dat' cannot be opened\n" );
      exit (0);
      }
  
    fprintf(fp,"%.3f\n", (float) (time_2-time_1)/(float) UCLOCKS_PER_SEC);
  
    fclose(fp);
#endif

  printf("\n\n  CV = %g\n", cv_eff);

  if (!(fp = fopen( "output.dat", "w" )))
    {
    fprintf( stderr, "file `output.dat' cannot be opened\n" );
    exit (0);
    }

  fprintf(fp,"%g\t%d\t%d\n", cv_eff, num_reg, h);

  fclose(fp);

  /* Free all allocated matrices */

  mat_free( X );
  mat_free( XT );
  mat_free( XTX );
  mat_free( XTXINV );
  mat_free( Y );
  mat_free( XTY );
  mat_free( X_ROW );
  mat_free( Y_ROW );

  mat_free( MAT_DUM_11 );
  mat_free( MAT_DUM_REGREG );
  mat_free( MAT_DUM_REGREG_A );
  mat_free( MAT_DUM_REGREG_B );
  mat_free( MAT_DUM_REGREG_C );
  mat_free( MAT_DUM_REG1 );
  mat_free( MAT_DUM_REG1_A );
  mat_free( MAT_DUM_REG1_B );

#ifdef SECS_TIME
    t2 = time(&t2);
    printf("\n  Elapsed time: %ld seconds\n", (int) difftime(t2,t1));
#endif

#ifdef MILLI_TIME
  	time_2 = uclock();
	  printf("\n  Elapsed time: %.3f seconds\n", (float) (time_2-time_1)/(float) UCLOCKS_PER_SEC);
#endif

#ifdef BANNER
  printf("\nProgram terminated normally.\n");
#endif

  return(0);

}
