/*
  This file contains functions for tracking the user's hand
  position given acceleration measurements.
*/


#include "tracker.h"
#include <math.h>
#include <stdio.h>
#include <strings.h>

static int calibrated = 0;



// linear coefficients for calculating tilt angle as function
// of ir marker position
static double ir_tilt_pol[2];


// Some linear algebra:
// takes dot product of 2 3-vectors
double dot3(double A[3], double B[3]);

// Multiplies two 3x3 matrices, C = A*B
void mult_3matrix(double A[3][3], double B[3][3], double C[3][3]);

// Inverts a 3x3 matrix B = inv(A)
void inv_3matrix(double A[3][3], double B[3][3]);

// Multiplies 3 matrix and 3-vector, c = A*b
void mult_3matrix_3vec(double A[3][3], double b[3], double c[3]);

// Evaluates polynomial at time t
double polyval6(double pos_poly[6][3], double t, int coordinate);
double polyval5(double vel_poly[5][3], double t, int coordinate);
double polyval4(double acc_poly[4][3], double t, int coordinate);

// takes starting index and ending index for polynomial
// and generates a vetor containing time values.
// returns the time value att index k
double gen_time(int indx0,int indx1,int k,double time[200]);

// Evaluates the fitness of a particular combination of
// start/end points and generates the best fitting polynomial
double fitness_function(int indx0,int indx1,int k,
			double acc_log[MAX_MOTION_LENGTH][3],
			double vel_poly[5][3],
			double pos_poly[6][3]);


// Struct for Kalman states that can be kept in the EKF
struct EKF_state{
  double X[4];
  double P[4][4];
};


/* This function returns the position given a corrected acceleration measure
*/
int acc2pos(double acc_x, // inputs: acceleration in world frame
	    double acc_y,
	    double acc_z,
	    
	    double *pos_x, // outputs: position in world frame
	    double *pos_y,
	    double *pos_z,

	    double *vel_x, // outputs: velocity in world frame
	    double *vel_y,
	    double *vel_z,

	    double *end_x, // outputs: motion enpoint in world frame
	    double *end_y,
	    double *end_z,

	    int reset){
  
  int i,j;
  double acc[3] = {acc_x*1.1,acc_y*1.1,acc_z*1.1}; // heuristic scaling
  //  double acc_tmp[3];

  int indx0_, indx1_;
  int indx0, indx1;
  //double t0_, t1_;
  //double t0, t1;

  double fitness = -100000;

  static int k = 0;
  static int in_motion = 0;
  static double vel[3] = {0.0, 0.0, 0.0};
  static double pos[3] = {0.0, 0.0, 0.0};
  static double end[3] = {0.0, 0.0, 0.0};
  static double vel_log[MAX_MOTION_LENGTH][3];
  static double acc_log[MAX_MOTION_LENGTH][3];
  static double acc_log_running[20][3];
  double acc_tan;

  //double acc_poly[4][3];
  double vel_poly[5][3];
  double pos_poly[6][3];
  
  double dt = 0.01; // hard coded for now. should perhaps be measured?

  // Calculate size of tangential acceleration:
  acc_tan = sqrt(acc_x*acc_x + acc_y*acc_y + acc_z*acc_z);

  /* 
     printf("(acc tan:%f)(in_mot:%d past_p:%d k:%d)\n",
 	 acc_tan,in_motion,past_peak,k);
  printf("[vel %5.3f %5.3f %5.3f]",
	 vel[0],vel[1],vel[2]);
  */


  if(reset){
    printf("\n\n RESETTING \n\n");
    k = 0;
    in_motion = 0;
    pos[0] = pos[1] = pos[2] = 0.0;
    vel[0] = vel[1] = vel[2] = 0.0;
    end[0] = end[1] = end[2] = 0.0;
    bzero(vel_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log_running,sizeof(double)*3*20);
    *pos_x = 0.0;
    *pos_y = 0.0;
    *pos_z = 0.0;
    
    *vel_x = 0.0;
    *vel_y = 0.0;
    *vel_z = 0.0;
    
    *end_x = 0.0;
    *end_y = 0.0;
    *end_z = 0.0;
    return 0;
  }

  // We keep a running log of the last 20 readings:
  printf("\n");
  for(i=0;i<19;i++){
    for(j=0;j<3;j++){
      acc_log_running[i][j] = acc_log_running[i+1][j];
      //printf("%5.2f ",acc_log_running[i][j]);
    }
    //printf("\n");
  }
  for(j=0;j<3;j++){
    acc_log_running[19][j] = acc[j];
    //printf("%5.2f ",acc_log_running[19][j]);
  }
    
 


  // if more than 4 samples in a row exceed threshold, we are in motion!
  // else return 0. Once we are in motion, we dont do this check.
  if(in_motion < 4){

    if(acc_tan > ACC_THRESHOLD*1.1){
      in_motion++;
    }else{
      in_motion = 0;
    }
    *pos_x = pos[0];
    *pos_y = pos[1];
    *pos_z = pos[2];

    *vel_x = 0.0;
    *vel_y = 0.0;
    *vel_z = 0.0;

    *end_x = pos[0];
    *end_y = pos[1];
    *end_z = pos[2];

    return 0;
  }

  // Start the logs - initialize with the running logs
  if(in_motion==4){
    k = 19;
    in_motion++;
    for(i=0;i<20;i++){
      for(j=0;j<3;j++){
	acc_log[i][j] = acc_log_running[i][j];
      }
    }
  }
  
  // log each measurement:
  k++;
  for(j=0;j<3;j++){
    acc_log[k][j] = acc[j];
  }
  
  // We find the best 3rd deg poly that fits observed acc
  // vary start and end point
  fitness = -10000000;
  for(indx0_=0;indx0_<20;indx0_++){
    for(indx1_ = indx0_+30;
	indx1_ < indx0_+100;
	indx1_+=2){
      double fitness_;
      fitness_ = fitness_function(indx0_,indx1_,k,acc_log,vel_poly,pos_poly);
      if(fitness_>fitness){
	indx0 = indx0_; 
	indx1 = indx1_; 
	fitness = fitness_;
      }
    }
  }
  fitness_function(indx0,indx1,k,acc_log,vel_poly,pos_poly);


  /*
  {
    int i_,j_;
    printf("\n acc_log:\n");
    for(i_=indx0;i_<=indx1;i_++){
      for(j_=0;j_<3;j_++){
	printf("%5.2f ",acc_log[i_][j_]);
      }
      printf("\n");
    }
    
    printf("\n pos_poly:\n");
    for(i_=0;i_<6;i_++){
      for(j_=0;j_<3;j_++){
	printf("%5.2f ",pos_poly[i_][j_]);
      }
      printf("\n");
    }
  }

  printf("ix0:%d  ix1:%d   k:%d\n",
	 indx0,indx1,k);
  */


  // Analytical integration gives 5th deg poly for pos!
  // and 4 deg poly for vel
  // However, we only use poly-given position if past halfpoint!
  // otherwise use raw integration from start of poly.
  // (Check value "half" off-line in MatLab)
  if(k>((indx1+indx0)/2)){
    //printf("poly!\n");

    *pos_x = pos[0] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),0);
    *pos_y = pos[1] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),1);
    *pos_z = pos[2] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),2);

    *vel_x = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),0);
    *vel_y = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),1);
    *vel_z = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),2);

    *end_x = pos[0] + polyval6(pos_poly,gen_time(indx0,indx1,indx1,NULL),0);
    *end_y = pos[1] + polyval6(pos_poly,gen_time(indx0,indx1,indx1,NULL),1);
    *end_z = pos[2] + polyval6(pos_poly,gen_time(indx0,indx1,indx1,NULL),2);
  }else{
    double pos_tmp[3] = {0.0,0.0,0.0};
    double vel_tmp[3] = {0.0,0.0,0.0};
    int coordinate;
    //printf("int!\n");
    for(i=indx0;i<=k;i++){
      for(coordinate=0;coordinate<3;coordinate++){
	vel_tmp[coordinate] += acc_log[i][coordinate]*dt;
	pos_tmp[coordinate] += vel_tmp[coordinate]*dt;
      }
    }
    *end_x = *pos_x = pos_tmp[0] + pos[0];
    *end_y = *pos_y = pos_tmp[1] + pos[1];
    *end_z = *pos_z = pos_tmp[2] + pos[2];

    *vel_x = vel_tmp[0];
    *vel_y = vel_tmp[1];
    *vel_z = vel_tmp[2];
  }
  // If we are more than 4 samples beyond last point on poly, we are done!
  // reset and start over
  // (Check off-line if this is good strategy!)
  if(k>(indx1+4)){
    
    k = 0;
    in_motion = 0;
    bzero(vel_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    
    // update pos[0-3] to last value of motion!
    pos[0] = *pos_x;
    pos[1] = *pos_y;
    pos[2] = *pos_z;

    end[0] = pos[0];
    end[1] = pos[1];
    end[2] = pos[2];

  }
  
  return 0;
}

/* This function returns the position given a corrected acceleration measure
*/
int acc2pos2(double acc_x, // inputs: acceleration in world frame
	    double acc_y,
	    double acc_z,
	    
	    double *pos_x, // outputs: position in world frame
	    double *pos_y,
	    double *pos_z,

	    double *vel_x, // outputs: velocity in world frame
	    double *vel_y,
	    double *vel_z,

	    int reset){
  
  int i,j;
  double acc[3] = {acc_x*1.1,acc_y*1.1,acc_z*1.1}; // heuristic scaling

  //double acc_tmp[3];

  int indx0_, indx1_;
  int indx0, indx1;
  //double t0_, t1_;
  //double t0, t1;

  double fitness = -100000;

  static int k = 0;
  static int in_motion = 0;
  static double vel[3] = {0.0, 0.0, 0.0};
  static double pos[3] = {0.0, 0.0, 0.0};
  static double vel_log[MAX_MOTION_LENGTH][3];
  static double acc_log[MAX_MOTION_LENGTH][3];
  static double acc_log_running[20][3];
  double acc_tan;

  //double acc_poly[4][3];
  double vel_poly[5][3];
  double pos_poly[6][3];
  
  double dt = 0.01; // hard coded for now. should perhaps be measured?

  // Calculate size of tangential acceleration:
  acc_tan = sqrt(acc_x*acc_x + acc_y*acc_y + acc_z*acc_z);

  /* 
     printf("(acc tan:%f)(in_mot:%d past_p:%d k:%d)\n",
 	 acc_tan,in_motion,past_peak,k);
  printf("[vel %5.3f %5.3f %5.3f]",
	 vel[0],vel[1],vel[2]);
  */


  if(reset){
    printf("\n\n RESETTING \n\n");
    k = 0;
    in_motion = 0;
    pos[0] = pos[1] = pos[2] = 0.0;
    vel[0] = vel[1] = vel[2] = 0.0;
    bzero(vel_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log_running,sizeof(double)*3*20);
    return 0;
  }

  // We keep a running log of the last 20 readings:
  printf("\n");
  for(i=0;i<19;i++){
    for(j=0;j<3;j++){
      acc_log_running[i][j] = acc_log_running[i+1][j];
      //printf("%5.2f ",acc_log_running[i][j]);
    }
    //printf("\n");
  }
  for(j=0;j<3;j++){
    acc_log_running[19][j] = acc[j];
    //printf("%5.2f ",acc_log_running[19][j]);
  }
    
 


  // if more than 4 samples in a row exceed threshold, we are in motion!
  // else return 0. Once we are in motion, we dont do this check.
  if(in_motion < 4){

    if(acc_tan > ACC_THRESHOLD){
      in_motion++;
    }else{
      in_motion = 0;
    }
    *pos_x = pos[0];
    *pos_y = pos[1];
    *pos_z = pos[2];

    *vel_x = 0.0;
    *vel_y = 0.0;
    *vel_z = 0.0;

    return 0;
  }

  // Start the logs - initialize with the running logs
  if(in_motion==4){
    k = 19;
    in_motion++;
    for(i=0;i<20;i++){
      for(j=0;j<3;j++){
	acc_log[i][j] = acc_log_running[i][j];
      }
    }
  }
  
  // log each measurement:
  k++;
  for(j=0;j<3;j++){
    acc_log[k][j] = acc[j];
  }
  
  // We find the best 3rd deg poly that fits observed acc
  // vary start and end point
  fitness = -10000000;
  for(indx0_=0;indx0_<20;indx0_++){
    for(indx1_ = indx0_+30;
	indx1_ < indx0_+100;
	indx1_+=2){
      double fitness_;
      fitness_ = fitness_function(indx0_,indx1_,k,acc_log,vel_poly,pos_poly);
      if(fitness_>fitness){
	indx0 = indx0_; 
	indx1 = indx1_; 
	fitness = fitness_;
      }
    }
  }
  fitness_function(indx0,indx1,k,acc_log,vel_poly,pos_poly);


  /*
  {
    int i_,j_;
    printf("\n acc_log:\n");
    for(i_=indx0;i_<=indx1;i_++){
      for(j_=0;j_<3;j_++){
	printf("%5.2f ",acc_log[i_][j_]);
      }
      printf("\n");
    }
    
    printf("\n pos_poly:\n");
    for(i_=0;i_<6;i_++){
      for(j_=0;j_<3;j_++){
	printf("%5.2f ",pos_poly[i_][j_]);
      }
      printf("\n");
    }
  }

  printf("ix0:%d  ix1:%d   k:%d\n",
	 indx0,indx1,k);
  */


  // Analytical integration gives 5th deg poly for pos!
  // and 4 deg poly for vel
  // However, we only use poly-given position if past halfpoint!
  // otherwise use raw integration from start of poly.
  // (Check value "half" off-line in MatLab)
  if(k>((indx1+indx0)/2)){
    //printf("poly!\n");

    *pos_x = pos[0] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),0);
    *pos_y = pos[1] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),1);
    *pos_z = pos[2] + polyval6(pos_poly,gen_time(indx0,indx1,k,NULL),2);

    *vel_x = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),0);
    *vel_y = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),1);
    *vel_z = polyval5(vel_poly,gen_time(indx0,indx1,k,NULL),2);
  }else{
    double pos_tmp[3] = {0.0,0.0,0.0};
    double vel_tmp[3] = {0.0,0.0,0.0};
    int coordinate;
    //printf("int!\n");
    for(i=indx0;i<=k;i++){
      for(coordinate=0;coordinate<3;coordinate++){
	vel_tmp[coordinate] += acc_log[i][coordinate]*dt;
	pos_tmp[coordinate] += vel_tmp[coordinate]*dt;
      }
    }
    *pos_x = pos_tmp[0] + pos[0];
    *pos_y = pos_tmp[1] + pos[1];
    *pos_z = pos_tmp[2] + pos[2];

    *vel_x = vel_tmp[0];
    *vel_y = vel_tmp[1];
    *vel_z = vel_tmp[2];
  }
  // If we are beyond last point on poly, we are done!
  // reset and start over
  // (Check off-line if this is good strategy!)
  if(k>(indx1)){
    
    k = 0;
    in_motion = 0;
    bzero(vel_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    
    // update pos[0-3] to last value of motion!
    pos[0] = *pos_x;
    pos[1] = *pos_y;
    pos[2] = *pos_z;
  }
  
  return 0;
}

/* This function returns the position given a corrected acceleration measure
*/
int acc2pos_simple(double acc_x, // inputs: acceleration in world frame
	    double acc_y,
	    double acc_z,
	    
	    double *pos_x, // outputs: position in world frame
	    double *pos_y,
	    double *pos_z,
	    int reset){
  
  int i;
  double acc[3] = {acc_x,acc_y,acc_z};
  double acc_tmp[3];
  static int k = 0;
  static int past_peak = 0;
  static int in_motion = 0;
  static double vel[3] = {0.0, 0.0, 0.0};
  static double pos[3] = {0.0, 0.0, 0.0};
  static double vel_log[MAX_MOTION_LENGTH][3];
  static double acc_log[MAX_MOTION_LENGTH][3];
  double acc_tan;
  
  double dt = 0.01; // hard coded for now. should perhaps be measured?

  // do nothing
  //return -1;

  // Calculate size of tangential acceleration:
  acc_tan = sqrt(acc_x*acc_x + acc_y*acc_y + acc_z*acc_z);

  printf("(acc tan:%f)(in_mot:%d past_p:%d k:%d)\n",
	 acc_tan,in_motion,past_peak,k);
  printf("[vel %5.3f %5.3f %5.3f]",
	 vel[0],vel[1],vel[2]);

  if(reset){
    printf("\n\n RESETTING \n\n");
    k = 0;
    past_peak = 0;
    in_motion = 0;
    pos[0] = pos[1] = pos[2] = 0.0;
    vel[0] = vel[1] = vel[2] = 0.0;
    bzero(vel_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    bzero(acc_log,sizeof(double)*3*MAX_MOTION_LENGTH);
    return 0;
  }

  // We let 10 cycles (~100 ms) pass before assuming next motion
  if((past_peak)<0){
    past_peak++;
    for(i=0;i<3;i++){
      vel[i] = 0.0;
      k = 0;
    }
  }else{
    if(!in_motion){
      printf("Not in motion!");

      // is any acceleration detected at all?
      if(acc_tan > 2.0){
	in_motion = 1;
      }
    }
    if(in_motion){
      printf("In motion!");
      k++;

      printf("{k=%d}",k);

      for(i=0;i<3;i++){
	vel[i]+=acc[i]*dt;
      }

      // Check if motion peak is passed:
      if((k>=200 && !past_peak) || (!past_peak && dot3(acc,vel)<0)){
	past_peak = 1;
      }

      // If we are past the peak, we just mirror motion up to here
      if(past_peak){
	in_motion--;
	
	printf("(Return index: %d)",k-(2*past_peak)-1);
        for(i=0;i<3;i++){
          vel[i] = vel_log[k-(2*past_peak)-1][i];
	  acc_tmp[i] = acc_log[k-(2*past_peak)-1][i];
        }
	past_peak++;
      }else{
	for(i=0;i<3;i++){
	  acc_tmp[i] = acc[i];
	}
	in_motion++;
      }
      for(i=0;i<3;i++){
	vel_log[k][i] = vel[i];
	pos[i] += vel[i]*dt + acc_tmp[i]*dt*dt; 
      }

      if(!in_motion){
	past_peak = -20;
      }

    }
    
    
    
  }

  *pos_x = pos[0];
  *pos_y = pos[1];
  *pos_z = pos[2];

  return 0;
}


/* This function returns the acceleration corrected for gravity,
   in the global reference frame, when fed with acceleration
   measures in the controller frame and camera points.
   Returns 0 on success, and -1 if calibration is needed.
*/
int correct_acc(double acc_x, // raw acceleration data for
		double acc_y, // x, y, and z, corrected to 
		double acc_z, // m/s^2, but not for gravity
		
		int ir_x1, // The x and y positions for  
		int ir_y1, // IR marker 1 and 2. These should
		int ir_x2, // be in raw pixel values.
		int ir_y2,

		double *corrected_acc_x, // corrected outputs
		double *corrected_acc_y, 
		double *corrected_acc_z,
		
		double *pan_out,
		double *tilt_out,
		double *yaw_out){

  
  if(calibrated){
    double mid_p_s[2];
    double ir_tilt_r;
    double ir_pan_r;
    double ir_yaw_s;
    double ir_pan_s;
    double ir_tilt_s;

    double R = 0.05;
    double EKF_dummy1,EKF_dummy2,EKF_dummy3;

    // The rotational matrices used for correction
    double tilt_matrix[3][3];
    double pan_matrix[3][3];
    double yaw_matrix[3][3];

    // Vectors and matrices used in the final calculation
    double g[3] = {0.0, 0.0, 9.81};
    double acc[3] = {acc_x, acc_y, acc_z};
    double acc_corr[3];
    double acc_corr_yaw[3];
    double acc_corr_tilt[3];
    //    double temp_matrix1[3][3];
    //double temp_matrix2[3][3];
    //double inv_matrix[3][3];

    //printf("[correct_acc: x1:%d y1:%d x2:%d y2:%d]",ir_x1,ir_y1,ir_x2,ir_y2);

    // do nothing:
    //return -1;

    mid_p_s[0] = (ir_x1+ir_x2)/2.0 - 512; 
    mid_p_s[1] = (ir_y1+ir_y2)/2.0 - 384;

    // calculate yaw angle.
    ir_yaw_s = atan2((ir_y2-ir_y1),(ir_x2-ir_x1));

    // compensate raw tilt and pan measurements for yaw:
    ir_tilt_r = cos(ir_yaw_s)*mid_p_s[1] - sin(ir_yaw_s)*mid_p_s[0];
    ir_pan_r =  cos(ir_yaw_s)*mid_p_s[0] - sin(ir_yaw_s)*mid_p_s[1];

    // calculate pan and tilt angles (note: same polynomial for x and y)
    ir_tilt_s = ir_tilt_r*ir_tilt_pol[0] + ir_tilt_pol[1];
    ir_pan_s = (ir_pan_r*ir_tilt_pol[0] + ir_tilt_pol[1]);


    // Set outputs for pan, tilt, yaw
    *pan_out = ir_pan_s;
    *tilt_out = ir_tilt_s;
    *yaw_out = ir_yaw_s;

    // Keep EKF updated with good measurements when available:
    // Do not use filtered results here.
    Kalman_filter(ir_pan_s,ir_tilt_s,ir_yaw_s,
		  &EKF_dummy1,&EKF_dummy2,&EKF_dummy3,R*R);


    // Matlab version:
    /*
      tilt_matrix = [1 0               0;...
                     0 cos(ir_tilt_s)  sin(ir_tilt_s);...
                     0 -sin(ir_tilt_s) cos(ir_tilt_s)];
      
      pan_matrix = [cos(ir_pan_s) sin(ir_pan_s) 0;...
                   -sin(ir_pan_s) cos(ir_pan_s) 0;...
                    0              0              1];
      
      
      yaw_matrix = [cos(ir_yaw_s)  0 sin(ir_yaw_s);...
                    0              1 0
                    -sin(ir_yaw_s) 0 cos(ir_yaw_s)];
    */

    tilt_matrix[0][0] = 1; tilt_matrix[0][1] = 0;              tilt_matrix[0][2] = 0;
    tilt_matrix[1][0] = 0; tilt_matrix[1][1] = cos(ir_tilt_s); tilt_matrix[1][2] = -sin(ir_tilt_s);
    tilt_matrix[2][0] = 0; tilt_matrix[2][1] = sin(ir_tilt_s);tilt_matrix[2][2] = cos(ir_tilt_s);

    pan_matrix[0][0] = cos(ir_pan_s); pan_matrix[0][1] = -sin(ir_pan_s); pan_matrix[0][2] = 0;
    pan_matrix[1][0] = sin(ir_pan_s); pan_matrix[1][1] = cos(ir_pan_s); pan_matrix[1][2] = 0;
    pan_matrix[2][0] = 0;             pan_matrix[2][1] = 0;             pan_matrix[2][2] = 1;

    yaw_matrix[0][0] = cos(ir_yaw_s); yaw_matrix[0][1] = 0; yaw_matrix[0][2] = -sin(ir_yaw_s);
    yaw_matrix[1][0] = 0;             yaw_matrix[1][1] = 1; yaw_matrix[1][2] = 0;
    yaw_matrix[2][0] = sin(ir_yaw_s); yaw_matrix[2][1] = 0; yaw_matrix[2][2] = cos(ir_yaw_s);

    /*
    // Matlab code:
    g_tmp_s = (yaw_matrix*tilt_matrix*pan_matrix*[0; 0; 9.81])';
    acc_corr(k,:) = ((inv(yaw_matrix*tilt_matrix*pan_matrix)*acc(k,:)')-[0; 0; 9.81])';
    */

    //mult_3matrix(double A[3][3], double B[3][3], double C[3][3]); // C=A*B
    
    // Correct for yaw:
    mult_3matrix_3vec(yaw_matrix,acc,acc_corr_yaw);

    // Correct for tilt:
    mult_3matrix_3vec(tilt_matrix,acc_corr_yaw,acc_corr_tilt);

    // Correct for pan:
    mult_3matrix_3vec(pan_matrix,acc_corr_tilt,acc_corr);

    // set outputs for rotationally corrected acceleration, corrected for g:
    *corrected_acc_x = acc_corr[0]; // -g[0], but g[0]=0
    *corrected_acc_y = acc_corr[1]; // -g[1], but g[1]=0
    *corrected_acc_z = acc_corr[2]-g[2];

    return 0;
  }else{
    return -1;
  }

}


/* This function returns the acceleration corrected for gravity,
   in the global reference frame, when fed with acceleration
   measures in the controller frame.
   Returns 0 on success, and -1 if calibration is needed.
*/
int correct_acc_nocam(double acc_x, // raw acceleration data for
		      double acc_y, // x, y, and z, corrected to 
		      double acc_z, // m/s^2, but not for gravity
		
		      double *corrected_acc_x, // corrected outputs
		      double *corrected_acc_y, 
		      double *corrected_acc_z,
		
		      double pan_in, // set to last known pan value
		      double tilt_in,
		      double yaw_in,

		      double *pan_out,
		      double *tilt_out,
		      double *yaw_out){

  // Deviation from 1 g
  double deviation;
  
  static double g_yaw;
  static double g_pan;
  static double g_tilt;

  static double g_yaw_; //unfiltered
  static double g_pan_;
  static double g_tilt_;

  // The rotational matrices used for correction
  double tilt_matrix[3][3];
  double pan_matrix[3][3];
  double yaw_matrix[3][3];

  // Vectors and matrices used in the final calculation
  double g[3] = {0.0, 0.0, 9.81};
  double acc[3] = {acc_x, acc_y, acc_z};
  double acc_corr[3];
  double acc_corr_yaw[3];
  double acc_corr_tilt[3];

  double R = 4.0;

  // set unmeasurable value
  g_pan_ = pan_in;

  // Calculate deviation from 1 g
  deviation = fabs(9.81 - sqrt((acc_x*acc_x) + 
			       (acc_y*acc_y) + 
			       (acc_z*acc_z)));


  // If the controller is non-moving we update rotation
  if(deviation<0.4){
    g_yaw_ = atan2(acc_x,acc_z);
    g_tilt_ = asin(acc_y/9.81);
    R = 0.4;
  }else{
    g_yaw_ = yaw_in;
    g_tilt_ = tilt_in;
  }
  
  //  printf("Before filter: %6.3f %6.3f %6.3f\n",
  //	 g_pan_,g_tilt_,g_yaw_);
  Kalman_filter(g_pan_,g_tilt_,g_yaw_,
		&g_pan,&g_tilt,&g_yaw,R*R);
  //printf("After filter: %6.3f %6.3f %6.3f\n",
  //	 g_pan,g_tilt,g_yaw);

  // correct for rotation
  tilt_matrix[0][0] = 1; tilt_matrix[0][1] = 0;           tilt_matrix[0][2] = 0;
  tilt_matrix[1][0] = 0; tilt_matrix[1][1] = cos(g_tilt); tilt_matrix[1][2] = -sin(g_tilt);
  tilt_matrix[2][0] = 0; tilt_matrix[2][1] = sin(g_tilt); tilt_matrix[2][2] = cos(g_tilt);
  
  pan_matrix[0][0] = cos(g_pan); pan_matrix[0][1] = -sin(g_pan); pan_matrix[0][2] = 0;
  pan_matrix[1][0] = sin(g_pan); pan_matrix[1][1] = cos(g_pan);  pan_matrix[1][2] = 0;
  pan_matrix[2][0] = 0;          pan_matrix[2][1] = 0;           pan_matrix[2][2] = 1;
  
  yaw_matrix[0][0] = cos(g_yaw); yaw_matrix[0][1] = 0; yaw_matrix[0][2] = -sin(g_yaw);
  yaw_matrix[1][0] = 0;          yaw_matrix[1][1] = 1; yaw_matrix[1][2] = 0;
  yaw_matrix[2][0] = sin(g_yaw); yaw_matrix[2][1] = 0; yaw_matrix[2][2] = cos(g_yaw);
  
  // Correct for yaw:
  mult_3matrix_3vec(yaw_matrix,acc,acc_corr_yaw);
  
  // Correct for tilt:
  mult_3matrix_3vec(tilt_matrix,acc_corr_yaw,acc_corr_tilt);

  // Correct for pan:
  mult_3matrix_3vec(pan_matrix,acc_corr_tilt,acc_corr);

  // set outputs for rotationally corrected acceleration, corrected for g:
  *corrected_acc_x = acc_corr[0]; // -g[0], but g[0]=0
  *corrected_acc_y = acc_corr[1]; // -g[1], but g[1]=0
  *corrected_acc_z = acc_corr[2]-g[2];

  *pan_out  = g_pan;
  *tilt_out = g_tilt;
  *yaw_out  = g_yaw;

  return 0;
}



/* This function calibrates the ir camera to the accelerometers,
   returns 0 on calibration success. Should be called repeatedly
   until it succeeds. Returns positive number of remaining iterations
   until it is calibrated, or negative number to indicate useless
   input data.
*/
int calibrate(double acc_x, // raw acceleration data for
	      double acc_y, // x, y, and z, corrected to 
	      double acc_z, // m/s^2, but not for gravity

	      int ir_x1, // The x and y positions for  
	      int ir_y1, // IR marker 1 and 2. These should
	      int ir_x2, // be in raw pixel values.
	      int ir_y2){
  
  static int k = 0;

  static double g_logged[N_CALIBRATE][3];
  //  static double g_tmp[3];
  static double g_yaw[N_CALIBRATE];
  static double g_tilt[N_CALIBRATE];
  
  static double mid_p[N_CALIBRATE][2];
  static double ir_yaw[N_CALIBRATE];

  // Deviation from 1 g
  double deviation;

  if(calibrated){
    return 0;
  }

  deviation = fabs(9.81 - sqrt((acc_x*acc_x) + 
			      (acc_y*acc_y) + 
			      (acc_z*acc_z)));

  printf("[acc: (%f) %f %f %f, deviation: %f]\n",
	 sqrt((acc_x*acc_x) +
	      (acc_y*acc_y) +
	      (acc_z*acc_z)),
	 acc_x,acc_y,acc_z,deviation);


  // If the controller is moving we don't calibrate
  if(deviation>0.25){
    return -2;
  }

  // we center the camera points;
  ir_x1 -= 512;
  ir_x2 -= 512;

  ir_y1 -= 384;
  ir_y2 -= 384;

  // We collect points until we have enough
  if(k<(N_CALIBRATE-1)){

    // create log of all 
    g_logged[k][0] = acc_x;
    g_logged[k][1] = acc_y;
    g_logged[k][2] = acc_z;

    g_yaw[k] = atan2(acc_x,acc_z);
    g_tilt[k] = asin(acc_y/9.81);

    mid_p[k][0] = (ir_x1+ir_x2)/2.0;
    mid_p[k][1] = (ir_y1+ir_y2)/2.0;

    ir_yaw[k] = atan2(ir_y2-ir_y1,ir_x2-ir_x1);

    k++;

  // When we have enough points we calibrate using least squares
  }
  
  if(k==(N_CALIBRATE-1)){
    double ir_tilt_r[N_CALIBRATE];
    int i;
    double At_gtilt[2] = {0.0,0.0};
    double AtA[2][2] = {{0.0,0.0},{0.0,0.0}};
    double d;
    
    for(i=0;i<N_CALIBRATE;i++){
      // compensate tilt measure for yaw:
      ir_tilt_r[i] = cos(ir_yaw[i])*(mid_p[i][1]) -
	sin(ir_yaw[i])*mid_p[i][0];
    }
    
    // Next we calculate the linear function ir_tilt_pol by calculating
    // ir_tilt_pol = [ir_tilt_r' ones]\g_tilt = A\g_tilt = inv(A'A)A'g_tilt
    // we start by generatin A'A and A_g_tilt, 
    // and finally inverting and multiplying:
    for(i=0;i<N_CALIBRATE;i++){
      AtA[0][0] += ir_tilt_r[i]*ir_tilt_r[i];
      AtA[0][1] += ir_tilt_r[i];
      AtA[1][0] += ir_tilt_r[i];
      AtA[1][1] += 1;

      At_gtilt[0] += ir_tilt_r[i]*g_tilt[i];
      At_gtilt[1] += g_tilt[i];
    }
    // invert and multply:
    // (d = 1/(a11a22-a12a21), used in explicit matric inverse (Kramer)
    d = 1/(AtA[0][0]*AtA[1][1] - AtA[0][1]*AtA[1][0]);
    ir_tilt_pol[0] = AtA[1][1]*d*At_gtilt[0] - AtA[0][1]*d*At_gtilt[1]; 
    ir_tilt_pol[1] = AtA[0][0]*d*At_gtilt[1] - AtA[1][0]*d*At_gtilt[0]; 

    calibrated = 1;
    return 0;    
  }

  return N_CALIBRATE - k;
}


// Multiplies 3 matrix and 3-vector, c = A*b
void mult_3matrix_3vec(double A[3][3], double b[3], double c[3]){
  int i,k;
  for(i=0;i<3;i++){
    c[i] = 0;
    for(k=0;k<3;k++){
      c[i] += A[i][k]*b[k];
    }
  }
}

// Multiplies 4-vextor and scalar, c = a*b
void mult_4vec_scalar(double a[4], double b, double c[4]){
  int i;
  for(i=0;i<4;i++){
    c[i] = a[i]*b;
  }
}

// adds 2 4-vactors, c = a+b
void add_4vec(double a[4], double b[4], double c[4]){
  int i;
  for(i=0;i<4;i++){
    c[i] = a[i]+b[i];
  }
}

// Multiplies 4 matrix and 4-vector, c = A*b
void mult_4matrix_4vec(double A[4][4], double b[4], double c[4]){
  int i,k;
  for(i=0;i<4;i++){
    c[i] = 0;
    for(k=0;k<4;k++){
      c[i] += A[i][k]*b[k];
    }
  }
}



// Multiplies two 3x3 matrices, C = A*B
void mult_3matrix(double A[3][3], double B[3][3], double C[3][3]){
  int i,j,k;
  for(i=0;i<3;i++){
    for(j=0;j<3;j++){
      C[i][j] = 0;
      for(k=0;k<3;k++){
        C[i][j] += A[i][k]*B[k][j];
      }
    }
  }
}



// Multiplies two 4x4 matrices, C = A*B
void mult_4matrix(double A[4][4], double B[4][4], double C[4][4]){
  int i,j,k;
  for(i=0;i<4;i++){
    for(j=0;j<4;j++){
      C[i][j] = 0;
      for(k=0;k<4;k++){
        C[i][j] += A[i][k]*B[k][j];
      }
    }
  }
}



// Inverts a 3x3 matrix B = inv(A)
void inv_3matrix(double A[3][3], double B[3][3]){
  double C[3][3];
  double I[3][3];
  int _ri[3]; // row index
  int i,j,k;
  double _max_val=0.0;
  int _max_pos = 0;
  int _tmp;
  double _temp_div;
  double _temp_mul;

  for(i=0;i<3;i++){
    for(j=0;j<3;j++){
      C[i][j] =  (double)A[i][j];
      I[i][j] = (double)(i==j);
    }
    _ri[i]=i;
  }

  for(i=0;i<3;i++){

    // find largest element in column i
    _max_val = 0.0;
    for(j=i;j<3;j++){
      if(fabs(C[_ri[j]][i])>_max_val){
        _max_val = fabs(C[_ri[j]][i]);
        _max_pos = j;
      }
    }
    //switch rows:

    _tmp=_ri[i];
    _ri[i]=_ri[_max_pos];
    _ri[_max_pos]=_tmp;

    // normalize row i
    _temp_div = C[_ri[i]][i];
    for(j=0;j<3;j++){
      I[_ri[i]][j]=I[_ri[i]][j]/_temp_div;
      C[_ri[i]][j]=C[_ri[i]][j]/_temp_div;
    }

    // eliminate lower rows
    if(i<2){
      for(j=i+1; j<3; j++){
        _temp_mul =  C[_ri[j]][i];
        for(k=0; k<3; k++){
          I[_ri[j]][k] -= _temp_mul*I[_ri[i]][k];
          C[_ri[j]][k] -= _temp_mul*C[_ri[i]][k];
        }
      }
    }

  }
  for(i=0; i<3; i++){
    for(j=0; j<3; j++){
      B[i][j] = (double)I[_ri[i]][j];
    }
  }
}



// Inverts a 4x4 matrix B = inv(A)
void inv_4matrix(double A[4][4], double B[4][4]){
  double C[4][4];
  double I[4][4];
  int _ri[4]; // row index
  int i,j,k;
  double _max_val=0.0;
  int _max_pos = 0;
  int _tmp;
  double _temp_div;
  double _temp_mul;

  for(i=0;i<4;i++){
    for(j=0;j<4;j++){
      C[i][j] =  (double)A[i][j];
      I[i][j] = (double)(i==j);
    }
    _ri[i]=i;
  }

  for(i=0;i<4;i++){

    // find largest element in column i
    _max_val = 0.0;
    for(j=i;j<4;j++){
      if(fabs(C[_ri[j]][i])>_max_val){
        _max_val = fabs(C[_ri[j]][i]);
        _max_pos = j;
      }
    }
    //switch rows:

    _tmp=_ri[i];
    _ri[i]=_ri[_max_pos];
    _ri[_max_pos]=_tmp;

    // normalize row i
    _temp_div = C[_ri[i]][i];
    for(j=0;j<4;j++){
      I[_ri[i]][j]=I[_ri[i]][j]/_temp_div;
      C[_ri[i]][j]=C[_ri[i]][j]/_temp_div;
    }

    // eliminate lower rows
    if(i<2){
      for(j=i+1; j<4; j++){
        _temp_mul =  C[_ri[j]][i];
        for(k=0; k<4; k++){
          I[_ri[j]][k] -= _temp_mul*I[_ri[i]][k];
          C[_ri[j]][k] -= _temp_mul*C[_ri[i]][k];
        }
      }
    }

  }
  for(i=0; i<4; i++){
    for(j=0; j<4; j++){
      B[i][j] = (double)I[_ri[i]][j];
    }
  }
}





// takes dot product of 2 3-vectors
double dot3(double A[3], double B[3]){
  int i;
  double c = 0;
  for(i=0;i<3;i++){
    c += A[i]*B[i];
  }
  return c;
}



// takes dot product of 2 4-vectors
double dot4(double b[4], double a[4]){
  int i;
  double c = 0;
  for(i=0;i<4;i++){
    c += a[i]*b[i];
  }
  return c;
}


// takes outer product of 2 4-vectors (returns 4x4-matrix)
void outer4(double a[4], double b[4], double C[4][4]){
  int i,j;
  //  double c = 0;
  for(i=0;i<4;i++){
    for(j=0;j<4;j++){
      C[i][j] = a[i]*b[j];
    }
  }
}


double fitness_function(int indx0,int indx1,int k,
			double acc_log[MAX_MOTION_LENGTH][3],
			double vel_poly[5][3],
			double pos_poly[6][3]){
  double fitness = 0;
  double res;
  int coordinate;
  int i;
  double time[200];  
  double t;

  double A;
  double a[200];
  double y;
  double x;
  double t1,t0;

  double divider;
  double pos0;

  // Generate time vector
  gen_time(indx0,indx1,k,time);

  t0 = time[indx0];
  t1 = time[indx1];

  for(coordinate=0; coordinate<3; coordinate++){
    // Fit polynomial parameter for coordinate (least squares)
    A = y = 0;

    for(i=indx0;(i<=k && i<=indx1);i++){
      t = time[i];
      a[i] = ((20*t*t*t) -
	   (12*(5.0/2)*(t0 + t1)*t*t) +
	   (6*(5.0/3)*(t0*t0 + t1*t1 + 4*t0*t1)*t) -
	   (2*5*(t0*t0*t1 + t1*t1*t0)));
      A += a[i]*a[i];
      y += a[i]*acc_log[i][coordinate];
    }
    x = y/A;
    
    // Calculate polynomial factors from parameter
    pos_poly[0][coordinate] = x;
    pos_poly[1][coordinate] = 0.0; // symmetry! (-(5/2)*(t0 + t1)*x)
    pos_poly[2][coordinate] = (5.0/3)*(t0*t0 + t1*t1 + 4*t0*t1)*x;
    pos_poly[3][coordinate] = 0.0; // symmetry! -5*(t0^2*t1 + t1^2*t0)*x;
    pos_poly[4][coordinate] = -5*x*t0*t0*t0*t0 - 3*pos_poly[2][coordinate]*t0*t0;
    pos_poly[5][coordinate] = 0.0; // set below

    // select constant factor so that the start remains at 0,0,0:
    pos0 = polyval6(pos_poly,t0,coordinate);
    pos_poly[5][coordinate] = -pos0;

    // Calculate residual
    divider = 0.0;
    res = 0.0;
    for(i=indx0;(i<=k && i<=indx1);i++){
      t = time[i];    
      divider++;
      res += (a[i]*x-acc_log[i][coordinate]) * (a[i]*x-acc_log[i][coordinate]);
    }
    fitness -= res/divider;

    // Calculate poly in vel space
    vel_poly[0][coordinate] = 5*pos_poly[0][coordinate];
    vel_poly[1][coordinate] = 4*pos_poly[1][coordinate];
    vel_poly[2][coordinate] = 3*pos_poly[2][coordinate];
    vel_poly[3][coordinate] = 2*pos_poly[3][coordinate];
    vel_poly[4][coordinate] = pos_poly[4][coordinate];
  }

  return fitness;



}


// takes starting index and ending index for polynomial
// and generates a vetor containing time values.
// returns the time value att index k
double gen_time(int indx0,int indx1,int k,double time[200]){
  double t0;
  double t1;
  double tk;
  int i;
  t0 = ((indx0-indx1)*0.01)/2;
  t1 = ((indx1-indx0)*0.01)/2;
  tk = ((k-indx0)*0.01)+t0;
  if(tk>t1) tk=t1;   // points after the polynomial are the same
                     // as the last point on the poly
  
  // Don't generate time vector if NULL pointer
  if(time != NULL){
    for(i=0;i<200;i++){
      time[i] = t0 + (0.01*(i-indx0));
    }
  }
  return tk;
}


// Evaluates polynomial at time t
double polyval6(double pos_poly[6][3], double t, int coordinate){
  int i=0;
  double val = 0;
  double t_ = 1;
  for(i=5;i>=0;i--){
    val += t_*pos_poly[i][coordinate];
    t_ *= t;
  }
  return val;
}

// Evaluates polynomial at time t
double polyval5(double vel_poly[5][3], double t, int coordinate){
  int i=0;
  double val = 0;
  double t_ = 1;
  for(i=4;i>=0;i--){
    val += t_*vel_poly[i][coordinate];
    t_ *= t;
  }
  return val;
}

// Evaluates polynomial at time t
double polyval4(double acc_poly[4][3], double t, int coordinate){
  int i=0;
  double val = 0;
  double t_ = 1;
  for(i=3;i>=0;i--){
    val += t_*acc_poly[i][coordinate];
    t_ *= t;
  }
  return val;
}

int Kalman_filter(double pan_in,
		  double tilt_in,
		  double yaw_in,
		  double *pan_out,
		  double *tilt_out,
		  double *yaw_out,
		  double R){

  static int initialized=0;
  static struct EKF_state state[3];
  struct EKF_state state_[3];
  double input[3] = {pan_in,tilt_in,yaw_in};
  int i,j,k;
  static double A[4][4];
  static double At[4][4]; // A transpose
  static double Q[4][4];
  double Temp_44a[4][4];
  double Temp_4a[4];
  double Temp;
  double t=0.01;
  double H[4] = {1.0, 0.0, 0.0, 0.0};
  double K[4];


  // Set state to zero and P to diagonal 10's
  // set A to upper triang lin diff eq
  if(!initialized){
    for(i=0;i<3;i++){
      for(j=0;j<4;j++){
	state[i].X[j]=0.0;
	for(k=0;k<4;k++){
	A[j][k] = (k==j);
	At[j][k] = (k==j);
	Q[j][k] = (k==j && k==1)*0.1;
	state[i].P[j][k] = (j==k)*10;
	}
      }
    }
    A[0][1] = A[1][2] =  A[2][3] = t;
    A[0][2] = A[1][3] = t*t/2.0;
    A[0][3] = t*t*t/6.0;

    At[1][0] = At[2][1] =  At[3][2] = t;
    At[2][0] = At[3][1] = t*t/2.0;
    At[3][0] = t*t*t/6.0;

    initialized = 1;
  }

  


  for(k=0;k<3;k++){
    // update:
    //state_[k].X = A*state[k].X
    mult_4matrix_4vec(A, state[k].X, state_[k].X);

    //    for(i=0;i<4;i++){
    //  printf("X:%5.2f X_:%5.2f\n",state[k].X[i],state_[k].X[i]);
    //}

    //state_[k].P = A*state[k].P*A' + Q;
    mult_4matrix(A, state[k].P, Temp_44a);
    mult_4matrix(Temp_44a,At,state_[k].P);
    state_[k].P[1][1] += Q[1][1]; // REWRITE IF Q IS CHANGED!!
    
    // correct:
    /*
      K = P_*H'*inv(H*P_*H' + R);
    */
    mult_4matrix_4vec(state_[k].P,H,Temp_4a);
    Temp = dot4(H,Temp_4a);
    Temp += R;
    mult_4vec_scalar(H, 1.0/(Temp), Temp_4a);
    mult_4matrix_4vec(state_[k].P, Temp_4a, K);

    /*
      X = X_ + K*(z - H*X_);
    */
    Temp = dot4(H,state_[k].X);
    //    printf("HxX: %5.2f  z:%5.2f",Temp,input[k]);
    Temp = input[k] - Temp;
    mult_4vec_scalar(K, Temp,Temp_4a);
    add_4vec(state_[k].X,Temp_4a,state[k].X);

    /*
      P = (eye(M=4) - K*H)*P_;
    */
    outer4(K,H,Temp_44a);
    for(i=0;i<4;i++){
      for(j=0;j<4;j++){
	Temp_44a[i][j] = (i==j)-Temp_44a[i][j];
      }
    }
    mult_4matrix(Temp_44a,state_[k].P,state[k].P);

  }

  *pan_out = state[0].X[0];
  *tilt_out = state[1].X[0];
  *yaw_out = state[2].X[0];
  return 0;
}
