#include "std_includes.h"

#include "dco.hpp"
typedef dco::ga1s<double> DCO_AM;
typedef DCO_AM::type DCO_A;
typedef DCO_AM::tape_t DCO_AM_TAPE;

template<typename AT, typename PT>
void f(AT& x, const vector<AT>& p, const vector<vector<PT>>& dW) { 
  int m=dW.size(), n=dW[0].size();
  AT s=0, x0=x; PT dt=1./n, t;
  for (int j=0;j<m;j++) {
    DCO_AM::jacobian_preaccumulator_t jp(dco::tape(x));
    t=0;
    jp.start();
    for (int i=0;i<n;i++) {
      x+=dt*p[i]*sin(x*t)+p[i]*cos(x*t)*sqrt(dt)*dW[j][i];
      t+=dt;
    }
    jp.register_output(x);
    jp.finish();
    s+=x; x=x0;
  }
  x=s/m;
}

vector<double> driver(double& xv, vector<double>& pv,
    const vector<vector<double>>& dW) {
  int n=dW[0].size();
  vector<double> g(n+1,0);
  DCO_A x0=xv;  
  vector<DCO_A> p(n); dco::value(p)=pv; 
  DCO_AM::global_tape=DCO_AM_TAPE::create();
  DCO_AM::global_tape->register_variable(x0);
  DCO_AM::global_tape->register_variable(p);
  DCO_A x=x0;  
  f(x,p,dW);
  DCO_AM::global_tape->register_output_variable(x);
  dco::derivative(x)=1;
  DCO_AM::global_tape->interpret_adjoint();
  g[0]=dco::derivative(x0);
  for (int i=0;i<n;i++) g[i+1]=dco::derivative(p[i]);
  DCO_AM_TAPE::remove(DCO_AM::global_tape);
  return g;
}  

int main(int c, char* v[]) {
  assert(c==3); int m=atoi(v[1]), n=atoi(v[2]);

  const double x0=1;
  vector<double> p(n,1); 

  default_random_engine generator;
  normal_distribution<double> distribution(0.0,1.0);
  vector<vector<double>> dW(m,vector<double>(n,1));
  for (int i=0;i<m;i++)
    for (int j=0;j<n;j++)
      dW[i][j]=distribution(generator);

  double x=x0;
  vector<double> g=driver(x,p,dW);
  cout << "dx/dx0=" << g[0] << endl;
  for (int i=0;i<n;i++) 
    cout << "dx/dp[" << i << "]=" << g[i+1] << endl;
  return 0;
}
