/*
 * est_gibbs.cpp
 *
 */

#include <string.h>
#include <math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>

#include "gibbs.h"

extern double* vector;
extern int* docs_id;
extern int* rdocs_id;
extern double *v_t;
extern double *exp_term;
extern gsl_rng* glob_r;
extern double **Nkw_ave;
extern double *Nk_ave;

static void sample_topic_lda(int d, int l, int w, Assignment* ass, Cts* cts, Model* model,
		Corpus* c)
{
	int old_topic, new_topic;
	int k;
	double sum_v;

	old_topic = ass->topic_ass[d][l];
	cts->n[d][old_topic] -= 1;
	cts->M[old_topic]--;
	cts->m[old_topic][w]--;

	for(k = 0; k < model->K; ++k){
		double r = (model->alpha + cts->n[d][k]);
		//int idx = k * model->v + w;
		r *= mget(model->phi, k, w);//vget(model->theta, idx);
		vector[k] = r;
	}

	sum_v = 0;
	for(k = 0; k < model->K; ++k)
		sum_v += vector[k];
	for(k = 0; k < model->K; ++k)
		vector[k] = vector[k] / sum_v;
	new_topic = next_discrete_normalised(vector, model->K);

	ass->topic_ass[d][l] = new_topic;
	cts->n[d][new_topic] += 1;
	cts->M[new_topic]++;
	cts->m[new_topic][w]++;
}

static void sample_ECHMC(Model* model, Cts* cts, Assignment* ass, Corpus* c)
{
	double ratio = ((double)c->ndocs) / model->BATCHSIZE;
	// can resample v^t
	int Dim = model->v * model->K;
	/*for(int vv = 0; vv < Dim; vv++){
		v_t[vv] = gsl_ran_gaussian(glob_r, sqrt(model->ETA));assert(!gsl_isnan(v_t[vv]));
		//v_t[vv] = gsl_ran_gaussian(glob_r, 1);assert(!gsl_isnan(v_t[vv]));
		assert(gsl_finite(v_t[vv]));
	}*/
	double tmp=0;
	for(int i = 0; i < model->NUMLF; i++){
		// theta
		for(int d = 0; d < Dim; d++){assert(!gsl_isnan(vget(model->theta, d)));assert(!gsl_isnan(v_t[d]));
			vset(model->theta, d, vget(model->theta, d) + v_t[d]);
		}

		// v^t
		for(int d = 0; d < Dim; d++){assert(gsl_finite(v_t[d]));
			double gaurnd = gsl_ran_gaussian(glob_r, sqrt(2.0 * model->C * model->ETA));
			assert(gsl_finite(gaurnd));
			gaurnd += (1 - model->ALPHA) * v_t[d];
			v_t[d] = gaurnd;
		}
		for(int d = 0; d < Dim; d++){
			exp_term[d] = 0;
		}

		for(int k = 0; k < model->K; k++){
			for(int w = 0; w < model->v; w++){
				int idx = k * model->v + w;
				exp_term[idx] = (model->gamma - vget(model->theta, idx)
						+ ratio * (Nkw_ave[k][w] - Nk_ave[k] * mget(model->phi, k, w))) * model->ETA;
			}
		}

		for(int d = 0; d < Dim; d++){
			v_t[d] += exp_term[d];
			//assert(gsl_finite(v_t[d]));
		}

		// ALPHA
		tmp = 0;
		for(int d = 0; d < Dim; d++){
			tmp += v_t[d] / Dim * v_t[d];
		}//assert(gsl_finite(tmp));
		model->ALPHA += tmp - model->ETA;

	}//printf("K = %lf, alpha = %lf\n", tmp / model->ETA, model->ALPHA);
}

void stm_e_gibbs(Model* model, Cts* cts, Assignment* ass, Corpus* c)
{
	int nSample = 6;
	int burnin = 3;

	gsl_ran_choose(glob_r, rdocs_id, model->BATCHSIZE, docs_id, c->ndocs, sizeof(int));

	for(int k = 0; k < model->K; k++){
		cts->M[k] = 0;
		Nk_ave[k] = 0;
		for(int w = 0; w < model->v; w++){
			cts->m[k][w] = 0;
			Nkw_ave[k][w] = 0;
		}
	}
	for(int d = 0; d < model->BATCHSIZE; d++){
		int dd = rdocs_id[d];
		for(int l = 0; l < c->docs[dd].total; l++){
			cts->m[ass->topic_ass[dd][l]][c->docs[dd].words[l]]++;
			cts->M[ass->topic_ass[dd][l]]++;
		}
	}
	for(int iter = 0; iter < nSample; iter++){
		for(int d = 0; d < model->BATCHSIZE; d++){
			int dd = rdocs_id[d];
			for(int l = 0; l < c->docs[dd].total; l++){
				sample_topic_lda(dd, l, c->docs[dd].words[l], ass, cts, model, c);
			}
		}
		if(iter >= burnin){
			for(int k = 0; k < model->K; k++){
				Nk_ave[k] += cts->M[k];
				for(int w = 0; w < model->v; w++){
					Nkw_ave[k][w] += cts->m[k][w];
				}
			}
		}
	}
	int ns = (nSample - burnin);
	for(int k = 0; k < model->K; k++){
		Nk_ave[k] /= ns;
		for(int w = 0; w < model->v; w++){
			Nkw_ave[k][w] /= ns;
		}
	}

	sample_ECHMC(model, cts, ass, c);
}
