/*
 * model.c
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <gsl/gsl_sf.h>
#include <gsl/gsl_math.h>
#include <unistd.h>

#include "model.h"
#include "util.h"

Assignment* ass_tmp;

/*
 * k:	number of topics
 * v: 	size of vocabulary
 */
Model* new_model(int v, int K)
{
	Model* model = (Model*) malloc(sizeof(Model));
	model->v = v;
	model->K = K;

	model->phi = gsl_matrix_calloc(K, v);
	model->theta = gsl_vector_calloc(K * v);

	return model;
}
/*
 * Initialize all count tables for statistics
 *
 */
Cts* new_cts(int K, int v, Corpus* c)
{
	int j;

	Cts* cts = (Cts*) malloc(sizeof(Cts));

	cts->n = (int**)malloc(sizeof(int*) * c->ndocs);
	cts->N = (int*)malloc(sizeof(int) * c->ndocs);
	for(j = 0; j < c->ndocs; ++j){
		cts->n[j] = (int*)malloc(sizeof(int) * K);
	}
	cts->m = (int**)malloc(sizeof(int*) * K);
	cts->M = (int*)malloc(sizeof(int) * K);
	for(int k = 0; k < K; k++){
		cts->m[k] = (int*)malloc(sizeof(int) * v);
	}
	return cts;
}

/*
 * Initialize the topic assignment of each word-token
 *
 */
Assignment* new_assignment(Corpus* c)
{
	int j;

	Assignment* ass = (Assignment*) malloc(sizeof(Assignment));
	ass->topic_ass = (int**) malloc(sizeof(int*) * c->ndocs);
	for (j = 0; j < c->ndocs; j++){
		ass->topic_ass[j] = (int*)malloc(sizeof(int) * c->docs[j].total);
	}
	ass_tmp = ass;
	return ass;
}

/*
 * randomly initialize the model
 *
 * Note: alpha and gamma are symmetrically initialized
 *
 */
Model* random_init(int K, int v, double alpha, double gamma,
		double ALPHA, double ETA, double C, int NUMLF, int BATCHSIZE)
{
	Model* model;
	model = new_model(v, K);
	model->alpha = alpha;
	model->gamma = gamma;
	model->ALPHA = ALPHA;
	model->ETA = ETA;
	model->C = C;
	model->NUMLF = NUMLF;
	model->BATCHSIZE = BATCHSIZE;

	return model;
}

/*
 * read model
 *
 */
Model* read_model(char* file)
{
	char str[BUFSIZ];
	FILE* fileptr;
	Model* model;
	int v, K, tmp, NUMLF, BATCHSIZE;
	double alp, gamma, ALPHA, ETA, C;

	sprintf(str, "%s/model_other.txt", file);
	fileptr = fopen(str, "r");
	if (!fileptr){
		printf("File %s/model_other.txt does not exist\n", file);
		exit(0);
	}

	tmp = fscanf(fileptr, "model->v: %d\n", &v);
	assert(tmp == 1);
	tmp = fscanf(fileptr, " %d", &K);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->alpha: %lf\n", &alp);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->gamma: %lf\n", &gamma);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->ALPHA: %lf\n", &ALPHA);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->ETA: %lf\n", &ETA);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->C: %lf\n", &C);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->NUMLF: %d\n", &NUMLF);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "model->BATCHSIZE: %d\n", &BATCHSIZE);
	assert(tmp == 1);
	tmp = fscanf(fileptr, "\n");
	model = random_init(K, v, alp, gamma, ALPHA, ETA, C, NUMLF, BATCHSIZE);

	sprintf(str, "%s/model_phi.txt", file);
	scanf_matrix(str, model->phi);

	sprintf(str, "%s/model_theta.txt", file);
	scanf_vector(str, model->theta);

	return model;
}
/*
 * function: to save model
 */
void save_model(Model* model, gsl_matrix* mphi, char* file)
{
	char str[BUFSIZ];
	FILE* fileptr;

	sprintf(str, "%s/model_other.txt", file);
	fileptr = fopen(str, "w");
	fprintf(fileptr, "model->v: %d\n", model->v);
	fprintf(fileptr, "model->K: %d\n", model->K);
	fprintf(fileptr, "model->alpha: %lf\n", model->alpha);
	fprintf(fileptr, "model->gamma: %lf\n", model->gamma);
	fprintf(fileptr, "model->ALPHA: %lf\n", model->ALPHA);
	fprintf(fileptr, "model->ETA: %lf\n", model->ETA);
	fprintf(fileptr, "model->C: %lf\n", model->C);
	fprintf(fileptr, "model->NUMLF: %d\n", model->NUMLF);
	fprintf(fileptr, "model->BATCHSIZE: %d\n", model->BATCHSIZE);

	sprintf(str, "%s/model_phi.txt", file);
	save_matrix(str, mphi);

	sprintf(str, "%s/model_theta.txt", file);
	save_vector(str, model->theta);

}


/*
 * read topic assignment
 */
Assignment* read_topic_assignmnet(Corpus* c, char* file)
{
	Assignment* ass = new_assignment(c);
	FILE* fileptr;
	int j, l, tmp;
	char str[BUFSIZ];

	printf("Reading topic assignment ......\n");
	sprintf(str, "%s/z.txt", file);
	fileptr = fopen(str, "r");
	if(!fileptr){
		printf("Cannot open file %s\n", str);
		exit(0);
	}
	for(j = 0; j < c->ndocs; j++){
		for(l = 0; l < c->docs[j].total; ++l){
			tmp = fscanf(fileptr, "%d ", &ass->topic_ass[j][l]);
			assert(tmp == 1);
		}
		tmp = fscanf(fileptr, "\n");
	}
	fclose(fileptr);

	printf("Finished!!!\n");
	return ass;
}

void save_topic_assignmnet(Corpus* c, Assignment* ass, char* file)
{
	FILE* fileptr;
	int j, l;
	char str[BUFSIZ];

	printf("Saving topic assignment ......\n");
	sprintf(str, "%s/z.txt", file);
	fileptr = fopen(str, "w");
	for(j = 0; j < c->ndocs; j++){
		for(l = 0; l < c->docs[j].total; ++l){
			fprintf(fileptr, "%d ", ass->topic_ass[j][l]);
		}
		fprintf(fileptr, "\n");
	}
	fclose(fileptr);
	printf("Finished!!!\n");
}

void print_top_words(int num_words, int begin_k, int end_k, gsl_matrix* M, Cts* cts, vocabulary* v, char* file)
{
	FILE* fileptr;
	int k, i, j;
	int word_id;
	gsl_vector* vector;
	int V;
	//int *counts, total_c = 0;
	V = v->size;

	fileptr = fopen(file, "w");
	vector = gsl_vector_alloc(V);
	for (k = begin_k; k <= end_k; k++){
		fprintf(fileptr, "------topic_%d------\n", k - begin_k);
		gsl_matrix_get_row(vector, M, k);
		sort(vector);
		for (i = 0; i < num_words; i++){
			word_id = (int) vget(vector, i);
			for (j = 0; j < V; j++){
				if (v->word_map[j].id == word_id) {
					fprintf(fileptr, "%s(%lf) ", v->word_map[j].word_str, mget(M, k, word_id));
					break;
				}
			}
		}
		fprintf(fileptr, "\n");
	}
	fclose(fileptr);
	gsl_vector_free(vector);
	//free(counts);
}

/*
 * Note: free topic assignment must before free corpus!!!
 */
void free_assignment(Assignment* ass, Corpus* c)
{
	int j;

	for (j = 0; j < c->ndocs; j++){
		free(ass->topic_ass[j]);
	}
	free(ass->topic_ass);
	free(ass);
}
/*
 * Note free plda_ct must before free corpus !!!
 */
void free_cts(Cts* cts, Corpus* c, int k)
{
	int j;

	free(cts->N);
	free(cts->M);
	for(j = 0; j < c->ndocs; j++){
		free(cts->n[j]);
	}
	free(cts->n);
	for(j = 0; j < k; j++){
		free(cts->m[j]);
	}
	free(cts->m);
	free(cts);
}
void free_model(Model* model)
{
	int i, w;

	gsl_matrix_free(model->phi);
	gsl_vector_free(model->theta);

	free(model);
}

/*
 *
 */
void free_all(Corpus* c, Model* model, Cts* cts, Assignment* ass, vocabulary* v)
{
	free_cts(cts, c, model->K);
	free_model(model);
	free_assignment(ass, c);
	free_corpus(c);
	free_vocabulary(v);
	printf("--- All space is freed!!!\n");
}

