function [index_truth,weight,ini_truth] = CRH_weather(dataset,iti)
%% function discription
% ----------------------------------------------------------------------
% Function for calculating the CRH results
% Qi Li
% Input:   
%          dataset--the dataset to be calculated on. It contains 3 columns
%                   The 1st column is entry id index
%                   The 2nd column is observation value
%                   The 3rd column is source index
%
%          iti--maximum number of iteration
%
% Output:  
%          index_truth--estimated truth for each entry. It contains 2 columns
%                       The 1st column is entry id index
%                       The 2nd column is estimated truth value
%
%          weight--estimated weight for each source. It contains 2 columns
%                  The 1st column is source id index
%                  The 2nd column is source weight
%
%          ini_truth--fusion results from voting/median. It has the same
%                     format as index_truth
% ----------------------------------------------------------------------

%% Prepare data

% sort dataset based on entry id index
if (~issorted([dataset{:,1}]))
    [~,indexofsort]=sort([dataset{:,1}]);
    dataset_temp=dataset(indexofsort,:);
    dataset=dataset_temp;
end

% calucate some statitics about the data
% number of fact
nof=size(dataset,1);

% get the entry list
[list_entry,entry_ia,entry_ic]=unique([dataset{:,1}]','stable');

% get the source list
[list_source,~,source_ic]=unique([dataset{:,3}]');

% number of sources
nos=length(list_source);

%number of entries
noe=length(entry_ia);

entry_ia(noe+1)=nof+1;%modify entry_ia s.t. the following work

%% initialization
% initial weight
ini_truth=[];
weight=1/nos*ones(nos,1);
weight_matrix=weight(source_ic);

% calculate initial truth entry by entry ----------------------------------
for i=1:(noe)
    ini_truth{i,1}=dataset{entry_ia(i),1};
    
    if (isempty(str2num(dataset{entry_ia(i),2}))) % categorical data
        
        % get the data on the same entry
        tempvalue=dataset(entry_ia(i):(entry_ia(i+1)-1),2);
        
        % get the unique value list on the same entry
        temp_list=unique(tempvalue,'stable');
        length_list=length(temp_list);
        wv=[];
        
        % calcuate the weighted occurrence for this entry
        for k=1:length_list
            wv(k)=sum(strcmp(tempvalue,temp_list{k,:}).*weight_matrix(entry_ia(i):(entry_ia(i+1)-1)));
        end
        
        % get the voting result
        [~,I]=max(wv);
        ini_truth{i,2}=temp_list{I};
        
    else % numerical data
        
        % get the data on the same entry
        tempvalue=str2double(dataset(entry_ia(i):(entry_ia(i+1)-1),2));
        
        % calcuate median for this entry
        ini_truth{i,2}=median(tempvalue);
        
        % calculate the standard error for this entry
        standerror(i,:)=std(tempvalue)+0.1;% the "+0.1" term is used to prevent standard error being 0
    end
end
% -------------------------------------------------------------------------

index_truth=ini_truth;
truth_matrix=ini_truth(entry_ic,2);% truth corresponding to each entry id
std_matrix=standerror(entry_ic);% standard error corresponding to each entry id

%initial other parameters
cat_count=zeros(nos,1); % count of categocial data for each source
con_count=zeros(nos,1); % count of continuous data for each source

%% CRH iteration
i=1;
while(i<= iti)
    i=i+1;
    
    % inital distance for eahc source
    score1=zeros(nos,1);
    score2=zeros(nos,1);
    
    % update weight--------------------------------------------------------
    for j=1:nof
        %calculate distance for each source
        
        % using 0/1 loss
        if (isempty(str2num(dataset{j,2})))
            
            score1(dataset{j,3})=score1(dataset{j,3})+(~strcmp(truth_matrix(j),dataset(j,2)));
            
            if i==2
                cat_count(dataset{j,3})=cat_count(dataset{j,3})+1;
            end
            
        % use normalized absolute distance
        else
            
            score2(dataset{j,3})=score2(dataset{j,3})+abs((str2num(dataset{j,2})-truth_matrix{j})/std_matrix(j));
            
            if i==2
                con_count(dataset{j,3})=con_count(dataset{j,3})+1;
            end
            
        end
    end
    
    % distance devided by count
    score1=score1./cat_count;
    score2=score2./con_count;
    score1=score1/sum(score1);
    score2=score2/sum(score2);
    
    % sum up the distance for categorical and continuous data
    score=(score1+score2);
    
    % calucate weight for sources
    norm_score=max(score);
    w=score/norm_score;
    weight=-log(w)+0.00001;
    weight_matrix=weight(source_ic);% weight corresponding to source index
    
    % update truth---------------------------------------------------------
    % see "calculate initial truth entry by entry" for more detail
    for j=1:(noe)
        if (isempty(str2num(dataset{entry_ia(j),2})))
            
            tempvalue=dataset(entry_ia(j):(entry_ia(j+1)-1),2);
            temp_list=unique(tempvalue,'stable');
            length_list=length(temp_list);
            
            wv=[];
            for k=1:length_list
                wv(k)=sum(strcmp(tempvalue,temp_list{k,:}).*weight_matrix(entry_ia(j):(entry_ia(j+1)-1)));
            end
            
            [~,I]=max(wv);
            index_truth{j,2}=temp_list{I};
            
       else
            tempvalue=str2double(dataset(entry_ia(j):(entry_ia(j+1)-1),2));
            tempweight=weight_matrix(entry_ia(j):(entry_ia(j+1)-1));
            
            % update truth by weighted median
            index_truth{j,2}=weightedMedian(tempvalue,tempweight);
        end
    end
    
    truth_matrix=index_truth(entry_ic,2);
    % ---------------------------------------------------------------------
end

weight=[list_source,weight];

