% % Input: % haplocells - haplotype_string array in cells as cgtcangcttac % pids - pattern id (not unique) 501, 502, 502, ... % Output: % ctables - contingency tables snplength cells of #patterns x 2 % % each ctable % a g % 500 4 6 % 501 5 5 % 502 3 2 with missing % % a-97, c-99, g-103, n-110, t-116 % Modified: % Date: 5/5/2006 function modetable = modegen(haplocells, ctables) sm = length(haplocells); %number of snps in raw data sn = length(haplocells{1}); %length of each snp snpmat = zeros(sm, sn); for i=1:sm for j=1:sn snpmat(i,j) = haplocells{i}(j); end end modetable = snpmat; for j=1:sn ctable = ctables{j}; rowsum = sum(ctable(2:end, :)); total = sum(rowsum); if rowsum(1) >= rowsum(2) modes = ctable(1,1); else modes = ctable(1,2); end if total == 20 %no missing values for i=1:sm if snpmat(i,j) == modes modetable(i,j) = 1; else modetable(i,j) = 2; end end else tot12 = 0; for i=1:sm if snpmat(i,j) == modes modetable(i,j) = 1; tot12 = tot12 + 1; elseif snpmat(i,j) == 110 modetable(i,j) = 0; else modetable(i,j) = 2; tot12 = tot12 + 2; end end avg = tot12 / total; for i=1:sm if snpmat(i,j) == 110; modetable(i,j) = avg; end end end end