/***
 *  $Id$
 **
 *  File: sequence_compare.hpp
 *  Created: May 03, 2012
 *
 *  Author: Jaroslaw Zola <jaroslaw.zola@hush.com>
 *  Copyright (c) 2012-2013 Jaroslaw Zola
 *  Distributed under the Boost Software License, Version 1.0.
 *
 *  Boost Software License - Version 1.0 - August 17th, 2003
 *
 *  Permission is hereby granted, free of charge, to any person or organization
 *  obtaining a copy of the software and accompanying documentation covered by
 *  this license (the "Software") to use, reproduce, display, distribute,
 *  execute, and transmit the Software, and to prepare derivative works of the
 *  Software, and to permit third-parties to whom the Software is furnished to
 *  do so, all subject to the following:
 *
 *  The copyright notices in the Software and this entire statement, including
 *  the above license grant, this restriction and the following disclaimer,
 *  must be included in all copies of the Software, in whole or in part, and
 *  all derivative works of the Software, unless such copies or derivative
 *  works are solely in the form of machine-executable object code generated by
 *  a source language processor.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *  FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 *  SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 *  FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 *  DEALINGS IN THE SOFTWARE.
 */

#ifndef SEQUENCE_COMPARE_HPP
#define SEQUENCE_COMPARE_HPP

#include <algorithm>
#include <cmath>
#include <cstring>
#include <fstream>
#include <functional>
#include <limits>
#include <map>
#include <string>
#include <vector>

#include <boost/tuple/tuple.hpp>


namespace bio {

  namespace detail {

    // this code comes from jaz
    template <typename Iter1, typename Iter2, typename Pred>
    int intersection_size(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2, Pred pred) {
        int S = 0;

        while ((first1 != last1) && (first2 != last2)) {
            if (pred(*first1, *first2)) ++first1;
            else if (pred(*first2, *first1)) ++first2;
            else {
                first1++;
                first2++;
                S++;
            }
        } // while

        return S;
    } // intersection_size


    template <typename Iter1, typename Iter2>
    int count_distance(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2) {
        int S = 0;

        while ((first1 != last1) && (first2 != last2)) {
            if (first1->first < first2->first) {
                S += (first1->second * first1->second);
                ++first1;
            }
            else if (first2->first < first1->first) {
                S += (first2->second * first2->second);
                ++first2;
            }
            else {
                int d = (first1->second - first2->second);
                S += d * d;
                first1++;
                first2++;
            }
        } // while

        return S;
    } // count_distance


    template <typename Sequence> void general_kmer_index(const std::string& s, int k, Sequence& S) {
        int l = s.size();
        int end = l - k + 1;
        S.resize(end);
        for (int i = 0; i < end; ++i) {
            S[i] = std::string(s.begin() + i, s.begin() + i + k);
        }
        std::sort(S.begin(), S.end());
    } // general_kmer_index


    template <typename Map> void general_kmer_count(const std::string& s, int k, Map& S) {
        S.clear();
        int l = s.size();
        int end = l - k + 1;
        for (int i = 0; i < end; ++i) {
            S[std::string(s.begin() + i, s.begin() + i + k)]++;
        }
    } // general_kmer_count


    class dna_digit {
    public:
        dna_digit() {
            std::memset(digit_, 0, 256);
            digit_['c'] = digit_['C'] = 1;
            digit_['g'] = digit_['G'] = 2;
            digit_['t'] = digit_['T'] = 3;
            digit_['u'] = digit_['U'] = 3;
        } // dna_digit

    protected:
        char digit_[256];

    }; // dna_digit


    class dna_kmer_index : public dna_digit {
    public:
        dna_kmer_index() : dna_digit() { }

        template <typename Sequence>
        void operator()(const std::string& s, int k, Sequence& S) {
            int l = s.size();
            int end = l - k + 1;

            S.resize(end);

            // first kmer
            unsigned long long int v = digit_[s[k - 1]];
            for (int i = 0; i < k - 1; ++i) {
                v += digit_[s[i]] * (1ULL << ((k - i - 1) << 1));
            }

            S[0] = v;

            // and then all other
            unsigned long long int b = 1ULL << ((k - 1) << 1);

            for (int i = 1; i < end; ++i) {
                v = (v - b * digit_[s[i - 1]]) * 4 + digit_[s[i + k - 1]];
                S[i] = v;
            }

            std::sort(S.begin(), S.end());
        } // operator()

    }; // class dna_kmer_index


    class dna_kmer_count : public dna_digit {
    public:
        dna_kmer_count() : dna_digit() { }

        template <typename Map>
        void operator()(const std::string& s, int k, Map& S) {
            int l = s.size();
            int end = l - k + 1;

            // first kmer
            unsigned long long int v = digit_[s[k - 1]];

            for (int i = 0; i < k - 1; ++i) {
                v += digit_[s[i]] * (1ULL << ((k - i - 1) << 1));
            }

            S[v] = 1;

            // and then all other
            unsigned long long int b = 1ULL << ((k - 1) << 1);

            for (int i = 1; i < end; ++i) {
                v = (v - b * digit_[s[i - 1]]) * 4 + digit_[s[i + k - 1]];
                S[v]++;
            }
        } // operator()

    }; // class dna_kmer_count

  } // namespace detail



  /** Class: sequence_compare
   *
   *  A general interface for sequence_compare algorithms.
   *  All algorithms in this library support this interface.
   *  Some, e.g. local_alignment, provide additional methods.
   */
  template <typename Derived> struct sequence_compare {
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          return static_cast<Derived*>(this)->operator()(s0, s1);
      }
  }; // struct sequence_compare



  /** Class: scoring_matrix
   *
   *  Functor encapsulating a scoring_matrix functionality.
   */
  class scoring_matrix {
  public:
      scoring_matrix() : sz_(0), matrix_(0) { std::memset(sigma_, 0, 256); }

      /** Constructor: scoring_matrix
       *
       *  Parameter:
       *  sigma -  Map of the alphabet used by the matrix.
       *  matrix - Row-wise stored substitution matrix.
       */
      scoring_matrix(unsigned char sigma[256], const std::vector<signed char>& matrix)
          : sz_(static_cast<int>(std::sqrt(matrix.size()))), matrix_(matrix) {
          std::memcpy(sigma_, sigma, 256);
      } // scoring_matrix

      /** Function: operator()
       *
       *  Returns:
       *  Substitution score between a and b.
       */
      int operator()(char a, char b) const { return matrix_[sigma_[a] * sz_ + sigma_[b]]; }

  private:
      int sz_;
      unsigned char sigma_[256];
      std::vector<signed char> matrix_;

  }; // scoring_matrix


  /** Function: make_dummy_sm
   *
   *  Parameter:
   *  m - match score.
   *  s - substitution score.
   *
   *  Returns:
   *  A scoring matrix only with match and substitution scores,
   *  for any alphabet.
   */
  inline scoring_matrix make_dummy_sm(int m, int s) {
      unsigned char sigma[256];
      for (int i = 0; i < 256; ++i) sigma[i] = i;
      std::vector<signed char> matrix(256 * 256, s);
      for (int i = 0; i < 256; ++i) matrix[(i << 8) + i] = m;
      return scoring_matrix(sigma, matrix);
  } // make_dummy_sm

  /** Function: make_dna_sm
   *
   *  Parameter:
   *  m - match score.
   *  s - substitution score.
   *
   *  Returns:
   *  A scoring matrix only with match and substitution scores,
   *  for the DNA 4-letter alphabet.
   */
  inline scoring_matrix make_dna_sm(int m, int s) {
      unsigned char sigma[256];
      for (int i = 0; i < 256; ++i) sigma[i] = 4;

      sigma['a'] = sigma['A'] = 0;
      sigma['c'] = sigma['C'] = 1;
      sigma['g'] = sigma['G'] = 2;
      sigma['t'] = sigma['T'] = 3;

      std::vector<signed char> matrix(5 * 5, s);
      for (int i = 0; i < 5; ++i) matrix[(5 * i) + i] = m;

      return scoring_matrix(sigma, matrix);
  }; // make_dna_sm

  /** Function: read_file_sm
   *
   *  Read scoring matrix from a file in the NCBI format.
   *  No alphabet conversion is applied, i.e. lower and upper
   *  case characters are considered different.
   *
   *  Parameter:
   *  name - name of a file.
   *  sub - scoring matrix to store the output.
   *
   *  Returns:
   *  true on success, false otherwise.
   */
  inline bool read_file_sm(const std::string& name, scoring_matrix& sub) {
      std::ifstream f(name.c_str());
      if (!f) return false;

      unsigned char sigma[256];
      std::string buf;

      // read comments
      while (!f.eof()) {
          buf = "";
          std::getline(f, buf);
          if (buf.empty() || (buf[0] != '#')) break;
      } // while

      if (buf.empty()) return false;

      // parse column header
      std::string head = buf;
      head.erase(std::remove(head.begin(), head.end(), ' '), head.end());

      int len = head.size();
      if (head[head.size() - 1] != '*') return false;

      std::vector<signed char> matrix(len * len, 0);

      for (int i = 0; i < 256; ++i) sigma[i] = len - 1;
      for (int i = 0; i < len - 1; ++i) sigma[head[i]] = i;

      // read matrix
      for (int i = 0; i < len; ++i) {
          char id = 0;
          int val;

          f >> id;
          if (!f || (id != head[i])) return false;

          for (int j = 0; j < len; ++j) {
              f >> val;
              if (!f) return false;
              int pos0 = sigma[head[i]];
              int pos1 = sigma[head[j]];
              matrix[pos0 * len + pos1] = val;
          } // for j
      } // for i

      f.close();

      sub = scoring_matrix(sigma, matrix);
      return true;
  } // read_scoring_matrix



  /** Class: local_alignment
   *
   *  Functor implementing memory-efficient local pairwise sequence alignment
   *  with affine gap penalty.
   */
  class local_alignment : public sequence_compare<local_alignment> {
  public:
      /** Constructor: local_alignment
       *
       *  Parameter:
       *  m - Match score (some positive number).
       *  s - Substitution penalty (usually negative number).
       *  g - Gap opening penalty (negative number).
       *  h - Gap extension penalty (negative number).
       */
      explicit local_alignment(int m = 0, int s = 0, int g = 0, int h = 0)
          : sub_(make_dummy_sm(m, s)), g_(g), h_(h) { }

      /** Constructor: local_alignment
       *
       *  Parameter:
       *  sm - Substitution matrix.
       *  g -  Gap opening penalty (negative number).
       *  h -  Gap extension penalty (negative number).
       */
      local_alignment(const scoring_matrix& sm, int g, int h)
          : sub_(sm), g_(g), h_(h) { }

      /** Function: operator()
       *
       *  Compute alignment between s0 and s1. Only the first
       *  highest scoring alignment is discovered.
       *
       *  Returns:
       *  3-tuple (alignment score, alignment length, number of matches).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          int n = s0.size() + 1;
          int m = s1.size() + 1;

          // S(i, j) = max{ I(i, j), D(i, j), S(i - 1, j - 1) + d(i,j), 0 }
          // D(i, j) = max{ D(i, j - 1), S(i, j - 1) + g } + h
          // I(i, j) = max{ I(i - 1, j), S(i - 1, j) + g } + h

          S_.resize(m, 0);
          std::fill(S_.begin(), S_.end(), 0);

          I_.resize(m, 0);
          std::fill(I_.begin(), I_.end(), 0);

          track_.resize(n * m);
          std::fill(track_.begin(), track_.end(), NOPE);

          int pos = 0;
          int Sij = 0;

          // we keep track of max
          // the first highest value is kept
          int mi = 0;
          int mj = 0;
          int me = 0;

          for (int i = 1; i < n; ++i) {
              int Si = 0;
              int Di = 0;

              pos = i * m;

              for (int j = 1; j < m; ++j) {
                  pos++;

                  Di = std::max(Di, Si + g_) + h_;
                  I_[j] = std::max(I_[j], S_[j] + g_) + h_;

                  Si = Sij + sub_(s0[i - 1], s1[j - 1]);

                  if ((Si < 0) && (Di < 0) && (I_[j] < 0)) {
                      // max is 0
                      Si = 0;
                      track_[pos] = NOPE;
                  } else {
                      // default: max in Si
                      track_[pos] = DIAG;

                      if (Di < I_[j]) {
                          if (Si < I_[j]) {
                              // max in I_[j]
                              Si = I_[j];
                              track_[pos] = TOP;
                          }
                      } else {
                          if (Si < Di) {
                              // max in Di
                              Si = Di;
                              track_[pos] = LEFT;
                          }
                      }
                  } // if

                  if (me < Si) {
                      me = Si;
                      mi = i;
                      mj = j;
                  }

                  Sij = S_[j];
                  S_[j] = Si;

              } // for j

              Sij = 0;
          } // for i

          // backtrack
          int i = mi;
          int j = mj;

          int match = 0;
          int length = 0;

          has_path_ = false;
          path_.clear();

          while (track_[i * m + j] != NOPE) {
              switch (track_[i * m + j]) {
                case NOPE:
                    break;

                case TOP:
                    --i;
                    path_.push_back('D');
                    break;

                case LEFT:
                    --j;
                    path_.push_back('I');
                    break;

                case DIAG:
                    --i;
                    --j;

                    if (s0[i] == s1[j]) match++;
                    path_.push_back('M');

                    break;
              } // switch

              length++;
          } // while

          ps0_ = i;
          ps1_ = j;

          return boost::make_tuple(me, length, match);
      } // operator()

      /** Function: path
       *
       *  Return the edit path of the last computed alignment.
       *
       *  Returns:
       *  Edit path in the basic CIGAR format.
       */
      std::string path() {
          if (!has_path_) {
              std::reverse(path_.begin(), path_.end());
              has_path_ = true;
          }
          return path_;
      } // path

      /** Function: position
       *
       *  Return the starting position of the last computed alignment.
       *
       *  Returns:
       *  A pair where the first element is a position in s0,
       *  and the second is a position in s1.
       */
      std::pair<int, int> position() const {
          return std::make_pair(ps0_, ps1_);
      } // position

  private:
      enum Move { NOPE = 0, TOP = 1, LEFT = 2, DIAG = 3 };

      bool has_path_;
      std::string path_;
      std::vector<Move> track_;

      int ps0_;
      int ps1_;

      std::vector<int> S_;
      std::vector<int> I_;

      scoring_matrix sub_;

      int g_;
      int h_;

  }; // class local_alignment


  /** Class: global_alignment
   *
   *  Functor implementing memory-efficient global pairwise sequence alignment
   *  with affine gap penalty.
   */
  class global_alignment : public sequence_compare<global_alignment> {
  public:
      /** Constructor: global_alignment
       *
       *  Parameter:
       *  m - Match score (some positive number).
       *  s - Substitution penalty (usually negative number).
       *  g - Gap opening penalty (negative number).
       *  h - Gap extension penalty (negative number).
       */
      explicit global_alignment(int m = 0, int s = 0, int g = 0, int h = 0)
          : sub_(make_dummy_sm(m, s)), g_(g), h_(h) { }

      /** Constructor: global_alignment
       *
       *  Parameter:
       *  sm - Substitution matrix.
       *  g -  Gap opening penalty (negative number).
       *  h -  Gap extension penalty (negative number).
       */
      global_alignment(const scoring_matrix& sm, int g, int h)
          : sub_(sm), g_(g), h_(h) { }

      /** Function: operator()
       *
       *  Compute alignment between s0 and s1.
       *
       *  Returns:
       *  3-tuple (alignment score, alignment length without terminal gaps, number of matches).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          int n = s0.size() + 1;
          int m = s1.size() + 1;

          // S(i, j) = max{ I(i, j), D(i, j), S(i - 1, j - 1) + d(i,j) }
          // D(i, j) = max{ D(i, j - 1), S(i, j - 1) + g } + h
          // I(i, j) = max{ I(i - 1, j), S(i - 1, j) + g } + h

          S_.resize(m, 0);
          std::fill(S_.begin(), S_.end(), 0);

          I_.resize(m, 0);
          std::fill(I_.begin(), I_.end(), 0);

          track_.resize(n * m);
          std::fill(track_.begin(), track_.end(), TOP);

          for (int j = 1; j < m; ++j) {
              track_[j] = LEFT;
              S_[j] = I_[j] = g_ + j * h_;
          }

          int pos = 0;
          int Sij = 0;

          for (int i = 1; i < n; ++i) {
              int Si = g_ + i * h_;
              int Di = g_ + i * h_;

              pos = i * m;
              track_[pos] = TOP;

              for (int j = 1; j < m; ++j) {
                  pos++;

                  Di = std::max(Di, Si + g_) + h_;
                  I_[j] = std::max(I_[j], S_[j] + g_) + h_;

                  Si = Sij + sub_(s0[i - 1], s1[j - 1]);

                  // default: max in Si
                  track_[pos] = DIAG;

                  if (Di < I_[j]) {
                      if (Si < I_[j]) {
                          // max in I_[j]
                          Si = I_[j];
                          track_[pos] = TOP;
                      }
                  } else {
                      if (Si < Di) {
                          // max in Di
                          Si = Di;
                          track_[pos] = LEFT;
                      }
                  } // if

                  Sij = S_[j];
                  S_[j] = Si;

              } // for j

              Sij = g_ + i * h_;

          } // for i

          // backtrack
          int i = n - 1;
          int j = m - 1;

          int match = 0;
          int length = 0;

          bool has_gap = false;
          int sgap = 0;
          int egap = 0;

          has_path_ = false;
          path_.clear();

          while ((i > 0) || (j > 0)) {
              switch (track_[i * m + j]) {
                case TOP:
                    --i;
                    sgap++;
                    path_.push_back('D');
                    break;

                case LEFT:
                    --j;
                    sgap++;
                    path_.push_back('I');
                    break;

                case DIAG:
                    --i;
                    --j;

                    if (s0[i] == s1[j]) match++;
                    path_.push_back('M');

                    if (!has_gap) {
                        has_gap = true;
                        egap = sgap;
                    }

                    sgap = 0;
                    break;
              } // switch

              length++;
          } // while

          return boost::make_tuple(S_.back(), length - sgap - egap, match);
      } // operator()

      /** Function: path
       *
       *  Return the edit path of the last computed alignment.
       *
       *  Returns:
       *  Edit path in the basic CIGAR format.
       */
      std::string path() {
          if (!has_path_) {
              std::reverse(path_.begin(), path_.end());
              has_path_ = true;
          }
          return path_;
      } // path

  private:
      enum Move { TOP = 0, LEFT = 1, DIAG = 2 };

      bool has_path_;
      std::string path_;
      std::vector<Move> track_;

      std::vector<int> S_;
      std::vector<int> I_;

      scoring_matrix sub_;

      int g_;
      int h_;

  }; // class global_alignment


  /** Class: banded_global_alignment
   *
   *  Functor implementing memory-efficient banded global pairwise sequence alignment
   *  with affine gap penalty. The algorithm assumes that gaps no longer than b
   *  will occur in the alignment. The band is adjusted if b < abs(|s0| - |s1|).
   */
  class banded_global_alignment : public sequence_compare<banded_global_alignment> {
  public:
      /** Constructor: banded_global_alignment
       *
       *  Parameter:
       *  m - Match score (some positive number).
       *  s - Substitution penalty (usually negative number).
       *  g - Gap opening penalty (negative number).
       *  h - Gap extension penalty (negative number).
       *  b - Expected band size.
       */
      explicit banded_global_alignment(int m = 0, int s = 0, int g = 0, int h = 0, int b = 0)
          : sub_(make_dummy_sm(m, s)), g_(g), h_(h), band_(b), lband_(b) { }

      /** Constructor: banded_global_alignment
       *
       *  Parameter:
       *  sm - Substitution matrix.
       *  g -  Gap opening penalty (negative number).
       *  h -  Gap extension penalty (negative number).
       *  b -  Expected band size.
       */
      banded_global_alignment(const scoring_matrix& sm, int g, int h, int b)
          : sub_(sm), g_(g), h_(h), band_(b), lband_(b) { }

      /** Function: operator()
       *
       *  Compute alignment between s0 and s1.
       *
       *  Returns:
       *  3-tuple (alignment score, alignment length without terminal gaps, number of matches).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          const int INF = 99999999;

          int n = s0.size() + 1;
          int m = s1.size() + 1;

          // adjust band
          lband_ = band_;

          if (lband_ == 0) lband_ = std::max(10, std::max(n, m) / 10);

          int b = std::max(n, m);
          if (b < 2 * lband_ + 1) lband_ = (b - 1) >> 1;

          b = std::abs(n - m);
          if (lband_ < b) lband_ = b + 1;

          // S(i, j) = max{ I(i, j), D(i, j), S(i - 1, j - 1) + d(i,j) }
          // D(i, j) = max{ D(i, j - 1), S(i, j - 1) + g } + h
          // I(i, j) = max{ I(i - 1, j), S(i - 1, j) + g } + h

          // TODO: we can optimize S_, I_ and track_ to store only required elements
          S_.resize(m);
          std::fill(S_.begin(), S_.end(), -INF);

          I_.resize(m);
          std::fill(I_.begin(), I_.end(), -INF);

          S_[0] = I_[0] = 0;

          track_.resize(n * m);
          std::fill(track_.begin(), track_.end(), TOP);

          int beg = 0;
          int end = std::min(m, lband_);

          for (int j = 1; j < end; ++j) {
              track_[j] = LEFT;
              S_[j] = I_[j] = g_ + j * h_;
          }

          // int pos = 0;
          int Sij = 0;

          for (int i = 1; i < n; ++i) {
              int Si = (i < lband_) ? g_ + i * h_ : -INF;
              int Di = (i < lband_) ? g_ + i * h_ : -INF;

              track_[i * m] = TOP;

              beg = std::max(1, i - lband_);
              end = std::min(m, i + lband_);

              for (int j = beg; j < end; ++j) {
                  Di = std::max(Di, Si + g_) + h_;
                  I_[j] = std::max(I_[j], S_[j] + g_) + h_;

                  Si = Sij + sub_(s0[i - 1], s1[j - 1]);

                  // default: max in Si
                  track_[i * m + j] = DIAG;

                  if (Di < I_[j]) {
                      if (Si < I_[j]) {
                          // max in I_[j]
                          Si = I_[j];
                          track_[i * m + j] = TOP;
                      }
                  } else {
                      if (Si < Di) {
                          // max in Di
                          Si = Di;
                          track_[i * m + j] = LEFT;
                      }
                  } // if

                  Sij = S_[j];
                  S_[j] = Si;

              } // for j

              Sij = (i < lband_) ? g_ + i * h_ : -INF;

          } // for i

          // backtrack
          int i = n - 1;
          int j = std::min(m - 1, i + lband_ - 1);

          int match = 0;
          int length = 0;

          bool has_gap = false;
          int sgap = 0;
          int egap = 0;

          has_path_ = false;
          path_.clear();

          while ((i > 0) || (j > 0)) {
              switch (track_[i * m + j]) {
                case TOP:
                    --i;
                    sgap++;
                    path_.push_back('D');
                    break;

                case LEFT:
                    --j;
                    sgap++;
                    path_.push_back('I');
                    break;

                case DIAG:
                    --i;
                    --j;

                    if (s0[i] == s1[j]) match++;
                    path_.push_back('M');

                    if (!has_gap) {
                        has_gap = true;
                        egap = sgap;
                    }

                    sgap = 0;
                    break;
              } // switch

              length++;
          } // while

          return boost::make_tuple(S_[std::min(n - 1 + lband_ - 1, m - 1)], length - sgap - egap, match);
      } // operator()

      /** Function: path
       *
       *  Return the edit path of the last computed alignment.
       *
       *  Returns:
       *  Edit path in the basic CIGAR format.
       */
      std::string path() {
          if (!has_path_) {
              std::reverse(path_.begin(), path_.end());
              has_path_ = true;
          }
          return path_;
      } // path

      /** Function: band
       *
       *  Return the band size used to compute the last alignment.
       */
      int band() const { return lband_; }

  private:
      enum Move { TOP = 0, LEFT = 1, DIAG = 2 };

      bool has_path_;
      std::string path_;
      std::vector<Move> track_;

      std::vector<int> S_;
      std::vector<int> I_;

      scoring_matrix sub_;

      int g_;
      int h_;

      int band_;
      int lband_;

  }; // class banded_global_alignment


  /** Class: semi_global_alignment
   *
   *  Functor implementing memory-efficient semi-global pairwise sequence
   *  alignment with affine gap penalty.
   */
  class semi_global_alignment : public sequence_compare<semi_global_alignment> {
  public:
      /** Constructor: semi_global_alignment
       *
       *  Parameter:
       *  m - Match score (some positive number).
       *  s - Substitution penalty (usually negative number).
       *  g - Gap opening penalty (negative number).
       *  h - Gap extension penalty (negative number).
       */
      explicit semi_global_alignment(int m = 0, int s = 0, int g = 0, int h = 0)
          : sub_(make_dummy_sm(m, s)), g_(g), h_(h) { }

      /** Constructor: semi_global_alignment
       *
       *  Parameter:
       *  sm - Substitution matrix.
       *  g -  Gap opening penalty (negative number).
       *  h -  Gap extension penalty (negative number).
       */
      semi_global_alignment(const scoring_matrix& sm, int g, int h)
          : sub_(sm), g_(g), h_(h) { }

      /** Function: operator()
       *
       *  Compute alignment between s0 and s1.
       *
       *  Returns:
       *  3-tuple (alignment score, alignment length without terminal gaps, number of matches).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          int n = s0.size() + 1;
          int m = s1.size() + 1;

          // S(i, j) = max{ I(i, j), D(i, j), S(i - 1, j - 1) + d(i,j) }
          // D(i, j) = max{ D(i, j - 1), S(i, j - 1) + g } + h
          // I(i, j) = max{ I(i - 1, j), S(i - 1, j) + g } + h

          S_.resize(m, 0);
          std::fill(S_.begin(), S_.end(), 0);

          I_.resize(m, 0);
          std::fill(I_.begin(), I_.end(), 0);

          track_.resize(n * m);
          std::fill(track_.begin(), track_.end(), TOP);

          for (int j = 1; j < m; ++j) {
              // S_[j] not set to get free cost
              I_[j] = g_ + j * h_;
              track_[j] = LEFT;
          }

          // max val in the last column
          int mi = 0;
          int me = 0;

          int pos = 0;
          int Sij = 0;

          for (int i = 1; i < n; ++i) {
              int Si = 0;
              int Di = g_ + i * h_;

              pos = i * m;
              track_[pos] = TOP;

              for (int j = 1; j < m; ++j) {
                  pos++;

                  Di = std::max(Di, Si + g_) + h_;
                  I_[j] = std::max(I_[j], S_[j] + g_) + h_;

                  Si = Sij + sub_(s0[i - 1], s1[j - 1]);

                  // default: max in Si
                  track_[pos] = DIAG;

                  if (Di < I_[j]) {
                      if (Si < I_[j]) {
                          // max in I_[j]
                          Si = I_[j];
                          track_[pos] = TOP;
                      }
                  } else {
                      if (Si < Di) {
                          // max in Di
                          Si = Di;
                          track_[pos] = LEFT;
                      }
                  } // if

                  Sij = S_[j];
                  S_[j] = Si;

              } // for j

              if (me < Sij) {
                  me = Sij;
                  mi = i;
              }

              Sij = 0;
          } // for i

          // backtrack
          int i = std::max(0, mi - 1);
          int j = std::max_element(S_.begin() + 1, S_.end()) - S_.begin();

          int score = 0;

          int match = 0;
          int length = 0;

          bool has_gap = true;
          int sgap = 0;
          int egap = 0;

          has_path_ = false;
          path_ = "";

          if (me <= S_[j]) {
              score = S_[j];
              i = n - 1;
              path_ = std::string((m - 1) - j, 'I');
          } else {
              score = me;
              j = m - 1;
              path_ = std::string((n - 1) - i, 'D');
          }

          while ((i > 0) || (j > 0)) {
              switch (track_[i * m + j]) {
                case TOP:
                    --i;
                    sgap++;
                    path_.push_back('D');
                    break;

                case LEFT:
                    --j;
                    sgap++;
                    path_.push_back('I');
                    break;

                case DIAG:
                    --i;
                    --j;

                    if (s0[i] == s1[j]) match++;
                    path_.push_back('M');

                    if (!has_gap) {
                        has_gap = true;
                        egap = sgap;
                    }

                    sgap = 0;
                    break;
              } // switch

              length++;
          } // while

          return boost::make_tuple(score, length - sgap - egap, match);
      } // operator()

      /** Function: path
       *
       *  Return the edit path of the last computed alignment.
       *
       *  Returns:
       *  Edit path in the basic CIGAR format.
       */
      std::string path() {
          if (!has_path_) {
              std::reverse(path_.begin(), path_.end());
              has_path_ = true;
          }
          return path_;
      } // path

  private:
      enum Move { TOP = 0, LEFT = 1, DIAG = 2 };

      bool has_path_;
      std::string path_;
      std::vector<Move> track_;

      std::vector<int> S_;
      std::vector<int> I_;

      scoring_matrix sub_;

      int g_;
      int h_;

  }; // class semi_global_alignment


  /** Class: free_global_alignment
   *
   *  Functor implementing memory-efficient global pairwise sequence alignment
   *  with cost-free end-gaps and affine gap penalty. Cost-free end-gaps are
   *  allowed only in one sequence.
   */
  class free_global_alignment : public sequence_compare<free_global_alignment> {
  public:
      /** Constructor: free_global_alignment
       *
       *  Parameter:
       *  m - Match score (some positive number).
       *  s - Substitution penalty (usually negative number).
       *  g - Gap opening penalty (negative number).
       *  h - Gap extension penalty (negative number).
       */
      explicit free_global_alignment(int m = 0, int s = 0, int g = 0, int h = 0)
          : sub_(make_dummy_sm(m, s)), g_(g), h_(h) { }

      /** Constructor: free_global_alignment
       *
       *  Parameter:
       *  sm - Substitution matrix.
       *  g -  Gap opening penalty (negative number).
       *  h -  Gap extension penalty (negative number).
       */
      free_global_alignment(const scoring_matrix& sm, int g, int h)
          : sub_(sm), g_(g), h_(h) { }

      /** Function: operator()
       *
       *  Compute alignment between s0 and s1. s0 is assumed to be a shorter
       *  query sequence in which end-gaps are free (i.e. s0 is contained in s1).
       *
       *  Returns:
       *  3-tuple (alignment score, alignment length without terminal gaps, number of matches).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          int n = s0.size() + 1;
          int m = s1.size() + 1;

          // S(i, j) = max{ I(i, j), D(i, j), S(i - 1, j - 1) + d(i,j) }
          // D(i, j) = max{ D(i, j - 1), S(i, j - 1) + g } + h
          // I(i, j) = max{ I(i - 1, j), S(i - 1, j) + g } + h

          S_.resize(m, 0);
          std::fill(S_.begin(), S_.end(), 0);

          I_.resize(m, 0);
          std::fill(I_.begin(), I_.end(), 0);

          track_.resize(n * m);
          std::fill(track_.begin(), track_.end(), TOP);

          for (int j = 1; j < m; ++j) {
              I_[j] = g_ + j * h_;
              track_[j] = LEFT;
          }

          int pos = 0;
          int Sij = 0;

          for (int i = 1; i < n; ++i) {
              int Si = g_ + i * h_;
              int Di = g_ + i * h_;

              pos = i * m;
              track_[pos] = TOP;

              for (int j = 1; j < m; ++j) {
                  pos++;

                  Di = std::max(Di, Si + g_) + h_;
                  I_[j] = std::max(I_[j], S_[j] + g_) + h_;

                  Si = Sij + sub_(s0[i - 1], s1[j - 1]);

                  // default: max in Si
                  track_[pos] = DIAG;

                  if (Di < I_[j]) {
                      if (Si < I_[j]) {
                          // max in I_[j]
                          Si = I_[j];
                          track_[pos] = TOP;
                      }
                  } else {
                      if (Si < Di) {
                          // max in Di
                          Si = Di;
                          track_[pos] = LEFT;
                      }
                  } // if

                  Sij = S_[j];
                  S_[j] = Si;

              } // for j

              Sij = g_ + i * h_;
          } // for i

          // backtrack
          int i = n - 1;
          int j = std::max_element(S_.begin() + 1, S_.end()) - S_.begin();

          int score = S_[j];

          int match = 0;
          int length = (m - 1) - j;

          bool has_gap = true;
          int sgap = 0;
          int egap = length;

          has_path_ = false;
          path_ = std::string(length, 'I');

          while ((i > 0) || (j > 0)) {
              switch (track_[i * m + j]) {
                case TOP:
                    --i;
                    sgap++;
                    path_.push_back('D');
                    break;

                case LEFT:
                    --j;
                    sgap++;
                    path_.push_back('I');
                    break;

                case DIAG:
                    --i;
                    --j;

                    if (s0[i] == s1[j]) match++;
                    path_.push_back('M');

                    if (!has_gap) {
                        has_gap = true;
                        egap = sgap;
                    }

                    sgap = 0;
                    break;
              } // switch

              length++;
          } // while

          return boost::make_tuple(score, length - sgap - egap, match);
      } // operator()

      /** Function: path
       *
       *  Return the edit path of the last computed alignment.
       *
       *  Returns:
       *  Edit path in the basic CIGAR format.
       */
      std::string path() {
          if (!has_path_) {
              std::reverse(path_.begin(), path_.end());
              has_path_ = true;
          }
          return path_;
      } // path

  private:
      enum Move { TOP = 0, LEFT = 1, DIAG = 2 };

      bool has_path_;
      std::string path_;
      std::vector<Move> track_;

      std::vector<int> S_;
      std::vector<int> I_;

      scoring_matrix sub_;

      int g_;
      int h_;

  }; // class free_global_alignment


  inline std::ostream& print_alignment(std::ostream& os,
                                       const std::string& s0,
                                       const std::string& s1,
                                       const std::string& path,
                                       const std::pair<int, int>& pos = std::make_pair(0, 0)) {
      int l = path.size();

      int pos0 = pos.first;
      int pos1 = pos.second;

      for (int i = 0; i < l; ++i) {
          if (path[i] == 'I') os << '-';
          else os << s0[pos0++];
      }

      os << "\n";

      pos0 = pos.first;
      pos1 = pos.second;

      for (int i = 0; i < l; ++i) {
          switch (path[i]) {
            case 'D':
                pos0++;
                os << ' ';
                break;
            case 'I':
                pos1++;
                os << ' ';
                break;
            case 'M':
                if (s0[pos0] == s1[pos1]) os << '|';
                else os << ' ';
                pos0++;
                pos1++;
                break;
          } // switch
      } // for i

      os << "\n";

      pos1 = pos.second;

      for (int i = 0; i < l; ++i) {
          if (path[i] == 'D') os << '-';
          else os << s1[pos1++];
      }

      return os;
  } // print_alignment



  /** Class: d2
   *
   *  Functor to compute the d2 distance.
   */
  class d2 : public sequence_compare<d2> {
  public:
      /** Constructor: d2
       *
       *  Parameter:
       *  k - kmer length.
       *  isdna - assume that input sequences are DNA/RNA.
       */
      explicit d2(int k = 0, bool isdna = true) : k_(k), isdna_(isdna) { }

      /** Function: operator()
       *
       *  Compute d2 score between s0 and s1.
       *
       *  Returns:
       *  3-tuple (d2 score, number of unique kmers in s0, number of unique kmers in s1).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          if ((s0.size() < k_) || (s1.size() < k_)) return boost::make_tuple(-1, -1, -1);

          if (isdna_) {
              dC_(s0, k_, dcount0_);
              dC_(s1, k_, dcount1_);
              int S = detail::count_distance(dcount0_.begin(), dcount0_.end(),
                                             dcount1_.begin(), dcount1_.end());
              return boost::make_tuple(S, dcount0_.size(), dcount1_.size());
          } else {
              detail::general_kmer_count(s0, k_, count0_);
              detail::general_kmer_count(s1, k_, count1_);
              int S = detail::count_distance(count0_.begin(), count0_.end(),
                                             count1_.begin(), count1_.end());
              return boost::make_tuple(S, count0_.size(), count1_.size());
          }

          return boost::make_tuple(-1, -1, -1);
      } // operator()

      /** Function: operator()
       *
       *  Compute d2 score between s0 and s1, where s0 is a sequence
       *  from the previous call of the binary version of this operator.
       *
       *  Returns:
       *  3-tuple (d2 score, number of unique kmers in s0, number of unique kmers in s1).
       */
      boost::tuple<int, int, int> operator()(const std::string& s1) {
          if (s1.size() < k_) return boost::make_tuple(-1, -1, -1);

          if (isdna_) {
              dC_(s1, k_, dcount1_);
              int S = detail::count_distance(dcount0_.begin(), dcount0_.end(),
                                             dcount1_.begin(), dcount1_.end());
              return boost::make_tuple(S, dcount0_.size(), dcount1_.size());
          } else {
              detail::general_kmer_count(s1, k_, count1_);
              int S = detail::count_distance(count0_.begin(), count0_.end(),
                                             count1_.begin(), count1_.end());
              return boost::make_tuple(S, count0_.size(), count1_.size());
          }

          return boost::make_tuple(-1, -1, -1);
      } // operator()

  private:
      int k_;
      bool isdna_;

      std::map<unsigned long long int, int> dcount0_;
      std::map<unsigned long long int, int> dcount1_;

      std::map<std::string, int> count0_;
      std::map<std::string, int> count1_;

      detail::dna_kmer_count dC_;

  }; // class d2


  /** Class: kmer_fraction
   *
   *  Functor to compute the number of shared kmers between two sequences.
   *  It can be used to compute e.g. kmer fraction similarity, defined as
   *  the Jaccard index between kmer spectra of sequences.
   */
  class kmer_fraction : public sequence_compare<kmer_fraction> {
  public:
      /** Constructor: kmer_fraction
       *
       *  Parameter:
       *  k - kmer length.
       *  isdna - assume that input sequences are DNA/RNA.
       */
      explicit kmer_fraction(int k = 0, bool isdna = true) : k_(k), isdna_(isdna) { }

      /** Function: operator()
       *
       *  Returns:
       *  3-tuple (number of common kmers, number of kmers in s0, number of kmers in s1).
       */
      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          if ((s0.size() < k_) || (s1.size() < k_)) return boost::make_tuple(-1, -1, -1);

          if (isdna_) {
              dI_(s0, k_, dindex0_);
              dI_(s1, k_, dindex1_);
              int S = detail::intersection_size(dindex0_.begin(), dindex0_.end(),
                                                dindex1_.begin(), dindex1_.end(),
                                                std::less<unsigned long long int>());
              return boost::make_tuple(S, dindex0_.size(), dindex1_.size());
          } else {
              detail::general_kmer_index(s0, k_, index0_);
              detail::general_kmer_index(s1, k_, index1_);
              int S = detail::intersection_size(index0_.begin(), index0_.end(),
                                                index1_.begin(), index1_.end(),
                                                std::less<std::string>());
              return boost::make_tuple(S, index0_.size(), index1_.size());
          }

          return boost::make_tuple(-1, -1, -1);
      } // operator()

      /** Function: operator()
       *
       *  Compute kmer score between s0 and s1, where s0 is a sequence
       *  from the previous call of the binary version of this operator.
       *
       *  Returns:
       *  3-tuple (number of common kmers, number of kmers in s0, number of kmers in s1).
       */
      boost::tuple<int, int, int> operator()(const std::string& s1) {
          if (s1.size() < k_) return boost::make_tuple(-1, -1, -1);

          if (isdna_) {
              dI_(s1, k_, dindex1_);
              int S = detail::intersection_size(dindex0_.begin(), dindex0_.end(),
                                                dindex1_.begin(), dindex1_.end(),
                                                std::less<unsigned long long int>());
              return boost::make_tuple(S, dindex0_.size(), dindex1_.size());
          } else {
              detail::general_kmer_index(s1, k_, index1_);
              int S = detail::intersection_size(index0_.begin(), index0_.end(),
                                                index1_.begin(), index1_.end(),
                                                std::less<std::string>());
              return boost::make_tuple(S, index0_.size(), index1_.size());
          }

          return boost::make_tuple(-1, -1, -1);
      } // operator()

  private:
      unsigned int k_;
      bool isdna_;

      std::vector<unsigned long long int> dindex0_;
      std::vector<unsigned long long int> dindex1_;

      std::vector<std::string> index0_;
      std::vector<std::string> index1_;

      detail::dna_kmer_index dI_;

  }; // class kmer_fraction


  class spaced_seeds_fraction : public sequence_compare<spaced_seeds_fraction> {
  public:
      explicit spaced_seeds_fraction(const std::string& sseed) : sseed_(sseed) { }

      boost::tuple<int, int, int> operator()(const std::string& s0, const std::string& s1) {
          int k = sseed_.size();

          detail::general_kmer_index(s0, k, index0_);
          detail::general_kmer_index(s1, k, index1_);

          for (int i = 0; i < index0_.size(); ++i) {
              for (int j = 0; j < k; ++j) if (sseed_[j] == '0') index0_[i][j] = '*';
          }

          for (int i = 0; i < index1_.size(); ++i) {
              for (int j = 0; j < k; ++j) if (sseed_[j] == '0') index1_[i][j] = '*';
          }

          std::sort(index0_.begin(), index0_.end());
          std::sort(index1_.begin(), index1_.end());

          int S = detail::intersection_size(index0_.begin(), index0_.end(),
                                            index1_.begin(), index1_.end(),
                                            std::less<std::string>());

          return boost::make_tuple(S, index0_.size(), index1_.size());
      } // operator()

  private:
      std::string sseed_;

      std::vector<std::string> index0_;
      std::vector<std::string> index1_;

  }; // spaced_seeds_fraction

} // namespace bio

#endif // SEQUENCE_COMPARE_HPP
