/***
 *  $Id$
 **
 *  File: fastx_iterator.hpp
 *  Created: Apr 05, 2012
 *
 *  Author: Jaroslaw Zola <jaroslaw.zola@hush.com>
 *  Copyright (c) 2012 Jaroslaw Zola
 *  Distributed under the Boost Software License.
 *
 *  Boost Software License - Version 1.0 - August 17th, 2003
 *
 *  Permission is hereby granted, free of charge, to any person or organization
 *  obtaining a copy of the software and accompanying documentation covered by
 *  this license (the "Software") to use, reproduce, display, distribute,
 *  execute, and transmit the Software, and to prepare derivative works of the
 *  Software, and to permit third-parties to whom the Software is furnished to
 *  do so, all subject to the following:
 *
 *  The copyright notices in the Software and this entire statement, including
 *  the above license grant, this restriction and the following disclaimer,
 *  must be included in all copies of the Software, in whole or in part, and
 *  all derivative works of the Software, unless such copies or derivative
 *  works are solely in the form of machine-executable object code generated by
 *  a source language processor.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *  FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 *  SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 *  FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 *  DEALINGS IN THE SOFTWARE.
 */

#ifndef FASTX_ITERATOR_HPP
#define FASTX_ITERATOR_HPP

#include <istream>
#include <string>
#include <tuple>
#include <utility>


namespace bio {

  struct fasta_sequence_type {
      typedef std::pair<std::string, std::string> value_type;
      value_type operator()(const std::string& s1, const std::string& s2) const { return value_type(s1, s2); }
  }; // struct fasta_sequence_type


  struct fastq_sequence_type {
      typedef std::tuple<std::string, std::string, std::string> value_type;
      value_type operator()(const std::string& s1, const std::string& s2, const std::string& s3) const {
	  return std::make_tuple(s1, s2, s3);
      } // operator()
  }; // struct fastq_sequence_type


  template <typename T = fasta_sequence_type>
  class fasta_input_iterator : public std::iterator<std::input_iterator_tag, typename T::value_type> {
  public:
      typedef char char_type;
      typedef std::char_traits<char_type> traits_type;
      typedef std::basic_istream<char_type, traits_type> istream_type;

      typedef typename T::value_type value_type;


      fasta_input_iterator() : state_(false), is_(0), pos_(0), offset_(0), tellg_(0) { }

      fasta_input_iterator(istream_type& is) : is_(&is), pos_(0), offset_(0), tellg_(0) {
	  state_ = (is_ && *is_) ? true : false;
	  if (state_ == true) {
	      while (*is_) {
		  buf_ = "";

		  std::getline(*is_, buf_);
		  offset_ += buf_.size() + 1;

		  if ((buf_.empty() == false) && (buf_[0] == '>')) break;
	      }
	      pos_--;
	      prv_read__();
	  } // if
      } // fasta_input_iterator


      unsigned int pos() const { return pos_; }

      unsigned long int tellg() const { return tellg_; }


      const value_type& operator*() const { return value_; }

      const value_type* operator->() const { return &(operator*()); }

      fasta_input_iterator& operator++() {
	  prv_read__();
	  return *this;
      } // operator++

      fasta_input_iterator operator++(int) {
	  fasta_input_iterator tmp = *this;
	  prv_read__();
	  return tmp;
      } // operator++


  private:
      void prv_read__() {
	  state_ = (is_ && *is_) ? true : false;
	  if (state_ == true) {
	      tellg_ = offset_ - buf_.size() - 1;

	      // trim sequence name
	      unsigned int l = buf_.size() - 1;
	      if (buf_[l] == '\r') buf_.resize(l);

	      s1_ = (buf_.c_str() + 1);
	      s2_ = "";

	      // get sequence
	      do {
		  buf_ = "";

		  std::getline(*is_, buf_);
		  offset_ += buf_.size() + 1;

		  if (buf_.empty() == false) {
		      if ((buf_[0] != ';') && (buf_[0] != '>')) {
			  unsigned int l = buf_.size() - 1;
			  if (buf_[l] == '\r') buf_.resize(l);
			  s2_ += buf_;
		      } else if (buf_[0] == '>') break;
		  }
	      }
	      while (*is_);

	      value_ = make_(s1_, s2_);
	      pos_++;
	  } // if
      } // prv_read__

      bool state_;
      istream_type* is_;

      unsigned int pos_;
      unsigned long int offset_;
      unsigned long int tellg_;

      std::string s1_;
      std::string s2_;
      std::string buf_;

      T make_;
      value_type value_;

      friend bool operator==(const fasta_input_iterator& lhs, const fasta_input_iterator& rhs) {
	  return ((lhs.state_ == rhs.state_) && (!lhs.state_ || (lhs.is_ == rhs.is_)));
      } // operator==

      friend bool operator!=(const fasta_input_iterator& lhs, const fasta_input_iterator& rhs) {
	  return !(lhs == rhs);
      } // operator!=

  }; // class fasta_input_iterator


  template <typename T = fastq_sequence_type>
  class fastq_input_iterator : public std::iterator<std::input_iterator_tag, typename T::value_type> {
  public:
      typedef char char_type;
      typedef std::char_traits<char_type> traits_type;
      typedef std::basic_istream<char_type, traits_type> istream_type;

      typedef typename T::value_type value_type;


      fastq_input_iterator() : state_(false), is_(0), pos_(0), offset_(0), tellg_(0) { }

      fastq_input_iterator(istream_type& is) : is_(&is), pos_(0), offset_(0), tellg_(0) {
	  state_ = (is_ && *is_) ? true : false;
	  if (state_ == true) {
	      while (*is_) {
		  buf_ = "";

		  std::getline(*is_, buf_);
		  offset_ += buf_.size() + 1;

		  if ((buf_.empty() == false) && (buf_[0] == '@')) break;
	      }
	      pos_--;
	      prv_read__();
	  } // if
      } // fastq_input_iterator


      unsigned int pos() const { return pos_; }

      unsigned long int tellg() const { return tellg_; }


      const value_type& operator*() const { return value_; }

      const value_type* operator->() const { return &(operator*()); }

      fastq_input_iterator& operator++() {
	  prv_read__();
	  return *this;
      } // operator++

      fastq_input_iterator operator++(int) {
	  fastq_input_iterator tmp = *this;
	  prv_read__();
	  return tmp;
      } // operator++


  private:
      void prv_read__() {
	  state_ = (is_ && *is_) ? true : false;

	  if (state_ == true) {
	      tellg_ = offset_ - buf_.size() - 1;

	      // trim sequence name
	      if (buf_.empty() == false) {
		  unsigned int l = buf_.size() - 1;
		  if (buf_[l] == '\r') buf_.resize(l);
	      }

	      s1_ = (buf_.c_str() + 1);
	      s2_ = "";
	      s3_ = "";

	      // get sequence
	      do {
		  buf_ = "";

		  std::getline(*is_, buf_);
		  offset_ += buf_.size() + 1;

		  if (buf_.empty() == false) {
		      if (buf_[0] != '+') {
			  unsigned int l = buf_.size() - 1;
			  if (buf_[l] == '\r') buf_.resize(l);
			  s2_ += buf_;
		      } else break;
		  }
	      }
	      while (*is_);

	      // get scores
	      do {
		  buf_ = "";

		  std::getline(*is_, buf_);
		  offset_ += buf_.size() + 1;

		  if (buf_.empty() == false) {
		      unsigned int l = buf_.size() - 1;
		      if (buf_[l] == '\r') buf_.resize(l);
		      if (s3_.size() + buf_.size() <= s2_.size()) s3_ += buf_;
		      else {
			  if (buf_[0] != '@') {
			      state_ = false;
			      // we clear stream so that we can
			      // test for eof() outside
			      is_->clear();
			  }
			  break;
		      }
		  }
	      }
	      while (*is_);

	      if (s2_.size() != s3_.size()) {
		  state_ = false;
		  // we clear stream so that we can
		  // test for eof() outside
		  is_->clear();
	      } else {
		  value_ = make_(s1_, s2_, s3_);
		  pos_++;
	      }

	  } // if
      } // prv_read__

      bool state_;
      istream_type* is_;

      unsigned int pos_;
      unsigned long int offset_;
      unsigned long int tellg_;

      T make_;
      value_type value_;

      std::string s1_;
      std::string s2_;
      std::string s3_;
      std::string buf_;

      friend bool operator==(const fastq_input_iterator& lhs, const fastq_input_iterator& rhs) {
	  return ((lhs.state_ == rhs.state_) && (!lhs.state_ || (lhs.is_ == rhs.is_)));
      } // operator==

      friend bool operator!=(const fastq_input_iterator& lhs, const fastq_input_iterator& rhs) {
	  return !(lhs == rhs);
      } // operator!=

  }; // class fastq_input_iterator

}; // namespace bio

#endif // FASTX_ITERATOR_HPP
