package edu.buffalo.cse.jive.app.dblp.sampler;

import java.io.IOException;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import edu.buffalo.cse.jive.app.dblp.model.Entity.TypeEntity;
import edu.buffalo.cse.jive.app.dblp.model.Entity.TypeField;
import edu.buffalo.cse.jive.app.dblp.model.Entity.TypeTitleContents;

public class XMLSampler extends DefaultHandler {

  private String entity = null;
  private final XMLPersister persister;
  private double sampleRate;
  private boolean requiresEscaping = false;
  private boolean escapingOpen;

  public XMLSampler(final XMLPersister persister, final double sampleRate) {

    this.sampleRate = sampleRate;
    this.persister = persister;
  }

  @Override
  public void characters(final char[] ch, final int start, final int length) throws SAXException {

    if (entity != null) {
      if (escapingOpen) {
        entity += "<![CDATA[";
        escapingOpen = false;
      }
      entity += escape(new String(ch, start, length));
    }
  }

  private String escape(final String value) {

    String result = "";
    for (char ch : value.toCharArray()) {
      if (ch == '&') {
        result += "&amp;";
      }
      else if (ch == '<') {
        result += "&lt;";
      }
      else if (ch == '>') {
        result += "&gt;";
      }
      else if (ch == '"') {
        result += "&quot;";
      }
      else {
        result += ch;
      }
    }
    return result;
  }

  private boolean checkEntityType(final String tagName) {

    boolean result = true;
    try {
      TypeEntity.valueOf(tagName.toLowerCase());
    }
    catch (final IllegalArgumentException e) {
      result = false;
    }
    return result;
  }

  private boolean checkFieldType(final String tagName) {

    boolean result = true;
    try {
      TypeField.valueOf(tagName.toLowerCase());
    }
    catch (final IllegalArgumentException e) {
      result = false;
    }
    return result;
  }

  private boolean checkTitleContentsType(final String tagName) {

    boolean result = true;
    try {
      TypeTitleContents.valueOf(tagName.toLowerCase());
    }
    catch (final IllegalArgumentException e) {
      result = false;
    }
    return result;
  }

  @Override
  public void endElement(final String uri, final String localName, final String qName)
      throws SAXException {

    // root element
    if (entity == null && "dblp".equalsIgnoreCase(localName)) {
      return;
    }
    // if this does not throw an exception, it is the end tag of the current entity
    if (checkEntityType(localName)) {
      if (entity != null) {
        if (requiresEscaping) {
          entity += "]]>";
          requiresEscaping = false;
        }
        entity += "</" + localName + ">\n";
        try {
          persister.persist(entity);
        }
        catch (IOException e) {
          e.printStackTrace();
        }
        entity = null;
      }
      return;
    }
    // otherwise, it is the end tag of the current field
    if (checkFieldType(localName) || checkTitleContentsType(localName)) {
      if (entity != null) {
        if (requiresEscaping) {
          entity += "]]>";
          requiresEscaping = false;
        }
        entity += "</" + localName + ">\n";
      }
      return;
    }
    throw new SAXException(String.format("Unexpected end tag: %s", localName));
  }

  @Override
  public void startElement(final String uri, final String localName, final String name,
      final Attributes atts) throws SAXException {

    if (entity == null && "dblp".equalsIgnoreCase(localName)) {
      return;
    }
    // if this does not throw an exception, it is the start tag of a new entity
    if (checkEntityType(localName)) {
      // include the element with some probability
      if (entity == null && Math.random() <= sampleRate) {
        entity = "<" + localName;
        for (int i = 0; i < atts.getLength(); i++) {
          entity += " " + atts.getLocalName(i) + "=\"" + escape(atts.getValue(i)) + "\"";
        }
        entity += ">\n";
      }
      return;
    }
    // if this is the start tag of a new field
    if (checkFieldType(localName) || checkTitleContentsType(localName)) {
      if (entity != null) {
        entity += "<" + localName;
        for (int i = 0; i < atts.getLength(); i++) {
          entity += " " + atts.getLocalName(i) + "=\"" + escape(atts.getValue(i)) + "\"";
        }
        entity += ">";
        requiresEscaping = localName.equals(TypeField.ee.name())
            || localName.equals(TypeField.url.name());
        escapingOpen = requiresEscaping;
      }
      return;
    }
    throw new SAXException(String.format("Unexpected start tag: %s", localName));
  }
}