package edu.buffalo.cse.jive.app.dblp.model;

import java.util.*;

import org.xml.sax.Attributes;

public class Entity {

  public enum TypeEntity {

    article,
    book,
    incollection,
    inproceedings,
    mastersthesis,
    phdthesis,
    proceedings,
    www;
  }

  public enum TypeTitleContents {

    i,
    ref,
    sub,
    sup,
    tt
  }

  public enum TypeField {

    address,
    author,
    booktitle,
    cdrom,
    chapter,
    cite,
    crossref,
    editor,
    ee,
    isbn,
    journal,
    month,
    note,
    number,
    pages,
    publisher,
    school,
    series,
    title,
    url,
    volume,
    year
  }

  // 790K authors
  private static Map<String, Integer> uniqueAuthors = new HashMap<String, Integer>(1250000, .8f);
  // 15K editors
  private static Map<String, Integer> uniqueEditors = new HashMap<String, Integer>(25000, .7f);
  // 500 authors
  private static Map<String, Integer> uniquePublishers = new HashMap<String, Integer>(1000);

  private static int entityCounter = 0;

  public static Map<String, Integer> getLookupAuthors() {

    return uniqueAuthors;
  }

  public static Map<String, Integer> getLookupEditors() {

    return uniqueEditors;
  }

  public static Map<String, Integer> getLookupPublishers() {

    return uniquePublishers;
  }

  private String address = "";
  // 1:N set of author indexes from uniqueAuthors
  private Set<Integer> authors = new HashSet<Integer>();
  private String booktitle = "";
  // 1:N cdroms
  private Set<String> cdroms = new HashSet<String>();
  private String chapter = "";
  private Set<String> cites = new HashSet<String>();
  // the crossref field in the inproceedings record contains the key of the
  // proceedings record
  private String crossref = "";
  // 1:N set of editors of proceedings
  private Set<Integer> editors = new HashSet<Integer>();
  // up to 2 electronic entries
  private Set<String> ees = new HashSet<String>();
  private int id = -1;
  private Set<String> isbns = new HashSet<String>();
  private String journal = "";
  // mandatory, unique
  private String key = "";
  // mandatory modification date in ISO 8601 date format (YYYY-MM-DD)
  private String mdate = "";
  private String month = "";
  // 1:N notes
  private Set<String> notes = new HashSet<String>();
  private String number = "";
  // optional, the preferred form is 'from-to'
  private String pages = "";
  private Set<Integer> publishers = new HashSet<Integer>();
  private String school = "";
  private String series = "";
  // mandatory
  private String title = "";
  private TypeEntity type = null;

  // up to 2 url entries
  private Set<String> urls = new HashSet<String>();

  private String volume = "";

  // optional, four digit year when present
  private String year = "";

  public Entity(TypeEntity type, Attributes attrs) {

    setType(type);
    setKey(attrs.getValue("key"));
    setMdate(attrs.getValue("mdate"));
    setId(++entityCounter);
  }

  protected void addCdrom(String cdrom) {

    cdroms.add(cdrom);
  }

  public void addCite(String cite) {

    cites.add(cite);
  }

  protected void addEe(String ee) {

    ees.add(ee);
  }

  protected void addIsbn(String isbn) {

    isbns.add(isbn);
  }

  protected void addNote(String note) {

    notes.add(note);
  }

  protected void addUrl(String url) {

    urls.add(url);
  }

  public String getAddress() {

    return address;
  }

  public Set<Integer> getAuthors() {

    return authors;
  }

  public String getBooktitle() {

    return booktitle;
  }

  public Set<String> getCdroms() {

    return cdroms;
  }

  public String getChapter() {

    return chapter;
  }

  public Set<String> getCites() {

    return cites;
  }

  public String getCrossref() {

    return crossref;
  }

  public Set<Integer> getEditors() {

    return editors;
  }

  public Set<String> getEes() {

    return ees;
  }

  public int getId() {

    return id;
  }

  public Set<String> getIsbns() {

    return isbns;
  }

  public String getJournal() {

    return journal;
  }

  public String getKey() {

    return key;
  }

  public String getMdate() {

    return mdate;
  }

  public String getMonth() {

    return month;
  }

  public Set<String> getNotes() {

    return notes;
  }

  public String getNumber() {

    return number;
  }

  public String getPages() {

    return pages;
  }

  public Set<Integer> getPublishers() {

    return publishers;
  }

  public String getSchool() {

    return school;
  }

  public String getSeries() {

    return series;
  }

  public String getTitle() {

    return title;
  }

  public TypeEntity getType() {

    return type;
  }

  public Set<String> getUrls() {

    return urls;
  }

  public String getVolume() {

    return volume;
  }

  public String getYear() {

    return year;
  }

  private Integer lookup(String key, Map<String, Integer> source) {

    Integer result = source.get(key);
    if (result == null) {
      result = source.size() + 1;
      source.put(key, result);
    }
    return result;
  }

  public Integer lookupAuthor(String author) {

    return lookup(author, uniqueAuthors);
  }

  public Integer lookupEditor(String editor) {

    return lookup(editor, uniqueEditors);
  }

  public Integer lookupPublisher(String publisher) {

    return lookup(publisher, uniquePublishers);
  }

  protected void setAddress(String address) {

    this.address = address;
  }

  protected void setBooktitle(String booktitle) {

    this.booktitle = booktitle;
  }

  protected void setChapter(String chapter) {

    this.chapter = chapter;
  }

  public void setCrossref(String crossref) {

    this.crossref = crossref;
  }

  private void setId(int id) {

    this.id = id;
  }

  protected void setJournal(String journal) {

    this.journal = journal;
  }

  protected void setKey(String key) {

    assert (key != "");
    this.key = key;
  }

  protected void setMdate(String mdate) {

    assert (mdate != "");
    this.mdate = mdate;
  }

  protected void setMonth(String month) {

    this.month = month;
  }

  protected void setNumber(String number) {

    this.number = number;
  }

  protected void setPages(String pages) {

    this.pages = pages;
  }

  protected void setSchool(String school) {

    this.school = school;
  }

  protected void setSeries(String series) {

    this.series = series;
  }

  protected void setTitle(String title) {

    assert (title != "");
    this.title = title;
  }

  protected void setType(TypeEntity type) {

    this.type = type;
  }

  protected void setVolume(String volume) {

    this.volume = volume;
  }

  protected void setYear(String year) {

    this.year = year;
  }

  private void updateError(TypeField fieldType) {

    System.err.println(String.format("duplicate field %s for %s entity (key = %s).", fieldType,
        getType(), getKey()));
  }

  public void updateProperty(TypeField fieldType, String fieldValue) {

    if ("...".equals(fieldValue)) {
      return;
    }
    String value = fieldValue.replace("'", "''").replace("\\'", "'").replace("\\", "?");
    switch (fieldType) {

      case address:
        if (getAddress() != "") {
          updateError(fieldType);
        }
        setAddress(value);
        break;
      case author:
        authors.add(lookupAuthor(value));
        break;
      case booktitle:
        if (getBooktitle() != "") {
          updateError(fieldType);
        }
        setBooktitle(value);
        break;
      case cdrom:
        addCdrom(value);
        break;
      case chapter:
        if (getChapter() != "") {
          updateError(fieldType);
        }
        setChapter(value);
        break;
      case cite:
        addCite(value);
        break;
      case crossref:
        if (getCrossref() != "") {
          updateError(fieldType);
        }
        setCrossref(value);
        break;
      case editor:
        editors.add(lookupEditor(value));
        break;
      case ee:
        addEe(value);
        break;
      case isbn:
        addIsbn(value);
        break;
      case journal:
        if (getJournal() != "") {
          updateError(fieldType);
        }
        setJournal(value);
        break;
      case month:
        if (getMonth() != "") {
          updateError(fieldType);
        }
        setMonth(value);
        break;
      case note:
        addNote(value);
        break;
      case number:
        if (getNumber() != "") {
          updateError(fieldType);
        }
        setNumber(value);
        break;
      case pages:
        if (getPages() != "") {
          updateError(fieldType);
        }
        setPages(value);
        break;
      case publisher:
        publishers.add(lookupPublisher(value));
        break;
      case school:
        if (getSchool() != "") {
          updateError(fieldType);
        }
        setSchool(value);
        break;
      case series:
        if (getSeries() != "") {
          updateError(fieldType);
        }
        setSeries(value);
        break;
      case title:
        if (getTitle() != "") {
          updateError(fieldType);
        }
        setTitle(value);
        break;
      case url:
        addUrl(value);
        break;
      case volume:
        if (getVolume() != "") {
          updateError(fieldType);
        }
        setVolume(value);
        break;
      case year:
        if (getYear() != "") {
          updateError(fieldType);
        }
        setYear(value);
        break;
    }
  }
}
