/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.machinereading.domains.ace;

import edu.stanford.nlp.ie.machinereading.GenericDataSetReader;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceCharSeq;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceDocument;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntity;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEntityMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceEventMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMention;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceRelationMentionArgument;
import edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceToken;
import edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.EventMention;
import edu.stanford.nlp.ie.machinereading.structure.ExtractionObject;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;

public class AceReader
extends GenericDataSetReader {
    private static Redwood.RedwoodChannels log = Redwood.channels(AceReader.class);
    private final Counter<String> entityCounts = new ClassicCounter<String>();
    private final Counter<String> adjacentEntityMentions = new ClassicCounter<String>();
    private final Counter<String> relationCounts;
    private final Counter<String> nameRelationCounts = new ClassicCounter<String>();
    private final Counter<String> eventCounts;
    private final Counter<String> mentionTypeCounts;
    private final String aceVersion;
    private static final boolean VERBOSE = false;

    public AceReader() {
        this(null, true);
    }

    public AceReader(StanfordCoreNLP processor, boolean preprocess) {
        this(processor, preprocess, "ACE2005");
    }

    public AceReader(StanfordCoreNLP processor, boolean preprocess, String version) {
        super(processor, preprocess, false, true);
        this.relationCounts = new ClassicCounter<String>();
        this.eventCounts = new ClassicCounter<String>();
        this.mentionTypeCounts = new ClassicCounter<String>();
        this.logger = Logger.getLogger(AceReader.class.getName());
        this.logger.setLevel(Level.SEVERE);
        this.aceVersion = version;
    }

    @Override
    public Annotation read(String path) throws IOException, SAXException, ParserConfigurationException {
        ArrayList<CoreMap> allSentences = new ArrayList<CoreMap>();
        File basePath = new File(path);
        assert (basePath.exists());
        Annotation corpus = new Annotation("");
        if (basePath.isDirectory()) {
            for (File aceFile : IOUtils.iterFilesRecursive(basePath, ".apf.xml")) {
                if (aceFile.getName().endsWith(".UPC1.apf.xml")) continue;
                allSentences.addAll(this.readDocument(aceFile, corpus));
            }
        } else {
            allSentences.addAll(this.readDocument(basePath, corpus));
        }
        AnnotationUtils.addSentences(corpus, allSentences);
        for (CoreMap sent : allSentences) {
            this.countAdjacentMentions(sent);
            this.countNameRelations(sent);
            this.countMentionTypes(sent);
        }
        return corpus;
    }

    private void countMentionTypes(CoreMap sent) {
        List mentions = (List)sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (mentions != null) {
            for (EntityMention m : mentions) {
                this.mentionTypeCounts.incrementCount(m.getMentionType());
            }
        }
    }

    private void countNameRelations(CoreMap sent) {
        List mentions = (List)sent.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
        if (mentions != null) {
            for (RelationMention m : mentions) {
                List<EntityMention> args = m.getEntityMentionArgs();
                if (args.size() != 2 || !args.get(0).getMentionType().equals("NAM") || !args.get(1).getMentionType().equals("NAM")) continue;
                this.nameRelationCounts.incrementCount(m.getType() + "." + m.getSubType());
            }
        }
    }

    private void countAdjacentMentions(CoreMap sent) {
        List mentions = (List)sent.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
        if (mentions != null) {
            for (EntityMention m1 : mentions) {
                for (EntityMention m2 : mentions) {
                    if (m1 == m2 || m1.getHeadTokenEnd() != m2.getHeadTokenStart() || !m1.getType().equals(m2.getType())) continue;
                    this.adjacentEntityMentions.incrementCount(m1.getType());
                }
            }
        }
    }

    private void printCounter(Counter<String> c, String h) {
        StringBuilder b = new StringBuilder();
        b.append(h).append(" counts:\n");
        Set<String> keys = c.keySet();
        for (String k : keys) {
            b.append("\t").append(k).append(": ").append(c.getCount(k)).append("\n");
        }
        this.logger.info(b.toString());
    }

    private List<CoreMap> readDocument(File file, Annotation corpus) throws IOException, SAXException, ParserConfigurationException {
        String aceFilename = file.getAbsolutePath().replace(".apf.xml", "");
        List<CoreMap> sentencesFromFile = this.readDocument(aceFilename, corpus);
        return sentencesFromFile;
    }

    private List<CoreMap> readDocument(String prefix, Annotation corpus) throws IOException, SAXException, ParserConfigurationException {
        this.logger.info("Reading document: " + prefix);
        ArrayList<CoreMap> results = new ArrayList<CoreMap>();
        AceDocument aceDocument = this.aceVersion.equals("ACE2004") ? AceDocument.parseDocument(prefix, false, this.aceVersion) : AceDocument.parseDocument(prefix, false);
        String docId = aceDocument.getId();
        Map<String, EntityMention> entityMentionMap = Generics.newHashMap();
        int tokenOffset = 0;
        for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); ++sentenceIndex) {
            RelationMention convertedMention;
            String word;
            List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
            ArrayList<CoreLabel> words = new ArrayList<CoreLabel>();
            StringBuilder textContent = new StringBuilder();
            for (int i = 0; i < tokens.size(); ++i) {
                CoreLabel l = new CoreLabel();
                l.setWord(tokens.get(i).getLiteral());
                l.set(CoreAnnotations.ValueAnnotation.class, l.word());
                l.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, tokens.get(i).getByteStart());
                l.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, tokens.get(i).getByteEnd());
                words.add(l);
                if (i > 0) {
                    textContent.append(" ");
                }
                textContent.append(tokens.get(i).getLiteral());
            }
            if (words.size() == 1 && (word = ((CoreLabel)words.get(0)).word()).startsWith("<") && word.endsWith(">")) {
                tokenOffset += tokens.size();
                continue;
            }
            Annotation sentence = new Annotation(textContent.toString());
            sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
            sentence.set(CoreAnnotations.TokensAnnotation.class, words);
            this.logger.info("Reading sentence: \"" + textContent + "\"");
            ArrayList<AceEntityMention> entityMentions = aceDocument.getEntityMentions(sentenceIndex);
            ArrayList<AceRelationMention> relationMentions = aceDocument.getRelationMentions(sentenceIndex);
            ArrayList<AceEventMention> eventMentions = aceDocument.getEventMentions(sentenceIndex);
            for (AceEntityMention aceEntityMention : entityMentions) {
                String corefID = "";
                for (String entityID : aceDocument.getKeySetEntities()) {
                    AceEntity e = aceDocument.getEntity(entityID);
                    if (!e.getMentions().contains(aceEntityMention)) continue;
                    corefID = entityID;
                    break;
                }
                EntityMention convertedMention2 = this.convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID);
                this.entityCounts.incrementCount(convertedMention2.getType());
                this.logger.info("CONVERTED MENTION HEAD SPAN: " + convertedMention2.getHead());
                this.logger.info("CONVERTED ENTITY MENTION: " + convertedMention2);
                AnnotationUtils.addEntityMention(sentence, convertedMention2);
                entityMentionMap.put(aceEntityMention.getId(), convertedMention2);
            }
            for (AceRelationMention aceRelationMention : relationMentions) {
                convertedMention = this.convertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap);
                if (convertedMention == null) continue;
                this.relationCounts.incrementCount(convertedMention.getType());
                this.logger.info("CONVERTED RELATION MENTION: " + convertedMention);
                AnnotationUtils.addRelationMention(sentence, convertedMention);
            }
            for (AceEventMention aceEventMention : eventMentions) {
                convertedMention = this.convertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset);
                if (convertedMention == null) continue;
                this.eventCounts.incrementCount(convertedMention.getType());
                this.logger.info("CONVERTED EVENT MENTION: " + convertedMention);
                AnnotationUtils.addEventMention(sentence, (EventMention)convertedMention);
            }
            results.add(sentence);
            tokenOffset += tokens.size();
        }
        return results;
    }

    private EventMention convertAceEventMention(AceEventMention aceEventMention, String docId, CoreMap sentence, Map<String, EntityMention> entityMap, int tokenOffset) {
        Set<String> roleSet = aceEventMention.getRoles();
        ArrayList<String> roles = new ArrayList<String>();
        for (String role : roleSet) {
            roles.add(role);
        }
        ArrayList<ExtractionObject> convertedArgs = new ArrayList<ExtractionObject>();
        int left = Integer.MAX_VALUE;
        int right = Integer.MIN_VALUE;
        for (String role : roles) {
            AceEntityMention arg = aceEventMention.getArg(role);
            ExtractionObject o = entityMap.get(arg.getId());
            if (o == null) {
                this.logger.severe("READER ERROR: Failed to find event argument with id " + arg.getId());
                this.logger.severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention.");
                return null;
            }
            convertedArgs.add(o);
            if (o.getExtentTokenStart() < left) {
                left = o.getExtentTokenStart();
            }
            if (o.getExtentTokenEnd() <= right) continue;
            right = o.getExtentTokenEnd();
        }
        AceCharSeq anchor = aceEventMention.getAnchor();
        ExtractionObject anchorObject = new ExtractionObject(aceEventMention.getId() + "-anchor", sentence, new Span(anchor.getTokenStart() - tokenOffset, anchor.getTokenEnd() + 1 - tokenOffset), "ANCHOR", null);
        EventMention em = new EventMention(aceEventMention.getId(), sentence, new Span(left, right), aceEventMention.getParent().getType(), aceEventMention.getParent().getSubtype(), anchorObject, convertedArgs, roles);
        return em;
    }

    private RelationMention convertAceRelationMention(AceRelationMention aceRelationMention, String docId, CoreMap sentence, Map<String, EntityMention> entityMap) {
        List<AceRelationMentionArgument> args = Arrays.asList(aceRelationMention.getArgs());
        ArrayList<ExtractionObject> convertedArgs = new ArrayList<ExtractionObject>();
        ArrayList<String> argNames = new ArrayList<String>();
        int left = Integer.MAX_VALUE;
        int right = Integer.MIN_VALUE;
        for (AceRelationMentionArgument arg : args) {
            ExtractionObject o = entityMap.get(arg.getContent().getId());
            if (o == null) {
                this.logger.severe("READER ERROR: Failed to find relation argument with id " + arg.getContent().getId());
                this.logger.severe("This happens because a few relation mentions illegally span multiple sentences. Will ignore this mention.");
                return null;
            }
            convertedArgs.add(o);
            argNames.add(arg.getRole());
            if (o.getExtentTokenStart() < left) {
                left = o.getExtentTokenStart();
            }
            if (o.getExtentTokenEnd() <= right) continue;
            right = o.getExtentTokenEnd();
        }
        if (argNames.size() != 2 || !((String)argNames.get(0)).equalsIgnoreCase("arg-1") || !((String)argNames.get(1)).equalsIgnoreCase("arg-2")) {
            this.logger.severe("READER ERROR: Invalid succession of arguments in relation mention: " + argNames);
            this.logger.severe("ACE relations must have two arguments. Will ignore this mention.");
            return null;
        }
        RelationMention relation = new RelationMention(aceRelationMention.getId(), sentence, new Span(left, right), aceRelationMention.getParent().getType(), aceRelationMention.getParent().getSubtype(), convertedArgs, null);
        return relation;
    }

    private EntityMention convertAceEntityMention(AceEntityMention entityMention, String docId, CoreMap sentence, int tokenOffset) {
        AceCharSeq ext = entityMention.getExtent();
        AceCharSeq head = entityMention.getHead();
        int extStart = ext.getTokenStart() - tokenOffset;
        int extEnd = ext.getTokenEnd() - tokenOffset + 1;
        if (extStart < 0) {
            this.logger.severe("READER ERROR: Invalid extent start " + extStart + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
            extStart = 0;
        }
        if (extEnd > ((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).size()) {
            this.logger.severe("READER ERROR: Invalid extent end " + extEnd + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity extent.");
            extEnd = ((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).size();
        }
        int headStart = head.getTokenStart() - tokenOffset;
        int headEnd = head.getTokenEnd() - tokenOffset + 1;
        if (headStart < 0) {
            this.logger.severe("READER ERROR: Invalid head start " + headStart + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
            headStart = 0;
        }
        if (headEnd > ((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).size()) {
            this.logger.severe("READER ERROR: Invalid head end " + headEnd + " for entity mention " + entityMention.getId() + " in document " + docId + " in sentence " + sentence);
            this.logger.severe("This may happen due to incorrect EOS detection. Adjusting entity head span.");
            headEnd = ((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).size();
        }
        if (headStart < extStart) {
            headStart = extStart;
        }
        if (headEnd > extEnd) {
            headEnd = extEnd;
        }
        assert (headStart < headEnd);
        EntityMention converted = new EntityMention(entityMention.getId(), sentence, new Span(extStart, extEnd), new Span(headStart, headEnd), entityMention.getParent().getType(), entityMention.getParent().getSubtype(), entityMention.getLdctype());
        return converted;
    }

    private EntityMention convertAceEntityMention(AceEntityMention entityMention, String docId, CoreMap sentence, int tokenOffset, String corefID) {
        EntityMention converted = this.convertAceEntityMention(entityMention, docId, sentence, tokenOffset);
        converted.setCorefID(corefID);
        return converted;
    }

    public static void main(String[] args) throws IOException {
        Properties props = StringUtils.argsToProperties(args);
        AceReader r = new AceReader(new StanfordCoreNLP(props, false), false);
        r.setLoggerLevel(Level.INFO);
        r.parse("/u/scr/nlp/data/ACE2005/");
        log.info("done");
    }
}

