/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.ComplexNodePattern;
import edu.stanford.nlp.ling.tokensregex.CoreMapNodePattern;
import edu.stanford.nlp.ling.tokensregex.Env;
import edu.stanford.nlp.ling.tokensregex.EnvLookup;
import edu.stanford.nlp.ling.tokensregex.MultiPatternMatcher;
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
import edu.stanford.nlp.ling.tokensregex.SequencePattern;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.ling.tokensregex.matcher.TrieMap;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

public class TokensRegexNERAnnotator
implements Annotator {
    protected static final Redwood.RedwoodChannels logger = Redwood.channels(TokensRegexNERAnnotator.class);
    protected static final String PATTERN_FIELD = "pattern";
    protected static final String OVERWRITE_FIELD = "overwrite";
    protected static final String PRIORITY_FIELD = "priority";
    protected static final String WEIGHT_FIELD = "weight";
    protected static final String GROUP_FIELD = "group";
    protected static final Set<String> predefinedHeaderFields = CollectionUtils.asSet("pattern", "overwrite", "priority", "weight", "group");
    protected static final String defaultHeader = "pattern,ner,overwrite,priority,group";
    private final boolean ignoreCase;
    private final List<Boolean> ignoreCaseList;
    private final Set<String> commonWords;
    private final List<Entry> entries;
    private final Map<SequencePattern<CoreMap>, Entry> patternToEntry;
    private final MultiPatternMatcher<CoreMap> multiPatternMatcher;
    private final List<Class> annotationFields;
    private final Set<String> myLabels;
    private final Pattern validPosPattern;
    private final List<Pattern> validPosPatternList;
    private final List<String[]> headerList;
    private final boolean verbose;
    private final Map<Entry, Integer> entryToMappingFileNumber;
    private final Set<String> noDefaultOverwriteLabels;
    private final PosMatchType posMatchType;
    public static final PosMatchType DEFAULT_POS_MATCH_TYPE = PosMatchType.MATCH_AT_LEAST_ONE_TOKEN;
    public static final String DEFAULT_BACKGROUND_SYMBOL = "O,MISC";
    public static PropertiesUtils.Property[] SUPPORTED_PROPERTIES = new PropertiesUtils.Property[]{new PropertiesUtils.Property("mapping", "edu/stanford/nlp/models/kbp/english/gazetteers/regexner_caseless.tab", "List of mapping files to use, separated by commas or semi-colons."), new PropertiesUtils.Property("mapping.header", "pattern,ner,overwrite,priority,group", "Comma separated list specifying order of fields in the mapping file"), new PropertiesUtils.Property("mapping.field.<fieldname>", "", "Class mapping for annotation fields other than ner"), new PropertiesUtils.Property("commonWords", "", "Comma separated list of files for common words to not annotate (in case your mapping isn't very clean)"), new PropertiesUtils.Property("ignorecase", "false", "Whether to ignore case or not when matching patterns."), new PropertiesUtils.Property("validpospattern", "", "Regular expression pattern for matching POS tags."), new PropertiesUtils.Property("posmatchtype", DEFAULT_POS_MATCH_TYPE.name(), "How should 'validpospattern' be used to match the POS of the tokens."), new PropertiesUtils.Property("noDefaultOverwriteLabels", "", "Comma separated list of output types for which default NER labels are not overwritten.\n For these types, only if the matched expression has NER type matching the\n specified overwriteableType for the regex will the NER type be overwritten."), new PropertiesUtils.Property("backgroundSymbol", "O,MISC", "Comma separated list of NER labels to always replace."), new PropertiesUtils.Property("verbose", "false", "")};
    private static final Pattern COMMA_DELIMITERS_PATTERN = Pattern.compile("\\s*,\\s*");
    private static final Pattern SEMICOLON_DELIMITERS_PATTERN = Pattern.compile("\\s*;\\s*");
    private static final Pattern EQUALS_DELIMITERS_PATTERN = Pattern.compile("\\s*=\\s*");
    private static final Pattern NUMBER_PATTERN = Pattern.compile("-?[0-9]+(?:\\.[0-9]+)?");

    public TokensRegexNERAnnotator(String mapping) {
        this(mapping, false);
    }

    public TokensRegexNERAnnotator(String mapping, boolean ignoreCase) {
        this(mapping, ignoreCase, null);
    }

    public TokensRegexNERAnnotator(String mapping, boolean ignoreCase, String validPosRegex) {
        this("tokenregexner", TokensRegexNERAnnotator.getProperties("tokenregexner", mapping, ignoreCase, validPosRegex));
    }

    private static Properties getProperties(String name, String mapping, boolean ignoreCase, String validPosRegex) {
        String prefix = !StringUtils.isNullOrEmpty(name) ? name + '.' : "";
        Properties props = new Properties();
        props.setProperty(prefix + "mapping", mapping);
        props.setProperty(prefix + "ignorecase", String.valueOf(ignoreCase));
        if (validPosRegex != null) {
            props.setProperty(prefix + "validpospattern", validPosRegex);
        }
        return props;
    }

    public TokensRegexNERAnnotator(String name, Properties properties) {
        String prefix = !StringUtils.isNullOrEmpty(name) ? name + '.' : "";
        String backgroundSymbol = properties.getProperty(prefix + "backgroundSymbol", DEFAULT_BACKGROUND_SYMBOL);
        String[] backgroundSymbols = COMMA_DELIMITERS_PATTERN.split(backgroundSymbol);
        String mappingFiles = properties.getProperty(prefix + "mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/english/gazetteers/regexner_caseless.tab;edu/stanford/nlp/models/kbp/english/gazetteers/regexner_cased.tab");
        String[] mappings = TokensRegexNERAnnotator.processListMappingFiles(mappingFiles);
        String validPosRegex = properties.getProperty(prefix + "validpospattern");
        this.posMatchType = PosMatchType.valueOf(properties.getProperty(prefix + "posmatchtype", DEFAULT_POS_MATCH_TYPE.name()));
        String commonWordsFile = properties.getProperty(prefix + "commonWords");
        this.commonWords = new HashSet<String>();
        if (commonWordsFile != null) {
            try (BufferedReader reader = IOUtils.readerFromString(commonWordsFile);){
                String line;
                while ((line = reader.readLine()) != null) {
                    this.commonWords.add(line);
                }
            }
            catch (IOException ex) {
                throw new RuntimeIOException("TokensRegexNERAnnotator " + name + ": Error opening the common words file: " + commonWordsFile, ex);
            }
        }
        String headerProp = properties.getProperty(prefix + "mapping.header", defaultHeader);
        boolean readHeaderFromFile = headerProp.equalsIgnoreCase("true");
        String[] annotationFieldnames = null;
        String[] headerFields = null;
        if (readHeaderFromFile) {
            annotationFieldnames = StringUtils.EMPTY_STRING_ARRAY;
            this.annotationFields = new ArrayList<Class>();
            for (int i = 0; i < mappings.length; ++i) {
                String mappingLine = mappings[i];
                if (!mappingLine.contains("header")) {
                    mappings[i] = mappingLine = "header=true, " + mappingLine;
                    continue;
                }
                if (Pattern.compile("header\\s*=\\s*true").matcher(mappingLine.toLowerCase()).find()) continue;
                throw new IllegalStateException("The annotator header property is set to true, but a different option has been provided for mapping file: " + mappingLine);
            }
        } else {
            headerFields = COMMA_DELIMITERS_PATTERN.split(headerProp);
            ArrayList<String> fieldNames = new ArrayList<String>();
            ArrayList<Class> fieldClasses = new ArrayList<Class>();
            for (String field : headerFields) {
                if (predefinedHeaderFields.contains(field)) continue;
                Class fieldClass = EnvLookup.lookupAnnotationKeyWithClassname(null, field);
                if (fieldClass == null) {
                    String classname = properties.getProperty(prefix + "mapping.field." + field);
                    fieldClass = EnvLookup.lookupAnnotationKeyWithClassname(null, classname);
                }
                if (fieldClass != null) {
                    fieldNames.add(field);
                    fieldClasses.add(fieldClass);
                    continue;
                }
                logger.warn(name + ": Unknown field: " + field + " cannot find suitable annotation class");
            }
            annotationFieldnames = new String[fieldNames.size()];
            fieldNames.toArray(annotationFieldnames);
            this.annotationFields = fieldClasses;
        }
        String noDefaultOverwriteLabelsProp = properties.getProperty(prefix + "noDefaultOverwriteLabels", "CITY");
        this.noDefaultOverwriteLabels = Collections.unmodifiableSet(CollectionUtils.asSet(COMMA_DELIMITERS_PATTERN.split(noDefaultOverwriteLabelsProp)));
        this.ignoreCase = PropertiesUtils.getBool(properties, prefix + "ignorecase", false);
        this.verbose = PropertiesUtils.getBool(properties, prefix + "verbose", false);
        this.validPosPattern = !StringUtils.isNullOrEmpty(validPosRegex) ? Pattern.compile(validPosRegex) : null;
        this.validPosPatternList = new ArrayList<Pattern>();
        this.ignoreCaseList = new ArrayList<Boolean>();
        this.headerList = new ArrayList<String[]>();
        this.entryToMappingFileNumber = new HashMap<Entry, Integer>();
        annotationFieldnames = TokensRegexNERAnnotator.processPerFileOptions(name, mappings, this.ignoreCaseList, this.validPosPatternList, this.headerList, this.ignoreCase, this.validPosPattern, headerFields, annotationFieldnames, this.annotationFields);
        this.entries = Collections.unmodifiableList(TokensRegexNERAnnotator.readEntries(name, this.noDefaultOverwriteLabels, this.ignoreCaseList, this.headerList, this.entryToMappingFileNumber, this.verbose, annotationFieldnames, mappings));
        IdentityHashMap<SequencePattern<CoreMap>, Entry> patternToEntry = new IdentityHashMap<SequencePattern<CoreMap>, Entry>();
        this.multiPatternMatcher = this.createPatternMatcher(patternToEntry);
        this.patternToEntry = Collections.unmodifiableMap(patternToEntry);
        Set<Object> myLabels = Generics.newHashSet();
        Collections.addAll(myLabels, backgroundSymbols);
        myLabels.add(null);
        for (Entry entry : this.entries) {
            Collections.addAll(myLabels, entry.types);
        }
        this.myLabels = Collections.unmodifiableSet(myLabels);
    }

    @Override
    public void annotate(Annotation annotation) {
        List sentences;
        if (this.verbose) {
            logger.info("Adding TokensRegexNER annotations ... ");
        }
        if ((sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class)) != null) {
            for (CoreMap sentence : sentences) {
                List tokens = (List)sentence.get(CoreAnnotations.TokensAnnotation.class);
                this.annotateMatched(tokens);
            }
        } else {
            List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
            if (tokens != null) {
                this.annotateMatched(tokens);
            } else {
                throw new RuntimeException("Unable to find sentences or tokens in " + annotation);
            }
        }
        if (this.verbose) {
            logger.info("done.");
        }
    }

    private MultiPatternMatcher<CoreMap> createPatternMatcher(Map<SequencePattern<CoreMap>, Entry> patternToEntry) {
        ArrayList<TokenSequencePattern> patterns = new ArrayList<TokenSequencePattern>(this.entries.size());
        for (Entry entry : this.entries) {
            TokenSequencePattern pattern;
            ComplexNodePattern.StringAnnotationRegexPattern posTagPattern;
            Boolean ignoreCaseEntry = this.ignoreCaseList.get(this.entryToMappingFileNumber.get(entry));
            int patternFlags = ignoreCaseEntry != false ? 66 : 0;
            int stringMatchFlags = ignoreCaseEntry != false ? 66 : 0;
            Env env = TokenSequencePattern.getNewEnv();
            env.setDefaultStringPatternFlags(patternFlags);
            env.setDefaultStringMatchFlags(stringMatchFlags);
            ComplexNodePattern.StringAnnotationRegexPattern stringAnnotationRegexPattern = posTagPattern = this.validPosPatternList.get(this.entryToMappingFileNumber.get(entry)) != null && PosMatchType.MATCH_ALL_TOKENS.equals((Object)this.posMatchType) ? new ComplexNodePattern.StringAnnotationRegexPattern(this.validPosPatternList.get(this.entryToMappingFileNumber.get(entry))) : null;
            if (entry.tokensRegex != null) {
                pattern = TokenSequencePattern.compile(env, entry.tokensRegex);
            } else {
                List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<SequencePattern.PatternExpr>(entry.regex.length);
                for (String p : entry.regex) {
                    CoreMapNodePattern c = CoreMapNodePattern.valueOf(p, patternFlags);
                    if (posTagPattern != null) {
                        c.add(CoreAnnotations.PartOfSpeechAnnotation.class, posTagPattern);
                    }
                    nodePatterns.add(new SequencePattern.NodePatternExpr(c));
                }
                if (nodePatterns.size() == 1) {
                    nodePatterns = Collections.singletonList(nodePatterns.get(0));
                }
                pattern = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns));
            }
            if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) {
                throw new RuntimeException("Invalid match group for entry " + entry);
            }
            pattern.setPriority(entry.priority);
            pattern.setWeight(entry.weight);
            patterns.add(pattern);
            patternToEntry.put(pattern, entry);
        }
        return TokenSequencePattern.getMultiPatternMatcher(patterns);
    }

    private void annotateMatched(List<CoreLabel> tokens) {
        List<SequenceMatchResult<CoreLabel>> matched = this.multiPatternMatcher.findNonOverlapping(tokens);
        for (SequenceMatchResult<CoreLabel> m : matched) {
            Entry entry = this.patternToEntry.get(m.pattern());
            int g = entry.annotateGroup;
            int start = m.start(g);
            int end = m.end(g);
            String str = m.group(g);
            if (this.commonWords.contains(str)) {
                if (!this.verbose) continue;
                logger.info("Not annotating (common word) '" + str + "': " + StringUtils.joinFields(m.groupNodes(g), CoreAnnotations.NamedEntityTagAnnotation.class) + " with " + entry.getTypeDescription() + ", sentence is '" + StringUtils.joinWords(tokens, " ") + "'");
                continue;
            }
            boolean overwriteOriginalNer = this.checkPosTags(tokens, start, end);
            if (overwriteOriginalNer) {
                overwriteOriginalNer = this.checkOrigNerTags(entry, tokens, start, end);
            }
            if (overwriteOriginalNer) {
                for (int i = start; i < end; ++i) {
                    CoreLabel token = tokens.get(i);
                    for (int j = 0; j < this.annotationFields.size(); ++j) {
                        token.set(this.annotationFields.get(j), entry.types[j]);
                    }
                }
                continue;
            }
            if (!this.verbose) continue;
            logger.info("Not annotating  '" + m.group(g) + "': " + StringUtils.joinFields(m.groupNodes(g), CoreAnnotations.NamedEntityTagAnnotation.class) + " with " + entry.getTypeDescription() + ", sentence is '" + StringUtils.joinWords(tokens, " ") + "'");
        }
    }

    private boolean checkPosTags(List<CoreLabel> tokens, int start, int end) {
        if (this.validPosPattern != null || TokensRegexNERAnnotator.atLeastOneValidPosPattern(this.validPosPatternList)) {
            switch (this.posMatchType) {
                case MATCH_ONE_TOKEN_PHRASE_ONLY: {
                    if (tokens.size() > 1) {
                        return true;
                    }
                }
                case MATCH_AT_LEAST_ONE_TOKEN: {
                    for (int i = start; i < end; ++i) {
                        CoreLabel token = tokens.get(i);
                        String pos = (String)token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                        if (pos != null && this.validPosPattern != null && this.validPosPattern.matcher(pos).matches()) {
                            return true;
                        }
                        if (pos == null) continue;
                        for (Pattern pattern : this.validPosPatternList) {
                            if (pattern == null || !pattern.matcher(pos).matches()) continue;
                            return true;
                        }
                    }
                    return false;
                }
                case MATCH_ALL_TOKENS: {
                    return true;
                }
            }
            return true;
        }
        return true;
    }

    private static boolean isLocationOrGpe(CoreLabel token) {
        return "LOCATION".equals(token.ner()) || "GPE".equals(token.ner());
    }

    private boolean checkOrigNerTags(Entry entry, List<CoreLabel> tokens, int start, int end) {
        String ner;
        int prevNerEndIndex;
        boolean specialCasePass = true;
        for (int i = start; i < end; ++i) {
            if (TokensRegexNERAnnotator.isLocationOrGpe(tokens.get(i))) continue;
            specialCasePass = false;
            break;
        }
        if (specialCasePass) {
            return true;
        }
        int nextNerStartIndex = end;
        String startNer = tokens.get(start).ner();
        String endNer = tokens.get(end - 1).ner();
        if (startNer != null && !this.myLabels.contains(startNer)) {
            for (prevNerEndIndex = start - 1; prevNerEndIndex >= 0 && (ner = tokens.get(prevNerEndIndex).ner()) != null && ner.equals(startNer); --prevNerEndIndex) {
            }
        }
        if (endNer != null && !this.myLabels.contains(endNer)) {
            while (nextNerStartIndex < tokens.size() && (ner = tokens.get(nextNerStartIndex).ner()) != null && ner.equals(endNer)) {
                ++nextNerStartIndex;
            }
        }
        boolean overwriteOriginalNer = false;
        if (prevNerEndIndex == start - 1 && nextNerStartIndex == end) {
            if (startNer == null) {
                overwriteOriginalNer = true;
            } else {
                for (int i = start + 1; i < end; ++i) {
                    if (startNer.equals(tokens.get(i).ner())) continue;
                    overwriteOriginalNer = true;
                    break;
                }
                if (!overwriteOriginalNer) {
                    if (entry.overwritableTypes.contains(startNer)) {
                        overwriteOriginalNer = true;
                    } else if (!TokensRegexNERAnnotator.hasNoOverwritableType(this.noDefaultOverwriteLabels, entry.types)) {
                        overwriteOriginalNer = this.myLabels.contains(startNer);
                    }
                }
            }
        }
        return overwriteOriginalNer;
    }

    private static List<Entry> readEntries(String annotatorName, Set<String> noDefaultOverwriteLabels, List<Boolean> ignoreCaseList, List<String[]> headerList, Map<Entry, Integer> entryToMappingFileNumber, boolean verbose, String[] annotationFieldnames, String ... mappings) {
        ArrayList<Entry> entries = new ArrayList<Entry>();
        TrieMap<String, Entry> seenRegexes = new TrieMap<String, Entry>();
        for (int mappingFileIndex = 0; mappingFileIndex < mappings.length; ++mappingFileIndex) {
            String mapping = mappings[mappingFileIndex];
            try (BufferedReader rd = IOUtils.readerFromString(mapping);){
                TokensRegexNERAnnotator.readEntries(annotatorName, headerList.get(mappingFileIndex), annotationFieldnames, entries, seenRegexes, mapping, rd, noDefaultOverwriteLabels, ignoreCaseList.get(mappingFileIndex), mappingFileIndex, entryToMappingFileNumber, verbose);
                continue;
            }
            catch (IOException e) {
                throw new RuntimeIOException("Couldn't read TokensRegexNER from " + mapping, e);
            }
        }
        if (mappings.length != 1) {
            logger.log(annotatorName + ": Read " + entries.size() + " unique entries from " + mappings.length + " files");
        }
        return entries;
    }

    private static Map<String, Integer> getHeaderIndexMap(String[] headerFields) {
        HashMap<String, Integer> map = new HashMap<String, Integer>();
        for (int i = 0; i < headerFields.length; ++i) {
            String field = headerFields[i];
            if (map.containsKey(field)) {
                throw new IllegalArgumentException("Duplicate header field: " + field);
            }
            map.put(field, i);
        }
        return map;
    }

    private static int getIndex(Map<String, Integer> map, String name) {
        Integer index = map.get(name);
        if (index == null) {
            return -1;
        }
        return index;
    }

    private static List<Entry> readEntries(String annotatorName, String[] headerFields, String[] annotationFieldnames, List<Entry> entries, TrieMap<String, Entry> seenRegexes, String mappingFilename, BufferedReader mapping, Set<String> noDefaultOverwriteLabels, boolean ignoreCase, Integer mappingFileIndex, Map<Entry, Integer> entryToMappingFileNumber, boolean verbose) throws IOException {
        String line;
        int origEntriesSize = entries.size();
        int isTokensRegex = 0;
        int lineCount = 0;
        Map<String, Integer> headerIndexMap = TokensRegexNERAnnotator.getHeaderIndexMap(headerFields);
        int iPattern = TokensRegexNERAnnotator.getIndex(headerIndexMap, PATTERN_FIELD);
        if (iPattern < 0) {
            throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Header does not contain 'pattern': " + StringUtils.join(headerFields));
        }
        int iOverwrite = TokensRegexNERAnnotator.getIndex(headerIndexMap, OVERWRITE_FIELD);
        int iPriority = TokensRegexNERAnnotator.getIndex(headerIndexMap, PRIORITY_FIELD);
        int iWeight = TokensRegexNERAnnotator.getIndex(headerIndexMap, WEIGHT_FIELD);
        int iGroup = TokensRegexNERAnnotator.getIndex(headerIndexMap, GROUP_FIELD);
        int[] annotationCols = new int[annotationFieldnames.length];
        int iLastAnnotationField = -1;
        for (int i = 0; i < annotationFieldnames.length; ++i) {
            annotationCols[i] = TokensRegexNERAnnotator.getIndex(headerIndexMap, annotationFieldnames[i]);
            if (annotationCols[i] < 0) {
                throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Header does not contain annotation field '" + annotationFieldnames[i] + "': " + StringUtils.join(headerFields));
            }
            if (annotationCols[i] <= iLastAnnotationField) continue;
            iLastAnnotationField = annotationCols[i];
        }
        int minLength = Math.max(iPattern, iLastAnnotationField) + 1;
        int maxLength = headerFields.length;
        while ((line = mapping.readLine()) != null) {
            Set<String> overwritableTypes;
            int i;
            Object[] objectArray;
            String[] split = line.split("\t");
            if (++lineCount == 1 && split.length == headerFields.length) {
                boolean equals = true;
                for (int i2 = 0; i2 < split.length; ++i2) {
                    if (Objects.equals(split[i2], headerFields[i2])) continue;
                    equals = false;
                    break;
                }
                if (equals) continue;
            }
            if (split.length < minLength || split.length > maxLength) {
                String err2 = "many";
                String expect = "<= " + maxLength;
                String extra = "";
                if (split.length < minLength) {
                    err2 = "few";
                    expect = ">= " + minLength;
                    if (split.length == 1) {
                        extra = "Maybe the problem is that you are using spaces not tabs? ";
                    }
                }
                throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Line " + lineCount + " of provided mapping file has too " + err2 + " tab-separated columns (" + split.length + " expecting " + expect + "). " + extra + "Line: " + line);
            }
            String regex = split[iPattern].trim();
            String tokensRegex = null;
            Object[] regexes = null;
            if (regex.startsWith("( ") && regex.endsWith(" )")) {
                tokensRegex = regex.substring(1, regex.length() - 1).trim();
            } else {
                regexes = regex.split("\\s+");
            }
            if (regexes != null) {
                objectArray = regexes;
            } else {
                String[] stringArray = new String[1];
                objectArray = stringArray;
                stringArray[0] = tokensRegex;
            }
            Object[] key = objectArray;
            if (ignoreCase) {
                String[] norm = new String[key.length];
                for (i = 0; i < key.length; ++i) {
                    norm[i] = key[i].toLowerCase();
                }
                key = norm;
            }
            String[] types = new String[annotationCols.length];
            for (i = 0; i < annotationCols.length; ++i) {
                types[i] = split[annotationCols[i]].trim();
            }
            double priority = 0.0;
            if (iOverwrite >= 0 && split.length > iOverwrite) {
                String[] tempOTs;
                if (NUMBER_PATTERN.matcher(split[iOverwrite].trim()).matches()) {
                    logger.warn("Number in types column for " + Arrays.toString(key) + " is probably priority: " + split[iOverwrite]);
                }
                overwritableTypes = (tempOTs = COMMA_DELIMITERS_PATTERN.split(split[iOverwrite].trim())).length == 0 ? Collections.emptySet() : (tempOTs.length == 1 ? Collections.singleton(tempOTs[0]) : new HashSet<String>(Arrays.asList(tempOTs)));
            } else {
                overwritableTypes = Collections.emptySet();
            }
            if (iPriority >= 0 && split.length > iPriority) {
                try {
                    priority = Double.parseDouble(split[iPriority].trim());
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Invalid priority in line " + lineCount + " in regexner file " + mappingFilename + ": \"" + line + "\"!", e);
                }
            }
            double weight = 0.0;
            if (iWeight >= 0 && split.length > iWeight) {
                try {
                    weight = Double.parseDouble(split[iWeight].trim());
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Invalid weight in line " + lineCount + " in regexner file " + mappingFilename + ": \"" + line + "\"!", e);
                }
            }
            int annotateGroup = 0;
            if (iGroup >= 0 && split.length > iGroup) {
                String context = split[iGroup].trim();
                try {
                    annotateGroup = Integer.parseInt(context);
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Invalid group in line " + lineCount + " in regexner file " + mappingFilename + ": \"" + line + "\"!", e);
                }
            }
            for (int i3 = 0; i3 < types.length; ++i3) {
                String type = types[i3];
                int commaPos = type.indexOf(44);
                if (commaPos <= 0) continue;
                String newType = type.substring(0, commaPos).trim();
                logger.warn(annotatorName + ": Entry has multiple types for " + annotationFieldnames[i3] + ": " + line + ".  Taking type to be " + newType);
                types[i3] = newType;
            }
            Entry entry = new Entry(tokensRegex, (String[])regexes, types, overwritableTypes, priority, weight, annotateGroup);
            if (seenRegexes.containsKey(Arrays.asList(key))) {
                Entry oldEntry = seenRegexes.get((K[])((String[])key));
                if (priority > oldEntry.priority) {
                    logger.warn(annotatorName + ": Replacing duplicate entry (higher priority): old=" + oldEntry + ", new=" + entry);
                } else {
                    String newTypeDesc;
                    String oldTypeDesc = oldEntry.getTypeDescription();
                    if (oldTypeDesc.equals(newTypeDesc = entry.getTypeDescription()) || !verbose) continue;
                    logger.warn(annotatorName + ": Ignoring duplicate entry: " + split[0] + ", old type = " + oldTypeDesc + ", new type = " + newTypeDesc);
                    continue;
                }
            }
            if (entry.overwritableTypes.isEmpty() && TokensRegexNERAnnotator.hasNoOverwritableType(noDefaultOverwriteLabels, entry.types)) {
                logger.warn(annotatorName + ": Entry doesn't have overwriteable types " + entry + ", but entry type is in noDefaultOverwriteLabels");
            }
            entries.add(entry);
            entryToMappingFileNumber.put(entry, mappingFileIndex);
            seenRegexes.put((String[])key, entry);
            if (entry.tokensRegex == null) continue;
            ++isTokensRegex;
        }
        logger.log(annotatorName + ": Read " + (entries.size() - origEntriesSize) + " unique entries out of " + lineCount + " from " + mappingFilename + ", " + isTokensRegex + " TokensRegex patterns.");
        return entries;
    }

    private static boolean hasNoOverwritableType(Set<String> noDefaultOverwriteLabels, String[] types) {
        for (String type : types) {
            if (!noDefaultOverwriteLabels.contains(type)) continue;
            return true;
        }
        return false;
    }

    private static String[] processListMappingFiles(String mappingFiles) {
        if (mappingFiles.contains(";") && mappingFiles.contains(",")) {
            return SEMICOLON_DELIMITERS_PATTERN.split(mappingFiles);
        }
        if (mappingFiles.contains(",")) {
            return COMMA_DELIMITERS_PATTERN.split(mappingFiles);
        }
        return SEMICOLON_DELIMITERS_PATTERN.split(mappingFiles);
    }

    private static String[] processPerFileOptions(String annotatorName, String[] mappings, List<Boolean> ignoreCaseList, List<Pattern> validPosPatternList, List<String[]> headerList, boolean ignoreCase, Pattern validPosPattern, String[] headerFields, String[] annotationFieldnames, List<Class> annotationFields) {
        Integer numMappingFiles = mappings.length;
        for (int index = 0; index < numMappingFiles; ++index) {
            boolean headerSet;
            boolean validPosPatternSet;
            boolean ignoreCaseSet;
            block34: {
                ignoreCaseSet = false;
                validPosPatternSet = false;
                headerSet = false;
                String[] allOptions = COMMA_DELIMITERS_PATTERN.split(mappings[index].trim());
                Integer numOptions = allOptions.length;
                String filePath = allOptions[allOptions.length - 1];
                if (numOptions <= 1) break block34;
                block22: for (int i = 0; i < numOptions - 1; ++i) {
                    String[] optionAndValue = EQUALS_DELIMITERS_PATTERN.split(allOptions[i].trim());
                    if (optionAndValue.length != 2) {
                        throw new IllegalArgumentException("TokensRegexNERAnnotator " + annotatorName + " ERROR: Incorrectly specified options for mapping file " + mappings[index].trim());
                    }
                    switch (optionAndValue[0].trim().toLowerCase()) {
                        case "ignorecase": {
                            ignoreCaseList.add(Boolean.parseBoolean(optionAndValue[1].trim()));
                            ignoreCaseSet = true;
                            continue block22;
                        }
                        case "validpospattern": {
                            String validPosRegex = optionAndValue[1].trim();
                            if (!StringUtils.isNullOrEmpty(validPosRegex)) {
                                validPosPatternList.add(Pattern.compile(validPosRegex));
                            } else {
                                validPosPatternList.add(validPosPattern);
                            }
                            validPosPatternSet = true;
                            continue block22;
                        }
                        case "header": {
                            String header = optionAndValue[1].trim();
                            String[] headerItems = header.split("\\s+");
                            headerSet = true;
                            if (headerItems.length == 1 && headerItems[0].equalsIgnoreCase("true")) {
                                try (BufferedReader br = IOUtils.readerFromString(filePath);){
                                    String headerLine = br.readLine();
                                    headerItems = headerLine.split("\\t");
                                }
                                catch (IOException e) {
                                    logger.err(e);
                                }
                            }
                            headerList.add(headerItems);
                            for (String field : headerItems) {
                                if (predefinedHeaderFields.contains(field) || Arrays.asList(annotationFieldnames).contains(field)) continue;
                                Class fieldClass = EnvLookup.lookupAnnotationKeyWithClassname(null, field);
                                if (fieldClass == null) {
                                    throw new RuntimeException("Not recognized annotation class field \"" + field + "\" in header for mapping file " + allOptions[numOptions - 1]);
                                }
                                annotationFields.add(fieldClass);
                                annotationFieldnames = Arrays.copyOf(annotationFieldnames, annotationFieldnames.length + 1);
                                annotationFieldnames[annotationFieldnames.length - 1] = field;
                            }
                            continue block22;
                        }
                    }
                }
                mappings[index] = allOptions[numOptions - 1];
            }
            if (!ignoreCaseSet) {
                ignoreCaseList.add(ignoreCase);
            }
            if (!validPosPatternSet) {
                validPosPatternList.add(validPosPattern);
            }
            if (headerSet) continue;
            headerList.add(headerFields);
        }
        return annotationFieldnames;
    }

    private static boolean atLeastOneValidPosPattern(List<Pattern> validPosPatternList) {
        for (Pattern pattern : validPosPatternList) {
            if (pattern == null) continue;
            return true;
        }
        return false;
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.unmodifiableSet(new ArraySet<Class>(Arrays.asList(CoreAnnotations.TextAnnotation.class, CoreAnnotations.TokensAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class, CoreAnnotations.SentencesAnnotation.class)));
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.unmodifiableSet(new ArraySet<Class>(this.annotationFields));
    }

    private static class Entry {
        public final String tokensRegex;
        public final String[] regex;
        public final String[] types;
        public final Set<String> overwritableTypes;
        public final double priority;
        public final double weight;
        public final int annotateGroup;

        public Entry(String tokensRegex, String[] regex, String[] types, Set<String> overwritableTypes, double priority, double weight, int annotateGroup) {
            this.tokensRegex = tokensRegex;
            this.regex = regex;
            this.types = new String[types.length];
            for (int i = 0; i < types.length; ++i) {
                this.types[i] = types[i].intern();
            }
            this.overwritableTypes = overwritableTypes;
            this.priority = priority;
            this.weight = weight;
            this.annotateGroup = annotateGroup;
        }

        public String getTypeDescription() {
            return Arrays.toString(this.types);
        }

        public String toString() {
            return "Entry{" + (this.tokensRegex != null ? this.tokensRegex : StringUtils.join(this.regex)) + ' ' + StringUtils.join(this.types) + ' ' + this.overwritableTypes + " prio:" + this.priority + '}';
        }
    }

    static enum PosMatchType {
        MATCH_ALL_TOKENS,
        MATCH_AT_LEAST_ONE_TOKEN,
        MATCH_ONE_TOKEN_PHRASE_ONLY;

    }
}

