/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.ruta.textruler.core;

import java.io.File;
import java.io.FilenameFilter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.textruler.core.CasCache;
import org.apache.uima.ruta.textruler.core.TextRulerExample;
import org.apache.uima.ruta.textruler.core.TextRulerExampleDocument;
import org.apache.uima.ruta.textruler.core.TextRulerTarget;
import org.apache.uima.ruta.textruler.core.TextRulerToolkit;

public class TextRulerExampleDocumentSet {
    protected List<TextRulerExampleDocument> documents;
    protected CasCache casCache;

    public TextRulerExampleDocumentSet(String xmiFolderName, CasCache casCache) {
        File[] files;
        this.documents = new ArrayList<TextRulerExampleDocument>();
        this.casCache = casCache;
        File trainingFolder = new File(xmiFolderName);
        for (File file : files = trainingFolder.listFiles(new FilenameFilter(){

            @Override
            public boolean accept(File dir, String name) {
                return name.endsWith(".xmi");
            }
        })) {
            TextRulerToolkit.log("found document XMI file: " + file.getName());
            this.documents.add(new TextRulerExampleDocument(file.getAbsolutePath(), casCache));
        }
    }

    protected TextRulerExampleDocumentSet(String[] inputXmiFiles, CasCache casCache) {
        this.casCache = casCache;
        this.documents = new ArrayList<TextRulerExampleDocument>();
        for (String fileName : inputXmiFiles) {
            this.documents.add(new TextRulerExampleDocument(fileName, casCache));
        }
    }

    public void createExamplesForTarget(TextRulerTarget target) {
        TextRulerExampleDocument[] sortedDocs;
        for (TextRulerExampleDocument doc : sortedDocs = this.getSortedDocumentsInCacheOptimizedOrder()) {
            doc.createExamplesForTarget(target);
        }
    }

    public void clearCurrentExamples() {
        for (TextRulerExampleDocument doc : this.documents) {
            doc.clearCurrentExamples();
        }
    }

    public Collection<CAS> getCachedCASes() {
        return this.casCache.getCachedCASes();
    }

    public boolean casCacheContainsKey(String key) {
        return this.casCache.containsElementWithKey(key);
    }

    public List<TextRulerExample> getAllExamples() {
        return this.getAllExamples(false);
    }

    public List<TextRulerExample> getAllPositiveExamples() {
        return this.getAllExamples(true);
    }

    public List<TextRulerExample> getAllExamples(boolean onlyPositives) {
        ArrayList<TextRulerExample> result = new ArrayList<TextRulerExample>();
        for (TextRulerExampleDocument doc : this.documents) {
            result.addAll(doc.getPositiveExamples());
            if (onlyPositives) continue;
            result.addAll(doc.getNegativeExamples());
        }
        return result;
    }

    public List<TextRulerExampleDocument> getDocuments() {
        return this.documents;
    }

    public TextRulerExampleDocument[] getSortedDocumentsInCacheOptimizedOrder(Collection<TextRulerExampleDocument> documents) {
        HashSet<TextRulerExampleDocument> docsLeft = new HashSet<TextRulerExampleDocument>(documents);
        TextRulerExampleDocument[] sortedDocs = new TextRulerExampleDocument[documents.size()];
        int i = 0;
        for (TextRulerExampleDocument doc : documents) {
            if (!this.casCacheContainsKey(doc.getCasFileName())) continue;
            docsLeft.remove(doc);
            sortedDocs[i] = doc;
            ++i;
        }
        Iterator<TextRulerExampleDocument> i$ = docsLeft.iterator();
        while (i$.hasNext()) {
            TextRulerExampleDocument doc;
            sortedDocs[i] = doc = i$.next();
            ++i;
        }
        return sortedDocs;
    }

    public TextRulerExampleDocument[] getSortedDocumentsInCacheOptimizedOrder() {
        return this.getSortedDocumentsInCacheOptimizedOrder(this.documents);
    }

    public List<Integer> getTokenCountHistogrammForSlotName(String slotName, Set<String> filterSet) {
        TextRulerExampleDocument[] sortedDocs;
        HashMap<Integer, Integer> map = new HashMap<Integer, Integer>();
        int maxLen = 0;
        for (TextRulerExampleDocument doc : sortedDocs = this.getSortedDocumentsInCacheOptimizedOrder(this.documents)) {
            CAS aCas = doc.getCAS();
            List<AnnotationFS> slots = TextRulerToolkit.extractAnnotationsForSlotName(aCas, slotName);
            TypeSystem ts = aCas.getTypeSystem();
            for (AnnotationFS a : slots) {
                Integer key;
                List<AnnotationFS> slotTokens = TextRulerToolkit.getAnnotationsWithinBounds(aCas, a.getBegin(), a.getEnd(), TextRulerToolkit.getFilterSetWithSlotName(slotName, filterSet), ts.getType("org.apache.uima.ruta.type.ANY"));
                int len = slotTokens.size();
                if (len > maxLen) {
                    maxLen = len;
                }
                int current = map.containsKey(key = new Integer(len)) ? (Integer)map.get(key) : 0;
                map.put(key, len + current);
            }
        }
        ArrayList<Integer> resultList = new ArrayList<Integer>(maxLen + 1);
        for (int i = 0; i <= maxLen; ++i) {
            int value = map.containsKey(i) ? (Integer)map.get(i) : 0;
            resultList.add(value);
        }
        return resultList;
    }

    public CAS getCAS(String key) {
        return this.casCache.getCAS(key);
    }

    public int size() {
        return this.documents.size();
    }

    public TextRulerExampleDocument getDocumentForFileName(String fileName) {
        for (TextRulerExampleDocument doc : this.documents) {
            if (!doc.getCasFileName().equals(fileName)) continue;
            return doc;
        }
        return null;
    }

    public List<TextRulerExampleDocumentSet> partitionIntoSubsets(int[] percentages) {
        ArrayList<TextRulerExampleDocumentSet> result = new ArrayList<TextRulerExampleDocumentSet>();
        int sum = 0;
        for (int p : percentages) {
            if (p == 0) {
                TextRulerToolkit.log("[TextRulerExampleDocumentSet.partitionIntoSubsets] a percentage must not be zero!");
                return null;
            }
            sum += p;
        }
        if (sum != 100) {
            TextRulerToolkit.log("[TextRulerExampleDocumentSet.partitionIntoSubsets] percentages has to be 100 in total!");
            return null;
        }
        int rest = this.size();
        int docIndex = 0;
        for (int i = 0; i < percentages.length; ++i) {
            int partSize;
            if (i == percentages.length - 1) {
                partSize = Math.round((float)(percentages[i] * this.size()) / 100.0f);
                if (partSize == 0) {
                    partSize = 1;
                }
            } else {
                partSize = rest;
            }
            if (partSize == 0) {
                TextRulerToolkit.log("[TextRulerExampleDocumentSet.partitionIntoSubsets] a percentage must not be zero! too few example documents for your partition?");
                return null;
            }
            String[] fileNames = new String[partSize];
            for (int doc = 0; doc < partSize; ++doc) {
                fileNames[doc] = this.documents.get(doc + docIndex).getCasFileName();
            }
            docIndex += partSize;
            result.add(new TextRulerExampleDocumentSet(fileNames, this.casCache));
            rest -= partSize;
        }
        return result;
    }
}

