/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ChineseEnglishWordMap
implements Serializable {
    private static final long serialVersionUID = 7655332268578049993L;
    private Map<String, Set<String>> map = new HashMap<String, Set<String>>(10000);
    private static final String defaultPath = "cedict_ts.u8";
    private static final String defaultPath2 = "/u/nlp/data/chinese-english-dictionary/cedict_ts.u8";
    private static final String ENV_VARIABLE = "CEDICT";
    private static final String defaultPattern = "[^ ]+ ([^ ]+)[^/]+/(.+)/";
    private static final String defaultDelimiter = "[/;]";
    private static final String defaultCharset = "UTF-8";
    private static final String[] punctuations = new String[]{"\uff08.*?\uff09", "\\(.*?\\)", "<.*?>", "[\u2033\u20dd\u25cb\u25ef\u2039\u2329\u27e8\u203a\u232a\u27e9\u00ab\u27ea\u00bb\u27eb\u2308\u230b\u27e6\u27e7\u3030\uff5e\u201c\u2036\u201d\u2033\u2307\u301c\u3012\u29c4\u300a\u300b\u3000]", "^to "};
    private static final boolean DEBUG = false;
    private boolean normalized = false;

    public static ChineseEnglishWordMap getInstance() {
        return SingletonHolder.INSTANCE;
    }

    public boolean containsKey(String key) {
        key = key.toLowerCase();
        key = key.trim();
        return this.map.containsKey(key);
    }

    public Set<String> getAllTranslations(String key) {
        key = key.toLowerCase();
        key = key.trim();
        return this.map.get(key);
    }

    public String getFirstTranslation(String key) {
        key = key.toLowerCase();
        Set<String> strings = this.map.get(key = key.trim());
        if (strings == null) {
            return null;
        }
        return strings.iterator().next();
    }

    public void readCEDict(String dictPath) {
        this.readCEDict(dictPath, defaultPattern, defaultDelimiter, defaultCharset);
    }

    private String normalize(String t) {
        if (!this.normalized) {
            return t;
        }
        for (String punc : punctuations) {
            t = t.replaceAll(punc, "");
        }
        t = t.trim();
        return t;
    }

    private Set<String> normalize(Set<String> trans) {
        if (!this.normalized) {
            return trans;
        }
        HashSet<String> set = new HashSet<String>();
        for (String t : trans) {
            if ((t = this.normalize(t)).equals("")) continue;
            set.add(t);
        }
        return set;
    }

    public void readCEDict(String dictPath, String pattern, String delimiter, String charset) {
        try {
            BufferedReader infile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(dictPath), charset));
            Pattern p = Pattern.compile(pattern);
            String line = infile.readLine();
            while (line != null) {
                Matcher m = p.matcher(line);
                if (m.matches()) {
                    String word = m.group(1).toLowerCase();
                    word = word.trim();
                    String transGroup = m.group(2);
                    String[] trans = transGroup.split(delimiter);
                    if (this.map.containsKey(word)) {
                        Set<String> oldtrans = this.map.get(word);
                        for (String t : trans) {
                            if ((t = this.normalize(t)).equals("") || oldtrans.contains(t)) continue;
                            oldtrans.add(t);
                        }
                    } else {
                        LinkedHashSet<String> transList = new LinkedHashSet<String>(Arrays.asList(trans));
                        String normW = this.normalize(word);
                        Set<String> normSet = this.normalize(transList);
                        if (!normW.equals("") && normSet.size() > 0) {
                            this.map.put(normW, normSet);
                        }
                    }
                }
                line = infile.readLine();
            }
            infile.close();
        }
        catch (IOException e) {
            throw new RuntimeException("IOException reading CEDict from file " + dictPath, e);
        }
    }

    public ChineseEnglishWordMap() {
        String path;
        File f = new File(defaultPath);
        if (f.canRead()) {
            path = defaultPath;
        } else {
            f = new File(defaultPath2);
            if (f.canRead()) {
                path = defaultPath2;
            } else {
                path = System.getenv(ENV_VARIABLE);
                f = new File(path);
                if (!f.canRead()) {
                    throw new RuntimeException("ChineseEnglishWordMap cannot find dictionary");
                }
            }
        }
        this.readCEDict(path);
    }

    public ChineseEnglishWordMap(String dictPath) {
        this.readCEDict(dictPath);
    }

    public ChineseEnglishWordMap(String dictPath, boolean normalized) {
        this.normalized = normalized;
        this.readCEDict(dictPath);
    }

    public ChineseEnglishWordMap(String dictPath, String pattern, String delimiter, String charset) {
        this.readCEDict(dictPath, pattern, delimiter, charset);
    }

    public ChineseEnglishWordMap(String dictPath, String pattern, String delimiter, String charset, boolean normalized) {
        this.normalized = normalized;
        this.readCEDict(dictPath, pattern, delimiter, charset);
    }

    private static boolean isDigits(String in) {
        int len = in.length();
        for (int i = 0; i < len; ++i) {
            if (Character.isDigit(in.charAt(i))) continue;
            return false;
        }
        return true;
    }

    public Map<String, Set<String>> getReverseMap() {
        Set<Map.Entry<String, Set<String>>> entries = this.map.entrySet();
        HashMap<String, Set<String>> rMap = new HashMap<String, Set<String>>(entries.size());
        for (Map.Entry<String, Set<String>> me : entries) {
            String k = me.getKey();
            Set<String> transList = me.getValue();
            for (String trans : transList) {
                Set entry = (Set)rMap.get(trans);
                if (entry == null) {
                    LinkedHashSet<String> toAdd = new LinkedHashSet<String>(6);
                    toAdd.add(k);
                    rMap.put(trans, toAdd);
                    continue;
                }
                entry.add(k);
            }
        }
        return rMap;
    }

    public int addMap(Map<String, Set<String>> addM) {
        int newTrans = 0;
        for (Map.Entry<String, Set<String>> me : addM.entrySet()) {
            String k = me.getKey();
            Set<String> addList = me.getValue();
            Set<String> origList = this.map.get(k);
            if (origList == null) {
                this.map.put(k, new LinkedHashSet<String>(addList));
                Set<String> newList = this.map.get(k);
                if (newList == null || newList.size() == 0) continue;
                newTrans += addList.size();
                continue;
            }
            for (String toAdd : addList) {
                if (origList.contains(toAdd)) continue;
                origList.add(toAdd);
                ++newTrans;
            }
        }
        return newTrans;
    }

    public String toString() {
        return this.map.toString();
    }

    public int size() {
        return this.map.size();
    }

    public static void main(String[] args) throws IOException {
        HashMap<String, Integer> flagsToNumArgs = new HashMap<String, Integer>();
        flagsToNumArgs.put("-dictPath", 1);
        flagsToNumArgs.put("-encoding", 1);
        Map<String, String[]> argMap = StringUtils.argsToMap(args, flagsToNumArgs);
        String[] otherArgs = argMap.get(null);
        if (otherArgs.length < 1) {
            System.err.println("usage: ChineseEnglishWordMap [-all] [-dictPath path] [-encoding enc_string] inputFile");
            System.exit(1);
        }
        String filename = otherArgs[0];
        boolean allTranslations = argMap.containsKey("-all");
        String charset = defaultCharset;
        if (argMap.containsKey("-encoding")) {
            charset = argMap.get("-encoding")[0];
        }
        BufferedReader r = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), charset));
        ChineseTreebankLanguagePack tlp = new ChineseTreebankLanguagePack();
        String[] dpString = argMap.get("-dictPath");
        ChineseEnglishWordMap cewm = dpString == null ? new ChineseEnglishWordMap() : new ChineseEnglishWordMap(dpString[0]);
        int totalWords = 0;
        int coveredWords = 0;
        PrintWriter pw = new PrintWriter((Writer)new OutputStreamWriter((OutputStream)System.out, charset), true);
        String line = r.readLine();
        while (line != null) {
            String[] words;
            for (String word : words = line.split("\\s", 1000)) {
                ++totalWords;
                if (word.length() == 0) continue;
                pw.print(StringUtils.pad(word + ':', 8));
                if (tlp.isPunctuationWord(word)) {
                    --totalWords;
                    pw.print(word);
                } else if (ChineseEnglishWordMap.isDigits(word)) {
                    pw.print(word + " [NUMBER]");
                } else if (cewm.containsKey(word)) {
                    ++coveredWords;
                    if (allTranslations) {
                        ArrayList<String> trans = new ArrayList<String>(cewm.getAllTranslations(word));
                        for (String s : trans) {
                            pw.print((trans.indexOf(s) > 0 ? "|" : "") + s);
                        }
                    } else {
                        pw.print(cewm.getFirstTranslation(word));
                    }
                } else {
                    pw.print("[UNK]");
                }
                pw.println();
            }
            pw.println();
            line = r.readLine();
        }
        r.close();
        System.err.print("Finished translating " + totalWords + " words (");
        System.err.println(coveredWords + " were in dictionary).");
    }

    private static class SingletonHolder {
        private static final ChineseEnglishWordMap INSTANCE = new ChineseEnglishWordMap();

        private SingletonHolder() {
        }
    }
}

