/*
 * Decompiled with CFR 0.152.
 */
package marytts.language.hi;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.exceptions.MaryConfigurationException;
import marytts.fst.FSTLookup;
import marytts.language.hi.phonemiser.HindiLTS;
import marytts.modules.InternalModule;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.server.MaryProperties;
import marytts.util.MaryRuntimeUtils;
import marytts.util.dom.MaryDomUtils;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.NodeIterator;

public class JPhonemiser
extends InternalModule {
    protected Map<String, List<String>> userdict;
    protected FSTLookup lexicon;
    protected HindiLTS lts;
    protected AllophoneSet allophoneSet;

    public JPhonemiser(String propertyPrefix) throws IOException, MaryConfigurationException {
        this("JPhonemiser", MaryDataType.PARTSOFSPEECH, MaryDataType.PHONEMES, propertyPrefix + "allophoneset", propertyPrefix + "userdict", propertyPrefix + "utf8toit3map");
    }

    public JPhonemiser(String componentName, MaryDataType inputType, MaryDataType outputType, String allophonesProperty, String userdictProperty, String utf8toit3mapProperty) throws IOException, MaryConfigurationException {
        super(componentName, inputType, outputType, MaryRuntimeUtils.needAllophoneSet((String)allophonesProperty).getLocale());
        this.allophoneSet = MaryRuntimeUtils.needAllophoneSet((String)allophonesProperty);
        String userdictFilename = MaryProperties.getFilename((String)userdictProperty);
        if (userdictFilename != null) {
            if (new File(userdictFilename).exists()) {
                this.userdict = this.readLexicon(userdictFilename);
            } else {
                this.logger.info((Object)("User dictionary '" + userdictFilename + "' for locale '" + this.getLocale() + "' does not exist. Ignoring."));
            }
        }
        InputStream utf8toit3mapStream = MaryProperties.needStream((String)utf8toit3mapProperty);
        this.lts = new HindiLTS(utf8toit3mapStream);
    }

    public MaryData process(MaryData d) throws Exception {
        Document doc = d.getDocument();
        NodeIterator it = MaryDomUtils.createNodeIterator((Document)doc, (Node)doc, (String[])new String[]{"t"});
        Element t = null;
        while ((t = (Element)it.nextNode()) != null) {
            if (t.hasAttribute("ph") && !t.getAttribute("ph").contains("*")) continue;
            String text = t.hasAttribute("sounds_like") ? t.getAttribute("sounds_like") : MaryDomUtils.tokenText((Element)t);
            String pos = null;
            if (t.hasAttribute("pos")) {
                pos = t.getAttribute("pos");
            }
            if (text == null || text.equals("")) continue;
            StringBuilder ph = new StringBuilder();
            String g2pMethod = null;
            StringTokenizer st = new StringTokenizer(text, " -");
            while (st.hasMoreTokens()) {
                String graph = st.nextToken();
                StringBuilder helper = new StringBuilder();
                if (pos.equals("$PUNCT")) continue;
                String phon = this.phonemise(graph, pos, helper);
                if (ph.length() == 0) {
                    g2pMethod = helper.toString();
                    ph.append(phon);
                    continue;
                }
                ph.append(" - ");
                ph.append(phon.replace('\'', ','));
            }
            if (ph == null || ph.length() <= 0) continue;
            this.setPh(t, ph.toString());
            t.setAttribute("g2p_method", g2pMethod);
        }
        MaryData result = new MaryData(this.outputType(), d.getLocale());
        result.setDocument(doc);
        return result;
    }

    public String phonemise(String text, String pos, StringBuilder g2pMethod) throws IOException {
        String result = this.userdictLookup(text, pos);
        if (result != null) {
            g2pMethod.append("userdict");
            return result;
        }
        result = this.lts.phonemise(text);
        if (result != null) {
            g2pMethod.append("rules");
            return result;
        }
        return null;
    }

    public String userdictLookup(String text, String pos) {
        if (this.userdict == null || text == null || text.length() == 0) {
            return null;
        }
        List<String> entries = this.userdict.get(text);
        if (entries == null) {
            text = text.toLowerCase(this.getLocale());
            entries = this.userdict.get(text);
        }
        if (entries == null) {
            text = text.substring(0, 1).toUpperCase(this.getLocale()) + text.substring(1);
            entries = this.userdict.get(text);
        }
        if (entries == null) {
            return null;
        }
        String transcr = null;
        for (String entry : entries) {
            String[] parts = entry.split("\\|");
            transcr = parts[0];
            if (parts.length <= 1 || pos == null) continue;
            StringTokenizer tokenizer = new StringTokenizer(entry);
            while (tokenizer.hasMoreTokens()) {
                String onePos = tokenizer.nextToken();
                if (!pos.equals(onePos)) continue;
                return transcr;
            }
        }
        return transcr;
    }

    protected Map<String, List<String>> readLexicon(String lexiconFilename) throws IOException {
        String line;
        HashMap<String, List<String>> fLexicon = new HashMap<String, List<String>>();
        BufferedReader lexiconFile = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(lexiconFilename), "UTF-8"));
        while ((line = lexiconFile.readLine()) != null) {
            ArrayList<String> transcriptions;
            String pos;
            if (line.trim().equals("") || line.startsWith("#")) continue;
            String[] lineParts = line.split("\\s*\\|\\s*");
            String graphStr = lineParts[0];
            String phonStr = lineParts[1];
            try {
                this.allophoneSet.splitIntoAllophones(phonStr);
            }
            catch (RuntimeException re) {
                this.logger.warn((Object)("Lexicon '" + lexiconFilename + "': invalid entry for '" + graphStr + "'"), (Throwable)re);
            }
            String phonPosStr = phonStr;
            if (lineParts.length > 2 && !(pos = lineParts[2]).trim().equals("")) {
                phonPosStr = phonPosStr + "|" + pos;
            }
            if (null == (transcriptions = (ArrayList<String>)fLexicon.get(graphStr))) {
                transcriptions = new ArrayList<String>();
                fLexicon.put(graphStr, transcriptions);
            }
            transcriptions.add(phonPosStr);
        }
        lexiconFile.close();
        return fLexicon;
    }

    protected void setPh(Element t, String ph) {
        if (!t.getTagName().equals("t")) {
            throw new DOMException(15, "Only t elements allowed, received " + t.getTagName() + ".");
        }
        if (t.hasAttribute("ph")) {
            String prevPh = t.getAttribute("ph");
            String newPh = prevPh.replaceFirst("\\*", ph);
            t.setAttribute("ph", newPh);
        } else {
            t.setAttribute("ph", ph);
        }
    }
}

