// Auteur :     Gloumouth1
// Oeuvre :     http://gloumouth1.free.fr/Marabout/java"
// A utilisé :  https://fr.wiktionary.org
// Licence :    https://creativecommons.org/licenses/by-sa/4.0/deed.fr

import java.util.List;

import org.jgrapht.Graph;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

import java.util.ArrayList;
import java.util.Arrays;

public class MyContentHandler implements ContentHandler {

        private int counter = 0;
        private StringBuilder content = new StringBuilder();
        private String title;
        private String targetLanguage;
        private String dictionaryLanguage;


        private String tagStartPronunciations;
        private String tagEndPronunciations;
        private int tagStartPronunciationsLength;

        private Graph<Integer, String> graph;
        private int syllables_min, syllables_max;

        public MyContentHandler(String dictionaryLanguage, String targetLanguage,
                        int syllables_min, int syllables_max, Graph<Integer, String> graph) {
                this.targetLanguage = targetLanguage;
                this.dictionaryLanguage = dictionaryLanguage;
                this.graph = graph;
                this.syllables_min = syllables_min;
                this.syllables_max = syllables_max;
                


                if (dictionaryLanguage.equals("FR")) { // == {{langue|fr}} == ... {{pron|bɔ̃.ʒuʁ|fr}}
                        tagStartPronunciations = "{{pron|";
                        tagEndPronunciations = "|";

                } else if (dictionaryLanguage.equals("EN")) { // ==English==\n ... {{IPA|en|/xxxxxx/}}
                        tagStartPronunciations = "{{IPA|" + targetLanguage + "|";
                        tagEndPronunciations = "}}";

                        // TODO Problème !!!!
                        // n'est pas capturé {{pron|ni.ˈʒɛɹ.i.ən|en}} {{pron|ni.ˈʒɛə.ɹɪən|en}}

                }
                tagStartPronunciationsLength = tagStartPronunciations.length();

        }

        public void setDocumentLocator(Locator locator) {
        }

        public void startDocument() throws SAXException {
        }

        public void endDocument() throws SAXException {
        }

        public void startPrefixMapping(String prefix, String uri)
                        throws SAXException {
        }

        public void endPrefixMapping(String prefix) throws SAXException {
        }

        public void startElement(String uri, String localName, String qName,
                        Attributes atts) throws SAXException {

                if (counter++ % 1000000 == 0)
                        System.out.print(".");
                // if (counter > maxWordsNumber) throw new MySAXTerminatorException(); // was
                // used during code conception
                content.setLength(0);
        }

        // allows to exclude some words that were badly phonetized on fr.wiktionary.org
        public boolean isAcceptable(String word) {
                List<String> wordsToExclude = Arrays.asList("trigle à ailes bleues", "nikoumouk", "langues amamis",
                                "runomancie", "runomancies", "bois de Lima", "cache-épouti", "être l’affaire de",
                                "AD perso", "Westmount");
                if (word.endsWith("-") || word.startsWith("-") || wordsToExclude.contains(word))
                        return false;
                else
                        return true;

        }

        // returns the language defined just before the pronciation
        public String getReadLanguage(String stringBefore) {
                if (dictionaryLanguage.equals("FR")) {

                        int languageStartPosition = stringBefore.lastIndexOf("== {{langue|");
                        int languageEndPosition = stringBefore.indexOf("}}",
                                        languageStartPosition + 12); // "== {{langue|".length()
                        if (languageStartPosition != -1 && languageEndPosition != -1)
                                return stringBefore.substring(
                                                languageStartPosition + 12,
                                                languageEndPosition);

                } else if (dictionaryLanguage.equals("EN")) {
                        int indexTagEnd = stringBefore.lastIndexOf("==");
                        if (indexTagEnd > 0) {
                                if (stringBefore.charAt(indexTagEnd - 1) == '=')
                                        return getReadLanguage(stringBefore.substring(0, indexTagEnd - 1));
                                else {
                                        int indexTagStart = stringBefore.substring(0, indexTagEnd - 1)
                                                        .lastIndexOf("==");
                                        String readString = stringBefore.substring(indexTagStart + 2, indexTagEnd);
                                        if (readString.equals("English"))
                                                return "en";
                                        else if (readString.equals("French"))
                                                return "fr";
                                        else if (readString.equals("Esperanto"))
                                                return "eo";
                                }
                        }

                }
                return "";

        }

        public String[] getPrononciationsArray(String prononciationsString) {
                if (dictionaryLanguage.equals("FR")) { // in this case no decomposition
                        String[] r = new String[1];
                        r[0] = prononciationsString;
                        return r;

                } else if (dictionaryLanguage.equals("EN")) { // /mypron1/|/mypron2/|/mypron3/
                      
                        String[] tab = prononciationsString.split("\\|");
                        ArrayList<String> r = new ArrayList<String>();
                        for (int i = 0; i < tab.length; i++) {
                                 if (tab[i].length() >= 2 && tab[i].charAt(0) == '/' && tab[i].charAt(tab[i].length() - 1) == '/') {
                                        String tab_i = tab[i].substring(1,tab[i].length() - 1).trim(); // remove the 2 /
                                        while(tab_i.length() >= 1 && (tab_i.charAt(0) == 'ˈ' || tab_i.charAt(0) == 'ˌ'))
                                                tab_i = tab_i.substring(1).trim();

                                        if (!tab_i.equals("")) 
                                                r.add(tab_i);
                                }
                        }
                        return r.toArray(new String[0]);
                }

                return new String[0];

        }

        public void analyseWikiText(String wikiText) {
                int pronunciationsStartPosition = wikiText.indexOf(tagStartPronunciations);
                int pronunciationsEndPosition = wikiText.indexOf(tagEndPronunciations,
                                pronunciationsStartPosition + tagStartPronunciationsLength);
                if (pronunciationsStartPosition == -1 || pronunciationsEndPosition == -1)
                        return; // no pronociation was found

                String stringBefore = wikiText.substring(0, pronunciationsStartPosition);
                if (targetLanguage.equals(getReadLanguage(stringBefore).trim())) {
                        String prononciationsString = wikiText
                                        .substring(pronunciationsStartPosition + tagStartPronunciationsLength,
                                                        pronunciationsEndPosition)
                                        .trim();

                        String[] prononciationsArray = getPrononciationsArray(prononciationsString);
                        for(int i = 0; i < prononciationsArray.length; i++) {
                                String[] syllables = prononciationsArray[i].split("[ .ˈˌː]+");

      
                                if (syllables.length < syllables_min || syllables.length > syllables_max)
                                        return; // the syllables number is not in the accepted interval
                                if (syllables[0].length() == 0)
                                        System.out.println("prononciation première syllabe de " + this.title + " est vide !");
                                int firstSyllableHashcode = syllables[0].hashCode();
                                int lastSyllableHashcode = syllables[syllables.length - 1].hashCode();
                                graph.addVertex(firstSyllableHashcode);
                                graph.addVertex(lastSyllableHashcode);
                                graph.addEdge(firstSyllableHashcode, lastSyllableHashcode, this.title);


                        }


                } else
                        // the read language is not the target one, then continue to search
                        analyseWikiText(wikiText.substring(pronunciationsEndPosition));
        }

        public void endElement(String uri, String localName, String qName)
                        throws SAXException {
                // System.out.println("END " + qName);
                String wikiText = content.toString();
                if (qName.equals("title")) {
                        this.title = wikiText;
                } else if (qName.equals("text")) {
                        if (isAcceptable(this.title))
                                analyseWikiText(wikiText);
                }

        }

        public void characters(char[] ch, int start, int length)
                        throws SAXException {

                content.append(ch, start, length);

        }

        public void ignorableWhitespace(char[] ch, int start, int length)
                        throws SAXException {
        }

        public void processingInstruction(String target, String data)
                        throws SAXException {
        }

        public void skippedEntity(String name) throws SAXException {
        }

}