/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.corpus.dictionary;

import com.hankcs.hanlp.corpus.dictionary.CommonDictionaryMaker;
import com.hankcs.hanlp.corpus.dictionary.DictionaryMaker;
import com.hankcs.hanlp.corpus.document.CorpusLoader;
import com.hankcs.hanlp.corpus.document.Document;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.corpus.util.CorpusUtil;
import com.hankcs.hanlp.corpus.util.Precompiler;
import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeSet;

public class NatureDictionaryMaker
extends CommonDictionaryMaker {
    public NatureDictionaryMaker() {
        super(null);
    }

    @Override
    protected void addToDictionary(List<List<IWord>> sentenceList) {
        Predefine.logger.info("\u5f00\u59cb\u5236\u4f5c\u8bcd\u5178");
        for (List<IWord> wordList : sentenceList) {
            IWord pre = null;
            for (IWord word : wordList) {
                this.dictionaryMaker.add(word);
                if (pre != null) {
                    this.nGramDictionaryMaker.addPair(pre, word);
                }
                pre = word;
            }
        }
    }

    @Override
    protected void roleTag(List<List<IWord>> sentenceList) {
        Predefine.logger.info("\u5f00\u59cb\u6807\u6ce8");
        int i = 0;
        for (List<IWord> wordList : sentenceList) {
            Predefine.logger.info(String.valueOf(++i) + " / " + sentenceList.size());
            for (IWord word : wordList) {
                Precompiler.compile(word);
            }
            LinkedList wordLinkedList = (LinkedList)wordList;
            wordLinkedList.addFirst(new Word("\u59cb##\u59cb", Nature.begin.toString()));
            wordLinkedList.addLast(new Word("\u672b##\u672b", Nature.end.toString()));
        }
    }

    static boolean makeCoreDictionary(String inPath, String outPath) {
        final DictionaryMaker dictionaryMaker = new DictionaryMaker();
        TreeSet labelSet = new TreeSet();
        CorpusLoader.walk(inPath, new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                for (List<Word> sentence : document.getSimpleSentenceList(true)) {
                    for (Word word : sentence) {
                        if (!this.shouldInclude(word)) continue;
                        dictionaryMaker.add(word);
                    }
                }
            }

            boolean shouldInclude(Word word) {
                return !("m".equals(word.label) || "mq".equals(word.label) || "w".equals(word.label) || "t".equals(word.label) ? !TextUtility.isAllChinese(word.value) : "nr".equals(word.label));
            }
        });
        if (outPath != null) {
            return dictionaryMaker.saveTxtTo(outPath);
        }
        return false;
    }

    public static void main(String[] args) {
        NatureDictionaryMaker dictionaryMaker = new NatureDictionaryMaker();
        CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014", new CorpusLoader.Handler(){

            @Override
            public void handle(Document document) {
                NatureDictionaryMaker.this.compute(CorpusUtil.convert2CompatibleList(document.getSimpleSentenceList(false)));
                NatureDictionaryMaker.this.compute(CorpusUtil.convert2CompatibleList(document.getSimpleSentenceList(true)));
            }
        });
        dictionaryMaker.saveTxtTo("data/test/CoreNatureDictionary");
    }
}

