package com.zzsn.event.util;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.SentencesUtil;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


public class CompanyUtil {


    public static List<String> entityAll(String content) {
        List<String> result = new ArrayList<>();
        //获取地名
        List<String> result1 = CompanyUtil.entityAdrssRecognize(content);
        //获取人名
        List<String> result2 = CompanyUtil.entityPersonRecognizeOld(content);
        result.addAll(result1);
        result.addAll(result2);
        return result;
    }

    /**
     * 识别命名实体
     * 创建人:  victory
     * 创建时间:  2016-6-8 下午4:30:35
     *
     * @param content
     * @return
     * @version 1.0
     */
    public static List<String> entityPersonRecognizeOld(String content) {
        if (null != content && content.trim().length() > 0) {
            content = content.replaceAll("丨", " ");
        }
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        List<String> names = new ArrayList<String>();  //人名
        //根据词性获取 人名
        for (String sentence : sentenceList) {
            List<Term> termList = HanLP.segment(sentence);

            for (Term term : termList) {
                if (term.nature.equals(Nature.w)) {
                    continue;
                }
                switch (term.nature.toString()) {
                    case "nr":
                        names.add(term.word);
                        break;
                    case "nrj":
                        names.add(term.word);
                        break;
                    case "nr2":
                        names.add(term.word);
                        break;
                    case "nrf":
                        names.add(term.word);
                        break;
                }
            }
        }


        List<String> listnames = new ArrayList<String>();
        for (String temp : names) {
            if (temp != null && !temp.isEmpty()) {
                String tempwds = temp.trim();
                if (!listnames.contains(tempwds)) {
                    listnames.add(tempwds);
                }
            }
        }


        return listnames;
    }

    /**
     * 识别命名实体
     * 创建人:  victory
     * 创建时间:  2016-6-8 下午4:30:35
     *
     * @param content
     * @return
     * @version 1.0
     */
    public static List<String> entityAdrssRecognize(String content) {
        if (null != content && content.trim().length() > 0) {
            content = content.replaceAll("丨", " ");
        }
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        List<String> names = new ArrayList<String>();  //人名
        //根据词性获取 地名
        for (String sentence : sentenceList) {
            List<Term> termList = HanLP.segment(sentence);

            for (Term term : termList) {
                if (term.nature.equals(Nature.w)) {
                    continue;
                }

                switch (term.nature.toString()) {
                    case "ns":
                        names.add(term.word);
                        break;
                    case "nsf":
                        names.add(term.word);
                        break;

                }
            }
        }
        List<String> listnames = new ArrayList<String>();
        for (String temp : names) {
            if (temp != null && !temp.isEmpty()) {
                String tempwds = temp.trim();
                if (!listnames.contains(tempwds)) {
                    listnames.add(tempwds);
                }
            }
        }


        return listnames;
    }
}
