package com.zzsn.generation.segment;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.TraditionalChineseTokenizer;
import com.hankcs.hanlp.utility.SentencesUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 分词工具类 创建人：李东亮 创建时间：2015-6-15 下午6:04:38 公司 ：郑州数能软件科技有限公司
 * 
 * @version 1.0
 * 
 */
public class SegmentWorkerUtil {
	private static final Logger Log = LoggerFactory.getLogger(SegmentWorkerUtil.class);
    
    public static final String SEGMENT_WORD = "word";
    public static final String SEGMENT_NATURE = "nature";
    
    public static final String ENTITY_NAME = "name";
    public static final String ENTITY_PLACE = "place";
    public static final String ENTITY_ORG = "org";
    public static final String ENTITY_COMPANY = "company";

	/**
	 * 中文分词 创建人: 李东亮 创建时间: 2015-6-10 下午5:04:40
	 * 
	 * @version 1.0
	 * @param content
	 * @return
	 */
//	public static List<String> segment(String content) {
//
//		List<String> result = new ArrayList<String>();
//		TokenStream tokenStream = null;
//		try {
//			tokenStream = AnalyzerBuilder.getInstance().tokenStream("content",
//					new StringReader(content));
//		} catch (IOException e1) {
//			// TODO Auto-generated catch block
//			e1.printStackTrace();
//		}
//		tokenStream.addAttribute(CharTermAttribute.class);
//		CharTermAttribute charTermAttribute;
//		try {
//			tokenStream.reset();
//			while (tokenStream.incrementToken()) {
//				charTermAttribute = tokenStream
//						.getAttribute(CharTermAttribute.class);
//				result.add(charTermAttribute.toString());
//			}
//		} catch (IOException e) {
//			// TODO Auto-generated catch block
//			e.printStackTrace();
//		} finally {
//			try {
//				tokenStream.close();
//			} catch (IOException e) {
//				// TODO Auto-generated catch block
//				e.printStackTrace();
//			}
//		}
//		return result;
//	}

	/**
	 * 净化分词结果 创建人: 李东亮 创建时间: 2015-7-15 下午4:14:23
	 * 
	 * @version 1.0
	 * @param result
	 * @return
	 */
//	public static List<String> cleanResult(List<String> words) {
//		List<String> result = new ArrayList<String>();
//		String word;
//		for (Iterator<String> iterator = words.iterator(); iterator.hasNext();) {
//			word = iterator.next();
//			// 去除停用词
//			if (IDFHash.IsChiStopWord(word) || IDFHash.IsEngStopWord(word)) {
//				continue;
//			}
//			// ?转换英文错误词
//			result.add(word);
//		}
//		return result;
//	}

	/**
	 * Hanlp算法分词，返回分词结果
	 * 创建人:  victory  
	 * 创建时间:  2016-3-28 下午2:15:53 
	 * @version 1.0
	 * @param content
	 * @return
	 */
	public static HashMap<String, List<String>> segmentHanlp(String content) {
	    
	    HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
	    List<String> sentenceList = SentencesUtil.toSentenceList(content);
	    
	    //运用分词，来切分 分词结果 和词性
        List<String> wordList = new ArrayList<String>();
        List<String> natureList = new ArrayList<String>();
	    for (String sentence : sentenceList) {
	        List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
	        //CoreStopWordDictionary.apply(termList);
	        for(Term term : termList) {
	            if (term.nature.equals(Nature.w)) {
	                continue;
	            }
	            wordList.add(term.word);
	            natureList.add(term.nature.toString());
//	            System.out.print(term.word+","+term.nature.toString());
	        } 
//	        System.out.println();
	    }
	    resultMap.put(SegmentWorkerUtil.SEGMENT_WORD, wordList);
        resultMap.put(SegmentWorkerUtil.SEGMENT_NATURE, natureList);
        return resultMap;
	}
	
	/**
	 * Hanlp算法分词，返回分词结果
	 * 创建人:  victory  
	 * 创建时间:  2016-3-28 下午2:15:53 
	 * @version 1.0
	 * @param content
	 * @return
	 */
	public static HashMap<String, List<String>> segmentHanlpForTw(String content) {
	    
	    HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
	    TraditionalChineseTokenizer.segment(content);
	    List<String> sentenceList = SentencesUtil.toSentenceList(content);
	    
	    //运用分词，来切分 分词结果 和词性
        List<String> wordList = new ArrayList<String>();
        List<String> natureList = new ArrayList<String>();
	    for (String sentence : sentenceList) {
	        List<Term> termList = TraditionalChineseTokenizer.segment(sentence);; //繁体字分词
	        //CoreStopWordDictionary.apply(termList);
	        for(Term term : termList) {
	            if (term.nature.equals(Nature.w)) {
	                continue;
	            }
	            wordList.add(term.word);
	            natureList.add(term.nature.toString());
	        } 
	    }
	    resultMap.put(SegmentWorkerUtil.SEGMENT_WORD, wordList);
        resultMap.put(SegmentWorkerUtil.SEGMENT_NATURE, natureList);
        return resultMap;
	}
    /**
     * Hanlp算法分词，返回分词结果，并使用停用词进行过滤
     * 创建人:  victory  
     * 创建时间:  2016-3-28 下午2:15:53 
     * @version 1.0
     * @param content
     * @return
     */
    public static HashMap<String, List<String>> segmentCleanHanlp(String content) {
        
        HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        
        //运用分词，来切分 分词结果 和词性
        List<String> wordList = new ArrayList<String>();
        List<String> natureList = new ArrayList<String>();
        for (String sentence : sentenceList) {
            List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
            CoreStopWordDictionary.apply(termList);
            for(Term term : termList) {
                if (term.nature.equals(Nature.w)) {
                    continue;
                }
                wordList.add(term.word);
                natureList.add(term.nature.toString());
            } 
        }
        resultMap.put(SegmentWorkerUtil.SEGMENT_WORD, wordList);
        resultMap.put(SegmentWorkerUtil.SEGMENT_NATURE, natureList);
        return resultMap;
    }
    
    /**
     * 识别地域
     * 创建人:  victory  
     * 创建时间:  2016-6-8 下午4:27:24 
     * @version 1.0
     * @param wordsMap
     * @return
     */
    public static List<String> nameRecognize(HashMap<String, List<String>> wordsMap) {
        //运用分词，来切分 分词结果 和词性
        List<String> names = new ArrayList<String>();
        if(!wordsMap.isEmpty()) {
            List<String> wordList = wordsMap.get(SegmentWorkerUtil.SEGMENT_WORD);
            List<String> natureList = wordsMap.get(SegmentWorkerUtil.SEGMENT_NATURE);
            int count = 0;
            for (String nature : natureList) {
                if (nature.equals("nr")) {
                    names.add(wordList.get(count));
                }
                count ++;
            }
        }
        return names;
    }
    /**
     * 名称识别
     * 创建人:  victory  
     * 创建时间:  2016-6-8 下午4:29:54 
     * @version 1.0
     * @param content
     * @return
     */
    public static List<String> nameRecognize(String content) {
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        List<String> names = new ArrayList<String>();
        
        for (String sentence : sentenceList) {
            List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
            CoreStopWordDictionary.apply(termList);
            for(Term term : termList) {
                if (term.nature.equals(Nature.w)) {
                    continue;
                }
                if (term.nature.equals(Nature.nr)) {
                    names.add(term.word);
                }
            } 
        }
        return names;
    }
    
    /**
     * 识别事件，文本中的所有事件
     * 创建人:  victory  
     * 创建时间:  2016-6-12 下午5:22:37 
     * @version 1.0
     * @param content
     * @return
     */
    public static List<String> eventRecognize(String content) {
       List<String> phraseList = HanLP.extractPhrase(content, 5);
        return phraseList;
    }
    
    /**
     * 识别文本中的所有时间
     * 创建人:  victory  
     * 创建时间:  2016-6-12 下午5:23:00 
     * @version 1.0
     * @param content
     * @return
     */
    public static List<String> dateRecognize(String content) {
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        List<String> names = new ArrayList<String>();
        
        for (String sentence : sentenceList) {
            List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
            CoreStopWordDictionary.apply(termList);
            for(Term term : termList) {
                if (term.nature.equals(Nature.w)) {
                    continue;
                }
                if (term.nature.equals(Nature.nr)) {
                    names.add(term.word);
                }
            } 
        }
        return names;
    }
    
    /**
     * 识别命名实体
     * 创建人:  victory  
     * 创建时间:  2016-6-8 下午4:30:35 
     * @version 1.0
     * @param content
     * @return
     */
    public static HashMap<String, List<String>> entityRecognizeForOLD(String content) {
        HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
        /**
         * 特征实体 
         */
        List<String> names = new ArrayList<String>();  //人名
        List<String> places = new ArrayList<String>(); //地域
        List<String> orgs = new ArrayList<String>();   //组织机构
        
        //根据词性获取 人名、地域、组织机构等
        for (String sentence : sentenceList) {
        	if(null == sentence || sentence.trim().length() == 0){
        		continue;
        	}
//        	System.out.println(sentence);
            List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
            CoreStopWordDictionary.apply(termList);
            for(Term term : termList) {
//            	System.out.print(term.word+","+term.nature.toString());
                if (term.nature.equals(Nature.w)) {
                    continue;
                }

                switch (term.nature.toString())
                {
                    case "nr":
                        names.add(term.word);
                        break;
                    case "nrj":
                        names.add(term.word);
                        break;
                    case "nr2":
                        names.add(term.word);
                        break;
                    case "nrf":
                        names.add(term.word);
                        break;
                    case "ns":
                        places.add(term.word);
                        break;
                    case "nsf":
                        places.add(term.word);
                        break;
                    case "nt":
                        orgs.add(term.word);
                        break;
                    case "ntc":
                        orgs.add(term.word);
                        break;
                    case "ntcf":
                        orgs.add(term.word);
                        break;
                    case "nto" :
                        orgs.add(term.word);
                        break;
                    case "ntch":
                        orgs.add(term.word);
                        break;
                    case "nth":
                        orgs.add(term.word);
                        break;   
                }
            } 
        }
        
        resultMap.put(SegmentWorkerUtil.ENTITY_NAME, names);
        resultMap.put(SegmentWorkerUtil.ENTITY_PLACE, places);
        resultMap.put(SegmentWorkerUtil.ENTITY_ORG, orgs);
        return resultMap;
    }
    /**
     * 识别命名实体
     * 创建人:  victory  
     * 创建时间:  2016-6-8 下午4:30:35 
     * @version 1.0
     * @param content
     * @return
     */
//    public static HashMap<String, List<String>> entityRecognizeNew(String content) {
//        HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//	     PerceptronLexicalAnalyzer analyzer = null;
//			try {
//				analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//				         HanLP.Config.PerceptronPOSModelPath,
//				         HanLP.Config.PerceptronNERModelPath);
//			} catch (IOException e) {
//				// TODO Auto-generated catch block
//				e.printStackTrace();
//			}
//        /**
//         * 特征实体
//         */
//        List<String> names = new ArrayList<String>();  //人名
//        List<String> places = new ArrayList<String>(); //地域
//        List<String> orgs = new ArrayList<String>();   //组织机构
//        //根据词性获取 人名、地域、组织机构等
//        for (String sentence : sentenceList) {
////            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//            List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
////            CoreStopWordDictionary.apply(termList);
////            for (IWord word : termList)
////
////            {
////
////                if (word instanceof CompoundWord)
////                {
/////*                	if (((CompoundWord) word).getLabel().equals("nt")) {
////                		System.out.println("nt(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("ntc")) {
////                		System.out.println("ntc(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("ntcf")) {
////                		System.out.println("ntcf(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("nto")) {
////                		System.out.println("nto(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("ntch")) {
////                		System.out.println("ntch(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("nth")) {
////                		System.out.println("nth(组合):"+((CompoundWord) word).getValue());
////                	}*/
////                	if (((CompoundWord) word).getLabel().equals("nr")) {
////                		System.out.println("nr(组合):"+((CompoundWord) word).getValue());
////                	}  else if (((CompoundWord) word).getLabel().equals("ns")) {
////                		System.out.println("ns(组合):"+((CompoundWord) word).getValue());
////                	}
////
////                } else {
////                	if (word.getLabel().equals("nt")) {
////                		orgs.add(word.getValue());
////                		System.out.println("nt:"+word.getValue());
////                	} else if (word.getLabel().equals("ntc")) {
////                		orgs.add(word.getValue());
////                		System.out.println("ntc:"+word.getValue());
////                	} else if (word.getLabel().equals("ntcf")) {
////                		orgs.add(word.getValue());
////                		System.out.println("ntcf:"+word.getValue());
////                	} else if (word.getLabel().equals("nto")) {
////                		orgs.add(word.getValue());
////                		System.out.println("nto:"+word.getValue());
////                	} else if (word.getLabel().equals("ntch")) {
////                		orgs.add(word.getValue());
////                		System.out.println("ntch:"+word.getValue());
////                	} else if (word.getLabel().equals("nth")) {
////                		orgs.add(word.getValue());
////                		System.out.println("nth:"+word.getValue());
////                	}
////                	if (word.getLabel().equals("nr")) {
////                		names.add(word.getValue());
//////                		System.out.println("nr:"+word.getValue());
////                	} else if (word.getLabel().equals("nrj")) {
////                		names.add(word.getValue());
////                		System.out.println("nrj:"+word.getValue());
////                	} else if (word.getLabel().equals("nr2")) {
////                		names.add(word.getValue());
////                		System.out.println("nr2:"+word.getValue());
////                	} else if (word.getLabel().equals("nrf")) {
////                		names.add(word.getValue());
////                		System.out.println("nrf:"+word.getValue());
////                	}
////                	else if (word.getLabel().equals("ns")) {
////                		places.add(word.getValue());
////                		System.out.println("ns:"+word.getValue());
////                	} else if (word.getLabel().equals("nsf")) {
////                		places.add(word.getValue());
////                		System.out.println("nsf:"+word.getValue());
////                	}
////                }
////
////            }
//            for(Term term : termList) {
//                if (term.nature.equals(Nature.w)) {
//                    continue;
//                }
//
//                switch (term.nature.toString())
//                {
//                    case "nr":
//                        names.add(term.word);
//                        break;
//                    case "nrj":
//                        names.add(term.word);
//                        break;
//                    case "nr2":
//                        names.add(term.word);
//                        break;
//                    case "nrf":
//                        names.add(term.word);
//                        break;
//                    case "ns":
//                        places.add(term.word);
//                        break;
//                    case "nsf":
//                        places.add(term.word);
//                        break;
//                    case "nt":
//                        orgs.add(term.word);
//                        break;
//                    case "ntc":
//                        orgs.add(term.word);
//                        break;
//                    case "ntcf":
//                        orgs.add(term.word);
//                        break;
//                    case "nto" :
//                        orgs.add(term.word);
//                        break;
//                    case "ntch":
//                        orgs.add(term.word);
//                        break;
//                    case "nth":
//                        orgs.add(term.word);
//                        break;
//                }
//            }
//        }
//
//        resultMap.put(SegmentUtil.ENTITY_NAME, names);
//        resultMap.put(SegmentUtil.ENTITY_PLACE, places);
//        resultMap.put(SegmentUtil.ENTITY_ORG, orgs);
//        return resultMap;
//    }
//
//    public static Map<String,Integer> getVerbWordsInfoFromContent(String content){
//    	Map<String,Integer> result = new HashMap<String,Integer>();
//
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        PerceptronLexicalAnalyzer analyzer = null;
//
//        try {
//			analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//			         HanLP.Config.PerceptronPOSModelPath,
//			         HanLP.Config.PerceptronNERModelPath);
//
//			for (String sentence : sentenceList) {
//	            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//                for (IWord word : termList)
//                {
//                	String la = word.getLabel();
//                	String va = word.getValue();
//                	if(
//                		la.equals("v")
//                		/*|| la.equals("vd")
//                		|| la.equals("vf")
//                		|| la.equals("vg")
//                		|| la.equals("vi")
//                		|| la.equals("vl")
//                		|| la.equals("vn")
//                		|| la.equals("vshi")
//                		|| la.equals("vx")
//                		|| la.equals("vyou")*/
//                			){
//                		Integer ct = result.get(va);
//                		if(null == ct || ct < 0){
//                			ct = 0;
//                		}
//                		ct = ct + 1;
//                		result.put(va, ct);
//                	}
//                }
//			}
//		}catch(Exception e){
//			Log.error(e.getMessage());
//		}
//    	return result;
//    }
//
//    public static List<String> getMeetingNameFromContent(String content){
//    	List<String> result = new ArrayList<String>();
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        PerceptronLexicalAnalyzer analyzer = null;
//		try {
//			analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//			         HanLP.Config.PerceptronPOSModelPath,
//			         HanLP.Config.PerceptronNERModelPath);
//
//			 for (String sentence : sentenceList) {
//
//		        	if(null != sentence && sentence.trim().length() > 0){
//		        		if(sentence.contains("会议")
//		        				|| sentence.contains("论坛")
//		        				|| sentence.contains("研讨会")
//		        				|| sentence.contains("座谈会")
//		        				|| sentence.contains("年会")
//		        				|| sentence.contains("发布会")
//		        				|| sentence.contains("交流会")
//		        				|| sentence.contains("组稿会")
//		        				|| sentence.contains("报告会")
//		        				|| sentence.contains("探讨会")
//		        				|| sentence.contains("讨论会")
//		        				){
//				            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//		        			//含有“会议”关键词
//		        			Map<Integer,IWord> map = new HashMap<Integer,IWord>();
//		        			Integer index = 0;
//		        			String mtname = "";
//		                    for (IWord word : termList)
//		                    {
//		                    	String la = word.getLabel();
//		                    	String va = word.getValue();
//		                    	if(
//		                    		va.contains("会议")
//			        				|| va.contains("论坛")
//			        				|| va.contains("研讨会")
//			        				|| va.contains("座谈会")
//			        				|| va.contains("年会")
//			        				|| va.contains("发布会")
//			        				|| va.contains("交流会")
//			        				|| va.contains("组稿会")
//			        				|| va.contains("报告会")
//			        				|| va.contains("探讨会")
//			        				|| va.contains("讨论会")
//		        				){
//		                    		mtname = va;
//		                    		break;
//		                    	}
//		                    	map.put(index, word);
//		                    	index = index + 1;
//		                    }
//		                    if(index > 0){
//		                    	String origin = mtname;
////		                    		mtname = map.get(it).getValue()+"[/"+map.get(it).getLabel()+"]";//会议；最后一个收尾名词
//		                		for(Integer i= map.size()-1 ;i>=0;i--){
//		                			String la = map.get(i).getLabel();
//		                			if(la.equals("n")
//		                					|| la.equals("nt")
//		                					|| la.equals("ntc")
//		                					|| la.equals("ntcb")
//		                					|| la.equals("ntcf")
//		                					|| la.equals("nth")
//		                					|| la.equals("nto")
//		                					|| la.equals("ni")
//		                					|| la.equals("nic")
//		                					|| la.equals("nis")
//		                					|| la.equals("nit")
//		                					|| la.equals("nl")
//		                					|| la.equals("nm")
//		                					|| la.equals("m")
//		                					|| la.equals("c")
//		                					|| la.equals("v")
//		                					|| la.equals("vn")
//		                					|| la.equals("ns")
//		                					|| la.equals("nsf")
//		                					){
////		                				String va= map.get(i).getValue()+"[/"+la+"]";
//		                				String va= map.get(i).getValue() ;
//		                				mtname = va + mtname;
//		            				}else{
//		            					break;
//		            				}
//		                		}
//		                		if(mtname.length() > origin.length()){
//		                			String[] startWords= {"主持","召开","主持召开","应邀","参加","应邀参加","出席","举行","促进"};
//		                			String[] exwords = {"出席","召开","应邀","参加","掌管","书记","举行"};
//		                			String[] eqname = {"媒体发布会","专题会议","党委会议","党组织会议","组织和会议","新闻发布会","工作会议","经验交流会","分析会议","主持座谈会","办发布会","干部座谈会","国际论坛","情况发布会","集团会议","年度工作会议","宏观论坛"};
//		                			for(String start : startWords){
//		                				if(mtname.startsWith(start)){
//		                					mtname = mtname.substring(start.length());
//		                				}
//		                			}
//		                			for(String ex : exwords){
//		                				if(mtname.contains(ex)){
//		                					mtname = mtname.replaceAll(ex, "");
//		                				}
//		                			}
//		                			for(String eq : eqname){
//		                				if(mtname.equals(eq)){
//		                					mtname = "";
//		                				}
//		                			}
//		                			if(null != mtname && mtname.trim().length() > 3){
////		                				Log.info("++++++会议名称为========="+mtname);
//		                				if(null != result && !result.contains(mtname)){
//		                					result.add(mtname);
//		                					sentence = sentence.replaceAll(mtname, "");
//		                					List<String> res2 = getMeetingNameFromContent(sentence);
//		                					if(null != res2 && res2.size() > 0){
//		                						for(String st : res2){
//		                							result.add(st);
//		                						}
//		                					}
//		                				}
//		                			}
//		                		}
//		                    }
//		        		}
//		        	}
//
//			 }
//		} catch (IOException e) {
//			// TODO Auto-generated catch block
//			e.printStackTrace();
//		}
//
//    	return result;
//    }
//
//
//
//    public static List<String> getProjectNameFromContentNew(String content){
//    	List<String> result = new ArrayList<String>();
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        PerceptronLexicalAnalyzer analyzer = null;
//		try {
//			analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//			         HanLP.Config.PerceptronPOSModelPath,
//			         HanLP.Config.PerceptronNERModelPath);
//
//			 for (String sentence : sentenceList) {
//
//		        	if(null != sentence && sentence.trim().length() > 0){
//		        		if(sentence.contains("项目")
//		        				|| sentence.contains("合同")
//		        				|| sentence.contains("工程")
//		        				){
//				            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//		        			//含有“会议”关键词
//		        			Map<Integer,IWord> map = new HashMap<Integer,IWord>();
//		        			Integer index = 0;
//		        			String mtname = "";
//		                    for (IWord word : termList)
//		                    {
//		                    	String la = word.getLabel();
//		                    	String va = word.getValue();
//		                    	if(
//		                    		va.contains("项目")
//			        				|| va.contains("合同")
//			        				|| va.contains("工程")
//		        				){
//		                    		mtname = va;
//		                    		break;
//		                    	}
//		                    	map.put(index, word);
//		                    	index = index + 1;
//		                    }
//		                    if(index > 0){
//		                    	String origin = mtname;
////		                    		mtname = map.get(it).getValue()+"[/"+map.get(it).getLabel()+"]";//会议；最后一个收尾名词
//		                		for(Integer i= map.size()-1 ;i>=0;i--){
//		                			String la = map.get(i).getLabel();
//		                			if(la.equals("n")
//		                					|| la.equals("nt")
//		                					|| la.equals("ntc")
//		                					|| la.equals("ntcb")
//		                					|| la.equals("ntcf")
//		                					|| la.equals("nth")
//		                					|| la.equals("nto")
//		                					|| la.equals("ni")
//		                					|| la.equals("nic")
//		                					|| la.equals("nis")
//		                					|| la.equals("nit")
//		                					|| la.equals("nl")
//		                					|| la.equals("nm")
//		                					|| la.equals("m")
//		                					|| la.equals("c")
//		                					|| la.equals("v")
//		                					|| la.equals("vn")
//		                					|| la.equals("ns")
//		                					|| la.equals("nsf")
//		                					){
////		                				String va= map.get(i).getValue()+"[/"+la+"]";
//		                				String va= map.get(i).getValue() ;
//		                				mtname = va + mtname;
//		            				}else{
//		            					break;
//		            				}
//		                		}
//		                		if(mtname.length() > origin.length()){
//		                			String[] startWords= {"签署","签订","承包","签约","中标"};
//		                			String[] exwords = {"签署","签订","承包","签约","中标"};
//		                			String[] eqname = {"媒体发布会","专题会议","党委会议","党组织会议","组织和会议","新闻发布会","工作会议","经验交流会","分析会议","主持座谈会","办发布会","干部座谈会","国际论坛","情况发布会","集团会议","年度工作会议","宏观论坛"};
//		                			for(String start : startWords){
//		                				if(mtname.startsWith(start)){
//		                					mtname = mtname.substring(start.length());
//		                				}
//		                			}
//		                			for(String ex : exwords){
//		                				if(mtname.contains(ex)){
//		                					mtname = mtname.replaceAll(ex, "");
//		                				}
//		                			}
//		                			for(String eq : eqname){
//		                				if(mtname.equals(eq)){
//		                					mtname = "";
//		                				}
//		                			}
//		                			if(null != mtname && mtname.trim().length() > 3){
////		                				Log.info("++++++会议名称为========="+mtname);
//		                				if(null != result && !result.contains(mtname)){
//		                					result.add(mtname);
//		                					sentence = sentence.replaceAll(mtname, "");
//		                					List<String> res2 = getMeetingNameFromContent(sentence);
//		                					if(null != res2 && res2.size() > 0){
//		                						for(String st : res2){
//		                							result.add(st);
//		                						}
//		                					}
//		                				}
//		                			}
//		                		}
//		                    }
//		        		}
//		        	}
//
//			 }
//		} catch (IOException e) {
//			// TODO Auto-generated catch block
//			e.printStackTrace();
//		}
//
//    	return result;
//    }

    
    
    /*
     * 获取信息中项目名称
     */
    public static String getProjectNameFromContent(String content){
    	content = content.replaceAll(" ", "");
        List<String> sentenceList = SentencesUtil.toSentenceList(content);
		String mtname = "";
		Pattern pattern = Pattern
				.compile("签署[\\s\\S]+项目[\\s\\S]|签署[\\s\\S]+合同[\\s\\S]|签署[\\s\\S]+协议[\\s\\S]|签署[\\s\\S]+工程[\\s\\S]" +
						"|签订[\\s\\S]+项目[\\s\\S]|签订[\\s\\S]+合同[\\s\\S]|签订[\\s\\S]+协议[\\s\\S]|签订[\\s\\S]+工程[\\s\\S]" +
						"|承包[\\s\\S]+项目[\\s\\S]|承包[\\s\\S]+合同[\\s\\S]|承包[\\s\\S]+协议[\\s\\S]|承包[\\s\\S]+工程[\\s\\S]" +
						"|签约[\\s\\S]+项目[\\s\\S]|签约[\\s\\S]+合同[\\s\\S]|签约[\\s\\S]+协议[\\s\\S]|签约[\\s\\S]+工程[\\s\\S]" +
						"|中标[\\s\\S]+项目[\\s\\S]|中标[\\s\\S]+合同[\\s\\S]|中标[\\s\\S]+协议[\\s\\S]|中标[\\s\\S]+工程[\\s\\S]" +
						"|获得[\\s\\S]+授标[\\s\\S]");
		
			 for (String sentence : sentenceList) {

				 
				 
		            
		        	if(null != sentence && sentence.trim().length() > 0){
		        		if(sentence.contains("项目")
		        				|| sentence.contains("合同")
		        				|| sentence.contains("工程")
		        				){

			    			Matcher mat = pattern.matcher(sentence);
			    			while (mat.find()) {
//			    				System.out.println("判断的核心句："+sentence);
			    				mtname = sentence;
//			    				System.out.println("判断的核心句："+mtname);
//			    				System.out.println("段落"+falg+"内容："+process.getContent());
//			    				System.out.println("");
			    				break;
			    			}
		        

		                			String[] startWords= {"签署","签订","承包","签约","中标","获得"};
//		                			String[] exwords = {"出席","召开","应邀","参加","掌管","书记","举行"};
//		                			String[] eqname = {"媒体发布会","专题会议","党委会议","党组织会议","组织和会议","新闻发布会","工作会议","经验交流会","分析会议","主持座谈会","办发布会","干部座谈会","国际论坛","情况发布会","集团会议","年度工作会议","宏观论坛"};
		                			for(String start : startWords){
		                				int index = mtname.indexOf(start);
		                				if(index>=0 && index+start.length()<mtname.length()-1){
		                					mtname = mtname.substring(index+start.length(),mtname.length()-1);
			                				if (mtname.startsWith("了")||mtname.startsWith("的")) {
			                					mtname = mtname.substring(1);
			                				}
			                				if (mtname.length()<6) {
			                					mtname=sentence;
			                				}
				                				break;


		                				}

		                			}
//		                			for(String ex : exwords){
//		                				if(mtname.contains(ex)){
//		                					mtname = mtname.replaceAll(ex, "");
//		                				}
//		                			}
//		                			for(String eq : eqname){
//		                				if(mtname.equals(eq)){
//		                					mtname = "";
//		                				}
//		                			}
		                			Log.info("++++++项目名称为========="+mtname);
		                			if(null != mtname && mtname.trim().length() > 3){
//		                				Log.info("++++++项目名称为========="+mtname);
//		                				if(null != result && !result.contains(mtname)){
//		                					result.add(mtname);
//		                					sentence = sentence.replaceAll(mtname, "");
//		                					List<String> res2 = getMeetingNameFromContent(sentence);
//		                					if(null != res2 && res2.size() > 0){
//		                						for(String st : res2){
//		                							result.add(st);
//		                						}
//		                					}
//		                				}
		                				break;
		                			}

		        		}
		        	}
		        	
			 }
    	
    	return mtname;
    }
    
    /*
     * 获取信息中合同金额
     */
//    public static String getProjectMoneyFromContent(String content){
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        String moneystrResult = "";
//		String moneystr = "";
//		String danwwi = "";
//		String moneytype = "";
//		String[] moneytypeArr = {"人民币","美元","欧元"};
//		String[] moneyMatchArr = {"亿人民币","万人民币","美元","欧元","亿元","万元",
//				                  "多亿人民币","多万人民币","多万美元","多亿美元","多万欧元","多亿欧元","多亿元","多万元"+
//				                  "余亿人民币","余万人民币","余万美元","余亿美元","余万欧元","余亿欧元","余亿元","余万元"};
//		String[] moneybkArr = {"總金額","项目金额","总投资","合同总价","合同金额","合同约","工程总造价","金额","项目整体投资","项目总投资","总投资","项目合同额","项目总金额","合同额"};
//			 for (String sentence : sentenceList) {
//
//
//
//
//		        	if(null != sentence && sentence.trim().length() > 0){
//		        		Set<String> strSet = new HashSet<String>();
//		        		AcApply ac = new AcApply();
//		        		strSet = ac.findWordsInArray(moneybkArr, sentence);
//
//		        		if(strSet.size()>0 && (sentence.contains("人民币")
//		        				|| sentence.contains("美元")
//		        				|| sentence.contains("欧元")
//		        				|| sentence.contains("亿元")
//		        				|| sentence.contains("万元"))
//		        				){
//
//		        			List<Term> termList = HanLP.segment(sentence);
//
//							for (Term term : termList) {
//								if ("m".equals(term.nature.toString())) {
//									if (term.word.contains("诸多")) {
//										continue;
//									} else {
//										if (moneystrResult.trim().length()==0) {
//											moneystr = term.word;
//
//											for (String type : moneytypeArr) {
//												if (sentence.contains(type)) {
//													moneytype = type;
//													break;
//												}
//											}
//
//											for (String mon : moneyMatchArr) {
//												if (sentence.contains(moneystr+mon)) {
//													if ("亿元".equals(mon)||"万元".equals(mon)) {
//														moneystrResult = moneystr+mon+moneytype;
//													} else {
//														moneystrResult = moneystr+mon;
//													}
//													break;
//												}
//											}
//										}
//
//
//									}
//
//								}
//							}
////		        if (moneystr.trim().length()>0 && moneytype.trim().length()>0) {
////		        	if ("人民币".endsWith(moneytype)) {
////		        		moneystrResult = moneystr+danwwi+moneytype;
////		        	} else {
////		        		if (danwwi.trim().length()>0) {
////		        			moneystrResult = moneystr+danwwi;
////		        		} else {
////		        			moneystrResult = moneystr+moneytype;
////		        		}
////
////		        	}
////
////		        }
////							moneystrResult = moneystr;
//		        		}
//		        	}
//
//			 }
//
//    	return moneystrResult;
//    }
//
    
    /*
     * 获取信息中合同金额
     */
//    public static Integer getInfoFromContent(String title,String content,Map<String,String> map,String[] mainwords,Map<String,Integer> mapScore,String[] days,String[] orgArr){
//    	Integer result = 0;
//    	boolean flag = false;
//    	boolean kvalue = false;
//
///*    	String aa = "国务院国资委4月20日发布的数据显示，中央企业一季度实现营业收入6万亿元，同比下降11.8%，超过8成企业营业收入下滑；实现净利润1304亿元，同比下降58.8%。";
//    	content=aa;*/
//    	AcApply ac = new AcApply();
//    	Set<String> strSet = new HashSet<String>();
//
//    	//关键词匹配
//    	strSet = ac.findWordsInArray(mainwords, content);
//    	for (String str : strSet) {
//    		Integer score =mapScore.get(str);
//    		String value = map.get(str);
//    		if ("1".equals(value)) {
//    			flag = true;
//    			result += score;
////    			break;
//    		} else {
//    			String[] valueArr = value.split(",");
//    	    	Set<String> strSet1 = new HashSet<String>();
//    	    	strSet1 = ac.findWordsInArray(valueArr, content);
//    	    	if(strSet1.size()>0) {
//    	    		flag = true;
//    	    		result +=score;
////        			break;
//    	    	}
//    		}
//    	}
//
//    	//如果段落中有日期且在时间范围内，则加分
//    	strSet = ac.findWordsInArray(days, content);
//    	if (null!=strSet && strSet.size()>0) {
//    		result += 30;
//    	}
//    	strSet = ac.findWordsInArray(orgArr, title);
//    	if (null!=strSet && strSet.size()>0) {
//    		result += 80;
//    	}
//
//    	if (flag) {
//    	       List<String> sentenceList = SentencesUtil.toSentenceList(content);
//    			List<String> lists = new ArrayList<String>();
//    				 for (String sentence : sentenceList) {
//    			        	if(null != sentence && sentence.trim().length() > 0){
//
//			        			List<Term> termList = HanLP.segment(sentence);
//
//								for (Term term : termList) {
//									if ("m".equals(term.nature.toString())) {
//										if (term.word.contains("万") ||
//												term.word.contains("千")||
//												term.word.contains("一")||
//												term.word.contains("二")||
//												term.word.contains("两")||
//												term.word.contains("三")||
//												term.word.contains("四")||
//												term.word.contains("五")||
//												term.word.contains("六")||
//												term.word.contains("七")||
//												term.word.contains("八")||
//												term.word.contains("九")||
//												term.word.contains("百")||
//												term.word.contains("诸多")) {
//											continue;
//										}
//										String matchword1 = term.word+"年";
//										String matchword2 = term.word+"月";
//										String matchword3 = term.word+"日";
//										if (sentence.contains(matchword1) || sentence.contains(matchword2)|| sentence.contains(matchword3)) {
//											continue;
//										}
//										return result;
//									}
//								}
//    			        	}
//
//    				 }
//    	}
//
//    	return 0;
//
//    }
//
//
//	public static List<Sentence> extractAnalySentenceFromContent(String content){
//		List<Sentence> result = new ArrayList<Sentence>();
//
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        PerceptronLexicalAnalyzer analyzer = null;
//
//        try {
//			analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//			         HanLP.Config.PerceptronPOSModelPath,
//			         HanLP.Config.PerceptronNERModelPath);
//
//			for (String sentence : sentenceList) {
//	            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//	            result.add(termList);
//			}
//		}catch(Exception e){
//			Log.error(e.getMessage());
//		}
//
//		return result;
//	}
//	public static Map<String,Map<String,Long>> extractVKeywordsSentenceFromContent(String content){
//		Map<String,Map<String,Long>> result = new HashMap<String,Map<String,Long>>();
//        List<String> sentenceList = SentencesUtil.toSentenceList(content);
//        PerceptronLexicalAnalyzer analyzer = null;
//
//        try {
//			analyzer = new PerceptronLexicalAnalyzer("data/model/perceptron/pku199801/cws.bin",
//			         HanLP.Config.PerceptronPOSModelPath,
//			         HanLP.Config.PerceptronNERModelPath);
//
//			for (String sentence : sentenceList) {
//	            Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
//	            for(IWord word : termList){
//	            	String la = word.getLabel();
//	            	String va = word.getValue();
//	            	if(la.startsWith("v")){
//	            		Map<String,Long> wordsC = result.get(la);
//	            		if(null == wordsC || wordsC.size() == 0){
//	            			wordsC = new HashMap<String,Long>();
//	            		}
//	            		Long cc = wordsC.get(va);
//	            		if(null == cc || cc == 0L){
//	            			cc = 0L;
//	            		}
//	            		cc++;
//	            		wordsC.put(va, cc);
//	            		result.put(la, wordsC);
//	            	}
//	            }
//			}
//		}catch(Exception e){
//			Log.error(e.getMessage());
//		}
//
//		return result;
//	}
//
//
//
//	public static void main(String[] args) throws IOException {
////		String content="会议强调，要进一步推进国家药品集中采购试点、短缺药监测应对和医疗救助工作。一要完善集中采购制度，加强中标药品质量监管和供应保障，实现降价惠民。认真总结试点经验，及时全面推开。二要保障基本药物、急（抢）救等药品供应。完善监测预警机制，对临床必需、易短缺、替代性差等药品，采取强化储备、统一采购或定点生产等方式保供，防止急需、常用药品不合理涨价。三要在做好基本医疗保障的同时，进一步完善医疗救助制度，落实落细参保缴费资助、直接救助等措施，切实提高困难群众获得感。";
////
////	    HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
////	    List<String> sentenceList = SentencesUtil.toSentenceList(content);
////
////	    //运用分词，来切分 分词结果 和词性
////        List<String> wordList = new ArrayList<String>();
////        List<String> natureList = new ArrayList<String>();
////	    for (String sentence : sentenceList) {
////	        List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
////	        //CoreStopWordDictionary.apply(termList);
////	        for(Term term : termList) {
////	            if (term.nature.equals(Nature.w)) {
////	                continue;
////	            }
////	            wordList.add(term.word);
////	            System.out.print(term.word+"||");
////	            natureList.add(term.nature.toString());
////	        }
////	    }
////	    resultMap.put(SegmentWorkerUtil.SEGMENT_WORD, wordList);
////        resultMap.put(SegmentWorkerUtil.SEGMENT_NATURE, natureList);
//		String str = "美国国会预算办公室(CBO)下调今年GDP预期至萎缩5.9%，预计2020年上半年美国经济增长12.4%；预计美国失业率在第三季度将升至峰值超14%，2020年~2030年，年均失业率将达6.1%。美联储称，最近一周其所持总体资产规模降至7.00904万亿美元，所持美国国债达到4.213万亿美元。";
//
////		String aa = getInfoFromContent(str);
//
//		String mt = getProjectNameFromContent(str);
//        List<String> list = getProjectNameFromContentNew(str);
//        System.out.println(list.toString());
//        String money = getProjectMoneyFromContent("从各大业务板块来看，工程承包板块新签合同额2515.9亿元，同比增长6.30%。其中，铁路工程、公路工程、房建工程、机场码头及航道工程新签合同额分别同比增长27.32%、10.25%、36.47%、1678.49%，整体趋势向好。");
//        HashMap<String, List<String>> map =entityRecognizeForOLD(str);
//
//        System.out.println(mt);
//        System.out.println(money);
////        List<Term> termList = HanLP.segment("国务院国资委党委书记郝鹏调研航空工业导弹院"); //调用hanlp算法进行分词
////        //CoreStopWordDictionary.apply(termList);
////        for(Term term : termList) {
////            if (term.nature.equals(Nature.w)) {
////                continue;
////            }
////            System.out.print(term.word+term.nature.toString()+",");
////        }
////        System.out.println();
//
////		List<WordInfo> ad =	 HanLP.extractWords(xx, 5);
////		List<String> ad1 =	 HanLP.extractKeyword(xx, 5);
////		List<String> ad2 =	HanLP.extractPhrase(xx, 5);
////		NewWordDiscover discover = new NewWordDiscover(4, 0.0f, 0.5f, 200f, false);
//////		NewWordDiscover discover = new NewWordDiscover();
////		List<WordInfo> ad12 = discover.discover(xx, 5);
////	      String content="My mother always says that I am a naughty girl. When I go out with her, I always make some mistakes. Such as annoying others when they are talking, which is very impolite. My mother doesn’t punish me, instead, she tries to disturb me when I want to talk to her. It makes me realize how rude I am. My mother is so tolerant that she is a good mother. ";
//////	      SegmentWorkerUtil es=new SegmentWorkerUtil();
//////	        String result=es.englishSegment(content);
//////	        System.out.println(result);
////
//////	    String content = "德国马勒贝洱集团";
////	    content = content.toLowerCase();
////	//    List<String> sentenceList = SentencesUtil.toSentenceList(content);
//////	    List<Term> ad = HanLP.segment(content);
////	    System.out.println(ad);
//
//
////		List<Term> termList = HanLP.segment("毕业生");
////		System.out.println(termList.size());
////
////
////NewWordDiscover discover = new NewWordDiscover(5, 0.0f, 0.5f, 100f, true);
////
//////读取文件夹下所以文档并合并成一篇文档用于新词发现
////
////StringBuilder sbText = new StringBuilder();
////
////    File[] txtFiles = new File("E:\\dataword").listFiles();
////
////    int i = 0;
////
////    for (File file : txtFiles)
////
////    {
////
////        System.out.printf("[%d / %d] 读取 %s 中...\n", ++i, txtFiles.length, file.getName());
////
////        sbText.append(IOUtil.readTxt(file.getPath()));
////
////        if (i == 100) break;
////
////    }
////
////    System.out.printf("对长度为%d的语料进行分析中...\n", sbText.length());
////
////    List<WordInfo> wordInfoList = discover.discover(xx, 50);
////    List<String> ss = HanLP.extractPhrase(xx, 15);
////
////
////    //打印出发现的新词
////    System.out.println(ss);
////    for (WordInfo wordInfo : wordInfoList) {
////
////     System.out.println(wordInfo.text);
////}
//	}
}
