package com.zzsn.event.util;

import com.baomidou.mybatisplus.core.toolkit.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 *
 * Utility：一些工具函数的集合，
 * ver:2014.04.03
 * ver:2014.03.26
 * ver: 2014.03.14
 *  ver: 2014.3.11
 * ver:2014.03.09
 * ver:2014.03.05
 * ver: 2014.03.04
 * ver: 2014.02.19
 *  ver: 2013.11.19
 * ver: 2013.10.19
 * ver: 2013.09.24
 * ver: 2013.09.20
 *
 */
@SuppressWarnings("deprecation")
public class Utility {
    //定时器控制flg
    public static int flg = 0;
    //任务执行状态flg
    public static int status_flg = 0;
    static String regEx = "[\\u4e00-\\u9fa5]";
    static Pattern patChi = Pattern.compile(regEx);
    static String regExAll = "[a-zA-Z\\u4e00-\\u9fa5]";
    static Pattern patWord = Pattern.compile(regExAll);
    static String regUnExAll = "[^a-zA-Z\\u4e00-\\u9fa5]";
    static Pattern patUnWord = Pattern.compile(regUnExAll);
    public static Pattern patWordAndNum = Pattern.compile("[0-9a-zA-Z\\u4e00-\\u9fa5]");
    static HashMap<String, String> stemMap = null;

    static String regHTMLNumcode = "&#(\\d{4,5});";
    static Pattern patHTMLNumCode = Pattern.compile(regHTMLNumcode);

    //<div id="ctl00_PlaceHolderMain_ctl01_ctl05_label" style="display:none">Page Content</div>
    static Pattern divNoneP = Pattern.compile("(?s)<div[^>]*display:none[^>]*>.*?</div>", Pattern.CASE_INSENSITIVE);
    static Pattern divP = Pattern.compile("<div>", Pattern.CASE_INSENSITIVE);
    static Pattern divRP = Pattern.compile("</div>", Pattern.CASE_INSENSITIVE);
    static Pattern brP = Pattern.compile("<br />", Pattern.CASE_INSENSITIVE);
    static Pattern brP2 = Pattern.compile("<br/>", Pattern.CASE_INSENSITIVE);
    static Pattern br2P = Pattern.compile("<br>", Pattern.CASE_INSENSITIVE);
    static Pattern spaceP = Pattern.compile("&nbsp;", Pattern.CASE_INSENSITIVE);
    static Pattern strongP = Pattern.compile("<strong>", Pattern.CASE_INSENSITIVE);
    static Pattern strongRP = Pattern.compile("</strong>", Pattern.CASE_INSENSITIVE);
    static Pattern pP = Pattern.compile("<p>", Pattern.CASE_INSENSITIVE);
    static Pattern pRP = Pattern.compile("</p>", Pattern.CASE_INSENSITIVE);

    static Pattern centerP = Pattern.compile("<center[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern centerRP = Pattern.compile("</center>", Pattern.CASE_INSENSITIVE);

    static Pattern removeAttrP = Pattern.compile("<([a-zA-Z0-9]+)[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern commentP = Pattern.compile("(?s)<!--[^>]*>.*?<![^>]*-->", Pattern.CASE_INSENSITIVE);
    static Pattern inputP = Pattern.compile("<input[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern formP = Pattern.compile("<form[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern formRP = Pattern.compile("</form>", Pattern.CASE_INSENSITIVE);
    static Pattern buttonP = Pattern.compile("(?s)<button[^>]*>.*?</button>", Pattern.CASE_INSENSITIVE);
    static Pattern iframeP = Pattern.compile("(?s)<iframe[^>]*>.*?</iframe>", Pattern.CASE_INSENSITIVE);
    static Pattern noscriptP = Pattern.compile("(?s)<noscript>.*?</noscript>", Pattern.CASE_INSENSITIVE);
    static Pattern objectP = Pattern.compile("(?s)<object[^>]*>.*?</object>", Pattern.CASE_INSENSITIVE);
    static Pattern linkP = Pattern.compile("(?s)<link[^>]*>", Pattern.CASE_INSENSITIVE);

    static Pattern imgReplaceP = Pattern.compile("<img([^>]*)>", Pattern.CASE_INSENSITIVE);
    static Pattern imgRevReplaceP = Pattern.compile("<_img([^>]*)>", Pattern.CASE_INSENSITIVE);
    static Pattern imgP = Pattern.compile("<img[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern imgRP = Pattern.compile("</img>", Pattern.CASE_INSENSITIVE);
    public static Pattern aRemoveP = Pattern.compile("(?s)<a[^>]*>.*?</a>", Pattern.CASE_INSENSITIVE);
    static Pattern legendRemoveP = Pattern.compile("(?s)<legend[^>]*>.*?</legend>", Pattern.CASE_INSENSITIVE);

    static Pattern aP = Pattern.compile("<a[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern aRP = Pattern.compile("</a>", Pattern.CASE_INSENSITIVE);
    static Pattern fontP = Pattern.compile("<font[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern fontRP = Pattern.compile("</font>", Pattern.CASE_INSENSITIVE);
    static Pattern hP = Pattern.compile("<h\\d[^>]*>", Pattern.CASE_INSENSITIVE);
    static Pattern hRP = Pattern.compile("</h\\d>", Pattern.CASE_INSENSITIVE);
    static Pattern ulRP = Pattern.compile("</ul>", Pattern.CASE_INSENSITIVE);
    static Pattern liRP = Pattern.compile("</li>", Pattern.CASE_INSENSITIVE);
    static Pattern trRP = Pattern.compile("</tr>", Pattern.CASE_INSENSITIVE);
    static Pattern tdRP = Pattern.compile("</td>", Pattern.CASE_INSENSITIVE);

    static Pattern textareaRemoveP = Pattern.compile("(?s)<textarea[^>]*>.*?</textarea>", Pattern.CASE_INSENSITIVE);
    static Pattern selectRemoveP = Pattern.compile("(?s)<select[^>]*>.*?</select>", Pattern.CASE_INSENSITIVE);
    static Pattern optionRemoveP = Pattern.compile("(?s)<option[^>]*>.*?</option>", Pattern.CASE_INSENSITIVE);
    static Pattern labelRemoveP = Pattern.compile("(?s)<label[^>]*>.*?</label>", Pattern.CASE_INSENSITIVE);


    private static Pattern patDate0 = Pattern.compile("\\d+-\\d{1,2}-\\d+");
    private static Pattern patDate1 = Pattern.compile("\\d+[-\\s/年月日]\\d{1,2}-\\d+", Pattern.CASE_INSENSITIVE);
    private static Pattern patDate2 = Pattern.compile("\\d+\\s+[A-Z][a-z]+\\s+\\d+");
    private static Pattern patDate3 = Pattern.compile("[A-Z][a-z\\.]+\\s+\\d{1,2},\\s+\\d+");
    private static Pattern patDate4 = Pattern.compile("\\d+年\\d+月\\d+日");
    private static Pattern patDate5 = Pattern.compile("\\d+/\\d{1,2}/\\d+");
    private static Pattern patDate6 = Pattern.compile("\\d+\\.\\d+\\.\\d+");
    private static Pattern patDate7 = Pattern.compile("\\d{1,2}-\\d{1,2}");
    private static Pattern patDate8 = Pattern.compile("\\d+月\\d+日");

    private static SimpleDateFormat formatter0 = new SimpleDateFormat("yyyy-MM-dd");
    private static SimpleDateFormat formatter0_1 = new SimpleDateFormat("yy-MM-dd");
    private static SimpleDateFormat formatter2 = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH);
    private static SimpleDateFormat formatter3_1 = new SimpleDateFormat("MMM dd, yyyy", Locale.ENGLISH);
    private static SimpleDateFormat formatter3_2 = new SimpleDateFormat("MMM. dd, yyyy", Locale.ENGLISH);
    private static SimpleDateFormat formatter4 = new SimpleDateFormat("yyyy年MM月dd");
    private static SimpleDateFormat formatter5_1 = new SimpleDateFormat("yyyy/MM/dd");
    private static SimpleDateFormat formatter5_2 = new SimpleDateFormat("dd/MM/yyyy");
    private static SimpleDateFormat formatter5_4 = new SimpleDateFormat("yy/MM/dd");
    private static SimpleDateFormat formatter5_3 = new SimpleDateFormat("dd/MM/yy");
    private static SimpleDateFormat formatter6 = new SimpleDateFormat("yyyy.MM.dd");
    private static SimpleDateFormat formatter7 = new SimpleDateFormat("MM-dd");
    private static SimpleDateFormat formatter8 = new SimpleDateFormat("MM月dd");

    private static Date thresholdDate = null;

    /*
     * 判断网页文件的编码
     */
    public static String getWebEncodingByStr(String content) {
        String encoding = null;
        Pattern p1 = Pattern.compile("<meta[^>]*>",
                Pattern.CASE_INSENSITIVE);
        Matcher m1 = p1.matcher(content);
        while (m1.find()) {
            String str = m1.group();
            Pattern p2 = Pattern.compile("charset[^\\s||\"||;||'||>]*");
            Matcher m2 = p2.matcher(str);
            if (m2.find()) {
                encoding = m2.group().substring(8);
                if (encoding.trim().length() == 0) {
                    Pattern p3 = Pattern
                            .compile("charset=\"[^\\s||\"||;||>]*");
                    Matcher m3 = p3.matcher(str);
                    if (m3.find()) {
                        encoding = m3.group().substring(9);
                    }
                    if (encoding.trim().length() == 0) {
                        // encoding = DetectCharSet.detectCharSet(fileName);
                        // if(encoding == null){
                        encoding = "GB2312";
                        // }
                    }
                }

                return encoding;
            }
        }

        return encoding;
    }


    public static String RemoveHTMLCode_old(String src) {
        src = src.replaceAll("<DIV>", "\n\n");
        src = src.replaceAll("</DIV>", "\n\n");
        src = src.replaceAll("<div>", "");
        src = src.replaceAll("</div>", "\n\n");
        src = src.replaceAll("<BR>", "\n\n");
        src = src.replaceAll("<br>", "\n\n");
        src = src.replaceAll("<br />", "\n\n");
        src = src.replaceAll("<BR />", "\n\n");
        src = src.replaceAll("&nbsp;", " ");
        src = src.replaceAll("<DIV>", "");
        src = src.replaceAll("<div>", "");
        src = src.replaceAll("&#8226;", "??");
        src = src.replaceAll("<STRONG>", "");
        src = src.replaceAll("</STRONG>", "");
        src = src.replaceAll("<strong>", "");
        src = src.replaceAll("</strong>", "");
        src = src.replaceAll("</p>", "\n\n");
        src = src.replaceAll("</P>", "\n\n");
        src = src.replaceAll("<P>", "\n\n");
        src = src.replaceAll("<p>", "\n\n");
        src = src.replaceAll("<a[^>]*>", "");
        src = src.replaceAll("<img[^>]*>", "");
        src = src.replaceAll("</a>", "");
        src = src.replaceAll("<font[^>]*>", "");
        src = src.replaceAll("</font>", "");
        src = src.replaceAll("<FONT[^>]*>", "");
        src = src.replaceAll("</FONT>", "");
        src = src.replaceAll("</h\\d>", "\n\n");
        src = src.replaceAll("</H\\d>", "\n\n");
        src = src.replaceAll("</ul>", "\n\n");
        src = src.replaceAll("</UL>", "\n\n");
        src = src.replaceAll("</li>", "\n\n");
        src = src.replaceAll("</LI>", "\n\n");

        src = src.replaceAll("</tr>", "\n");
        src = src.replaceAll("</TR>", "\n");


        src = src.replaceAll("<[^>]*>", "");

        return src.trim();
    }


    public static String RemoveHTMLCodeWithImg(String src) {
        src = src.replaceAll("(<[^>]*>)\\s*(<[^>]*>)", "$1$2");
        src = divP.matcher(src).replaceAll("\r\n");
        src = divRP.matcher(src).replaceAll("\r\n");
        src = brP.matcher(src).replaceAll("\r\n");
        src = br2P.matcher(src).replaceAll("\r\n");
        src = brP2.matcher(src).replaceAll("\r\n");
        src = spaceP.matcher(src).replaceAll(" ");
        src = src.replaceAll("&#8226;", "??");
        src = strongP.matcher(src).replaceAll("");
        src = strongRP.matcher(src).replaceAll("");
        src = pP.matcher(src).replaceAll("\r\n");
        src = pRP.matcher(src).replaceAll("\r\n");
//        src = aP.matcher(src).replaceAll("");
//        src = aRP.matcher(src).replaceAll("");
//		src = imgP.matcher(src).replaceAll("");
        src = fontP.matcher(src).replaceAll("");
        src = fontRP.matcher(src).replaceAll("");
        src = hRP.matcher(src).replaceAll("\r\n");
        src = ulRP.matcher(src).replaceAll("\r\n");
        src = liRP.matcher(src).replaceAll("\r\n");
//        src = trRP.matcher(src).replaceAll("\r\n");
//        src = tdRP.matcher(src).replaceAll("\r\n");


//        src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>))(<[^>]*>)", "");
//        src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>|<table[^>]*>|<tbody[^>]*>|<tr[^>]*>|<td[^>]*>))(<[^>]*>)", "");
        src = src.replaceAll("(?!(<img[^>]*>|<a[^>]*>|</a>|<table[^>]*>|</table>|<tbody[^>]*>|</tbody>|<tr[^>]*>|</tr>|<td[^>]*>|</td>))(<[^>]*>)", "");

        src = src.replaceAll("<img", "\r\n<img");

        return src.trim();
    }

    public static String RemoveHTMLCode(String src) {
        src = src.replaceAll("(<[^>]*>)\\s*(<[^>]*>)", "$1$2");
        src = divP.matcher(src).replaceAll("\n\n");
        src = divRP.matcher(src).replaceAll("\n\n");
        src = brP.matcher(src).replaceAll("\n\n");
        src = brP2.matcher(src).replaceAll("\n\n");
        src = br2P.matcher(src).replaceAll("\n\n");
        src = spaceP.matcher(src).replaceAll(" ");
        src = src.replaceAll("&#8226;", "??");
        src = strongP.matcher(src).replaceAll("");
        src = strongRP.matcher(src).replaceAll("");
        src = pP.matcher(src).replaceAll("\n\n");
        src = pRP.matcher(src).replaceAll("\n\n");
        src = aP.matcher(src).replaceAll("");
        src = aRP.matcher(src).replaceAll("");
        src = imgP.matcher(src).replaceAll("");
        src = fontP.matcher(src).replaceAll("");
        src = fontRP.matcher(src).replaceAll("");
        src = hRP.matcher(src).replaceAll("\n\n");
        src = ulRP.matcher(src).replaceAll("\n\n");
        src = liRP.matcher(src).replaceAll("\n\n");
        src = trRP.matcher(src).replaceAll("\n\n");
        src = tdRP.matcher(src).replaceAll("\n\n");


        src = src.replaceAll("<[^>]*>", "");

        return src.trim();
    }

    public static String HTMLDecode(String str) {
        //
        // 去掉一些HTML编码
        str = str.replaceAll("&quot;", "\"");
        str = str.replaceAll("&nbsp;", " ");
        str = str.replaceAll("&middot;", "·");
        str = str.replaceAll("&amp;", "&");
        str = str.replaceAll("&ldquo;", "“");
        str = str.replaceAll("&rdquo;", "”");
        str = str.replaceAll("&gt;", ">");
        str = str.replaceAll("&lt;", "<");
        str = str.replaceAll("&raquo;", "??");
        str = str.replaceAll("&times;", "×");
        str = str.replaceAll("&ccedil;", "??");
        str = str.replaceAll("&atilde;", "??");
        str = str.replaceAll("&ecirc;", "ê");


        // 去掉<>
        //
        str = str.replaceAll("<\\?[^>]*>", "");


        Matcher matcher = patHTMLNumCode.matcher(str);
        while (matcher.find()) {
            str = matcher.replaceFirst(String.valueOf((char) Integer.parseInt(matcher.group(1))));
            matcher = patHTMLNumCode.matcher(str);
        }

	    /*

		String[] tmp = str.split(";&#|&#|;");
		StringBuffer sb = new StringBuffer("");

		for (int i = 0; i < tmp.length; i++) {
			if (tmp[i].matches("\\d{4,5}")) {
				sb.append((char) Integer.parseInt(tmp[i]));
			} else {
				sb.append(tmp[i]);
			}
		}
		str = sb.toString();
		*/
        return str;
    }

    public static String RemoveHTMLControl(String htmlText) {
        htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
        htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
        htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
        htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
        htmlText = inputP.matcher(htmlText).replaceAll("");
        htmlText = formP.matcher(htmlText).replaceAll("");
        htmlText = buttonP.matcher(htmlText).replaceAll("");
        htmlText = formRP.matcher(htmlText).replaceAll("");
        return htmlText;
    }

    public static String RemoveStyleCode(String content) {

        try {
            Pattern p1 = Pattern.compile("(?s)<script\\s*.*?>(.*?)</script>",
                    Pattern.CASE_INSENSITIVE);
            Matcher m1 = p1.matcher(content);
            content = m1.replaceAll("");

            Pattern p2 = Pattern.compile("(?s)<style\\s*.*?>(.*?)</style>",
                    Pattern.CASE_INSENSITIVE);
            Matcher m2 = p2.matcher(content);
            content = m2.replaceAll("");

            Pattern p11 = Pattern.compile("(?s)<script\\s*.*?/>",
                    Pattern.CASE_INSENSITIVE);
            Matcher m11 = p11.matcher(content);
            content = m11.replaceAll("");

            Pattern p21 = Pattern.compile("(?s)<style\\s*.*?/>",
                    Pattern.CASE_INSENSITIVE);
            Matcher m21 = p21.matcher(content);
            content = m21.replaceAll("");


            content = noscriptP.matcher(content).replaceAll("");
            content = objectP.matcher(content).replaceAll("");
            content = linkP.matcher(content).replaceAll("");


			/*
			Pattern p22 = Pattern.compile("(?s)<img\\s*.*?/>",
					Pattern.CASE_INSENSITIVE);
			Matcher m22 = p22.matcher(content);
			content = m22.replaceAll("");
			*/
            // 去除注释
            // Pattern p3 = Pattern.compile("(?s)<!--\\s*.*?>(.*?)-->");
            Pattern p3 = Pattern.compile("(?s)<!--.*?-->");
            Matcher m3 = p3.matcher(content);
            content = m3.replaceAll("");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return content;

    }

    public static String RemoveReturnCode(String src) {
/*		src = src.replaceAll("\r", "");
		src = src.replaceAll("\n", "");*/
        return src;
    }

    public static String RemoveHTMLReturnCode(String src) {
        //src = src.replaceAll("(<[^>]*>)[\r\n]+(<[^>]*>)", "$1$2");
/*		src = src.replaceAll("\r", "");
		src = src.replaceAll("\n", "");*/
        return src;
    }

    public static String AddHTMLLine(String content) {

        try {
            Pattern p1 = Pattern
                    .compile("(?s)</div>", Pattern.CASE_INSENSITIVE);
            Matcher m1 = p1.matcher(content);
            content = m1.replaceAll("</div>\r\n");

            Pattern p2 = Pattern.compile("(?s)<div", Pattern.CASE_INSENSITIVE);
            Matcher m2 = p2.matcher(content);
            content = m2.replaceAll("<div\r\n");

            Pattern p3 = Pattern.compile("(?s)</p>", Pattern.CASE_INSENSITIVE);
            Matcher m3 = p3.matcher(content);
            content = m3.replaceAll("</p>\r\n");

            Pattern p4 = Pattern.compile("(?s)<p>", Pattern.CASE_INSENSITIVE);
            Matcher m4 = p4.matcher(content);
            content = m4.replaceAll("<p>\r\n");

            Pattern p5 = Pattern.compile("(?s)<br>", Pattern.CASE_INSENSITIVE);
            Matcher m5 = p5.matcher(content);
            content = m5.replaceAll("<br>\r\n");

            Pattern p6 = Pattern.compile("(?s)</li>", Pattern.CASE_INSENSITIVE);
            Matcher m6 = p6.matcher(content);
            content = m6.replaceAll("</li>\r\n");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return content;
    }

    /**
     * byte数组转换成16进制字符串
     *
     * @param src
     * @return
     */
    public static String bytesToHexString(byte[] src) {
        StringBuilder stringBuilder = new StringBuilder();
        if (src == null || src.length <= 0) {
            return null;
        }
        for (int i = 0; i < src.length; i++) {
            int v = src[i] & 0xFF;
            String hv = Integer.toHexString(v);
            if (hv.length() < 2) {
                stringBuilder.append(0);
            }
            stringBuilder.append(hv);
        }
        return stringBuilder.toString();
    }

    /**
     * 根据文件流读取文件真实类型
     *
     * @param is
     * @return
     */
    public static String getTypeByStream(FileInputStream is) {
        byte[] b = new byte[7];
        try {
            is.read(b, 0, b.length);
        } catch (IOException e) {
            e.printStackTrace();
        }
        String type = bytesToHexString(b).toUpperCase();
        if (type.contains("3C21444F") || type.contains("3C68746D")
                || type.contains("3C48544D")) {
            return "web";
        } else if (type.contains("D0CF11E0")) {
            return "word";
        } else if (type.contains("255044462D312E")) {
            return "pdf";
        } else if (type.contains("504B030414")) {
            return "word";
        } else if (type.contains("3C3F786D")) { //xml
            return null;
        } else {
            return "web";// return "unknown";
        }
    }


    public static String getFileType(String file) {
        FileInputStream is;
        try {
            is = new FileInputStream(file);
            String type = getTypeByStream(is);
            if (type.equals("word")) {
                //可能是doc,excel, or ppt
                String readType = file.substring(file.lastIndexOf(".")).toLowerCase();
                if (readType.equals(".doc") || readType.equals(".docx")) {
                    return "word";
                } else if (readType.equals(".xls") || readType.equals(".xlsx")) {
                    return "excel";
                } else if (readType.equals(".ppt") || readType.equals(".pptx")) {
                    return "ppt";
                }
            }
            is.close();
            return type;

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return "unknown";
    }

    /**
     * 获取文件扩展名
     * 创建人:  刘小鹏
     * 创建时间:  2015-6-4 下午6:41:36
     *
     * @param file
     * @return
     * @version 1.0
     */
    public static String getFileExt(String file) {
/*		int index = file.lastIndexOf(".");
		if((index >=0) && (index < file.length() - 1))
		{
			String str = file.substring(index+1);
			return "."+StringFilter(str);
		}
		else
		{
			return "";
		}*/
        return ".html";
    }

    public static String getFileName(String file) {
        if (file == null) {
            return null;
        }
        int index = file.lastIndexOf("\\");
        if (index < 0) {
            index = file.lastIndexOf("/");
            if (index < 0) {
                return file;
            }
        }
        return file.substring(index + 1);
    }

    public static String getFilePath(String file) {
        if (file == null) {
            return null;
        }
        int index = file.lastIndexOf("\\");
        if (index < 0) {
            index = file.lastIndexOf("/");
            if (index < 0) {
                return "";
            }
        }
        return file.substring(0, index + 1);
    }


    public static String getFileTitle(String file) {
        if (file == null) {
            return null;
        }
        file = getFileName(file);
        int index = file.lastIndexOf(".");
        if (index < 0) {
            return file;
        } else if (index == 0) {
            return "";
        }
        return file.substring(0, index);
    }


    public static boolean isGoodEngSentence(String sentence) {
        if (sentence.length() < 10) {
            return false;
        }
        String newSent = sentence.replaceAll("[a-zA-Z ]", "").trim();
        if (newSent.length() * 1.0 / sentence.length() > 0.3) {
            return false;
        }
        return true;
    }

    public static boolean isGoodEngSentenceX(String sentence, int minlen, int maxlen, int maxOtherLen) {
        if ((sentence.length() < minlen) || (sentence.length() > maxlen)) {
            return false;
        }
        String newSent = sentence.replaceAll("[a-zA-Z ]", "").trim();
        if ((newSent.length() * 1.0 / sentence.length() > 0.25) || (newSent.length() > maxOtherLen)) {
            return false;
        }
        return true;
    }

    public static boolean isGoodChiSentence(String sentence) {
        if (sentence.length() < 5) {
            return false;
        }
        String newSent = sentence.replaceAll("[\\u4e00-\\u9fa5 ]", "").trim();
        if (newSent.length() * 1.0 / sentence.length() > 0.4) {
            return false;
        }
        return true;
    }

    public static boolean isGoodChiSentenceX(String sentence, int minlen, int maxlen, int maxOtherLen) {
        if ((sentence.length() < minlen) || (sentence.length() > maxlen)) {
            return false;
        }
        String newSent = sentence.replaceAll("[\\u4e00-\\u9fa5 ]", "").trim();
        if ((newSent.length() * 1.0 / sentence.length() > 0.3) || (newSent.length() > maxOtherLen)) {
            return false;
        }
        return true;
    }

    public static boolean isGoodSentence(String sentence) {
        if (sentence.length() < 10) {
            return false;
        }
        String newSent = sentence.replaceAll("[a-zA-Z\\u4e00-\\u9fa5 ]", "").trim();
        if (newSent.length() * 1.0 / sentence.length() > 0.4) {
            return false;
        }
        return true;
    }

    public static boolean isGoodSentence_simple(String sentence) {
        String newSent = sentence.replaceAll("[a-zA-Z\\u4e00-\\u9fa5 ]", "").trim();
        if (newSent.length() * 1.0 / sentence.length() > 0.4) {
            return false;
        }
        return true;
    }


    public static List<String> getFiles(List<String> l, String directory, boolean bIncludeSubDir) {
        if (l == null) {
            l = new ArrayList<String>();
        }

        File file = new File(directory);

        if (file.isDirectory()) {
            String[] children = file.list();
            File childFile;
            for (int i = 0; i < children.length; i++) {
                if (bIncludeSubDir) {
                    getFiles(l, new File(file, children[i]).getAbsolutePath(), bIncludeSubDir);
                } else if ((childFile = new File(file, children[i])).isFile()) {
                    l.add(childFile.getAbsolutePath());
                }
            }
        } else {
            if (file.isFile()) {
                l.add(directory);
            }
        }

        return l;
    }


    public static boolean getFiles(String directory, BufferedWriter bw) {
        try {
            File file = new File(directory);
            if (file.isDirectory()) {
                String[] children = file.list();
                for (int i = 0; i < children.length; i++) {
                    getFiles(new File(file, children[i]).getAbsolutePath(), bw);
                }
            } else {
                if (file.isFile()) {
                    bw.write(directory);
                    bw.newLine();
                }
            }
            return true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public static boolean copyFile(String inFile, String outFile) {
        try {
            // long t1=System.currentTimeMillis();
            File file = new File(inFile);
            FileChannel out = new FileOutputStream(new File(outFile)).getChannel();

            FileInputStream input = new FileInputStream(file);
            //MappedByteBuffer buffer=new FileInputStream(file).getChannel().map(FileChannel.MapMode.READ_ONLY,0,file.length());
            //     buffer.load();
            MappedByteBuffer buffer = input.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length());
            buffer.load();

            //Charset charset=Charset.defaultCharset();
            //Charset charset=Charset.forName("GBK");
            //CharBuffer charBuffer=charset.decode(buffer);
            //System.out.println(charBuffer);
            out.write(buffer);
            buffer = null;
            out.close();

            //System.out.println("花费时间"+(System.currentTimeMillis()-t1)+"测试");
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    public static boolean isContainedChiWord(String text) {
        Matcher matcher = patChi.matcher(text);
        return matcher.find();
    }

    public static boolean isContainedWord(String text) {
        Matcher matcher = patWord.matcher(text);
        return matcher.find();
    }

    public static boolean isContainedOnlyWord(String text) {
        Matcher matcher = patUnWord.matcher(text);
        return (matcher.find() == false);
    }

    public static String stemming(String word) {
        if (Utility.stemMap == null) {
            try {
                Pattern p = Pattern.compile("(.+?)[\\s]+(.+)");
                Matcher m;
                FileInputStream fin = new FileInputStream("data/model_eng.txt");
                InputStreamReader ir = new InputStreamReader(fin, "UTF-8");
                BufferedReader br = new BufferedReader(ir);
                String temp;
                stemMap = new HashMap<String, String>();
                while ((temp = br.readLine()) != null) {
                    m = p.matcher(temp);
                    if (m.find())
                        stemMap.put(m.group(1), m.group(2));

                }
                ir.close();
                fin.close();
            } catch (Exception e) {
                System.out.println("Initialize stemming failed!");
                stemMap = null;
                return word.trim();
            }
        }
        if (stemMap.containsKey(word.trim()))
            return stemMap.get(word.trim());
        else {
            return word.trim();
        }
    }

    public static String stemmingText(String text) {
        String[] words = text.split(" ");
        String result = "";
        for (String word : words) {
            result += stemming(word) + " ";
        }
        return result;
    }


    public static <K, V extends Comparable<V>> Map<K, V> sortByValueDesc(Map<K, V> map) {

        List<Entry<K, V>> list = new LinkedList<Entry<K, V>>(map.entrySet());
        Collections.sort(list, new Comparator<Entry<K, V>>() {
            public int compare(Entry<K, V> o1, Entry<K, V> o2) {
                Comparable<V> v2 = o2.getValue();
                V v1 = o1.getValue();
                if (v2 == null) {
                    if (v1 == null) {
                        return 0;
                    } else {
                        return -1;
                    }
                } else {

                    if (v1 == null) {

                        return 1;

                    } else {

                        return v2.compareTo(v1);

                    }

                }

            }

        });

        Map<K, V> result = new LinkedHashMap<K, V>();

        Iterator<Entry<K, V>> it = list.iterator();

        while (it.hasNext()) {

            Entry<K, V> entry = it.next();

            result.put(entry.getKey(), entry.getValue());

        }

        return result;

    }

    /**
     * 提取html字符串转中的普通文本，注意处理其中的回车符
     *
     * @param htmlText
     * @return
     */
    public static String TransferHTML2Text(String htmlText) {
        String text = Utility.HTMLDecode(Utility.RemoveHTMLCode(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
        text = text.replaceAll(" 　　", "\n");
        text = text.replaceAll(" +\r\n", "\n");
        text = text.replaceAll("\r\n", "\n");
        text = text.replaceAll(" +", " ");
        text = text.replaceAll("[\\u00A0\\u3000]", "");
        text = text.replaceAll("　", "");
        text = text.replaceAll(" \n", "\n");
        text = text.replaceAll("=", "");
        text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");

        return text;
    }

    /**
     * 去除特殊符号，但不去除换行符号
     */
    public static String getValueAfterReplaceSpecialWordNotEnter(String str) {
        if (org.apache.commons.lang3.StringUtils.isEmpty(str)) {
            return "";
        }
        return filterASCIINotEnter(filterUnicode(str.replace("&", "&amp;")
                .replace("<", "&lt;").replace(">", "&gt;")
                .replace("\"", "&quot;").replace("'", "&apos;")));
    }

    /**
     * 过滤ASCII码中的不可见字符 ，不包括换行
     * 换行在ASCII表中对应的值为 10和 13
     */
    private static String filterASCIINotEnter(String source) {
        if (org.apache.commons.lang3.StringUtils.isBlank(source)) {
            return "";
        }

        char[] sourceCharArr = source.toCharArray();
        for (int i = 0; i < sourceCharArr.length; i++) {
            // 换行字符
            if (sourceCharArr[i] == 0x0A || sourceCharArr[i] == 0x0D) {
                continue;
            }
            if (sourceCharArr[i] < 0x20 || sourceCharArr[i] == 0x7F) {
                sourceCharArr[i] = 0x20;
            }
        }
        return new String(sourceCharArr);
    }

    private static String filterUnicode(String source) {
        Pattern parttern = Pattern.compile("([\\u007f-\\u009f]|\\u00ad|[\\u0483-\\u0489]|[\\u0559-\\u055a]|\\u058a|[\\u0591-\\u05bd]|\\u05bf|[\\u05c1-\\u05c2]|[\\u05c4-\\u05c7]|[\\u0606-\\u060a]|[\\u063b-\\u063f]|\\u0674|[\\u06e5-\\u06e6]|\\u070f|[\\u076e-\\u077f]|\\u0a51|\\u0a75|\\u0b44|[\\u0b62-\\u0b63]|[\\u0c62-\\u0c63]|[\\u0ce2-\\u0ce3]|[\\u0d62-\\u0d63]|\\u135f|[\\u200b-\\u200f]|[\\u2028-\\u202e]|\\u2044|\\u2071|[\\uf701-\\uf70e]|[\\uf710-\\uf71a]|\\ufb1e|[\\ufc5e-\\ufc62]|\\ufeff|\\ufffc)");
        Matcher m = parttern.matcher(source);
        if (m.find()) {
            return m.replaceAll("");
        }
        return source;
    }

    /**
     * 提取html字符串转中的普通文本，注意处理其中的回车符
     *
     * @param htmlText
     * @return
     */
    public static String TransferHTML2TextWithImg(String htmlText) {
        String text = Utility.HTMLDecode(Utility.RemoveHTMLCodeWithImg(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
        text = text.replaceAll(" 　　", "\n");
        text = text.replaceAll(" +\r\n", "\n");
        text = text.replaceAll("\r\n", "\n");
        text = text.replaceAll(" +", " ");
       // text = text.replaceAll("[\\u00A0\\u3000]", "");
       // text = text.replaceAll("　", "");
        text = text.replaceAll(" \n", "\n");
        text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");
        //如果遇到table，则不加处理
        if (text.contains("<table")) {
            String[] textArr = text.split("\n");
            String result ="";
            for (String tex : textArr) {
                if (!tex.contains("<table") && !tex.contains("<td") && !tex.contains("<tr")
                        && !tex.contains("table>") && !tex.contains("td>") && !tex.contains("tr>")) {
                    result += "<p style='text-indent:2em;'>" + tex + "</p><br/>";
                } else {
                    result +=  tex;
                }
            }
            text = result;
        } else {
            text = text.replaceAll("\n\\s+", "</p>"+"<br/>" + "<p style='text-indent:2em;'>");
            text = text.replaceAll("\n", "</p>"+"<br/>" + "<p style='text-indent:2em;'>");
            text = "<p style='text-indent:2em;'>" + text + "</p>";
        }
        return text;
    }

    public static String normalizeHtmlTransf(String s) {
        String ret = s.replaceAll("&bull;", "·");
        ret = ret.replaceAll("&middot;", "·");
        ret = ret.replaceAll("&nbsp;", " ");
        ret = ret.replaceAll("&quot;", "\"");
        ret = ret.replaceAll("&amp;", "&");
        ret = ret.replace('・', '·');
        ret = ret.replace("&ldquo;", "\"");
        ret = ret.replace("&rdquo;", "\"");
        ret = ret.replace("&hellip;", "...");
        ret = ret.replace("&lt;", "<");
        ret = ret.replace("&gt;", ">");
        ret = ret.replace("&mdash;", "—");
        ret = ret.replace("&ndash;", "–");
        ret = ret.replace("&tilde;", "~");
        ret = ret.replace("&lsquo;", "'");
        ret = ret.replace("&rsquo;", "'");
        ret = ret.replace("&sbquo;", ",");
        ret = ret.replace("&lsaquo;", "‹");
        ret = ret.replace("&rsaquo;", "›");
        ret = ret.replace("&hellip;", "…");
        ret = ret.replace("|", " ");
        return ret;
    }


    /**
     * 提取html字符串转中的普通文本，注意处理其中的回车符
     *
     * @param htmlText
     * @return
     */
    public static String TransferHTML3TextWithImg(String htmlText) {
        String text = Utility.HTMLDecode(Utility.RemoveHTMLCodeWithImg(Utility.RemoveStyleCode(Utility.RemoveHTMLReturnCode(htmlText))));
        text = text.replaceAll(" 　　", "\n");
        text = text.replaceAll(" +\r\n", "\n");
        text = text.replaceAll("\r\n", "\n");
        text = text.replaceAll(" +", " ");
        text = text.replaceAll("[\\u00A0\\u3000]", "");
        text = text.replaceAll("　", "");
        text = text.replaceAll(" \n", "\n");
        text = text.replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n").replaceAll("\n\n", "\n");
        return text;
    }

    /**
     * 去掉无用的HTML标签，包括Img,a等
     *
     * @param htmlText
     * @return
     */
    public static String RemoveUselessHTMLTag(String htmlText) {
        try {
			/*
			htmlText = htmlText.replaceAll("<([a-zA-Z0-9]+)[^>]*>", "<$1>");
			htmlText = htmlText.replaceAll("(?s)<!--[^>]*>.*?<![^>]*-->", "");
			htmlText = htmlText.replaceAll("(?s)<input>", "");
			htmlText = htmlText.replaceAll("(?s)<form>", "");
			htmlText = htmlText.replaceAll("(?s)</form>", "");

			//htmlText = htmlText.replaceAll("(?s)<a>.*?</a>", "");
			htmlText = htmlText.replaceAll("<a>", "<span>");
			htmlText = htmlText.replaceAll("</a>", "</span>");
			htmlText = htmlText.replaceAll("(?s)<iframe>.*?</iframe>", "");
			htmlText = htmlText.replaceAll("(?s)<noscript>.*?</noscript>", "");

			//Pattern pat =  Pattern.compile("(?s)<object[^>]*>.*?</object>");
			//htmlText = "<object ><param></param> <param></param> <param></param> <param></param> \r\n<param></param><param></param><param></param><video></video></object>ddd";
			//htmlText = pat.matcher(htmlText).replaceAll("");
			htmlText = htmlText.replaceAll("(?s)<object[^>]*>.*?</object>", "");


			htmlText = htmlText.replaceAll("<img[^>]*>", "");
			htmlText = htmlText.replaceAll("</img>", "");
			htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
			htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");


			//htmlText = htmlText.replaceAll(" 　　", "\r\n");
			//htmlText = htmlText.replaceAll(" +\r?\n", "\r\n");
			//htmlText = htmlText.replaceAll("\r?\n+", "\r\n");
			 *
			 */

            htmlText = Utility.RemoveStyleCode(htmlText);

            htmlText = htmlText.replaceAll("&nbsp;", " ");


            htmlText = divNoneP.matcher(htmlText).replaceAll("");

            htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
            htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
            htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
            htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
            htmlText = inputP.matcher(htmlText).replaceAll("");
            htmlText = formP.matcher(htmlText).replaceAll("");
            htmlText = buttonP.matcher(htmlText).replaceAll("");
            htmlText = formRP.matcher(htmlText).replaceAll("");


            htmlText = removeAttrP.matcher(htmlText).replaceAll("<$1>");
            htmlText = commentP.matcher(htmlText).replaceAll("");
            htmlText = legendRemoveP.matcher(htmlText).replaceAll("");


            htmlText = aP.matcher(htmlText).replaceAll("<sapn>");
            htmlText = aRP.matcher(htmlText).replaceAll("</sapn>");
            htmlText = iframeP.matcher(htmlText).replaceAll("");
            htmlText = noscriptP.matcher(htmlText).replaceAll("");
            htmlText = objectP.matcher(htmlText).replaceAll("");
            htmlText = imgP.matcher(htmlText).replaceAll("");
            htmlText = imgRP.matcher(htmlText).replaceAll("");

            htmlText = centerP.matcher(htmlText).replaceAll("");
            htmlText = centerRP.matcher(htmlText).replaceAll("");

            htmlText = htmlText.replaceAll("<cufontext>", "");
            htmlText = htmlText.replaceAll("</cufontext>", "");
            htmlText = htmlText.replaceAll("<cufon>", "");
            htmlText = htmlText.replaceAll("</cufon>", "");


            //htmlText = htmlText.replaceAll("(?s)<([a-zA-Z0-9]+)[^>]*>\\s*(</$1>)", "");

            htmlText = htmlText.replaceAll("(?s)<ul[^>]*>\\s*</ul>", "");
            htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
            htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
            htmlText = htmlText.replaceAll("(?s)<li[^>]*>\\s*</li>", "");
            htmlText = htmlText.replaceAll("(?s)<canvas[^>]*>\\s*</canvas>", "");

            return htmlText;
        } catch (Exception e) {
            e.printStackTrace();
            return htmlText;
        }
    }


    /**
     * 去掉无用的HTML标签，包括a等
     *
     * @param htmlText
     * @return
     */
    public static String RemoveUselessHTMLTagX(String htmlText) {
        try {
            htmlText = Utility.RemoveStyleCode(htmlText);

            htmlText = htmlText.replaceAll("&nbsp;", " ");


            htmlText = divNoneP.matcher(htmlText).replaceAll("");

            htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
            htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
            htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
            htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
            htmlText = inputP.matcher(htmlText).replaceAll("");
            htmlText = formP.matcher(htmlText).replaceAll("");
            htmlText = buttonP.matcher(htmlText).replaceAll("");
            htmlText = formRP.matcher(htmlText).replaceAll("");


            htmlText = imgReplaceP.matcher(htmlText).replaceAll("<_img$1>");
            htmlText = removeAttrP.matcher(htmlText).replaceAll("<$1>");
            htmlText = imgRevReplaceP.matcher(htmlText).replaceAll("<img$1>");

            htmlText = commentP.matcher(htmlText).replaceAll("");
            htmlText = legendRemoveP.matcher(htmlText).replaceAll("");


            htmlText = aP.matcher(htmlText).replaceAll("<sapn>");
            htmlText = aRP.matcher(htmlText).replaceAll("</sapn>");
            htmlText = iframeP.matcher(htmlText).replaceAll("");
            htmlText = noscriptP.matcher(htmlText).replaceAll("");
            htmlText = objectP.matcher(htmlText).replaceAll("");
            //htmlText = imgP.matcher(htmlText).replaceAll("");
            //htmlText = imgRP.matcher(htmlText).replaceAll("");

            htmlText = centerP.matcher(htmlText).replaceAll("");
            htmlText = centerRP.matcher(htmlText).replaceAll("");

            htmlText = htmlText.replaceAll("<cufontext>", "");
            htmlText = htmlText.replaceAll("</cufontext>", "");
            htmlText = htmlText.replaceAll("<cufon>", "");
            htmlText = htmlText.replaceAll("</cufon>", "");


            //htmlText = htmlText.replaceAll("(?s)<([a-zA-Z0-9]+)[^>]*>\\s*(</$1>)", "");

            htmlText = htmlText.replaceAll("(?s)<ul[^>]*>\\s*</ul>", "");
            htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
            htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
            htmlText = htmlText.replaceAll("(?s)<li[^>]*>\\s*</li>", "");
            htmlText = htmlText.replaceAll("(?s)<canvas[^>]*>\\s*</canvas>", "");

            return htmlText;
        } catch (Exception e) {
            e.printStackTrace();
            return htmlText;
        }
    }


    public static String RemoveUselessLink(String contentWithTag) {
        Document doc = Jsoup.parse(contentWithTag);
        Elements contentElems = doc.select("a");
        if ((contentElems == null) || (contentElems.size() == 0)) {
            return contentWithTag;
        }
        for (Element aElement : contentElems) {
            try {
                String elementText = aElement.text().trim();
                Element parentElement = aElement.parent();
                String parentText = parentElement.text().trim();
                elementText = elementText.replaceAll("　", "").trim();
                parentText = parentText.replaceAll("　", "").trim();
                if (parentText.equals(elementText)) {
                    aElement.remove();
                } else {
                    parentText = Utility.aRemoveP.matcher(parentElement.html()).replaceAll("");
                    parentText = Utility.TransferHTML2Text(parentText);
                    if (Utility.patWordAndNum.matcher(parentText).find() == false) {
                        parentElement.remove();
                    }
                }
                while (parentElement.text().trim().isEmpty()) {
                    Element tempElement = parentElement;
                    parentElement = parentElement.parent();
                    tempElement.remove();
                }
            } catch (Exception e) {
                continue;
            }
        }


        return doc.outerHtml();

    }


    public static boolean ContainDateInfo_BAK(String content) {
        try {
            Matcher dateMatcher = null;
            if ((dateMatcher = patDate0.matcher(content)).find()
                    || (dateMatcher = patDate1.matcher(content)).find()
                    || (dateMatcher = patDate2.matcher(content)).find()
                    || (dateMatcher = patDate3.matcher(content)).find()
                    || (dateMatcher = patDate4.matcher(content)).find()
                    || (dateMatcher = patDate5.matcher(content)).find()
                    || (dateMatcher = patDate6.matcher(content)).find()
                    || (dateMatcher = patDate7.matcher(content)).find()
                    || (dateMatcher = patDate8.matcher(content)).find()
            ) {
                return true;
            }
            return false;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    public static boolean ContainDateInfo(String content) {
        try {
            Matcher dateMatcher = null;
            if (((dateMatcher = patDate0.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 0) != null))
                    || ((dateMatcher = patDate1.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 1) != null))
                    || ((dateMatcher = patDate2.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 2) != null))
                    || ((dateMatcher = patDate3.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 3) != null))
                    || ((dateMatcher = patDate4.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 4) != null))
                    || ((dateMatcher = patDate5.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 5) != null))
                    || ((dateMatcher = patDate6.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 6) != null))
                    || ((dateMatcher = patDate7.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 7) != null))
                    || ((dateMatcher = patDate8.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 8) != null))
            ) {
                return true;
            }
            return false;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }


    public static Matcher ContainedDateInfo_BAK(String content) {
        try {
            Matcher dateMatcher = null;
            if ((dateMatcher = patDate0.matcher(content)).find()
                    || (dateMatcher = patDate1.matcher(content)).find()
                    || (dateMatcher = patDate2.matcher(content)).find()
                    || (dateMatcher = patDate3.matcher(content)).find()
                    || (dateMatcher = patDate4.matcher(content)).find()
                    || (dateMatcher = patDate5.matcher(content)).find()
                    || (dateMatcher = patDate6.matcher(content)).find()
                    || (dateMatcher = patDate7.matcher(content)).find()
                    || (dateMatcher = patDate8.matcher(content)).find()
            ) {
                return dateMatcher;
            }
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }


    public static Matcher ContainedDateInfo(String content) {
        try {
            Matcher dateMatcher = null;
            if (((dateMatcher = patDate0.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 0) != null))
                    || ((dateMatcher = patDate1.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 1) != null))
                    || ((dateMatcher = patDate2.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 2) != null))
                    || ((dateMatcher = patDate3.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 3) != null))
                    || ((dateMatcher = patDate4.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 4) != null))
                    || ((dateMatcher = patDate5.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 5) != null))
                    || ((dateMatcher = patDate6.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 6) != null))
                    || ((dateMatcher = patDate7.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 7) != null))
                    || ((dateMatcher = patDate8.matcher(content)).find() && (Utility.transDate(dateMatcher.group(), 8) != null))
            ) {
                return dateMatcher;
            }
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static Date transDate(String source, int type) {
        try {

            if (thresholdDate == null) {
                thresholdDate = formatter0.parse("1970-01-01");
            }
            Date date = null;
            switch (type) {
                case 0:
                    date = formatter0.parse(source);

                    if (date.before(thresholdDate)) {
                        date = formatter0_1.parse(source);
                        if (date.before(thresholdDate)) {
                            return null;
                        }
                    }

                    break;
                case 1:
                    //date = formatter1.parse(source);
                    break;
                case 2:
                    date = formatter2.parse(source);
                    break;
                case 3:
                    try {
                        date = formatter3_1.parse(source);
                    } catch (Exception e) {
                        date = null;
                    }
                    if (date == null) {
                        date = formatter3_2.parse(source);
                    }
                    break;
                case 4:
                    date = formatter4.parse(source);
                    break;
                case 5:
                    try {
                        date = formatter5_1.parse(source);
                    } catch (Exception e) {
                        date = null;
                    }
                    if ((date == null) || (date.before(thresholdDate))) {
                        date = formatter5_2.parse(source);
                    }
                    if ((date == null) || (date.before(thresholdDate))) {
                        date = formatter5_3.parse(source);
                    }
                    if ((date == null) || (date.before(thresholdDate))) {
                        date = formatter5_4.parse(source);
                    }
                    break;
                case 6:
                    date = formatter6.parse(source);
                    break;
                case 7:
                    date = formatter7.parse(source);
                    break;
                case 8:
                    date = formatter8.parse(source);
                    break;
            }

            if ((date != null) && (date.before(thresholdDate))) {
                return null;
            }

            return date;
        } catch (Exception e) {
            return null;
        }
    }


    public static Date transDate(String content) {
        try {
            Matcher dateMatcher = null;
            Date date = null;
            if (((dateMatcher = patDate0.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 0)) != null))
                    || ((dateMatcher = patDate1.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 1)) != null))
                    || ((dateMatcher = patDate2.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 2)) != null))
                    || ((dateMatcher = patDate3.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 3)) != null))
                    || ((dateMatcher = patDate4.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 4)) != null))
                    || ((dateMatcher = patDate5.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 5)) != null))
                    || ((dateMatcher = patDate6.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 6)) != null))
                    || ((dateMatcher = patDate7.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 7)) != null))
                    || ((dateMatcher = patDate8.matcher(content)).find() && ((date = Utility.transDate(dateMatcher.group(), 8)) != null))
            ) {
                return date;
            }
            return null;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public static String transStandardDate(String content) {
        try {
            Date date = transDate(content);
            if (date != null) {
                try {
                    String dateStr = formatter0.format(date);
                    return dateStr;
                } catch (Exception e) {
                    return null;
                }
            }
            return null;
        } catch (Exception e) {
            return null;
        }
    }

    /**
     * 获取正文中的图片路径
     * 创建人:  刘小鹏
     * 创建时间:  2015-11-13 下午5:27:27
     *
     * @param text
     * @param uri
     * @return
     * @version 1.0
     */
    public static List<String> getContentImgPath(String text, String uri) {
        List<String> result = new ArrayList<String>();
        String baseUri = null;
        Pattern p = Pattern.compile("(<img.+?src=)(\"|')(.+?)(\"|')(.*?/?>)", Pattern.CASE_INSENSITIVE);
        Matcher m = p.matcher(text);
        String rawPath;
        while (m.find()) {
            rawPath = m.group(3);
            if (rawPath.startsWith("http://") && !rawPath.startsWith("https://")) {


            }
        }
        return result;
    }


    /**
     * 创建人:  刘小鹏
     * 创建时间:  2015-10-28 上午9:35:01
     *
     * @return
     * @version 1.0
     */
    public static String convertCharset(String content, String sourceCharset, String targetCharset) throws UnsupportedEncodingException {
        byte[] newtemp = new String(content.getBytes(sourceCharset), sourceCharset).getBytes(targetCharset);
        String result = new String(newtemp, targetCharset);
        return result;
    }

    /**
     * 获取请求路径后缀
     * 创建人:  杨海龙
     * 创建时间:  2015年7月10日 上午10:14:52
     *
     * @param sourceaddress
     * @return
     * @version 1.0
     */
    public static String getFileSuffix(String sourceaddress) {
        if (sourceaddress.lastIndexOf(".") == -1) {
            return null;
        }
        String suffix = sourceaddress.substring(sourceaddress.lastIndexOf("."), sourceaddress.length());
        if (null != suffix && (".pdf".equals(suffix.toLowerCase()) ||
                ".doc".equals(suffix.toLowerCase()) ||
                ".docx".equals(suffix.toLowerCase()) ||
                ".ppt".equals(suffix.toLowerCase()) ||
                ".pptx".equals(suffix.toLowerCase()) ||
                ".xls".equals(suffix.toLowerCase()) ||
                ".xlsx".equals(suffix.toLowerCase())
        )) {
            return suffix.toLowerCase();
        }
        return null;
    }


    /**
     * 格式化URI
     * 创建人:  刘小鹏
     * 创建时间:  2015-8-20 下午3:26:00
     *
     * @param uri
     * @return
     * @version 1.0
     */
    public static String formatURI(String uri) {
        uri = uri.trim();
        uri = uri.replaceAll("/+$", "");
        return uri;
    }


    public static String dealImg(String contentNoTag) {
        //分段后换行多余
        Document document = Jsoup.parse(contentNoTag);
        //img图片设置固定宽度和高度
        Elements imgelements = document.select("img");
        for (Iterator<Element> iterator = imgelements.iterator(); iterator.hasNext(); ) {
            Element imgel = iterator.next();
            if (isNotEmpty(imgel.attr("src"))) {
//                if(isNotEmpty(imgel.attr("style"))) {
                imgel.attr("style", "width: 50%;margin-left:23%;margin-right:27%;");
//                }
//                imgel.after("<br/>");
//				imgel.wrap("<div style=\" text-indent:5rem;\"></div>");
            } else {
                imgel.remove();
            }
        }

        return htmlEscape(document.outerHtml()).replace("</p>","").replaceAll("<html>\\n <head></head>\\n <body>   \\n  ","");
    }
    //对带标签的内容进行进一步处理
    public static String htmlEscape(String content){
        if(content.indexOf("\r\n") > -1){
            content = content.replaceAll("\r\n", "<br/>");
        }
        if(content.indexOf('\n') > -1){
            content = content.replaceAll("\n", "<br/>");
        }
        while (content.replaceAll("\\s*", "").indexOf("<br/><br/>") > -1) {
            content = content.replaceAll("<br/>\\s*<br/>", "<br/>");
        }
        //兼容已按老逻辑处理过的数据
        content = content.replaceAll("</p ><p","</p ><br/><p").replace("  ","");
        String[] page = content.split("<br/>");

        String convertContent = "";
        if(page != null && page.length > 0 ) {
            for (String section : page) {
                if (StringUtils.isNotBlank(section)) {
                    section = section.trim();


                    //去除&nbsp ensp emsp空格
                    while (section.startsWith("&nbsp;") || section.startsWith(" ") || section.startsWith(" ")
                            || section.startsWith("　") || section.startsWith(" ")) {
                        if (section.startsWith("&nbsp;")) {
                            section = section.replaceFirst("&nbsp;", "");
                        } else {
                            section = section.substring(1).trim();
                        }
                    }
                    convertContent+=section;
                }
            }
        }
        return convertContent;
    }

    public static boolean isNotEmpty(Object object) {
        if (object != null && !object.equals("") && !object.equals("null")) {
            return (true);
        }
        return (false);
    }

    public static String removeHTMLScriptLabel(String contentWithTag) {

        String pattern = "<script[^>]*>[\\s\\S]*?</script>";
        Pattern scriptPattern = Pattern.compile(pattern);
        Matcher matcher = scriptPattern.matcher(contentWithTag);

        StringBuffer result = new StringBuffer();

        while (matcher.find()) {
            matcher.appendReplacement(result, "");
        }

        matcher.appendTail(result);
        return result.toString();
    }

}
