package com.zzsn.knowbase.util;

import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;

import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtil {
    /***
     * 标签格式化，去除空行，规范添加首行缩进等,与前端ck富文本编辑器格式化保持一致
     * @param html
     * @return
     */
    public static String formatHtml(String html) {
        return formatHtml(html,null,null);
    }
    /***
     * 标签格式化，去除空行，规范添加首行缩进等,与前端ck富文本编辑器格式化保持一致
     * @param html
     * @param cleanInKeys 清理包含关键词的段落
     * @param cleanEqKeys 清理等于关键词的段落
     * @return
     */
    public static String formatHtml(String html,String cleanInKeys,String cleanEqKeys) {
        //提取figure部分 不进行格式化处理
        html = html.replaceAll("\r","").replaceAll("\n","");
        Pattern figurePtn = Pattern.compile("<figure.*?</figure>");
        Matcher matcher = figurePtn.matcher(html);
        List<String> figureStrList = new ArrayList<>();
        while(matcher.find()) {
            figureStrList.add(matcher.group());
        }
        if (figureStrList.size()>0) {
            for (int i = 0; i < figureStrList.size(); i++) {
                html = html.replace(figureStrList.get(i), "current_figure_wlan"+i+"current_figure_wlan");
            }
        }
        //提取table部分 不进行格式化处理
        Pattern tablePtn = Pattern.compile("<table.*?</table>");
        Matcher matchertable = tablePtn.matcher(html);
        List<String> tableStrList = new ArrayList<>();
        while(matchertable.find()) {
            tableStrList.add(matchertable.group());
        }
        if (tableStrList.size()>0) {
            for (int i = 0; i < tableStrList.size(); i++) {
                html = html.replace(tableStrList.get(i), "current_table_wlan"+i+"current_table_wlan");
            }
        }
        //格式化代码
        html = html.replaceAll("<div", "<p");
        html = html.replaceAll("</div>", "</p>");
        //html = html.replaceAll("<strong[^>]*>", "");
        //html = html.replaceAll("</strong>", "");
        html = html.replaceAll("<html[^>]*>", "");
        html = html.replaceAll("</html>", "");
        html = html.replaceAll("<body[^>]*>", "");
        html = html.replaceAll("</body>", "");
        html = html.replaceAll("<head[^>]*>", "");
        html = html.replaceAll("</head>", "");
        html = html.replaceAll("<em[^>]*>", "");
        html = html.replaceAll("</em>", "");
        html = html.replaceAll("<u[^>]*>", "");
        html = html.replaceAll("</u>", "");
        html = html.replaceAll("<li[^>]*>", "");
        html = html.replaceAll("</li>", "");
        html = html.replaceAll("<span[^>]*>", "");
        html = html.replaceAll("</span>", "");
        html = html.replaceAll("&nbsp;", "");
        html = html.replaceAll("　", "");
        html = html.replaceAll("<p></p>", "");
        html = html.replaceAll("<a", "<a rel=\"nofollow\"");
        //<br data-cke-filler="true">表示空行，去除br内部样式
        html = html.replaceAll("<br[^>]*>","<br />");
        html = html.replaceAll("</br[^>]*>","<br />");

        //将p标签替换成<br />
        html = html.replaceAll("<p[^>]*>","");
        html = html.replaceAll("</p>","<br />");
        html = html.replaceAll("<br /><br />","<br />");
        html = html.replaceAll("\n", "<br />");
        html = html.replaceAll("\r", "");

        //按<br />分组，将换行<br>全部替换成p标签
        String[] bb = html.split("<br[^>]*>");
        String aa="";
        for(int i=0;i<bb.length;i++){
            if(StringUtils.isEmpty(bb[i]) || StringUtils.isEmpty(bb[i].trim())){
                continue;
            }
            //清理包含关键词的段落
            if(StringUtils.isNotEmpty(cleanInKeys)){
                List<String> list = JSON.parseArray(cleanInKeys, String.class);
                boolean flag = false;
                for (String keys : list) {
                    for (String key : keys.split("\\+")) {
                        if(bb[i].contains(key)){
                            flag = true;
                        }else{
                            //组合词只要有一个不满足则不去除
                            flag = false;
                            break;
                        }
                    }
                    if(flag){
                        //满足一项则去除此段
                        break;
                    }
                }
                if(flag){
                    continue;
                }
            }
            //清理等于关键词的段落
            if(StringUtils.isNotEmpty(cleanEqKeys)) {
                List<String> list = JSON.parseArray(cleanEqKeys, String.class);
                if(list.contains(bb[i].trim())){
                    continue;
                }
            }
            if(bb[i].trim().startsWith("<img ")){
                aa = aa+"<p style=\"text-align:center;\">"+bb[i].trim()+"</p>";
            }else{
                aa = aa+"<p style=\"text-indent:2em;\">"+bb[i].trim()+"</p>";
            }
        }
        //首行缩进
        //html = aa.replaceAll("<p[^>]*>", "<p style=\"text-indent:2em;\">");
        html = aa;
        //去除所有外链
        html = removeLink(html);
        //去除所有javascript代码标记
        html = removeJavascript(html);
        //去除空行
        html = html.replaceAll("<p>　　</p>","");
        html = html.replaceAll("<p></p>","");
        html = html.replaceAll("<p style=\"text-indent:2em;\">　　</p>","");
        html = html.replaceAll("<p style=\"text-indent:2em;\"></p>","");
        html = html.replaceAll("<p style=\"text-indent:2em;\"><title></title></p>","");
        //回写table部分
        if (tableStrList.size()>0) {
            for (int i = 0; i < tableStrList.size(); i++) {
                html = html.replace("<p style=\"text-indent:2em;\">current_table_wlan"+i+"current_table_wlan</p>",tableStrList.get(i));
                html = html.replace("current_table_wlan"+i+"current_table_wlan",tableStrList.get(i));
            }
        }
        //回写figure部分
        if (figureStrList.size()>0) {
            for (int i = 0; i < figureStrList.size(); i++) {
                html = html.replace("<p style=\"text-indent:2em;\">current_figure_wlan"+i+"current_figure_wlan</p>",figureStrList.get(i));
                html = html.replace("current_figure_wlan"+i+"current_figure_wlan",figureStrList.get(i));
            }
        }
        return html;
    }

    /**
     * 去除所有外链
     * @param html
     * @return
     */
    public static String removeLink(String html) {
        html = html.replaceAll("<a[^>]*>","");
        html = html.replaceAll("</a[^>]*>","");
        return html;
    }

    /**
     * 去除所有javascript代码标记
     * @param html
     * @return
     */
    public static String removeJavascript(String html) {
        html = html.replaceAll("<meta[^>]*>","");
        html = html.replaceAll("<script[^>]*>","");
        html = html.replaceAll("</script[^>]*>","");
        html = html.replaceAll("<iframe[^>]*>","");
        html = html.replaceAll("</iframe[^>]*>","");
        html = html.replaceAll("<frame[^>]*>","");
        html = html.replaceAll("javascript:","javascript：");
        return html;
    }
    /**
     * 去除表格
     * @param html
     * @return
     */
    public static String removeTabel(String html) {
        html = html.replaceAll("<table.*?</table>","");
        return html;
    }
    /**
     * 切割分片 尽量保证段落字数在200左右
     * @param html
     * @return
     */
    public static List<String> splitContents(String html) {
        List<String> list = new ArrayList<>();
        html = formatHtml(html);
        //去除换行
        html = html.replaceAll("\n|\r","");
        //去除表格
        html = html.replaceAll("<table.*?</table>","");
        //格式化代码
        html = html.replaceAll("</div>", "</p>");
        html = html.replaceAll("<div", "<p");
        html = html.replaceAll("<figure[^>]*>", "");
        html = html.replaceAll("</figure>", "");
        html = html.replaceAll("<strong[^>]*>", "");
        html = html.replaceAll("</strong>", "");
        html = html.replaceAll("<html[^>]*>", "");
        html = html.replaceAll("</html>", "");
        html = html.replaceAll("<body[^>]*>", "");
        html = html.replaceAll("</body>", "");
        html = html.replaceAll("<head[^>]*>", "");
        html = html.replaceAll("</head>", "");
        html = html.replaceAll("<em[^>]*>", "");
        html = html.replaceAll("</em>", "");
        html = html.replaceAll("<u[^>]*>", "");
        html = html.replaceAll("</u>", "");
        html = html.replaceAll("<li[^>]*>", "");
        html = html.replaceAll("</li>", "");
        html = html.replaceAll("<span[^>]*>", "");
        html = html.replaceAll("</span>", "");
        html = html.replaceAll("&nbsp;", "");
        html = html.replaceAll("　", "");
        html = html.replaceAll("<p></p>", "");
        html = html.replaceAll("<a", "<a rel=\"nofollow\"");
        //<br data-cke-filler="true">表示空行，去除br内部样式
        html = html.replaceAll("<br[^>]*>","<br />");
        html = html.replaceAll("</br[^>]*>","<br />");

        //将p标签替换成<br />
        html = html.replaceAll("<p[^>]*>","");
        html = html.replaceAll("</p>","<br />");
        html = html.replaceAll("<br /><br />","<br />");
        html = html.replaceAll("\n", "<br />");
        html = html.replaceAll("\r", "");

        StringBuilder item = new StringBuilder();
        for (String s : html.split("<br[^>]*>")) {
            item.append(ContentUtility.TransferHTML2Text(s.trim()));
            if(item.length()>500){
                //单段超过500字符的，按照句号进行段落切分
                String[] bb = item.toString().split("。");
                StringBuilder item2 = new StringBuilder();
                for (String s2 : bb) {
                    item2.append(s2.trim()).append("。");
                    if(item2.length()>150){
                        list.add(item2.toString());
                        item2 = new StringBuilder();
                    }
                }
                if(item2.length()>0){
                    list.add(item2.toString());
                }
                item = new StringBuilder();
            }else if(item.length()>30){
                //段落字数在30到500之间的，直接添加
                list.add(item.toString());
                item = new StringBuilder();
            }
        }
        if(item.length()>0){
            list.add(item.toString());
        }
        return list;
    }
    public static void main(String[] args) {
        System.out.println(formatHtml("<p>标题</p><p>摘要</p><p>正搜索文</p>","[\"正+文\",\"正文\"]","[\"标题\",\"要\"]"));
        System.out.println(formatHtml("<html>\n" +
                " <head>111</head>\n" +
                " <body>   \n" +
                "  <div> \n" +
                "   <div> \n" +
                "    <table>  \n" +
                "     <tbody> \n" +
                "      <tr> \n" +
                "       <td style=\"\" width=\"54\">品名</td> \n" +
                "       <td style=\"\" width=\"54\">规格（mm）</td> \n" +
                "       <td style=\"\" width=\"54\">材质</td> \n" +
                "       <td style=\"\" width=\"54\">钢厂/产地</td> \n" +
                "       <td style=\"\" width=\"54\">价格（元/吨）</td> \n" +
                "       <td style=\"\" width=\"54\">涨跌</td> \n" +
                "       <td style=\"\" width=\"54\">备注</td>  \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td align=\"right\">6</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">5050</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td align=\"right\">8</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4860</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td align=\"right\">10</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4760</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">经销</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td align=\"right\">12</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4290</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td align=\"right\">14</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4250</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>16-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4210</td> \n" +
                "       <td>-</td> \n" +
                "       <td><br></td> \n" +
                "       <td style=\"min-width: 30px\">代理</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>酒钢</td> \n" +
                "       <td align=\"right\">4190</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>柳钢</td> \n" +
                "       <td align=\"right\">4190</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>临钢</td> \n" +
                "       <td align=\"right\">4190</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>萍钢</td> \n" +
                "       <td align=\"right\">4190</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>南钢</td> \n" +
                "       <td align=\"right\">4190</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货</td> \n" +
                "       <td style=\"min-width: 30px\">代理</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普中板</td> \n" +
                "       <td>22-30</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4330</td> \n" +
                "       <td>-</td> \n" +
                "       <td><br></td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td>32-40</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4320</td> \n" +
                "       <td>-</td> \n" +
                "       <td><br></td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td align=\"right\">30</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>酒钢</td> \n" +
                "       <td align=\"right\">4290</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td align=\"right\">40</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>酒钢</td> \n" +
                "       <td align=\"right\">4270</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td align=\"right\">40</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>萍钢</td> \n" +
                "       <td align=\"right\">4290</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td align=\"right\">50</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4360</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">普厚板</td> \n" +
                "       <td align=\"right\">60</td> \n" +
                "       <td>Q235B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4410</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td align=\"right\">6</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4980</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td align=\"right\">8</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4830</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td align=\"right\">10</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4750</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td align=\"right\">12</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4490</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td align=\"right\">14</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4420</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>16-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4400</td> \n" +
                "       <td>-</td> \n" +
                "       <td><br></td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>酒钢</td> \n" +
                "       <td align=\"right\">4380</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>萍钢</td> \n" +
                "       <td align=\"right\">4380</td> \n" +
                "       <td>-</td> \n" +
                "       <td>货少</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>临钢</td> \n" +
                "       <td align=\"right\">4380</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>柳钢</td> \n" +
                "       <td align=\"right\">4370</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金中板</td> \n" +
                "       <td>14-20</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>南钢</td> \n" +
                "       <td align=\"right\">4370</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金厚板</td> \n" +
                "       <td>22-30</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>重钢</td> \n" +
                "       <td align=\"right\">4450</td> \n" +
                "       <td>-</td> \n" +
                "       <td><br></td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "      <tr> \n" +
                "       <td style=\"\">低合金厚板</td> \n" +
                "       <td align=\"right\">30</td> \n" +
                "       <td>Q355B</td> \n" +
                "       <td>酒钢</td> \n" +
                "       <td align=\"right\">4430</td> \n" +
                "       <td>-</td> \n" +
                "       <td>无货gl</td> \n" +
                "       <td style=\"min-width: 30px\">商家</td> \n" +
                "      </tr> \n" +
                "     </tbody> \n" +
                "    </table> \n" +
                "    <p style=\"text-indent:2rem\"><br></p> \n" +
                "   </div> \n" +
                "  </div>  \n" +
                " </body>\n" +
                "</html>"));
    }

    /**
     * 数据高亮显示，适用于标题和摘要处理
     * @param text 原文
     * @param keys 需要高亮数据
     */
    public static String gaoLiang(String text,String keys) {
        if(StringUtils.isEmpty(keys)){
            return text;
        }
        String[] split = keys.split("");
        //通过set去重
        Set<String> set = new HashSet<>(Arrays.asList(split));
        //去除原有标签
        text = ContentUtility.TransferHTML2Text(text);
        for (String key : set) {
            //不替换标签内容
            String zhanwei1 = "☛";
            String zhanwei2 = "☚";
            text = text.replace("<span style='color: #f73131;'>", zhanwei1).replace("</span>", zhanwei2);
            text = text.replace(key, "<span style='color: #f73131;'>" + key + "</span>");
            text = text.replace(zhanwei1,"<span style='color: #f73131;'>").replace(zhanwei2,"</span>");
        }
        return text;
    }

}
