package com.zzsn.knowbase.controller;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.http.HtmlUtil;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.zzsn.knowbase.entity.AiReportScienceFile;
import com.zzsn.knowbase.entity.AiReportScienceFileMaterial;
import com.zzsn.knowbase.service.IAiReportScienceFileMaterialService;
import com.zzsn.knowbase.service.IAiReportScienceFileService;
import com.zzsn.knowbase.vo.AiReportScienceFileVo;
import com.zzsn.knowbase.vo.Result;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.MultiValueMap;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;

import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
 * @Version 1.0
 * @Author: ZhangJingKun
 * @Date: 2023/12/25 10:33
 * @Content:
 */
@RestController
@Slf4j
@RequestMapping("/knowbase/manage")
public class KnowBaseManageController {

    @Resource
    private IAiReportScienceFileService aiReportScienceFileService;

    @Resource
    private IAiReportScienceFileMaterialService aiReportScienceFileMaterialService;

    private String kkfileUrl = "http://114.116.43.184:8012/onlinePreview?officePreviewType=pdf&tifPreviewType=jpg&url=";//默认显示pdf
    /**
     * 测试方法
     * @return
     */
    @RequestMapping("hello")
    public String hello(){ log.info("hello");return "Hello!"; }

    /**
     * 分页列表查询
     */
    @GetMapping(value = "/list")
    public Result<?> queryPageList(AiReportScienceFileVo aiReportScienceFileVo,
                                   @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
                                   @RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize,
                                   @RequestParam(name = "column", defaultValue = "common") String column,
                                   @RequestParam(name = "order", defaultValue = "desc") String order) {
        IPage<AiReportScienceFile> pageList = aiReportScienceFileService.queryPageList(aiReportScienceFileVo, pageNo, pageSize, column, order);

        return Result.OK(pageList);
    }


    /**
     * 分页列表查询
     */
    @GetMapping(value = "/listToTxt")
    public Result<?> listToTxt(AiReportScienceFileVo aiReportScienceFileVo,
                               @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
                               @RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize,
                               @RequestParam(name = "column", defaultValue = "common") String column,
                               @RequestParam(name = "order", defaultValue = "desc") String order) throws Exception {
        for(int i=1;i<300;i++){
            column = "id";
            IPage<AiReportScienceFile> pageList = aiReportScienceFileService.queryPageList(aiReportScienceFileVo, i, 100, column, order);
            for (AiReportScienceFile hit : pageList.getRecords()) {
                if(StringUtils.isNotEmpty(hit.getContent())){
                    // 处理搜索结果...
                    String title = hit.getTitle();
                    String content = hit.getContent();
                    String id = hit.getId();
                    System.out.println(i+"==id==========："+id);
                    if(hit.getDataType()==0){
                        generateTxt(content,title,"F:\\期刊txt\\");
                    }else if(hit.getDataType()==1){
                        generateTxt(content,title,"F:\\学术论文txt\\");
                    }else if(hit.getDataType()==2){
                        generateTxt(content,title,"F:\\学术论文txt\\");
                    }
                }
            }
        }
        return Result.OK();
    }

    /**
     * 通过问题推荐素材内容
     */

    @GetMapping(value = "/suggestContent")
    public Result<?> suggestContent(String text, String type,
                                    @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
                                    @RequestParam(name = "pageSize", defaultValue = "5") Integer pageSize) {
        if (StringUtils.isBlank(text) || StringUtils.isBlank(type)) {
            return Result.error("参数异常");
        }
        IPage<Map<String, Object>> suggestContent = aiReportScienceFileService.suggestContent(text, type, pageNo, pageSize);
        return Result.OK(suggestContent);
    }

    /**
     * 查找当前段落文章上下文内容
     */

    @GetMapping(value = "/getContentList")
    public Result<?> getContentList(String textId) {
        if (StringUtils.isBlank(textId)) {
            return Result.error("参数异常");
        }
        try {
            Map<String, Object> contextByTextId = aiReportScienceFileService.getContextByTextId(textId);
            return Result.OK(contextByTextId);
        } catch (Exception e) {
            return Result.error(e.getMessage());
        }
    }


    /**
     * 通过id查询
     */
    @PostMapping(value = "/queryInfoById")
    public Result<?> queryInfoById(String id) {
        if (StringUtils.isBlank(id)) {
            return Result.error("参数异常!");
        }
        AiReportScienceFile aiReportScienceFile = aiReportScienceFileService.getById(id);
        if(aiReportScienceFile!=null && StringUtils.isNotEmpty(aiReportScienceFile.getFilePathObs())){
            // 使用Base64编码
            String encodedString = Base64.getEncoder().encodeToString(aiReportScienceFile.getFilePathObs().getBytes());
            aiReportScienceFile.setPreviewObs(kkfileUrl + encodedString);
        }
        return Result.OK(aiReportScienceFile);
    }


    /**
     * 保存编辑的content字段
     */
    @PostMapping(value = "/saveContent")
    public Result<?> saveContent(@RequestBody AiReportScienceFile aiReportScienceFile) {
        if (StringUtils.isBlank(aiReportScienceFile.getId())) {
            return Result.error("参数异常!");
        }

        AiReportScienceFile scienceFileFromDb = aiReportScienceFileService.getById(aiReportScienceFile.getId());
        if(StringUtils.isNotEmpty(scienceFileFromDb.getAuditBy())){
            aiReportScienceFile.setCheckBy(aiReportScienceFile.getAuditBy());
            aiReportScienceFile.setCheckTime(aiReportScienceFile.getAuditTime());
            aiReportScienceFile.setAuditBy(null);
            aiReportScienceFile.setAuditTime(null);
        }
//        BeanUtils.copyProperties(aiReportScienceFile, scienceFileFromDb);
        // scienceFileFromDb.setContent(aiReportScienceFile.getContent());
        aiReportScienceFileService.updateById(aiReportScienceFile);

        return Result.OK(aiReportScienceFile);
    }

    /**
     * 保存编辑的content字段
     */
    @PostMapping(value = "/auditScience")
    public Result<?> auditScience(HttpServletRequest request, @RequestBody AiReportScienceFile aiReportScienceFile) {
        if (StringUtils.isBlank(aiReportScienceFile.getId())) {
            return Result.error("参数异常!");
        }
        // 如果有传递content,则先保存内容之后再执行拆分入向量库
        aiReportScienceFile.setStatus(2);
        //TODO 编辑者信息
//        LoginUser user = (LoginUser) SecurityUtils.getSubject().getPrincipal();
//        aiReportScienceFile.setAuditBy(user.getRealname());
        aiReportScienceFile.setAuditTime(new Date());
        this.saveContent(aiReportScienceFile);

        try {
            AiReportScienceFile scienceFile = aiReportScienceFileService.getById(aiReportScienceFile.getId());
            aiReportScienceFileService.deleteDataByFileId(aiReportScienceFile.getId());
            try {
                // 将要删除的段落id推送到kafka中
                QueryWrapper<AiReportScienceFileMaterial> queryWrapper = new QueryWrapper<>();
                queryWrapper.lambda().eq(AiReportScienceFileMaterial::getFileId, scienceFile.getId())
                        .select(AiReportScienceFileMaterial::getId);
                List<AiReportScienceFileMaterial> list = aiReportScienceFileMaterialService.list(queryWrapper);
                if (CollectionUtil.isNotEmpty(list)) {
                    List<String> collect = list.stream().map(AiReportScienceFileMaterial::getId).collect(Collectors.toList());
                    //ReportUtil.sendKafka(collect);  //TODO 发送kafka
                }
            } catch (Exception e) {
                log.error("删除数据入向量库失败",e);
            }
            aiReportScienceFileMaterialService.splitScienceFileMaterial(scienceFile.getContent(), scienceFile.getId());
        } catch (Exception e) {
            log.error("拆分失败!", e);
            return Result.error("拆分失败!");
        }
        return Result.OK();
    }


/*
    @Resource
    private StreamBridge streamBridge;
    @GetMapping(value = "/sendFileMaterial")
    public void sendFileMaterial() {
        QueryWrapper query = new QueryWrapper();
        query.eq("deleted", "0");
        query.eq("status","2");
        query.gt("audit_time","2023-12-19 11:00:00");
        query.select("id","origin","data_type");
        query.orderByAsc("id");
        List<AiReportScienceFile> listAiReportScienceFile = aiReportScienceFileService.list(query);

        int i=0;
        int j=0;
        for(AiReportScienceFile obj:listAiReportScienceFile){
            i++;
            System.out.println("i====:"+i+"=====id============="+obj.getId());
            QueryWrapper query1 = new QueryWrapper();
            query1.eq("file_id", obj.getId());
            query1.eq("deleted", "0");
            query1.ne("content_type","img");
            query1.ne("content_type","table");
            List<AiReportScienceFileMaterial> listAiReportScienceFileMaterial = aiReportScienceFileMaterialService.list(query1);
            //素材kafka推送

            streamBridge = GetBeanUtil.getApplicationContext().getBean(StreamBridge.class);
            log.debug("期刊论文素材推送kafka开始======");
            int k=0;
            int a=0;
            for(AiReportScienceFileMaterial obj1:listAiReportScienceFileMaterial){
                a++;
                if(StringUtils.isNotEmpty(obj1.getContent()) && obj1.getContent().contains("<p") && obj1.getContent().contains("</p>")
                        && countChineseCharacters(Utility.TransferHTML2Text(Utility.RemoveUselessHTMLTag(obj1.getContent())))<36){
                    System.out.println("过滤======a============:"+a);
                    continue;
                }
                //推送kafka
                JSONObject jo = new JSONObject();
                jo.put("origin", obj.getOrigin());
                jo.put("fileId", obj.getId());
                jo.put("status", "2");
                jo.put("fileType", getFileType(obj.getDataType()));
                jo.put("textId", obj1.getId());
                jo.put("text", Utility.TransferHTML2Text(Utility.RemoveUselessHTMLTag(obj1.getContent())));
                jo.put("textType", getTextType(obj1.getContentType()));
                j++;
                k++;
                System.out.println("推送====j==================:"+j+"====a============:"+a+"====k============:"+k);
                streamBridge.send("science_file_1219", jo);
                log.debug("推送成功，段落id："+obj1.getId());
            }
//            ReportUtil.sendKafka2(listAiReportScienceFileMaterial, obj.getId(), obj.getDataType(),obj.getOrigin(),"2");
        }
    }
    */


    /**
     * 通过传入的文本类型
     * @param text
     * @return
     */
    private static String getTextType(String text){
        if (text.equals("p")) {
            return "内容";
        }else if (text.contains("h")) {
            return "标题";
        }else if (text.equals("img")) {
            return "图片";
        }else if (text.equals("table")) {
            return "表格";
        }else {
            return "内容";
        }
    }
    /**
     * 通过传入的文本类型
     * @param type
     * @return
     */
    private static String getFileType(Integer type){
        if (type == 0) {
            return "期刊";
        }else if (type == 1) {
            return "博士论文";
        }else if (type == 2) {
            return "硕士论文";
        }else if (type == 3) {
            return "图书";
        }else if (type == 4) {
            return "研报";
        }else {
            return "期刊";
        }
    }
    public static int countChineseCharacters(String str) {
        Pattern pattern = Pattern.compile("[\u4e00-\u9fa5]");
        Matcher matcher = pattern.matcher(str);
        int count = 0;
        while (matcher.find()) {
            count++;
        }
        return count;
    }
    /**
     * 通过id删除
     */
    @GetMapping(value = "/deleteById")
    public Result<?> delete(@RequestParam(name = "id") String id) {
        if (StringUtils.isBlank(id)) {
            return Result.error("id不能为空");
        }
        AiReportScienceFile scienceFile = aiReportScienceFileService.getById(id);
        if (scienceFile == null) {
            return Result.error("数据不存在");
        }
        scienceFile.setDeleted("1");
        aiReportScienceFileService.deleteDataByFileId(id);
        aiReportScienceFileService.updateById(scienceFile);
        return Result.OK("删除成功!");
    }

    /**
     * 通过id删除
     */
    @GetMapping(value = "/getAuditPersonList")
    public Result<?> getAuditPersonList() {
        List<String> list = aiReportScienceFileService.getAuditPersonList();

        return Result.OK(list);
    }

    /**
     * 上传期刊论文图书
     *
     * @param
     * @return
     */
    @PostMapping(value = "/uploadScience")
    public Result<?> uploadScience(HttpServletRequest request, AiReportScienceFile aiReportScienceFile) {
        try {
            Boolean flag = aiReportScienceFileService.uploadAiMaterial(aiReportScienceFile, request);
            return Result.OK("操作成功");
        } catch (Exception e) {
            return Result.OK("操作失败");
        }
    }



    /**
     *
     * @param
     * @return
     */
    /*
    @PostMapping(value = "/uploadScienceTuShu")
    public Result<?> uploadScienceTuShu(HttpServletRequest request, AiReportScienceFile aiReportScienceFile) {
        try {
            try {
                int i = 0;
                MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
                MultiValueMap<String, MultipartFile> files = multipartRequest.getMultiFileMap();
                LinkedList<MultipartFile> aa = (LinkedList<MultipartFile>) files.get("file");
                for (MultipartFile file : aa) {
                    i++;
                    long time1 = new Date().getTime();
                    String fileName = file.getOriginalFilename();//获取文件名
                    String prefix = fileName.substring(fileName.lastIndexOf(".") + 1);

                    AiReportScienceFile reportScienceFile = new AiReportScienceFile();
                    if (fileName.contains("_")) {
                        reportScienceFile.setTitle(fileName.substring(0, fileName.lastIndexOf("_")));
                    } else if (fileName.contains(".")) {
                        reportScienceFile.setTitle(fileName.substring(0, fileName.indexOf(".")));
                    }
                    if (!"docx".equals(prefix) && !"doc".equals(prefix) && !"pdf".equals(prefix)) {
                        throw new Exception("目前支持doc、docx、txt、pdf格式");
                    }
                    if (file.getOriginalFilename().endsWith("docx") || file.getOriginalFilename().endsWith("doc")) {
                        //文件上传
                        System.out.println(i + "：标题：" + reportScienceFile.getTitle() + "-开始拆分");
                        //获取目录
                        InputStream fileInputStream = file.getInputStream();
                        XWPFDocument document = new XWPFDocument(fileInputStream);

                        String content = ReportUtil.getwordHtmlOnlyText(document);

                        List<String> contentList = ReportUtil.beautifyContent2(content);//lxp
                        reportScienceFile.setContent(String.join("", contentList));

                        System.out.println(i + "：标题：" + reportScienceFile.getTitle() + "-拆分完成");
                        long time2 = new Date().getTime();
                        System.out.println("拆分耗时："+(time2 - time1));
                        if(StringUtils.isNotEmpty(reportScienceFile.getContent())){
                            // 处理搜索结果...
                            String title1 = reportScienceFile.getTitle();
                            String content1 = reportScienceFile.getContent();
                            generateTxt(content1,title1,"D:\\图书txt\\");
                            long time3 = new Date().getTime();
                            System.out.println("txt耗时："+(time3 - time2));
                        }

//                        byteArrayOutputStream.close();
                        document.close();
                        fileInputStream.close();
//                        inputStream.close();
                    } else if (file.getOriginalFilename().endsWith("pdf")) {

                    } else {
                        System.out.println("上传文件类型错误！");
                    }
                }
            } catch (Exception e) {
                log.error(e.getMessage());
            }
            return Result.OK("操作成功");
        } catch (Exception e) {
            return Result.OK("操作失败");
        }
    }

     */

    public static String path = "G:\\论文（word）\\论文（word）\\8.8期刊word31\\地方国资国企改革背景下内审体系的创新发展_赵金祥.docx";
    private static int offset = 100;//偏移量
    @Autowired
    //private static ObsUtil obsUtil;

    public static void main(String[] args) throws Exception {
        // 要编码的字符串
        String originalString = "http://zzsn.luyuen.com/report/reportScience/5becff31-198e-49b6-89cb-6be2d111e032.docx";
        // 使用Base64编码
        String encodedString = Base64.getEncoder().encodeToString(originalString.getBytes());
        System.out.println("Encoded String: " + encodedString);

        // 使用Base64解码
        byte[] decodedBytes = Base64.getDecoder().decode(encodedString);
        String decodedString = new String(decodedBytes);
        System.out.println("Decoded String: " + decodedString);

        String aa = "";
        System.out.println(aa);



//        File file = new File(path);
//        FileInputStream input = new FileInputStream(file);
//        XWPFDocument document = new XWPFDocument(input);
//        List<String> directoryList = new ArrayList<>();
//        setNoDirectory(document, directoryList);
//
//
//
//        InputStream inputStream = null;
//        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
//        document.write(byteArrayOutputStream);
//        inputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
//        String text = DocUtil.convertDocStream2Html(inputStream);
//        System.out.println(aa);
//        text = text.replaceAll("__space__one1", "<h1>");
//        text = text.replaceAll("__space__one2", "</h1>");
//        text = text.replaceAll("__space__two1", "<h2>");
//        text = text.replaceAll("__space__two2", "</h2>");
//        text = text.replaceAll("__space__three1", "<h3>");
//        text = text.replaceAll("__space__three2", "</h3>");
//        text = text.replaceAll("&nbsp;","");
//        text = text.substring(text.indexOf("<body>"),text.indexOf("</body>"));
//        Elements aaa = getDirectory(text);
//        System.out.println(11);
//        // 解析HTML字符串
//        Document doc = Jsoup.parse(text);
//        // 提取所有的p标签、img标签和table标签
//        Elements elements = doc.select("p:not(:has(img)),p > img,img,table,h1,h2,h3,h4");
//        Elements elements1 = new Elements();
//        String hh = "";
//        // 打印提取结果、去多余空格
//        for (Element element : elements) {
//            if(element.toString().contains("<img") || element.toString().contains("<table")){
//                elements1.add(element);
//            }else{
//                if(!element.text().equals("") && !chunshuzi(element.text()) && !element.text().contains("http://www.cnki.net")
//                && (!element.text().contains("(c)") && !element.text().contains("china"))){
//                    if(!calculateEnglishRatio(element.text())){
//                        String text1 = element.text().replaceAll(" ","").replaceAll("&nbsp;","");
//                        element.text(text1);
//                    }
//                    elements1.add(element);
//                }
//            }
//        }
//        //优化段落
//        Boolean flag = true;
//        String text2 = "";
//        Iterator<Element> iterator = elements1.iterator();
//        List<String> content = new ArrayList<>();
//
//        while(iterator.hasNext()) {
//            Element element = iterator.next();
//            String str = element.toString();
//            if(str.contains("<p") && str.contains("</p>")){
//                element.clearAttributes();
//                element.attr("style","font-size:12pt;text-indent:2em");
//                str = element.toString();
//            }
//            if(str.contains("<h") && str.contains("</h")){
//                flag = false;
//            }
//            //先拼接正文前数据
//            if(flag){
//                hh = hh + element;
//                content.add(str);
//            }else{
//                //开始优化段落内容
//                if(str.contains("<p") && str.contains("</p>") && !element.text().equals("") && !cankaowenxian(element.text().replaceAll(" ",""))){
//                    if(!calculateEnglishRatio(element.text())){
//                        if(!element.text().endsWith("。") && !isDigit(element.text()) && !element.text().startsWith("关键词")
//                                && !element.text().startsWith("目录")){
//                            text2 = text2 + element.text();
//                            iterator.remove();
//                        }else{
//                            if(!text2.equals("")){
//                                text2 = text2 + element.text();
//                                element.clearAttributes();
//                                element.attr("style","font-size:12pt;text-indent:2em");
//                                element.text(text2);
//                                text2 = "";
//                                hh = hh + element;
//                                content.add(element.toString());
//                            }else{
//                                hh = hh + element;
//                                content.add(element.toString());
//                            }
//                        }
//                    }else{
//                        hh = hh + element;
//                        content.add(element.toString());
//                    }
//                }else{
//                    hh = hh + element;
//                    content.add(element.toString());
//                }
//            }
//        }
//        System.out.println(elements1);
//
//        List<Map<String, Object>> li = getList(content);
//        System.out.println(li);
    }
    public static Elements getDirectory(String text) {
        // 解析HTML字符串
        Document doc = Jsoup.parse(text);
        // 提取所有的p标签、img标签和table标签
        Elements elements = doc.select("p:not(:has(img)),p > img,img,table,h1,h2,h3,h4");
        Elements elements1 = new Elements();
        String hh = "";
        // 打印提取结果、去多余空格

        //获取文章目录标题
        int startflag=0;
        int maxlever = 0;
        Map<String, String> titleLev = new HashMap<String, String>();
        for (Element element : elements) {
            if(element.toString().contains("<img") || element.toString().contains("<table")){
                elements1.add(element);
            }else{
                if(!element.text().equals("")){
                    String parentText = element.text();
                    String parentText2 = element.text();
                    if (parentText.contains("关键词") && startflag == 0) {
                        startflag = 1;
                    }
                    if(!calculateEnglishRatio(parentText)){
                        //判断是不是标题目录
                        if((parentText.matches("[一二三四五六七八九十]+.*") ||
                                parentText.startsWith("(") ||
                                parentText.startsWith("（") ||
                                parentText.matches("\\d+.*") ) &&
                                parentText.length()<50 &&
                                !parentText.startsWith("中图分类号") &&
                                startflag == 1

                        ){
                            String tKey = getTilteNum(parentText);
                            if (null==tKey || tKey.trim().length()==0) {
                                continue;
                            }
                            //获取目录的层级
                            String lever = titleLev.get(tKey);
                            if (null!=lever ) {
                                parentText = "<h"+lever+">"+parentText+"</h"+lever+">";
                            } else if (titleLev.size()==0) {
                                maxlever++;
                                titleLev.put(tKey,String.valueOf(maxlever));
                                parentText = "<h1>"+parentText+"</h1>";
                            } else {
                                maxlever++;
                                titleLev.put(tKey,String.valueOf(maxlever));
                                parentText = "<h"+maxlever+">"+parentText+"</h"+maxlever+">";

                            }
                            parentText = parentText.replaceAll(" ","").replaceAll("&nbsp;","");
                            System.out.println(parentText);

                        }
                        if(parentText.startsWith("<h")){
                            String tag = parentText.substring(1,3);
                            element.tagName(tag);
                            element.html(parentText2);
                        }
                    }
                    elements1.add(element);
                }
            }
        }
        return elements1;
    }


    public static String getTilteNum(String title) {
        String result = "";
        if (Pattern.matches("\\d+", title)) {
            return result ;
        }

        if(title.matches("\\d+.*") ) {
            StringBuilder result1 = new StringBuilder();
            for (char c : title.toCharArray()) {
                if (Character.isDigit(c) || c=='.') {
                    result1.append(c);
                } else {
                    break;
                }
            }

            result = String.valueOf(result1);
            result = result.replaceAll("\\d", "1");
        } else if (title.matches("[一二三四五六七八九十]+.*") ) {
            if (title.contains("、") || title.contains(" ") || title.contains(".")) {
                result = "一";
            }

        } else if (title.startsWith("(") ||
                title.startsWith("（")) {
            if (title.substring(1).matches("\\d+.*") ) {
                result = "(1)";
            } else if (title.substring(1).matches("[一二三四五六七八九十]+.*")) {
                result = "(一)";
            }
        }

        return result ;
    }
    public static boolean chunshuzi (String str) {
        Pattern pattern = Pattern.compile("^\\d+$");
        Matcher matcher = pattern.matcher(str);
        return matcher.find();
    }
    public static List<Map<String, Object>> getList(List<String> originList) {
        Stack<Map<String, Object>> stack = new Stack<>();

        Map<String, Object> map1 = new HashMap<>();
        map1.put("id", null);
        map1.put("text", null);
        map1.put("level", 1000);
        stack.push(map1);


        List<Map<String, Object>> list = new ArrayList<>();
        int sort = 1;
        for (String text : originList) {
            Map<String, Object> map = new HashMap<>();
            map.put("id", UUID.randomUUID().toString().replaceAll("-", ""));
            map.put("text", text);
            map.put("level", getLevel(text));
            getParent(map, stack);
            if (canPush(map, stack)) {
                stack.push(map);
            }
            map.put("contentType", getContentType(text));
            map.put("sort", sort++);
            list.add(map);
        }

        stack.clear();
        return list;
    }
    public static boolean cankaowenxian (String str) {
        Pattern pattern = Pattern.compile("^\\[[1234567890]+\\]");
        Matcher matcher = pattern.matcher(str);
        Pattern pattern1 = Pattern.compile("^\\［[1234567890]+\\］");
        Matcher matcher1 = pattern1.matcher(str);
        if(matcher.find() || matcher1.find()){
            return true;
        }else{
            return false;
        }
    }
    /**
     * 通过传入的文本开头标签，判断文本类型 p/h1/h2/h3/h4/img/table
     *
     * @param text
     * @return
     */
    private static String getContentType(String text) {
        if (text.startsWith("<p")) {
            return "p";
        } else if (text.startsWith("<h1")) {
            return "h1";
        } else if (text.startsWith("<h2")) {
            return "h2";
        } else if (text.startsWith("<h3")) {
            return "h3";
        } else if (text.startsWith("<h4")) {
            return "h4";
        } else if (text.startsWith("<img")) {
            return "img";
        } else if (text.startsWith("<table")) {
            return "table";
        } else {
            return "";
        }
    }

    /**
     * 判断源数据是否能放入栈中
     *
     * @param source 源数据
     * @param stack  栈数据
     * @return true or false
     */
    private static boolean canPush(Map<String, Object> source, Stack<Map<String, Object>> stack) {
        if (stack.isEmpty()) {
            return true;
        }
        String text = (String) source.get("text");
        if (!text.startsWith("<h")) {
            return false;
        }

        Map<String, Object> peek = stack.peek();
        int level = (int) peek.get("level");
        int sourceLevel = (int) source.get("level");

        return sourceLevel > level;
    }

    /**
     * 给文件赋值 parent 属性
     *
     * @param source
     * @param stack
     */
    private static void getParent(Map<String, Object> source, Stack<Map<String, Object>> stack) {
        if (stack.isEmpty()) {
            source.put("parent", "0");
        }
        Map<String, Object> peek = stack.peek();
        String id = (String) peek.get("id");
        int level = (int) peek.get("level");

        int sourceLevel = (int) source.get("level");


        if (sourceLevel > level) {
            source.put("parent", id);
        } else {
            boolean f = false;
            while (sourceLevel <= level) {
                stack.pop();
                if (stack.isEmpty()) {
                    source.put("parent", "0");
                    f = true;
                    break;
                }
                peek = stack.peek();
                level = (int) peek.get("level");
            }
            if (!f) {
                id = (String) peek.get("id");
                source.put("parent", id);
            }

        }
    }


    /**
     * 通过 h 标签的判断文本层级
     *
     * @param text
     * @return
     */
    private static Integer getLevel(String text) {
        if (StringUtils.isBlank(text)) {
            return null;
        }
        Matcher matcher = Pattern.compile("^(<h1>|<h2>|<h3>|<h4>|<h5>|<h6>)").matcher(text);
        if (matcher.find()) {
            String tag = matcher.group();
            switch (tag) {
                case "<h1>":
                    return 1;
                case "<h2>":
                    return 2;
                case "<h3>":
                    return 3;
                case "<h4>":
                    return 4;
                case "<h5>":
                    return 5;
                case "<h6>":
                    return 6;
                default:
                    return 100;
            }
        } else {
            return 100;
        }
    }

    // 判断英文字符的比重是否大于百分之50
    public static boolean calculateEnglishRatio(String str) {
        int englishCount = 0;
        for (int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if (isEnglish(c)) {
                englishCount++;
            }
        }
        double englishRatio = (double) englishCount / str.length();
        return englishRatio > 0.5;
    }

    // 判断字符是否为英文字符
    public static boolean isEnglish(char c) {
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
    }

    // 判断英文字符的比重是否大于百分之50
    public static boolean isDigit(String str) {
        char lastChar = str.charAt(str.length() - 1);
        boolean isDigit = Character.isDigit(lastChar);
        return isDigit;
    }


    final String lineSeparator = System.getProperty("line.separator");
    private void generateTxt(String content,String title,String path) {

        content = content.replaceAll("</p>", "</p>" + lineSeparator);
        content = content.replaceAll("</h1>", "</h1>" + lineSeparator);
        content = content.replaceAll("</h2>", "</h2>" + lineSeparator);
        content = content.replaceAll("</h3>", "</h3>" + lineSeparator);
        content = content.replaceAll("</h4>", "</h4>" + lineSeparator);
        content = content.replaceAll(" +", " ");
        content = HtmlUtil.cleanHtmlTag(HtmlUtil.removeHtmlTag(content,"style","script","img","table"));
        if (content.length() < 20) {
            return;
        }
        if (title.contains("/") || title.contains(":")) {
            title = title.replaceAll("[/:]", "_");
        }


        File file = null;
        try {
            file = FileUtil.touch(new File(path + title + ".txt"));
            FileUtil.writeString(title + "\n" + content, file, StandardCharsets.UTF_8);
        } catch (IORuntimeException ignore) {
        }
    }

}
