package com.zzsn.knowbase.service.impl;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.http.HtmlUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.zzsn.knowbase.entity.AiReportScienceFile;
import com.zzsn.knowbase.entity.AiReportScienceFileMaterial;
import com.zzsn.knowbase.mapper.AiReportScienceFileMapper;
import com.zzsn.knowbase.service.IAiReportScienceFileMaterialService;
import com.zzsn.knowbase.service.IAiReportScienceFileService;
import com.zzsn.knowbase.util.DateUtil;
import com.zzsn.knowbase.util.HttpUtil;
import com.zzsn.knowbase.util.MD5Util;
import com.zzsn.knowbase.util.ReportUtil;
import com.zzsn.knowbase.vo.AiReportScienceFileVo;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;
import org.springframework.util.MultiValueMap;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;

import javax.annotation.Resource;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

/**
 * @Version: V1.0
 */
@Service
public class AiReportScienceFileServiceImpl extends ServiceImpl<AiReportScienceFileMapper, AiReportScienceFile> implements IAiReportScienceFileService {
    @Autowired
    private IAiReportScienceFileService aiReportScienceFileService;
    @Autowired
    private IAiReportScienceFileMaterialService aiReportScienceFileMaterialService;
    @Resource
    private StringRedisTemplate stringRedisTemplate;


    // private static final String BASE_URL = "http://116.63.179.212:7862/";
    private static final String BASE_URL = "http://114.115.172.99:10013/";

    @Override
    public IPage<AiReportScienceFile> queryPageList(AiReportScienceFileVo aiReportScienceFileVo, Integer pageNo, Integer pageSize, String column, String order) {
        IPage<AiReportScienceFile> page = new Page<>(pageNo, pageSize);
        QueryWrapper<AiReportScienceFile> queryWrapper = new QueryWrapper<>();
        if (StringUtils.isNotEmpty(aiReportScienceFileVo.getAuthor())) {
            queryWrapper.like("author", aiReportScienceFileVo.getAuthor());
        }
        if (StringUtils.isNotEmpty(aiReportScienceFileVo.getOrigin())) {
            queryWrapper.like("origin", aiReportScienceFileVo.getOrigin());
        }
        if (aiReportScienceFileVo.getDataType() != null) {
            if (aiReportScienceFileVo.getDataType() == 5) {
                queryWrapper.in("data_type", Arrays.asList(1, 2));
            } else {
                queryWrapper.eq("data_type", aiReportScienceFileVo.getDataType());
            }
        }
        if (StringUtils.isNotBlank(aiReportScienceFileVo.getPubStartTime())) {
            if (aiReportScienceFileVo.getDataType() == 5) {
                String substring = aiReportScienceFileVo.getPubStartTime().substring(0, 4)+"年";
                queryWrapper.ge("publish_date", substring);
            } else {
                queryWrapper.ge("publish_date", aiReportScienceFileVo.getPubStartTime());
            }
        }
        if (StringUtils.isNotBlank(aiReportScienceFileVo.getPubEndTime())) {
            if (aiReportScienceFileVo.getDataType() == 5) {
                String substring = aiReportScienceFileVo.getPubEndTime().substring(0, 4)+"年";
                queryWrapper.le("publish_date", substring);
            } else {
                queryWrapper.le("publish_date", aiReportScienceFileVo.getPubEndTime());
            }
        }

        if (StringUtils.isNotEmpty(aiReportScienceFileVo.getSearchWords())) {
            if ("标题".equals(aiReportScienceFileVo.getSearchType())) {
                queryWrapper.like("title", aiReportScienceFileVo.getSearchWords());
            } else {
                queryWrapper.like("content", aiReportScienceFileVo.getSearchWords());
            }
        }
        if (StringUtils.isBlank(column)) {
            column = "common";
        }
        if ("common".equals(column)) {
            queryWrapper.orderBy(true, true, "status")
                    .orderBy(true, false, "audit_time", "update_time");
        } else {
            queryWrapper.orderBy(true, false, column);
        }


        if (StringUtils.isNotBlank(aiReportScienceFileVo.getAuditBy())) {
            queryWrapper.like("audit_by", aiReportScienceFileVo.getAuditBy());
        }
        if (StringUtils.isNotBlank(aiReportScienceFileVo.getAuditTimeStart()) ) {
            if (aiReportScienceFileVo.getAuditTimeStart().length() == 10) {
                aiReportScienceFileVo.setAuditTimeStart(aiReportScienceFileVo.getAuditTimeStart() + " 00:00:00");
            }
            queryWrapper.ge("audit_time", aiReportScienceFileVo.getAuditTimeStart());
        }
        if (StringUtils.isNotBlank(aiReportScienceFileVo.getAuditTimeEnd())) {
            if (aiReportScienceFileVo.getAuditTimeEnd().length() == 10) {
                aiReportScienceFileVo.setAuditTimeEnd(aiReportScienceFileVo.getAuditTimeEnd() + " 23:59:59");
            }
            queryWrapper.le("audit_time", aiReportScienceFileVo.getAuditTimeEnd());
        }


        if (aiReportScienceFileVo.getStatus() != null) {
            queryWrapper.eq("status", aiReportScienceFileVo.getStatus());
        }
        queryWrapper.eq("deleted", "0");
        String[] queryColumn = {"id", "content_type", "file_path_obs", "title", "cover_obs", "preview_obs",
                "year", "author", "origin", "data_type","check_by","check_time",
                "zip_file_url", "status", "publish_date",
                "create_by", "create_time", "update_by", "update_time","audit_by","audit_time"
        };
        // 列表不查询content字段
        queryWrapper.select(queryColumn);
        queryWrapper.eq("deleted", "0");
        return this.baseMapper.selectPage(page, queryWrapper);
    }


    /**
     * 素材推荐
     * 1. 通过模型接口获取最相关推荐文本内容
     * 2. 根据返回的 fileId 和 textId 查询数据库相近文件段落内容
     * 3. 拼接后返回推荐的文本
     */
    @Override
    public IPage<Map<String, Object>> suggestContent(String text, String type, Integer pageNo, Integer pageSize) {
        // 首先从缓存中拿数据
        String key = MD5Util.MD5Encode("ReportScienceFile:"+text + type, "UTF-8");
        String value = stringRedisTemplate.opsForValue().get(key);
        JSONArray jsonArray = null;
        if (value != null) {
            jsonArray = JSONArray.parseArray(value);
        } else {
            jsonArray = postSuggest(text, type);
        }
        List<Map<String, Object>> list = new ArrayList<>();
        if (!jsonArray.isEmpty()) {
            QueryWrapper<AiReportScienceFileMaterial> queryWrapper1 = getAiReportScienceFileMaterialQueryWrapper(jsonArray);
            List<AiReportScienceFileMaterial> resultList = aiReportScienceFileMaterialService.list(queryWrapper1);
            List<String> existsId = resultList.stream().map(AiReportScienceFileMaterial::getId).collect(Collectors.toList());
            if (jsonArray.size() > existsId.size()) {
                List<String> notExistsIds = new ArrayList<>();
                jsonArray.removeIf(object -> {
                    JSONObject jsonObject = (JSONObject) object;
                    String textId = jsonObject.getString("textId");
                    boolean contains = existsId.contains(textId);
                    if (!contains) {
                        notExistsIds.add(textId);
                    }
                    return !contains;
                });
                if (!notExistsIds.isEmpty()) {
                    // 说明有数据不在数据库中，需要删除向量库数据
                    CompletableFuture.runAsync(()-> deleteDataByContentIds(notExistsIds));
                }
            }


            int maxCount = pageNo * pageSize;
            int minCount = (pageNo - 1) * pageSize;
            List<Object> objects = new ArrayList<>();
            if (jsonArray.size() > maxCount) {
                objects = jsonArray.subList(minCount, maxCount);
            } else if (jsonArray.size() > minCount) {
                objects = jsonArray.subList(minCount, jsonArray.size());
            }
            List<String> fileIds = new ArrayList<>();
            List<String> fileMaterialIds = new ArrayList<>();
            List<String> titleTextIds = new ArrayList<>();
            for (int i = 0; i < objects.size(); i++) {
                JSONObject jsonObject = (JSONObject) objects.get(i);
                String id = jsonObject.getString("fileId");
                String textId = jsonObject.getString("textId");
                String textType = jsonObject.getString("textType");
                if ("标题".equals(textType)) {
                    titleTextIds.add(textId);
                }

                fileIds.add(id);
                fileMaterialIds.add(textId);

            }
            if (!fileIds.isEmpty() && !fileMaterialIds.isEmpty()) {
                // 查询数据库- 图书\期刊信息
                LambdaQueryWrapper<AiReportScienceFile> queryWrapper = new LambdaQueryWrapper<>();
                queryWrapper.in(AiReportScienceFile::getId, fileIds);
                queryWrapper.select(AiReportScienceFile::getId,AiReportScienceFile::getAuthor,AiReportScienceFile::getPublishDate,AiReportScienceFile::getTitle,AiReportScienceFile::getOrigin);
                List<AiReportScienceFile> scienceFiles = aiReportScienceFileService.list(queryWrapper);
                Map<String, AiReportScienceFile> collect = scienceFiles.stream().collect(Collectors.toMap(AiReportScienceFile::getId, v -> v));

                List<AiReportScienceFileMaterial> fileMaterials = aiReportScienceFileMaterialService.listByIds(fileMaterialIds);
                Map<String, AiReportScienceFileMaterial> fileMaterialMap = fileMaterials.stream().collect(Collectors.toMap(AiReportScienceFileMaterial::getId, v -> v));

                Map<String, String> titleContentMap = new HashMap<>(titleTextIds.size());
                if (!titleTextIds.isEmpty()) {
                    // 查询数据库- 标题
                    for (String titleTextId : titleTextIds) {
                        titleContentMap.put(titleTextId, getSubContent(titleTextId, fileMaterialMap));
                    }
                }
                for (Object object : objects) {
                    JSONObject jsonObject = (JSONObject) object;
                    String id = jsonObject.getString("fileId");
                    String textId = jsonObject.getString("textId");
                    String content = jsonObject.getString("text");
                    AiReportScienceFile scienceFile = collect.get(id);

                    // 获取标题下的内容
                    String titleContent = Optional.ofNullable(titleContentMap.get(textId)).orElse("");

                    Map<String, Object> map = new HashMap<>();
                    map.put("author", scienceFile.getAuthor());
                    map.put("publishDate", scienceFile.getPublishDate());
                    map.put("title", scienceFile.getTitle());
                    map.put("content", content + titleContent);
                    map.put("textId", textId);
                    map.put("origin", scienceFile.getOrigin());
                    list.add(map);
                }
            }
            stringRedisTemplate.opsForValue().set(key, JSON.toJSONString(jsonArray), 10, TimeUnit.MINUTES);
        }
        IPage<Map<String, Object>> resultList = new Page<>(pageNo, pageSize);
        resultList.setRecords(list);
        resultList.setTotal(jsonArray.size());
        return resultList;
    }

//    @NotNull  //TODO  注释
    private static QueryWrapper<AiReportScienceFileMaterial> getAiReportScienceFileMaterialQueryWrapper(JSONArray jsonArray) {
        List<String> materialIds = new ArrayList<>();
        for (Object object : jsonArray) {
            JSONObject jsonObject = (JSONObject) object;
            String textId = jsonObject.getString("textId");
            materialIds.add(textId);
        }
        // 只查询id字段增加速度，条件为 in materialIds
        QueryWrapper<AiReportScienceFileMaterial> queryWrapper = new QueryWrapper<>();
        queryWrapper.in("id", materialIds);
        queryWrapper.select("id");
        return queryWrapper;
    }

    /**
     * 通过 分段id,和当前排序,获取下级 子段落 内容
     * @param titleTextId 分段id
     * @param fileMaterialMap 当前段落内容对象
     * @return 下级 子段落 内容
     */
    private String getSubContent(String titleTextId, Map<String, AiReportScienceFileMaterial> fileMaterialMap) {
        AiReportScienceFileMaterial scienceFileMaterial = fileMaterialMap.get(titleTextId);
        LambdaQueryWrapper<AiReportScienceFileMaterial> fileMaterialQuery = new LambdaQueryWrapper<>();
        fileMaterialQuery.eq(AiReportScienceFileMaterial::getFileId, scienceFileMaterial.getFileId());
        fileMaterialQuery.gt(AiReportScienceFileMaterial::getSort, scienceFileMaterial.getSort());
        fileMaterialQuery.notIn(AiReportScienceFileMaterial::getContentType, "img","table");
        fileMaterialQuery.orderByAsc(AiReportScienceFileMaterial::getSort);
        fileMaterialQuery.last(" limit 3 ");
        List<AiReportScienceFileMaterial> one = aiReportScienceFileMaterialService.list(fileMaterialQuery);
        StringBuilder content = new StringBuilder();
        if (CollUtil.isNotEmpty(one)) {
            for (AiReportScienceFileMaterial fileMaterial : one) {
                content.append("\n").append(HtmlUtil.cleanHtmlTag(fileMaterial.getContent()));
                if ("100".equals(fileMaterial.getLevel())) {
                    break;
                }
            }
        }
        return content.toString();
    }

    /**
     * 通过 textId 获取该段落上下文内容
     */
    @Override
    public Map<String, Object> getContextByTextId(String textId) {
        if (StringUtils.isBlank(textId)) {
            return null;
        }
        // 获取当前分割的段落内容,主要用于获取序号和
        AiReportScienceFileMaterial fileMaterial = aiReportScienceFileMaterialService.getById(textId);
        if (fileMaterial == null) {
            throw new IllegalArgumentException("素材文件已被删除或重新发起审核");
        }

        LambdaQueryWrapper<AiReportScienceFile> queryWrapper = new LambdaQueryWrapper<>();
        queryWrapper.in(AiReportScienceFile::getId, fileMaterial.getFileId());
        queryWrapper.select(AiReportScienceFile::getId,AiReportScienceFile::getAuthor,AiReportScienceFile::getPublishDate,
                AiReportScienceFile::getTitle,AiReportScienceFile::getOrigin,AiReportScienceFile::getDataType);

        AiReportScienceFile scienceFile = aiReportScienceFileService.getOne(queryWrapper, false);

        String content = "";
        // 如果是期刊类型,返回整个文章
        if (scienceFile.getDataType() == 0) {
            content = getPeriodicalContent(fileMaterial);
        } else {
            // 其他文件类型
            content = getContent(fileMaterial);
        }

        Map<String, Object> map = new HashMap<>();
        map.put("fileName", scienceFile.getTitle());
        map.put("content", content);
        map.put("origin", scienceFile.getOrigin());
        map.put("author", scienceFile.getAuthor());
        map.put("publishDate", scienceFile.getPublishDate());

        return map;
    }

    /**
     * 获取期刊内容
     */
    public String getPeriodicalContent(AiReportScienceFileMaterial fileMaterial) {
        QueryWrapper<AiReportScienceFileMaterial> queryWrapper = new QueryWrapper<>();
        queryWrapper.eq("file_id", fileMaterial.getFileId());
        queryWrapper.orderByAsc("sort");
        queryWrapper.select("content");
        List<AiReportScienceFileMaterial> list = aiReportScienceFileMaterialService.list(queryWrapper);
        if (CollectionUtil.isNotEmpty(list)) {
            StringBuilder sb = new StringBuilder();

            int startWithNum = startWithNum(list);
            int length = list.size();
            for (int i = 0; i < length; i++) {
                AiReportScienceFileMaterial aiReportScienceFileMaterial = list.get(i);
                String content = HtmlUtil.cleanHtmlTag(aiReportScienceFileMaterial.getContent());
                // 如果出现 参考文献 ,之后的不在拼接,且文字内容需要小于10. <h1>参考文献</h1> ,前后有9个字符html标签
                if (content.contains("参考文献") && content.length() < 10) {
                    break;
                }
                // 从判断开始位置开始添加
                if (i>=startWithNum) {
                    sb.append(HtmlUtil.removeAllHtmlAttr(aiReportScienceFileMaterial.getContent(),"p","span"));
                }
            }
            return sb.toString();
        }
        return "";
    }

    /**
     * 获取期刊时,判断从第几段开始取 content 值
     */
    public int startWithNum(List<AiReportScienceFileMaterial> list) {
        if (CollectionUtil.isEmpty(list)) {
            return 0;
        }
        int num = 0;
        boolean flag = false;
        for (AiReportScienceFileMaterial aiReportScienceFileMaterial : list) {
            String content = aiReportScienceFileMaterial.getContent();
            if (content.contains("摘要")) {
                flag = true;
                break;
            }
            num++;
        }
        // 如果没有摘要 或者 摘要出现在中间，那么就返回全篇文章
        if (!flag || (double) num / list.size() > 0.5) {
            num = 0;
        }
        return num;
    }

    /**
     * 其他文件素材获取内容
     */
    public String getContent(AiReportScienceFileMaterial fileMaterial) {
        int totalSuggestContentNum = HtmlUtil.cleanHtmlTag(fileMaterial.getContent()).length();

        List<AiReportScienceFileMaterial> list = new ArrayList<>();
        list.add(fileMaterial);
        if (totalSuggestContentNum < 4000) {
            Integer firstSort = fileMaterial.getSort();
            Integer lastSort = fileMaterial.getSort();
            List<AiReportScienceFileMaterial> listBySort = getListBySort(fileMaterial.getFileId(), firstSort, lastSort);
            Map<Integer, AiReportScienceFileMaterial> collect = listBySort.stream().collect(Collectors.toMap(AiReportScienceFileMaterial::getSort, v -> v));
            while (totalSuggestContentNum < 4000) {
                AiReportScienceFileMaterial firstFileMaterial = collect.get(--firstSort);
                AiReportScienceFileMaterial lastFileMaterial = collect.get(++lastSort);
                if (firstFileMaterial == null && lastFileMaterial == null) {
                    break;
                }
                if (firstFileMaterial != null) {
                    totalSuggestContentNum += HtmlUtil.cleanHtmlTag(firstFileMaterial.getContent()).length();
                    list.add(firstFileMaterial);
                }
                if (lastFileMaterial != null) {
                    totalSuggestContentNum += HtmlUtil.cleanHtmlTag(lastFileMaterial.getContent()).length();
                    list.add(lastFileMaterial);
                }
            }
            list.sort(Comparator.comparingInt(AiReportScienceFileMaterial::getSort));
        }


        StringBuilder stringBuilder = new StringBuilder();
        if (!CollectionUtil.isEmpty(list)) {
            for (AiReportScienceFileMaterial scienceFileMaterial : list) {
                stringBuilder.append(HtmlUtil.removeAllHtmlAttr(scienceFileMaterial.getContent(), "p","span"));
            }
        }
        return stringBuilder.toString();
    }
    public List<AiReportScienceFileMaterial> getListBySort(String fileId, int firstSort, int lastSort) {
        QueryWrapper<AiReportScienceFileMaterial> queryWrapper = new QueryWrapper<>();
        queryWrapper.eq("file_id", fileId);
        queryWrapper.orderByAsc("sort");
        int min = Math.max(0, firstSort - 25);
        int max = Math.min(lastSort + 45, Integer.MAX_VALUE);
        queryWrapper.between("sort", min, max);
        return aiReportScienceFileMaterialService.list(queryWrapper);
    }


    @Override
    public void deleteDataByFileId(String id) {
        Map<String, Object> param = new HashMap<>();
        param.put("knowledge_base_id", "yjzx_books_vdb");
        param.put("fileId", new String[]{id});

        try {
            HttpUtil.doPost(BASE_URL + "books/books_delete", new JSONObject(param), 10000);
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }
    }

    private void deleteDataByContentIds(List<String> contentIds) {
        try {
            if (CollectionUtil.isNotEmpty(contentIds)) {
                // 删除知识库
                Map<String, Object> param = new HashMap<>();
                param.put("knowledge_base_id", "yjzx_books_vdb");
                param.put("textId", contentIds.toArray());
                try {
                    // 推送数据入kafka
                    //TODO 推送数据入kafka
                    //ReportUtil.sendKafka(contentIds);
                } catch (Exception e) {
                    log.error("数据推送kafka失败", e);
                }
                try {

                    HttpUtil.doPost(BASE_URL + "books/books_delete", new JSONObject(param), 10000);
                } catch (IOException e) {
                    log.error("向量库数据删除失败", e);
                }

            }
        } catch (Exception e) {
            log.error("删除素材推荐数据失败");
        }
    }

    /**
     * {
     * "knowledge_base_id": "yjzx_books_test",
     * "question": "亚马逊科技能带来什么",
     * "history": [],
     * "llm_answer": false,
     * "score_threshold":600,
     * "vector_search_top_k": 16384,
     * "type": [],
     * "ids": []
     * }
     *
     */
    private JSONArray postSuggest(String text, String type) {
        String[] types = type != null ? type.split(",") : new String[]{};
        Map<String, Object> param = new HashMap<>();
        param.put("question", text);
        param.put("knowledge_base_id", "yjzx_books_vdb");
        // param.put("llm_answer", false);
        param.put("score_threshold", 500);
        param.put("vector_search_top_k", 500);
        param.put("type", types);
        // param.put("ids",new String[]{});


        try {
            String responseStr = HttpUtil.doPost(BASE_URL + "books/books_chat", new JSONObject(param), 10000);
            JSONObject jsonObject = JSON.parseObject(responseStr);
            if (jsonObject.getInteger("code") == 200) {
                return jsonObject.getJSONArray("results");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return new JSONArray();
    }


    @Override
    public List<String> getAuditPersonList() {
        LambdaQueryWrapper<AiReportScienceFile> queryWrapper = new LambdaQueryWrapper<>();
        queryWrapper.eq(AiReportScienceFile::getDeleted, 0);
        queryWrapper.isNotNull(AiReportScienceFile::getAuditBy);
        queryWrapper.groupBy(AiReportScienceFile::getAuditBy);
        queryWrapper.select(AiReportScienceFile::getAuditBy);
        return this.list(queryWrapper).stream().map(AiReportScienceFile::getAuditBy).collect(Collectors.toList());
    }

////    @Override
//    public Boolean uploadAiMaterialOld(AiReportScienceFile aiReportScienceFile, HttpServletRequest request) throws Exception {
//        try {
//            int i = 0;
//            int j = 0;
//            MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
//            MultiValueMap<String, MultipartFile> files = multipartRequest.getMultiFileMap();
//            LinkedList<MultipartFile> aa = (LinkedList<MultipartFile>) files.get("file");
//            for (MultipartFile file : aa) {
//                try{
//                    i++;
//                    String fileName = file.getOriginalFilename();//获取文件名
//                    String prefix = fileName.substring(fileName.lastIndexOf(".") + 1);
//
//                    String author = "";
//                    AiReportScienceFile reportScienceFile = new AiReportScienceFile();
//                    if (fileName.contains("_")) {
//                        reportScienceFile.setTitle(fileName.substring(0, fileName.lastIndexOf("_")));
//                        author = fileName.substring(fileName.lastIndexOf("_")+1,fileName.lastIndexOf("."));
//                    } else if (fileName.contains(".")) {
//                        reportScienceFile.setTitle(fileName.substring(0, fileName.indexOf(".")));
//                    }
//                    if (!"docx".equals(prefix) && !"doc".equals(prefix) && !"pdf".equals(prefix)) {
//                        throw new Exception("目前支持doc、docx、txt、pdf格式");
//                    }
////                    QueryWrapper query = new QueryWrapper();
////                    query.eq("title", reportScienceFile.getTitle());
////                    if(!author.equals("")){
////                        query.like("author", author);
////                    }
////                    List<AiReportScienceFile> list = aiReportScienceFileService.list(query);
////                    if (list.size() == 0 || (
////                            StringUtils.isNotEmpty(list.get(0).getContent()) && StringUtils.isNotEmpty(list.get(0).getFilePathObs())
////                    )) {
////                        System.out.println(i + "：标题：" + reportScienceFile.getTitle() + "-已拆分");
////                        continue;
////                    }
//                    j++;
//                    if (file.getOriginalFilename().endsWith("docx") || file.getOriginalFilename().endsWith("doc")) {
//                        //文件上传
//                        String text = DocUtil.convertDocStream2Html(file.getInputStream());
//                        reportScienceFile.setContent(text);
//                        ReportUtil.formatFile(reportScienceFile, prefix,file);
////                        if(StringUtils.isNotEmpty(list.get(0).getContent()) && StringUtils.isEmpty(list.get(0).getFilePathObs())){
////                            list.get(0).setFilePathObs(reportScienceFile.getFilePathObs());
////                            aiReportScienceFileService.updateById(list.get(0));
////                            System.out.println(j + "：标题：" + reportScienceFile.getTitle() + "-更新文件地址");
////                            continue;
////                        }
//                        System.out.println(j + "：标题：" + reportScienceFile.getTitle() + "-开始拆分");
//                    /*
//                    获取目录
//                    */
//                        FileInputStream fileInputStream = (FileInputStream) file.getInputStream();
//                        XWPFDocument document = new XWPFDocument(fileInputStream);
//                        List<String> directoryList = new ArrayList<>();
//                        ReportUtil.setNoDirectory(document, directoryList);
//
//                        InputStream inputStream = null;
//                        java.io.ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
//                        document.write(byteArrayOutputStream);
//                        inputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
//                        String content = DocUtil.convertDocStream2Html(inputStream);
//                        content = content.replaceAll("__space__one1", "<h1>");
//                        content = content.replaceAll("__space__one2", "</h1>");
//                        content = content.replaceAll("__space__two1", "<h2>");
//                        content = content.replaceAll("__space__two2", "</h2>");
//                        content = content.replaceAll("__space__three1", "<h3>");
//                        content = content.replaceAll("__space__three2", "</h3>");
//                        content = content.replaceAll("&nbsp;", "");
//                        if(content.contains("<body") && content.contains("</body>")){
//                            content = content.substring(content.indexOf("<body"), content.indexOf("</body>"));
//                        }
//
//
//                    /*
//                    美化内容（去空格、段落美化）
//                    */
//                        Elements elements = ReportUtil.getDirectory(content);
////                    List<String> contentList = ReportUtil.beautifyContent(content);//zgz
//                        List<String> contentList = ReportUtil.beautifyContent2(elements);//lxp
//
//                        reportScienceFile.setContent(String.join("", contentList));
//                        reportScienceFile.setDeleted("0");
//                        reportScienceFile.setPublishDate(DateUtil.getStringDate(new Date()));
//                        reportScienceFile.setStatus(0);
//                        reportScienceFile.setDataType(aiReportScienceFile.getDataType());
//
//                        System.out.println(j + "：标题：" + reportScienceFile.getTitle() + "-拆分完成");
//                        String fileId = "";
//                        Integer fileType = reportScienceFile.getDataType();
//                        String origin = "";
////                        if (list.size() > 0) {
////                            list.get(0).setContent(reportScienceFile.getContent());
////                            list.get(0).setDeleted("0");
////                            list.get(0).setFilePathObs(reportScienceFile.getFilePathObs());
////                            list.get(0).setPreviewObs(reportScienceFile.getPreviewObs());
////                            list.get(0).setCoverObs(reportScienceFile.getCoverObs());
////                            fileId = list.get(0).getId();
////                            fileType = list.get(0).getDataType();
////                            origin = list.get(0).getOrigin();
////                            aiReportScienceFileService.updateById(list.get(0));
////                        } else {
////                            aiReportScienceFileService.save(reportScienceFile);
////                            fileId = reportScienceFile.getId();
////                        }
//                        aiReportScienceFileService.save(reportScienceFile);
//                        fileId = reportScienceFile.getId();
//
//                    /*
//                    获取所有段落层级数据
//                    */
//                        List<Map<String, Object>> li = ReportUtil.getList(contentList);
//
//                    /*
//                    删除历史素材
//                    */
//                        QueryWrapper queryOld = new QueryWrapper();
//                        queryOld.eq("file_id", fileId);
//                        aiReportScienceFileMaterialService.remove(queryOld);
//                        // 输出提取的内容
//                        List<AiReportScienceFileMaterial> listMaterial = new ArrayList<>();
//                        for (Map<String, Object> str : li) {
//                            AiReportScienceFileMaterial aiReportScienceFileMaterial = new AiReportScienceFileMaterial();
//                            aiReportScienceFileMaterial.setId(str.get("id").toString());
//                            aiReportScienceFileMaterial.setParent(str.get("parent").toString());
//                            aiReportScienceFileMaterial.setContentType(str.get("contentType").toString());
//                            aiReportScienceFileMaterial.setFileId(fileId);
//                            aiReportScienceFileMaterial.setContent(str.get("text").toString());
//                            aiReportScienceFileMaterial.setLevel(String.valueOf(str.get("level")));
//                            aiReportScienceFileMaterial.setSort((Integer) str.get("sort"));
//                            aiReportScienceFileMaterial.setDeleted("0");
//                            listMaterial.add(aiReportScienceFileMaterial);
//                        }
//                        aiReportScienceFileMaterialService.saveBatch(listMaterial);
//                        QueryWrapper query1 = new QueryWrapper();
//                        query1.eq("file_id", fileId);
//                        query1.eq("deleted", "0");
//                        query1.ne("content_type","img");
//                        query1.ne("content_type","table");
//                        List<AiReportScienceFileMaterial> listAiReportScienceFileMaterial = aiReportScienceFileMaterialService.list(query1);
//
//                    /*
//                    素材kafka推送
//                    */
//                        ReportUtil.sendKafka(listAiReportScienceFileMaterial, fileId, fileType,origin);
//
//                        byteArrayOutputStream.close();
//                        document.close();
//                        fileInputStream.close();
//                        inputStream.close();
//                    } else if (file.getOriginalFilename().endsWith("pdf")) {
//
//                    } else {
//                        System.out.println("上传文件类型错误！");
//                    }
//                }catch (Exception e){
//                    System.out.println("处理异常！跳过");
//                }
//            }
//        } catch (Exception e) {
//            log.error(e.getMessage());
//        }
//        return true;
//    }


    @Override
    public Boolean uploadAiMaterial(AiReportScienceFile aiReportScienceFile, HttpServletRequest request) throws Exception {
        try {
            int i = 0;
            int j = 0;
            long b=System.currentTimeMillis();
            MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
            MultiValueMap<String, MultipartFile> files = multipartRequest.getMultiFileMap();
            LinkedList<MultipartFile> aa = (LinkedList<MultipartFile>) files.get("file");
            for (MultipartFile file : aa) {
                try{
                    i++;
                    String fileName = file.getOriginalFilename();//获取文件名
                    String prefix = fileName.substring(fileName.lastIndexOf(".") + 1);
                    String author = "";
                    AiReportScienceFile reportScienceFile = new AiReportScienceFile();
                    if (fileName.contains("_")) {
                        reportScienceFile.setTitle(fileName.substring(0, fileName.lastIndexOf("_")));
                        author = fileName.substring(fileName.lastIndexOf("_")+1,fileName.lastIndexOf("."));
                        if (author!=null && author.length()>0) {
                            author = author.replaceAll("\\(1\\)","").replaceAll("\\(2\\)","")
                                    .replaceAll("\\(3\\)","").trim();
                        }
                    } else if (fileName.contains("-")) {
                        reportScienceFile.setTitle(fileName.substring(0, fileName.lastIndexOf("-")));
                        author = fileName.substring(fileName.lastIndexOf("-")+1,fileName.lastIndexOf("."));
                        if (author!=null && author.length()>0) {
                            author = author.replaceAll("\\(1\\)","").replaceAll("\\(2\\)","")
                                    .replaceAll("\\(3\\)","").trim();
                        }
                    } else if (fileName.contains(".")) {
                        String title = fileName.substring(0, fileName.indexOf("."));
                        reportScienceFile.setTitle(fileName.substring(0, fileName.indexOf(".")));
                    }

                    //基于文件规则，优化文件名
                    if (reportScienceFile.getTitle().contains("_")) {
                        String[] titleArr = reportScienceFile.getTitle().split("_");
                        String title = "";
                        for (String tt : titleArr) {
                            if (tt.length()==0){
                                continue;
                            }
                            if (tt.endsWith("国资委")) {
                                title +=tt+":";
                            } else {
                                title +=tt+" ";
                            }
                        }
                        reportScienceFile.setTitle(title.trim());
                    }
                    //基于文件规则，优化文件名
                    if (reportScienceFile.getTitle().contains("—")) {
                        String title = reportScienceFile.getTitle().substring(0, reportScienceFile.getTitle().indexOf("—")).trim();
                        reportScienceFile.setTitle(title);
                    }

                    if (reportScienceFile.getTitle().contains("...")) {
                        String title = reportScienceFile.getTitle().substring(0, reportScienceFile.getTitle().indexOf("..."));
                        reportScienceFile.setTitle(title);
                    }
                    if (!"docx".equals(prefix) && !"doc".equals(prefix) && !"pdf".equals(prefix)) {
                        throw new Exception("目前支持doc、docx、txt、pdf格式");
                    }

                    /*
                    //期刊的特殊逻辑，不需要
                    QueryWrapper<AiReportScienceFile> query = new QueryWrapper<>();
                    query.select("id");
//                    query.eq("title", reportScienceFile.getTitle());
                    //有多个空格的换成一个空格
                    String titleRes =  reportScienceFile.getTitle().replaceAll("\\s+", " ");
                    String titleReNoSpace =  reportScienceFile.getTitle().replaceAll("\\s+", "");
                    query.and((w-> w.like("title", titleRes).or().like("title", titleReNoSpace)));
                    if(!author.equals("") && author.length()<=5){
                        query.like("author", author);
                    }
                    List<AiReportScienceFile> list = aiReportScienceFileService.list(query);
                    System.out.println("时间2 : "+(System.currentTimeMillis()-b)/1000f+" 秒 ");

//                    if(list.size()==0 || (list.get(0).getContent()!=null && list.get(0).getContent().length()>0)){
//                        if (list.size()!=0) {
//                            System.out.println(i + "：标题：" + list.get(0).getTitle() + "-已处理过");
//                        }
//
//                        continue;
//                    }

                    if(list.size()==0 || (list.size()>1)){//list.size = 1时才继续往下执行
                        if (list.size()!=0) {
                            log.error(i + "：标题：" + fileName + "-已处理过或查询到太多");
                        } else {
                            log.error(i + "：标题：" + fileName + "-找不到");
                        }
                        continue;
                    } else {
                        AiReportScienceFile aiReport = aiReportScienceFileService.getById(list.get(0).getId());
                        System.out.println("时间3 : "+(System.currentTimeMillis()-b)/1000f+" 秒 ");
                        if (aiReport!=null && aiReport.getContent()!=null && aiReport.getContent().length()>0) {
                            log.error(i + "：标题：" + fileName + "-已处理过或查询到太多");
                            continue;
                        }
                    }
*/
//                    if(list.size()==0 || (list.size()>1 || (list.get(0).getContent()!=null && list.get(0).getContent().length()>0))){
//                        if (list.size()!=0) {
//                            log.error(i + "：标题：" + fileName + "-已处理过或查询到太多");
//                            System.out.println(i + "：标题：" + fileName + "-已处理过");
//                        } else {
//                            log.error(i + "：标题：" + fileName + "-找不到");
//                            System.out.println(i + "：标题：" + reportScienceFile.getTitle() + "-找不到");
//                        }
//
//                        continue;
//                    }
                    j++;
                    if (file.getOriginalFilename().endsWith("docx") || file.getOriginalFilename().endsWith("doc")) {
                        //文件上传
                        ReportUtil.formatFile(reportScienceFile, prefix,file);
                        System.out.println(j + "：标题：" + reportScienceFile.getTitle() + "-开始拆分");
                        //获取目录
                        InputStream fileInputStream = file.getInputStream();
                        XWPFDocument document = new XWPFDocument(fileInputStream);

                        String content = ReportUtil.getwordHtml(document);

                        List<String> contentList = ReportUtil.beautifyContent2(content);//lxp

                        reportScienceFile.setContent(String.join("", contentList));
                        reportScienceFile.setDeleted("0");
                        reportScienceFile.setPublishDate(DateUtil.getStringDate(new Date()));
                        reportScienceFile.setStatus(0);
                        reportScienceFile.setDataType(aiReportScienceFile.getDataType());

                        System.out.println(j + "：标题：" + reportScienceFile.getTitle() + "-拆分完成");
                        log.error(j + "：标题：" + fileName + "-处理成功");
                        String fileId = "";
                        Integer fileType = reportScienceFile.getDataType();
                        String origin = "";
                        /*
                        //期刊特殊逻辑
                        if (list.size() > 0) {
                            list.get(0).setContent(reportScienceFile.getContent());
                            list.get(0).setDeleted("0");
                            list.get(0).setFilePathObs(reportScienceFile.getFilePathObs());
                            list.get(0).setPreviewObs(reportScienceFile.getPreviewObs());
                            list.get(0).setCoverObs(reportScienceFile.getCoverObs());
                            fileId = list.get(0).getId();
                            fileType = list.get(0).getDataType();
                            origin = list.get(0).getOrigin();
                            aiReportScienceFileService.updateById(list.get(0));
                        } else {
                            aiReportScienceFileService.save(reportScienceFile);
                            fileId = reportScienceFile.getId();
                        }

                         */

                        aiReportScienceFileService.save(reportScienceFile);
                        fileId = reportScienceFile.getId();

                        //获取所有段落层级数据
                        List<Map<String, Object>> li = ReportUtil.getList(contentList);

                        //删除历史素材
                        QueryWrapper queryOld = new QueryWrapper();
                        queryOld.eq("file_id", fileId);
                        aiReportScienceFileMaterialService.remove(queryOld);
                        // 输出提取的内容
                        List<AiReportScienceFileMaterial> listMaterial = new ArrayList<>();
                        for (Map<String, Object> str : li) {
                            AiReportScienceFileMaterial aiReportScienceFileMaterial = new AiReportScienceFileMaterial();
                            aiReportScienceFileMaterial.setId(str.get("id").toString());
                            aiReportScienceFileMaterial.setParent(str.get("parent").toString());
                            aiReportScienceFileMaterial.setContentType(str.get("contentType").toString());
                            aiReportScienceFileMaterial.setFileId(fileId);
                            aiReportScienceFileMaterial.setContent(str.get("text").toString());
                            aiReportScienceFileMaterial.setLevel(String.valueOf(str.get("level")));
                            aiReportScienceFileMaterial.setSort((Integer) str.get("sort"));
                            aiReportScienceFileMaterial.setDeleted("0");
                            listMaterial.add(aiReportScienceFileMaterial);
                        }
                        aiReportScienceFileMaterialService.saveBatch(listMaterial);
                        QueryWrapper query1 = new QueryWrapper();
                        query1.eq("file_id", fileId);
                        query1.eq("deleted", "0");
                        query1.ne("content_type","img");
                        query1.ne("content_type","table");
                        List<AiReportScienceFileMaterial> listAiReportScienceFileMaterial = aiReportScienceFileMaterialService.list(query1);

                        //素材kafka推送
//                        ReportUtil.sendKafka(listAiReportScienceFileMaterial, fileId, fileType,origin,"0");//0是未审核

                        document.close();
                        fileInputStream.close();
                    } else if (file.getOriginalFilename().endsWith("pdf")) {

                    } else {
                        System.out.println("上传文件类型错误！");
                    }
                }catch (Exception e){
                    System.out.println("处理异常！跳过");
                }
            }

        } catch (Exception e) {
            log.error(e.getMessage());
        }
        return true;
    }
}
