package com.zzsn.knowbase.service.impl;

import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.zzsn.knowbase.entity.AiReportScienceFile;
import com.zzsn.knowbase.entity.AiReportScienceFileMaterial;
import com.zzsn.knowbase.mapper.AiReportScienceFileMaterialMapper;
import com.zzsn.knowbase.service.IAiReportScienceFileMaterialService;
import com.zzsn.knowbase.service.IAiReportScienceFileService;
import com.zzsn.knowbase.util.ReportUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * @Version: V1.0
 */
@Service
public class AiReportScienceFileMaterialServiceImpl extends ServiceImpl<AiReportScienceFileMaterialMapper, AiReportScienceFileMaterial> implements IAiReportScienceFileMaterialService {
    @Autowired
    private IAiReportScienceFileMaterialService aiReportScienceFileMaterialService;
	@Autowired
	private IAiReportScienceFileService aiReportScienceFileService;
//	@Resource
//	private StreamBridge streamBridge;

    public void splitScienceFileMaterial(String html,String fileId){
		QueryWrapper query1 = new QueryWrapper();
		query1.eq("file_id",fileId);
		aiReportScienceFileMaterialService.remove(query1);

		List<AiReportScienceFileMaterial> list = new ArrayList<>();
		AiReportScienceFile scienceFile = aiReportScienceFileService.getById(fileId);
		// 使用Jsoup解析HTML字符串
		// 解析HTML字符串
		Document doc = Jsoup.parse(html);
		// 提取所有的p标签、img标签和table标签
		Elements elements = doc.select("p:not(:has(img)),p > img,img,table,h1,h2,h3,h4,h5,h6,h7,h8,h9");
		Iterator<Element> iterator = elements.iterator();
		List<String> contentList = new ArrayList<>();
		while(iterator.hasNext()) {
			Element element = iterator.next();
			contentList.add(element.toString());
		}

		/*
		获取所有段落层级数据
		*/
		List<Map<String, Object>> li = ReportUtil.getList(contentList);
		List<AiReportScienceFileMaterial> listMaterial = new ArrayList<>();
		for (Map<String, Object> str : li) {
			AiReportScienceFileMaterial aiReportScienceFileMaterial = new AiReportScienceFileMaterial();
			aiReportScienceFileMaterial.setParent(str.get("parent").toString());
			aiReportScienceFileMaterial.setContentType(str.get("contentType").toString());
			aiReportScienceFileMaterial.setFileId(fileId);
			aiReportScienceFileMaterial.setContent(str.get("text").toString());
			aiReportScienceFileMaterial.setLevel(String.valueOf(str.get("level")));
			aiReportScienceFileMaterial.setSort((Integer) str.get("sort"));
			aiReportScienceFileMaterial.setDeleted("0");
			listMaterial.add(aiReportScienceFileMaterial);
		}
		aiReportScienceFileMaterialService.saveBatch(listMaterial);

		QueryWrapper query = new QueryWrapper();
		query.eq("file_id",fileId);
		query.eq("deleted","0");
		query.ne("content_type","img");
		query.ne("content_type","table");
		List<AiReportScienceFileMaterial> listAiReportScienceFileMaterial = aiReportScienceFileMaterialService.list(query);
		//ReportUtil.sendKafka(listAiReportScienceFileMaterial,fileId,scienceFile.getDataType(),scienceFile.getOrigin(),"2");//审核通过
		//TODO 发送kafka
    }
}
