提交 310640c1 作者: yanxin

整合专题数据

编辑逻辑优化
上级 ea4d4ea6
......@@ -187,10 +187,10 @@
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>21.1.0</version>
<version>15.12.0</version>
<type>jar</type>
<scope>system</scope>
<systemPath>${project.basedir}/lib/aspose-words-21.1-jdk17.jar</systemPath>
<systemPath>${project.basedir}/lib/aspose-words-15.12.0-jdk16.jar</systemPath>
</dependency>
<dependency>
......
......@@ -20,8 +20,8 @@ public class Constants {
//新领导讲话索引(22.08.24)
public final static String LEADER_SPEECH_BASE_DATA = "leaderspeech_new";
//新专题库(22.04.24)
public final static String ES_SUBJECT_DATA = "subjectdata";
//专题库
public final static String ES_SUBJECT_DATA = "subjectdatabase";
//审计对接旧索引(废弃)
public final static String ES_SUBJECT_DEV_DATA = "subjectdatabase_dev";
......@@ -29,7 +29,7 @@ public class Constants {
//处理后的专题资讯信息存储索引。
public final static String ES_DATA_FOR_SUBJECT = "subjectdatabase_2023";
//知识库
public final static String ES_DATA_FOR_KNOWLEDGE = "knowledgedatabase";
public final static String ES_DATA_FOR_KNOWLEDGE = "knowledgedatabase_nt";
/*
* 专题内容主次关系索引
*/
......
......@@ -31,6 +31,35 @@ public class KbKnowledges extends Model<KbKnowledges> {
*/
@TableField("name")
private String name;
/**
* 绑定专题,多个用逗号分隔
*/
@TableField("subject_id")
private String subjectId;
/**
* 同步数据状态,多个用逗号分隔,为空同步所有状态
*/
@TableField("subject_status")
private String subjectStatus;
/**
* 同步数据发布状态,多个用逗号分隔,为空同步所有状态
*/
@TableField("subject_publish_status")
private String subjectPublishStatus;
/**
* 二级分类
*/
@TableField("project_id")
private String projectId;
/**
* 一级分类
*/
@TableField("type_id")
private String typeId;
/**
* 同步状态 1:启用 0:停用
*/
@TableField("sync_status")
private Integer syncStatus;
}
......@@ -54,18 +54,17 @@ public class Knowledge implements Serializable {
@ApiModelProperty(value = "创建日期")
private String createTime;
/**
* 更新人
* 专题id
*/
@ApiModelProperty(value = "更新人")
private String updateBy;
private String subjectId;
/**
* 更新日期
* 专题id
*/
private String updateTime;
private String contentWithTag;
/**
* 状态(1启用 0不启用)
* 更新日期
*/
private Integer status;
private String updateDate;
/**
* 来源
*/
......@@ -104,6 +103,10 @@ public class Knowledge implements Serializable {
private String kbKnowledgeId;
private String contentAll;
/**
* 链接地址
*/
private String sourceAddress;;
/**
* 类型
*/
private String typeId;
......@@ -113,7 +116,7 @@ public class Knowledge implements Serializable {
private List<KnowFile> files;
private Integer score;
/**
* 0word 1excel 2ppt 3pdf
* 0word 1excel&subject 2ppt 3pdf
*/
private Integer importData;
......
package com.zzsn.knowbase.entity.subject;
import lombok.Data;
import java.util.List;
@Data
public class IntelligenceInfo {
//作者
private String author;
private String authorRaw;
//审核状态 (0:未审核 1:审核通过 2:审核未通过 3:暂定 4:重复数据 默认值为0)
private Integer checkStatus;
//发布状态 checkStatus=1时生效(0/null:待发布 1:已发布 2:已下架 默认值为null)
private Integer publishStatus;
//正文
private String content;
private String contentRaw;
//带标签正文
private String contentWithTag;
private String contentWithTagRaw;
//创建时间
private String createDate;
//删除标志
private Integer deleteFlag;
//id
private String id;
//关键词
private String keyWords;
//语言
private String lang;
//来源
private String origin;
private String originRaw;
//发布时间
private String publishDate;
//得分
private Double score;
//信息源id
private String sid;
//地址
private String sourceAddress;
//专题id
private String subjectId;
//摘要
private String summary;
private String summaryRaw;
//标题
private String title;
private String titleRaw;
//置顶
private Integer topNum;
private String type;
private List<Label> labels;
//正负面
private String orientation;
//更新时间
private String updateDate;
//图片处理状态
private String imgDisposeStatus;
//数据类型 qbyw:情报要闻 qbnc:情报内参 cgbg: 常规报告
private String dataType;
//abi报表地址
private String abiUrl;
//abi报表id
private String abiId;
//栏目列表 对应栏目管理code字段
private List<String> programaIds;
//资源目录 对应资源目录管理code字段
private String resourceCatalogId;
//关键词列表
private List<String> keyWordsList;
//入专题库时间
private String processDate;
}
package com.zzsn.knowbase.entity.subject;
import lombok.Data;
/**
* 基础信息打上标签的实体对象
*/
@Data
public class Label {
//命中标识
private String hitRemarks;
//标签标识
private String labelMark;
//标签备注
private String labelRemarks;
//项目标签id
private String projectLabelId;
//关联标签id
private String relationId;
//关联标签名称
private String relationName;
//审核状态
private Integer status;
}
......@@ -34,6 +34,8 @@ public interface IKnowledgeService {
*/
void deleteKnowledge(List<Knowledge> knowledgeList);
void deleteForPython(String id, String knowledgeBaseId);
/**
* 分页检索
*
......
......@@ -3,6 +3,8 @@ package com.zzsn.knowbase.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.zzsn.knowbase.entity.KbKnowledges;
import java.util.List;
/**
* <p>
* 服务类
......@@ -13,4 +15,5 @@ import com.zzsn.knowbase.entity.KbKnowledges;
*/
public interface KbKnowledgesService extends IService<KbKnowledges> {
List<KbKnowledges> syncSubjectConf();
}
......@@ -831,17 +831,19 @@ public class KbAuthorizedUserServiceImpl extends ServiceImpl<KbAuthorizedUserMap
boolean success = res.isSuccess();
if(200 == res.getCode() && success){
/**校验成功,获取到第三方用户信息*/
KbAuthorizedUser bean = JSONUtil.toBean(JSONUtil.toJsonStr(res.getResult()), KbAuthorizedUser.class);
/**校验第三方用户是否进行了授权*/
KbAuthorizedUser one = JSONUtil.toBean(JSONUtil.toJsonStr(res.getResult()), KbAuthorizedUser.class);
one.setIsAll(0);
one.setRoleId("1742844597970673665");
/**校验第三方用户是否进行了授权*//*
List<KbAuthorizedUser> list = super.list(Wrappers.<KbAuthorizedUser>lambdaQuery().eq(KbAuthorizedUser::getUserId, bean.getUserId()));
if (CollectionUtil.isEmpty(list)){
HttpServletResponse httpServletResponse = SpringContextUtils.getHttpServletResponse();
httpServletResponse.setStatus(401);
return Result.error("该用户还未进行授权访问,请联系管理员进行授权");
}
/**查询授权用户的角色和权限*/
*//**查询授权用户的角色和权限*//*
UserInfoVo userInfoVo = new UserInfoVo();
/**查询授权用户,“所有”节点的角色优先*/
*//**查询授权用户,“所有”节点的角色优先*//*
QueryWrapper<KbAuthorizedUser> query = new QueryWrapper<>();
query.eq("a.user_id",bean.getUserId());
query.eq("b.knowledge_project_id","0");
......@@ -852,7 +854,7 @@ public class KbAuthorizedUserServiceImpl extends ServiceImpl<KbAuthorizedUserMap
one = authorizedUserMapper.getOne(query1);
}
/**是否是所有节点的管理员*/
*//**是否是所有节点的管理员*//*
List<KbAuthuserKnowledgeprojectMap> list1 = authuserKnowledgeprojectMapService.list(Wrappers.<KbAuthuserKnowledgeprojectMap>lambdaQuery()
.eq(KbAuthuserKnowledgeprojectMap::getAuthUserId, one.getId())
.eq(KbAuthuserKnowledgeprojectMap::getKnowledgeProjectId, "0")
......@@ -863,7 +865,7 @@ public class KbAuthorizedUserServiceImpl extends ServiceImpl<KbAuthorizedUserMap
one.setIsAll(0);
}else {
one.setIsAll(1);
}
}*/
List<KbRole> kbRoles = roleService.listByIds(Arrays.asList(one.getRoleId().split(",")));
......@@ -877,6 +879,7 @@ public class KbAuthorizedUserServiceImpl extends ServiceImpl<KbAuthorizedUserMap
// e.setPermissions(permissionsService.list(Wrappers.<KbPermissions>lambdaQuery().in(KbPermissions::getId,longs)));
// }
// });
UserInfoVo userInfoVo = new UserInfoVo();
if (CollectionUtil.isNotEmpty(permissionMaps)) {
List<KbPermissions> permissions = permissionsService.list(Wrappers.<KbPermissions>lambdaQuery().in(KbPermissions::getId, permissionMaps.stream().map(KbRolePermissionMap::getPermissionId).collect(Collectors.toList())));
userInfoVo.setPermissions(permissions);
......@@ -942,42 +945,44 @@ public class KbAuthorizedUserServiceImpl extends ServiceImpl<KbAuthorizedUserMap
}
@Override
public Result<?> doCheckAndGetUser(String token) {
/**调用第三方系统校验接口*/
/**调用第三方系统校验接口*/
Result res = doCheck(token);
boolean success = res.isSuccess();
if(200 == res.getCode() && success) {
/**校验成功,获取到第三方用户信息*/
KbAuthorizedUser bean = JSONUtil.toBean(JSONUtil.toJsonStr(res.getResult()), KbAuthorizedUser.class);
/**校验第三方用户是否进行了授权*/
List<KbAuthorizedUser> list = super.list(Wrappers.<KbAuthorizedUser>lambdaQuery().eq(KbAuthorizedUser::getUserId, bean.getUserId()));
if (CollectionUtil.isEmpty(list)) {
HttpServletResponse httpServletResponse = SpringContextUtils.getHttpServletResponse();
httpServletResponse.setStatus(401);
return Result.error("该用户还未进行授权访问,请联系管理员进行授权");
}
/**查询授权用户,“所有”节点的角色优先*/
QueryWrapper<KbAuthorizedUser> query = new QueryWrapper<>();
query.eq("a.user_id",bean.getUserId());
query.eq("b.knowledge_project_id","0");
KbAuthorizedUser one = authorizedUserMapper.getOne(query);
if (ObjectUtil.isEmpty(one) || StringUtils.isBlank(one.getRoleId())){
QueryWrapper<KbAuthorizedUser> query1 = new QueryWrapper<>();
query1.eq("a.user_id",bean.getUserId());
one = authorizedUserMapper.getOne(query1);
}
/**是否是所有节点的管理员*/
List<KbAuthuserKnowledgeprojectMap> list1 = authuserKnowledgeprojectMapService.list(Wrappers.<KbAuthuserKnowledgeprojectMap>lambdaQuery()
.eq(KbAuthuserKnowledgeprojectMap::getAuthUserId, one.getId())
.eq(KbAuthuserKnowledgeprojectMap::getKnowledgeProjectId, "0")
.eq(KbAuthuserKnowledgeprojectMap::getSign, 0)
);
if (CollectionUtil.isNotEmpty(list1)){
one.setIsAll(0);
}else {
one.setIsAll(1);
}
return Result.OK(one);
// /**校验第三方用户是否进行了授权*/
// List<KbAuthorizedUser> list = super.list(Wrappers.<KbAuthorizedUser>lambdaQuery().eq(KbAuthorizedUser::getUserId, bean.getUserId()));
// if (CollectionUtil.isEmpty(list)) {
// HttpServletResponse httpServletResponse = SpringContextUtils.getHttpServletResponse();
// httpServletResponse.setStatus(401);
// return Result.error("该用户还未进行授权访问,请联系管理员进行授权");
// }
// /**查询授权用户,“所有”节点的角色优先*/
// QueryWrapper<KbAuthorizedUser> query = new QueryWrapper<>();
// query.eq("a.user_id",bean.getUserId());
// query.eq("b.knowledge_project_id","0");
// KbAuthorizedUser one = authorizedUserMapper.getOne(query);
// if (ObjectUtil.isEmpty(one) || StringUtils.isBlank(one.getRoleId())){
// QueryWrapper<KbAuthorizedUser> query1 = new QueryWrapper<>();
// query1.eq("a.user_id",bean.getUserId());
// one = authorizedUserMapper.getOne(query1);
// }
// /**是否是所有节点的管理员*/
// List<KbAuthuserKnowledgeprojectMap> list1 = authuserKnowledgeprojectMapService.list(Wrappers.<KbAuthuserKnowledgeprojectMap>lambdaQuery()
// .eq(KbAuthuserKnowledgeprojectMap::getAuthUserId, one.getId())
// .eq(KbAuthuserKnowledgeprojectMap::getKnowledgeProjectId, "0")
// .eq(KbAuthuserKnowledgeprojectMap::getSign, 0)
// );
//
// if (CollectionUtil.isNotEmpty(list1)){
// one.setIsAll(0);
// }else {
// one.setIsAll(1);
// }
bean.setIsAll(0);
bean.setRoleId("1742844597970673665");
return Result.OK(bean);
}else {
return Result.error("第三方系统未通过用户校验");
}
......
......@@ -193,7 +193,7 @@ public class KbKnowledgeProjectServiceImpl extends ServiceImpl<KbKnowledgeProjec
e.setRoles(list);
e.setPermissions(permissions);
});
//查询此用户,在其他节点有无特殊权限
/*//查询此用户,在其他节点有无特殊权限
List<UserKnowPermissionVo> res = authorizedUserMapper.getUserPermission(userInfo.getId());
List<UserKnowPermissionVo> collect = res.stream().filter(f -> !"0".equals(f.getKnowledgeProjectId())).collect(Collectors.toList());
if (CollectionUtil.isNotEmpty(collect)){
......@@ -225,7 +225,7 @@ public class KbKnowledgeProjectServiceImpl extends ServiceImpl<KbKnowledgeProjec
e.setPermissions(permissionsList);
}
});
}
}*/
return Result.OK(kbKnowledgeProjects);
}else {
......
package com.zzsn.knowbase.service.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.zzsn.knowbase.entity.KbKnowledges;
import com.zzsn.knowbase.mapper.KbKnowledgesMapper;
import com.zzsn.knowbase.service.KbKnowledgesService;
import org.springframework.stereotype.Service;
import java.util.List;
/**
* <p>
* 服务实现类
......@@ -18,4 +22,12 @@ import org.springframework.stereotype.Service;
@Service
public class KbKnowledgesServiceImpl extends ServiceImpl<KbKnowledgesMapper, KbKnowledges> implements KbKnowledgesService {
@Override
public List<KbKnowledges> syncSubjectConf() {
LambdaQueryWrapper<KbKnowledges> queryWrapper = Wrappers.lambdaQuery();
queryWrapper.isNotNull(KbKnowledges::getSubjectId);
queryWrapper.ne(KbKnowledges::getSubjectId, "");
queryWrapper.eq(KbKnowledges::getSyncStatus, 1);
return this.list(queryWrapper);
}
}
package com.zzsn.knowbase.service.impl;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
......@@ -20,7 +21,6 @@ import com.zzsn.knowbase.vo.Result;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.pdfbox.pdmodel.PDDocument;
......@@ -62,8 +62,6 @@ import java.io.*;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* @Description: 知识
......@@ -302,6 +300,20 @@ class KnowledgeServiceImpl implements IKnowledgeService {
}
BeanUtils.copyProperties(knowledge, knowledgeMessage);
knowledgeMessage.setType(knowledge.getTypeId());
if(StringUtils.isNotEmpty(knowledge.getContentWithTag())){
//片段切分
List<Content> contents = new ArrayList<>();
List<String> splitContents = HtmlUtil.splitContents(knowledge.getContentWithTag());
for (String content : splitContents) {
System.out.println(content.length());
System.out.println(content);
contents.add(Content.builder()
.contentId(codeGenerateUtil.geneIdNo(Constants.FINANCE, 8))
.content(content)
.build());
}
knowledge.setContents(contents);
}
//id为空表示新增
if (null == knowledge.getId()) {
knowledge.setId(codeGenerateUtil.geneIdNo(Constants.FINANCE, 8));
......@@ -345,7 +357,10 @@ class KnowledgeServiceImpl implements IKnowledgeService {
@Override
public void deleteKnowledge(List<Knowledge> knowledgeList) {
for (Knowledge knowledge : knowledgeList) {
esOpUtil.docDeleteById(Constants.ES_DATA_FOR_KNOWLEDGE, knowledge.getId());
knowledge.setDeleteFlag(1);
//esOpUtil.docDeleteById(Constants.ES_DATA_FOR_KNOWLEDGE, knowledge.getId());
//修改为标记删除
esOpUtil.docUpdateById(Constants.ES_DATA_FOR_KNOWLEDGE, knowledge.getId(), JSON.toJSONString(knowledge));
}
CompletableFuture.runAsync(() -> {
......@@ -353,10 +368,9 @@ class KnowledgeServiceImpl implements IKnowledgeService {
deleteForPython(knowledge.getId(), knowledge.getKbKnowledgeId());
}
});
}
@Override
public void deleteForPython(String id, String knowledgeBaseId) {
JSONObject params = new JSONObject();
List<String> lidList = new ArrayList<>();
......@@ -364,11 +378,8 @@ class KnowledgeServiceImpl implements IKnowledgeService {
params.put("id", lidList);
params.put("knowledge_base_id", knowledgeBaseId);
log.info("delete param:{}", params);
try {
HttpUtil.doPost(deleteUrl, params, 120000);
} catch (IOException e) {
e.printStackTrace();
}
String res = HttpUtil.post(deleteUrl, params.toJSONString(), 5000);
log.info("delete res:{}", res);
}
......@@ -523,6 +534,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
if (StringUtils.isNotBlank(knowledgeParam.getVerifyEndTime())) {
boolQuery.filter(QueryBuilders.rangeQuery("verifyTime").lte(EsDateUtil.esFieldDateFormat(knowledgeParam.getEndTime())));
}
boolQuery.mustNot(QueryBuilders.termQuery("deleteFlag",1));
outer.should(boolQuery);
searchSourceBuilder.query(outer);
searchRequest.source(searchSourceBuilder);
......@@ -604,12 +616,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
if (knowledgeParam.getSearchScope().equals(2)) {
params.put("question", knowledgeParam.getSearchInfo());
}
String result = null;
try {
result = HttpUtil.doPost(searchUrl, params, 120000);
} catch (IOException e) {
e.printStackTrace();
}
String result = HttpUtil.post(searchUrl, params.toJSONString(), 120000);
if (result == null || !result.contains("result")) {
return null;
}
......@@ -831,11 +838,19 @@ class KnowledgeServiceImpl implements IKnowledgeService {
continue;
}
if (StringUtils.isNotEmpty(info.get(1))) {
specialInformation.setContentAll(info.get(1));
// //段落切分
// specialInformation.setContents(Collections.singletonList(Content.builder()
// .contentId(codeGenerateUtil.geneIdNo(Constants.FINANCE, 8))
// .content(info.get(1)).build()));
specialInformation.setContentWithTag(info.get(1));
//片段切分
List<Content> contents = new ArrayList<>();
List<String> splitContents = HtmlUtil.splitContents(specialInformation.getContentWithTag());
for (String content : splitContents) {
System.out.println(content.length());
System.out.println(content);
contents.add(Content.builder()
.contentId(codeGenerateUtil.geneIdNo(Constants.FINANCE, 8))
.content(content)
.build());
}
specialInformation.setContents(contents);
} else {
log.error("上传的数据{}正文为空,此条数据忽略", info.get(0));
continue;
......@@ -850,13 +865,13 @@ class KnowledgeServiceImpl implements IKnowledgeService {
specialInformation.setPublishDate(EsDateUtil.esFieldDateFormat(info.get(4)));
}
specialInformation.setDeleteFlag(0);
specialInformation.setCreateTime(cn.hutool.core.date.DateUtil.format(new Date(), "yyyy-MM-dd'T'HH:mm:ss"));
specialInformation.setCreateTime(EsDateUtil.esFieldDateFormat(cn.hutool.core.date.DateUtil.formatDateTime(new Date())));
esOpUtil.docSavaByEntity(Constants.ES_DATA_FOR_KNOWLEDGE, specialInformation.getId(), specialInformation);
KnowledgeMessage knowledgeMessage = new KnowledgeMessage();
BeanUtils.copyProperties(specialInformation, knowledgeMessage);
knowledgeMessage.setType(specialInformation.getTypeId());
knowledgeMessage.setContents(specialInformation.getContents());
produceInfo.sendKnowledgeExcelContents(knowledgeMessage);
//produceInfo.sendKnowledgeExcelContents(knowledgeMessage);
produceInfo.sendKnowledgeContents(knowledgeMessage);
} catch (NumberFormatException e) {
log.error("处理异常");
}
......
......@@ -311,18 +311,10 @@ public class LocalFileServiceImpl implements ILocalFileService {
String publish = EsDateUtil.esFieldDateFormat(knowledge.getPublishDate());
knowledge.setPublishDate(publish);
KbAuthorizedUser userInfo = SpringContextUtils.getUserInfo();
knowledge.setUpdateBy(userInfo.getName());
knowledge.setUpdateTime(cn.hutool.core.date.DateUtil.formatDateTime(new Date()).replace(" ", "T"));
Integer verifyStatus = knowledge.getVerifyStatus();
if(verifyStatus != null && (verifyStatus == 1 || verifyStatus == 2))
knowledge.setVerifyTime(cn.hutool.core.date.DateUtil.formatDateTime(new Date()).replace(" ", "T"));
knowledge.setVerifyTime(EsDateUtil.esFieldDateFormat(cn.hutool.core.date.DateUtil.formatDateTime(new Date())));
knowledge.setVerifierId(userInfo.getUserId());
knowledge.setVerifierName(userInfo.getUsername());
if(Integer.valueOf("0").equals(knowledge.getImportData())){
List<KnowFile> files = knowledge.getFiles();
KnowFile knowFile = files.get(0);
String filePath = knowFile.getFilePath();
......
package com.zzsn.knowbase.task;
import com.alibaba.fastjson.JSON;
import com.zzsn.knowbase.constant.Constants;
import com.zzsn.knowbase.entity.Content;
import com.zzsn.knowbase.entity.KbKnowledges;
import com.zzsn.knowbase.entity.Knowledge;
import com.zzsn.knowbase.entity.subject.IntelligenceInfo;
import com.zzsn.knowbase.kafka.message.KnowledgeMessage;
import com.zzsn.knowbase.kafka.producer.ProduceInfo;
import com.zzsn.knowbase.service.IKnowledgeService;
import com.zzsn.knowbase.service.KbKnowledgesService;
import com.zzsn.knowbase.util.*;
import com.zzsn.knowbase.vo.KnowledgeVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.*;
import java.util.stream.Collectors;
/**
* 同步专题数据到知识库
*/
@Slf4j
@RestController
@RequestMapping("subjectDataSyncTask")
public class SubjectDataSyncTask {
@Autowired
private KbKnowledgesService kbKnowledgesService;
@Autowired
private IKnowledgeService knowledgeService;
@Autowired
private EsOpUtil esOpUtil;
@Autowired
private CodeGenerateUtil codeGenerateUtil;
@Autowired
private ProduceInfo produceInfo;
@Scheduled(cron = "0 */10 * * * ?")
@GetMapping("startTask")
public void startTask() {
List<KbKnowledges> confList = kbKnowledgesService.syncSubjectConf();
for (KbKnowledges kbKnowledges : confList) {
sync(kbKnowledges);
}
}
@GetMapping("syncById")
public void syncById(String id) {
sync(kbKnowledgesService.getById(id));
}
public void sync(KbKnowledges kbKnowledges) {
//按审核状态分组
Map<Integer, List<String>> checkStatusMap = new HashMap<>();
//提取发布状态分组
Map<Integer, List<String>> publishStatusMap = new HashMap<>();
//已删除数据
List<String> deleteList = new ArrayList<>();
getSubjectIdGroup(kbKnowledges,checkStatusMap,publishStatusMap,deleteList);
//根据专题配置,提取有效数据
List<String> subjectValidList1 = new ArrayList<>();
List<String> subjectValidList2 = new ArrayList<>();
if(StringUtils.isNotEmpty(kbKnowledges.getSubjectStatus())){
String subjectStatus = kbKnowledges.getSubjectStatus();
for (String status : subjectStatus.split(",")) {
if(checkStatusMap.get(Integer.parseInt(status)) != null){
subjectValidList1.addAll(checkStatusMap.get(Integer.parseInt(status)));
}
}
}else{
for (Map.Entry<Integer, List<String>> entry : checkStatusMap.entrySet()) {
subjectValidList1.addAll(entry.getValue());
}
}
if(StringUtils.isNotEmpty(kbKnowledges.getSubjectPublishStatus())){
String subjectStatus = kbKnowledges.getSubjectPublishStatus();
for (String status : subjectStatus.split(",")) {
if(publishStatusMap.get(Integer.parseInt(status)) != null){
subjectValidList2.addAll(publishStatusMap.get(Integer.parseInt(status)));
}
}
}else{
for (Map.Entry<Integer, List<String>> entry : publishStatusMap.entrySet()) {
subjectValidList2.addAll(entry.getValue());
}
}
//专题有效数据 (交集)
List<String> subjectValidList = new ArrayList<>(subjectValidList1);
subjectValidList.retainAll(subjectValidList2);
//知识库有效数据
List<String> knowValidList = new ArrayList<>();
//知识库删除数据
List<String> knowDeleteList = new ArrayList<>();
getKnowId(kbKnowledges,knowValidList,knowDeleteList);
//待删除数据id
List<String> hasDeleteList = new ArrayList<>();
//待新增数据id
List<String> hasAddList = new ArrayList<>();
if(!subjectValidList.isEmpty()){
//提取需要新增或更新的数据
hasAddList = new ArrayList<>(subjectValidList);
hasAddList.removeAll(knowValidList);
}
if(!knowValidList.isEmpty()){
//提取需要删除的数据(知识库存在有效数据,专题库非配置状态范围内数据)
hasDeleteList = new ArrayList<>(knowValidList);
hasDeleteList.removeAll(subjectValidList);
//本次更新数据不需要删除
hasDeleteList.removeAll(hasAddList);
}
log.info("知识库:{},需要新增或更新的数据:{}",kbKnowledges.getName(),hasAddList);
log.info("知识库:{},需要删除的数据:{}",kbKnowledges.getName(),hasDeleteList);
//处理待修改数据
for (String item : hasAddList) {
String id = item.split(",")[0];
Map<String, Object> mapItem = esOpUtil.searchDoc(Constants.ES_SUBJECT_DATA, id);
IntelligenceInfo subjectItem = JSON.parseObject(JSON.toJSONString(mapItem), IntelligenceInfo.class);
Knowledge knowledge = new Knowledge();
knowledge.setUpdateDate(subjectItem.getUpdateDate());
knowledge.setPublishDate(subjectItem.getPublishDate());
knowledge.setAuthor(subjectItem.getAuthor());
knowledge.setContentWithTag(subjectItem.getContentWithTag());
knowledge.setId(id);
knowledge.setDeleteFlag(0);
knowledge.setCreateTime(EsDateUtil.esFieldDateFormat(cn.hutool.core.date.DateUtil.formatDateTime(new Date())));
knowledge.setTitle(subjectItem.getTitle());
knowledge.setSubjectId(kbKnowledges.getSubjectId());
knowledge.setVerifyStatus(1);
knowledge.setOrigin(subjectItem.getOrigin());
knowledge.setSourceAddress(subjectItem.getSourceAddress());
knowledge.setKbKnowledgeId(getId(kbKnowledges));
knowledge.setKnowledgeProjectId(kbKnowledges.getProjectId());
knowledge.setType("专题数据");
knowledge.setTypeId(kbKnowledges.getTypeId());
knowledge.setImportData(1);
//片段切分
List<Content> contents = new ArrayList<>();
List<String> splitContents = HtmlUtil.splitContents(subjectItem.getContentWithTag());
for (String content : splitContents) {
System.out.println(content.length());
System.out.println(content);
contents.add(Content.builder()
.contentId(codeGenerateUtil.geneIdNo(Constants.FINANCE, 8))
.content(content)
.build());
}
knowledge.setContents(contents);
//判断是否存在
if(esOpUtil.docExists(Constants.ES_DATA_FOR_KNOWLEDGE, id)){
//更新数据
esOpUtil.docUpdateById(Constants.ES_DATA_FOR_KNOWLEDGE, id, JSON.toJSONString(knowledge));
log.info("知识库数据更新id:{},title:{}",knowledge.getId(),knowledge.getTitle());
//删除向量库已有数据
knowledgeService.deleteForPython(id, kbKnowledges.getId());
}else {
//添加数据
esOpUtil.docSaveByJson(Constants.ES_DATA_FOR_KNOWLEDGE, id, JSON.toJSONString(knowledge));
log.info("知识库数据新增id:{},title:{}",knowledge.getId(),knowledge.getTitle());
}
//通过kafka同步到向量库
KnowledgeMessage knowledgeMessage = new KnowledgeMessage();
BeanUtils.copyProperties(knowledge, knowledgeMessage);
knowledgeMessage.setType(knowledge.getTypeId());
produceInfo.sendKnowledgeContents(knowledgeMessage);
log.info("通过kafka同步到向量库id:{},title:{}",knowledge.getId(),knowledge.getTitle());
}
//处理待删除数据
for (String item : hasDeleteList) {
String id = item.split(",")[0];
//标记删除
Knowledge knowledge = new Knowledge();
knowledge.setId(id);
knowledge.setDeleteFlag(1);
esOpUtil.docUpdateById(Constants.ES_DATA_FOR_KNOWLEDGE, id, JSON.toJSONString(knowledge));
knowledgeService.deleteForPython(id, kbKnowledges.getId());
log.info("知识库数据删除id:{}",knowledge.getId());
}
}
private static String getId(KbKnowledges kbKnowledges) {
return kbKnowledges.getId();
}
public void getSubjectIdGroup(KbKnowledges kbKnowledges, Map<Integer, List<String>> checkStatusMap, Map<Integer, List<String>> publishStatusMap, List<String> deleteList) {
//查询待同步数据id
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("subjectId", kbKnowledges.getSubjectId().split(",")));
//只查询更新时间在最近一周的数据
boolQuery.filter(QueryBuilders.rangeQuery("updateDate").gte(DateUtil.getStringDate(new Date(System.currentTimeMillis() - 7 * 24 * 60 * 60 * 1000L))));
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//更新时间倒序
searchSourceBuilder.sort("updateDate", SortOrder.DESC);
searchSourceBuilder.trackTotalHits(true);
searchSourceBuilder.query(boolQuery);
//查询指定字段
searchSourceBuilder.fetchSource(new String[]{"id","deleteFlag","checkStatus","publishStatus","updateDate"}, null);
Map<String, Object> map = esOpUtil.searchByQuery(Constants.ES_SUBJECT_DATA ,
0, 10000, searchSourceBuilder);
List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("data");
if(list != null && !list.isEmpty()) {
//分组提取id列表
for (Map<String, Object> mapItem : list) {
Object deleteFlag = mapItem.get("deleteFlag");
if(deleteFlag != null && "1".equals(deleteFlag.toString())){
//已删除数据
deleteList.add(mapItem.get("id").toString()+","+mapItem.get("updateDate").toString());
continue;
}
groupMap(checkStatusMap, mapItem, mapItem.get("checkStatus"));
groupMap(publishStatusMap, mapItem, mapItem.get("publishStatus"));
}
}
}
public void getKnowId(KbKnowledges kbKnowledges, List<String> validList, List<String> deleteList) {
//查询待同步数据id
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("subjectId", kbKnowledges.getSubjectId().split(",")));
//只查询更新时间在最近一周的数据
boolQuery.filter(QueryBuilders.rangeQuery("updateDate").gte(DateUtil.getStringDate(new Date(System.currentTimeMillis() - 7 * 24 * 60 * 60 * 1000L))));
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//更新时间倒序
searchSourceBuilder.sort("updateDate", SortOrder.DESC);
searchSourceBuilder.trackTotalHits(true);
searchSourceBuilder.query(boolQuery);
//查询指定字段
searchSourceBuilder.fetchSource(new String[]{"id","deleteFlag","verifyStatus","updateDate"}, null);
Map<String, Object> map = esOpUtil.searchByQuery(Constants.ES_DATA_FOR_KNOWLEDGE ,
0, 10000, searchSourceBuilder);
List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("data");
if(list != null || !list.isEmpty()) {
//分组提取id列表
for (Map<String, Object> mapItem : list) {
Object deleteFlag = mapItem.get("deleteFlag");
if(deleteFlag != null && "1".equals(deleteFlag.toString())){
//已删除数据
deleteList.add(mapItem.get("id").toString()+","+mapItem.get("updateDate").toString());
}else{
//只要未删除,均为有效数据
validList.add(mapItem.get("id").toString()+","+mapItem.get("updateDate").toString());
}
}
}
}
private void groupMap(Map<Integer, List<String>> statusMap, Map<String, Object> mapItem, Object status) {
if(status == null){
status = 0;
}
Integer statusInt = Integer.valueOf(status.toString());
if(statusMap.containsKey(statusInt)){
statusMap.get(statusInt).add(mapItem.get("id").toString()+","+mapItem.get("updateDate").toString());
}else{
List<String> tempList = new ArrayList<>();
tempList.add(mapItem.get("id").toString()+","+mapItem.get("updateDate").toString());
statusMap.put(statusInt,tempList);
}
}
}
package com.zzsn.knowbase.util;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ContentUtility {
static Pattern divNoneP = Pattern.compile("(?s)<div[^>]*display:none[^>]*>.*?</div>", Pattern.CASE_INSENSITIVE);
static Pattern divP = Pattern.compile("<div>", Pattern.CASE_INSENSITIVE);
static Pattern divRP = Pattern.compile("</div>", Pattern.CASE_INSENSITIVE);
static Pattern brP = Pattern.compile("<br />", Pattern.CASE_INSENSITIVE);
static Pattern br2P = Pattern.compile("<br>", Pattern.CASE_INSENSITIVE);
static Pattern spaceP = Pattern.compile("&nbsp;", Pattern.CASE_INSENSITIVE);
static Pattern strongP = Pattern.compile("<strong>", Pattern.CASE_INSENSITIVE);
static Pattern strongRP = Pattern.compile("</strong>", Pattern.CASE_INSENSITIVE);
static Pattern pP = Pattern.compile("<p>", Pattern.CASE_INSENSITIVE);
static Pattern pRP = Pattern.compile("</p>", Pattern.CASE_INSENSITIVE);
static Pattern centerP = Pattern.compile("<center[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern centerRP = Pattern.compile("</center>", Pattern.CASE_INSENSITIVE);
static Pattern removeAttrP = Pattern.compile("<([a-zA-Z0-9]+)[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern commentP = Pattern.compile("(?s)<!--[^>]*>.*?<![^>]*-->", Pattern.CASE_INSENSITIVE);
static Pattern inputP = Pattern.compile("<input[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern formP = Pattern.compile("<form[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern formRP = Pattern.compile("</form>", Pattern.CASE_INSENSITIVE);
static Pattern buttonP = Pattern.compile("(?s)<button[^>]*>.*?</button>", Pattern.CASE_INSENSITIVE);
static Pattern iframeP = Pattern.compile("(?s)<iframe[^>]*>.*?</iframe>", Pattern.CASE_INSENSITIVE);
static Pattern noscriptP = Pattern.compile("(?s)<noscript>.*?</noscript>", Pattern.CASE_INSENSITIVE);
static Pattern objectP = Pattern.compile("(?s)<object[^>]*>.*?</object>", Pattern.CASE_INSENSITIVE);
static Pattern linkP = Pattern.compile("(?s)<link[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern imgReplaceP = Pattern.compile("<img([^>]*)>", Pattern.CASE_INSENSITIVE);
static Pattern imgRevReplaceP = Pattern.compile("<_img([^>]*)>", Pattern.CASE_INSENSITIVE);
static Pattern imgP = Pattern.compile("<img[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern imgRP = Pattern.compile("</img>", Pattern.CASE_INSENSITIVE);
public static Pattern aRemoveP = Pattern.compile("(?s)<a[^>]*>.*?</a>", Pattern.CASE_INSENSITIVE);
static Pattern legendRemoveP = Pattern.compile("(?s)<legend[^>]*>.*?</legend>", Pattern.CASE_INSENSITIVE);
static Pattern aP = Pattern.compile("<a[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern aRP = Pattern.compile("</a>", Pattern.CASE_INSENSITIVE);
static Pattern fontP = Pattern.compile("<font[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern fontRP = Pattern.compile("</font>", Pattern.CASE_INSENSITIVE);
static Pattern hP = Pattern.compile("<h\\d[^>]*>", Pattern.CASE_INSENSITIVE);
static Pattern hRP = Pattern.compile("</h\\d>", Pattern.CASE_INSENSITIVE);
static Pattern ulRP = Pattern.compile("</ul>", Pattern.CASE_INSENSITIVE);
static Pattern liRP = Pattern.compile("</li>", Pattern.CASE_INSENSITIVE);
static Pattern trRP = Pattern.compile("</tr>", Pattern.CASE_INSENSITIVE);
static Pattern tdRP = Pattern.compile("</td>", Pattern.CASE_INSENSITIVE);
static Pattern textareaRemoveP = Pattern.compile("(?s)<textarea[^>]*>.*?</textarea>", Pattern.CASE_INSENSITIVE);
static Pattern selectRemoveP = Pattern.compile("(?s)<select[^>]*>.*?</select>", Pattern.CASE_INSENSITIVE);
static Pattern optionRemoveP = Pattern.compile("(?s)<option[^>]*>.*?</option>", Pattern.CASE_INSENSITIVE);
static Pattern labelRemoveP = Pattern.compile("(?s)<label[^>]*>.*?</label>", Pattern.CASE_INSENSITIVE);
static String regHTMLNumcode = "&#(\\d{4,5});";
static Pattern patHTMLNumCode = Pattern.compile(regHTMLNumcode);
/**
* 去掉无用的HTML标签,包括a等
* @param htmlText
* @return
*/
public static String RemoveUselessHTMLTagX(String htmlText) {
try {
htmlText = ContentUtility.RemoveStyleCode(htmlText);
htmlText = htmlText.replaceAll("&nbsp;", " ");
htmlText = divNoneP.matcher(htmlText).replaceAll("");
htmlText = textareaRemoveP.matcher(htmlText).replaceAll("");
htmlText = selectRemoveP.matcher(htmlText).replaceAll("");
htmlText = optionRemoveP.matcher(htmlText).replaceAll("");
htmlText = labelRemoveP.matcher(htmlText).replaceAll("");
htmlText = inputP.matcher(htmlText).replaceAll("");
htmlText = formP.matcher(htmlText).replaceAll("");
htmlText = buttonP.matcher(htmlText).replaceAll("");
htmlText = formRP.matcher(htmlText).replaceAll("");
// htmlText = imgReplaceP.matcher(htmlText).replaceAll("<_img$1>");
htmlText = removeUselessAtt(htmlText);
// htmlText = removeAttrP.matcher(htmlText).replaceAll("<$1>");
// htmlText = imgRevReplaceP.matcher(htmlText).replaceAll("<img$1>");
htmlText = commentP.matcher(htmlText).replaceAll("");
htmlText = legendRemoveP.matcher(htmlText).replaceAll("");
// htmlText = aP.matcher(htmlText).replaceAll("<sapn>");
// htmlText = aRP.matcher(htmlText).replaceAll("</sapn>");
htmlText = iframeP.matcher(htmlText).replaceAll("");
htmlText = noscriptP.matcher(htmlText).replaceAll("");
htmlText = objectP.matcher(htmlText).replaceAll("");
//htmlText = imgP.matcher(htmlText).replaceAll("");
//htmlText = imgRP.matcher(htmlText).replaceAll("");
htmlText = centerP.matcher(htmlText).replaceAll("");
htmlText = centerRP.matcher(htmlText).replaceAll("");
htmlText = htmlText.replaceAll("<cufontext>", "");
htmlText = htmlText.replaceAll("</cufontext>", "");
htmlText = htmlText.replaceAll("<cufon>", "");
htmlText = htmlText.replaceAll("</cufon>", "");
//htmlText = htmlText.replaceAll("(?s)<([a-zA-Z0-9]+)[^>]*>\\s*(</$1>)", "");
htmlText = htmlText.replaceAll("(?s)<ul[^>]*>\\s*</ul>", "");
htmlText = htmlText.replaceAll("(?s)<div[^>]*>\\s*</div>", "");
htmlText = htmlText.replaceAll("(?s)<p[^>]*>\\s*</p>", "");
htmlText = htmlText.replaceAll("(?s)<li[^>]*>\\s*</li>", "");
htmlText = htmlText.replaceAll("(?s)<canvas[^>]*>\\s*</canvas>", "");
return htmlText;
} catch (Exception e) {
e.printStackTrace();
return htmlText;
}
}
public static String RemoveHTMLCode(String src) {
src = src.replaceAll("(<[^>]*>)\\s*(<[^>]*>)", "$1$2");
src = divP.matcher(src).replaceAll("\n\n");
src = divRP.matcher(src).replaceAll("\n\n");
src = brP.matcher(src).replaceAll("\n\n");
src = br2P.matcher(src).replaceAll("\n\n");
src = spaceP.matcher(src).replaceAll(" ");
src = src.replaceAll("&#8226;", "??");
src = strongP.matcher(src).replaceAll("");
src = strongRP.matcher(src).replaceAll("");
src = pP.matcher(src).replaceAll("\n\n");
src = pRP.matcher(src).replaceAll("\n\n");
src = aP.matcher(src).replaceAll("");
src = aRP.matcher(src).replaceAll("");
src = imgP.matcher(src).replaceAll("");
src = fontP.matcher(src).replaceAll("");
src = fontRP.matcher(src).replaceAll("");
src = hRP.matcher(src).replaceAll("\n\n");
src = ulRP.matcher(src).replaceAll("\n\n");
src = liRP.matcher(src).replaceAll("\n\n");
src = trRP.matcher(src).replaceAll("\n\n");
src = tdRP.matcher(src).replaceAll("\n\n");
src = src.replaceAll("<[^>]*>", "");
return src.trim();
}
/**
* 去除除table的td外的无用的html标签属性
* 创建人: 李东亮
* 创建时间: 2016-7-14 下午5:01:20
* @version 1.0
* @param htmlText
* @return
*/
public static String removeUselessAtt(String htmlText) {
Matcher m = removeAttrP.matcher(htmlText);
Map<String, String> replaceMap = new HashMap<String, String>();
String tagPre;
while (m.find()) {
tagPre = m.group();
if (!tagPre.startsWith("<td ") && !tagPre.startsWith("<TD ") && !tagPre.startsWith("<th ") && !tagPre.startsWith("<TH ")
&& !tagPre.startsWith("<img ") && !tagPre.startsWith("<IMG ") && !tagPre.startsWith("<a ") && !tagPre.startsWith("<A ")) {
replaceMap.put(tagPre, removeAttrP.matcher(tagPre).replaceAll("<$1>"));
}
}
String replaceTagPre;
for (String key : replaceMap.keySet()) {
replaceTagPre = replaceMap.get(key);
while (htmlText.contains(key) && !key.equals(replaceTagPre)) {
htmlText = htmlText.replace(key, replaceTagPre);
}
}
return htmlText;
}
public static String HTMLDecode(String str) {
//
// 去掉一些HTML编码
str = str.replaceAll("&quot;", "\"");
str = str.replaceAll("&nbsp;", " ");
str = str.replaceAll("&middot;", "·");
str = str.replaceAll("&amp;", "&");
str = str.replaceAll("&ldquo;", "“");
str = str.replaceAll("&rdquo;", "”");
str = str.replaceAll("&gt;", ">");
str = str.replaceAll("&lt;", "<");
str = str.replaceAll("&raquo;", "??");
str = str.replaceAll("&times;", "×");
str = str.replaceAll("&ccedil;", "??");
str = str.replaceAll("&atilde;", "??");
str = str.replaceAll("&ecirc;", "ê");
// 去掉<>
//
str = str.replaceAll("<\\?[^>]*>", "");
Matcher matcher = patHTMLNumCode.matcher(str);
while (matcher.find()) {
str = matcher.replaceFirst(String.valueOf((char) Integer.parseInt(matcher.group(1))));
matcher = patHTMLNumCode.matcher(str);
}
String[] tmp = str.split(";&#|&#|;");
StringBuffer sb = new StringBuffer("");
for (int i = 0; i < tmp.length; i++) {
if (tmp[i].matches("\\d{4,5}")) {
sb.append((char) Integer.parseInt(tmp[i]));
} else {
sb.append(tmp[i]);
}
}
str = sb.toString();
return str;
}
public static String RemoveHTMLReturnCode(String src) {
//src = src.replaceAll("(<[^>]*>)[\r\n]+(<[^>]*>)", "$1$2");
src = src.replaceAll("\r", "");
src = src.replaceAll("\n", "");
return src;
}
/**
* 提取html字符串转中的普通文本,注意处理其中的回车符
* @param htmlText
* @return
*/
public static String TransferHTML2Text(String htmlText) {
if(htmlText==null){
return null;
}
String text = ContentUtility.HTMLDecode(ContentUtility.RemoveHTMLCode(ContentUtility.RemoveStyleCode(ContentUtility.RemoveHTMLReturnCode(htmlText))));
text = text.replaceAll("   ", "\r\n");
text = text.replaceAll(" +\r\n", "\r\n");
text = text.replaceAll(" +", " ");
text = text.replaceAll("[\\u00A0\\u3000]", "");
text = text.replaceAll(" ", "");
return text;
}
public static String RemoveStyleCode(String content) {
try {
Pattern p1 = Pattern.compile("(?s)<script\\s*.*?>(.*?)</script>", Pattern.CASE_INSENSITIVE);
Matcher m1 = p1.matcher(content);
content = m1.replaceAll("");
Pattern p2 = Pattern.compile("(?s)<style\\s*.*?>(.*?)</style>", Pattern.CASE_INSENSITIVE);
Matcher m2 = p2.matcher(content);
content = m2.replaceAll("");
Pattern p11 = Pattern.compile("(?s)<script\\s*.*?/>", Pattern.CASE_INSENSITIVE);
Matcher m11 = p11.matcher(content);
content = m11.replaceAll("");
Pattern p21 = Pattern.compile("(?s)<style\\s*.*?/>", Pattern.CASE_INSENSITIVE);
Matcher m21 = p21.matcher(content);
content = m21.replaceAll("");
content = noscriptP.matcher(content).replaceAll("");
content = objectP.matcher(content).replaceAll("");
content = linkP.matcher(content).replaceAll("");
Pattern p22 = Pattern.compile("(?s)<img\\s*.*?/>", Pattern.CASE_INSENSITIVE);
Matcher m22 = p22.matcher(content);
content = m22.replaceAll("");
// 去除注释
// Pattern p3 = Pattern.compile("(?s)<!--\\s*.*?>(.*?)-->");
Pattern p3 = Pattern.compile("(?s)<!--.*?-->");
Matcher m3 = p3.matcher(content);
content = m3.replaceAll("");
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
/**
* 返回匹配的域名。例如:www.baidu.com
* @version 1.0
* @param sourceAddress
* @return
*/
public static String domainURL(String sourceAddress){
if (sourceAddress==null || sourceAddress.trim().length()==0) {
return "";
}
String result = "";
try {
String regex = "(?<=//|)((\\w)+\\.)+[\\s\\S]+?(?=\\/|\\:|\\?)" ;
Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(sourceAddress);
matcher.find();
result = matcher.group();
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
/**
* 去除域名的前缀(栏目)
* 例如:输入:finance.sina.com.cn ; 输出: sina.com.cn ;
* @version 1.0
* @param domainStr
* @return
*/
public static String cutDomainPrefix(String domainStr){
String result = "";
try {
String regex = "[a-zA-Z0-9-]+.(cn|com|cdt|com.mo|nl|us|biz|de|org.sa|info|ee|org.zw|co.uk|ie|com.sg|co.ke|be|eu|com.cn|gov.cn|co.kr|sh.cn|cssn.cn|org|ac.cn|co|org.cn|net|org.uk|hk|fr|no|se|org.sg|bg|org.pl|cz|at|org.nz|or.jp|mu|org.pe|com.hk|net.cn|mil|edu|edu.cn|cas.cn|hk|tw|tv|me|cc|COM|ORG|NET|MIL|EDU)$";
Pattern p = Pattern.compile(regex,Pattern.CASE_INSENSITIVE);
Matcher matcher = p.matcher(domainStr);
while (matcher.find()) {
result = matcher.group();
break;
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
}
......@@ -578,7 +578,47 @@ public class EsOpUtil {
return null;
}
}
/**
* 按条件查询数据
*
* @param index
* @param start
* @param size
* @param searchSourceBuilder
* @return
*/
public Map<String, Object> searchByQuery(String index, int start, int size, SearchSourceBuilder searchSourceBuilder) {
try {
Map<String, Object> resultMap = new HashMap<>();
SearchRequest searchRequest = new SearchRequest(index);
// 默认最大数量是10000,设置为true后,显示准确数量
searchSourceBuilder.trackTotalHits(true);
if (start >= 0 && size >= 0) {
searchSourceBuilder.from(start);
searchSourceBuilder.size(size);
}
searchRequest.source(searchSourceBuilder);
//System.out.println("param : " + searchSourceBuilder.toString());
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total = hits.getTotalHits().value;
resultMap.put("total", total);
SearchHit[] searchHits = hits.getHits();
List<Map<String, Object>> mapList = new ArrayList<>(searchHits.length);
for (SearchHit hit : searchHits) {
//存储的字段
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
//得分
//sourceAsMap.put("score", hit.getScore());
mapList.add(sourceAsMap);
}
resultMap.put("data", mapList);
return resultMap;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public boolean existBySourceAddress(String index, String sourceAddress) {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
......
package com.zzsn.knowbase.util;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlUtil {
/***
* 标签格式化,去除空行,规范添加首行缩进等,与前端ck富文本编辑器格式化保持一致
* @param html
* @return
*/
public static String formatHtml(String html) {
return formatHtml(html,null,null);
}
/***
* 标签格式化,去除空行,规范添加首行缩进等,与前端ck富文本编辑器格式化保持一致
* @param html
* @param cleanInKeys 清理包含关键词的段落
* @param cleanEqKeys 清理等于关键词的段落
* @return
*/
public static String formatHtml(String html,String cleanInKeys,String cleanEqKeys) {
//提取figure部分 不进行格式化处理
html = html.replaceAll("\r","").replaceAll("\n","");
Pattern figurePtn = Pattern.compile("<figure.*?</figure>");
Matcher matcher = figurePtn.matcher(html);
List<String> figureStrList = new ArrayList<>();
while(matcher.find()) {
figureStrList.add(matcher.group());
}
if (figureStrList.size()>0) {
for (int i = 0; i < figureStrList.size(); i++) {
html = html.replace(figureStrList.get(i), "current_figure_wlan"+i+"current_figure_wlan");
}
}
//提取table部分 不进行格式化处理
Pattern tablePtn = Pattern.compile("<table.*?</table>");
Matcher matchertable = tablePtn.matcher(html);
List<String> tableStrList = new ArrayList<>();
while(matchertable.find()) {
tableStrList.add(matchertable.group());
}
if (tableStrList.size()>0) {
for (int i = 0; i < tableStrList.size(); i++) {
html = html.replace(tableStrList.get(i), "current_table_wlan"+i+"current_table_wlan");
}
}
//格式化代码
html = html.replaceAll("<div", "<p");
html = html.replaceAll("</div>", "</p>");
//html = html.replaceAll("<strong[^>]*>", "");
//html = html.replaceAll("</strong>", "");
html = html.replaceAll("<html[^>]*>", "");
html = html.replaceAll("</html>", "");
html = html.replaceAll("<body[^>]*>", "");
html = html.replaceAll("</body>", "");
html = html.replaceAll("<head[^>]*>", "");
html = html.replaceAll("</head>", "");
html = html.replaceAll("<em[^>]*>", "");
html = html.replaceAll("</em>", "");
html = html.replaceAll("<u[^>]*>", "");
html = html.replaceAll("</u>", "");
html = html.replaceAll("<li[^>]*>", "");
html = html.replaceAll("</li>", "");
html = html.replaceAll("<span[^>]*>", "");
html = html.replaceAll("</span>", "");
html = html.replaceAll("&nbsp;", "");
html = html.replaceAll(" ", "");
html = html.replaceAll("<p></p>", "");
html = html.replaceAll("<a", "<a rel=\"nofollow\"");
//<br data-cke-filler="true">表示空行,去除br内部样式
html = html.replaceAll("<br[^>]*>","<br />");
html = html.replaceAll("</br[^>]*>","<br />");
//将p标签替换成<br />
html = html.replaceAll("<p[^>]*>","");
html = html.replaceAll("</p>","<br />");
html = html.replaceAll("<br /><br />","<br />");
html = html.replaceAll("\n", "<br />");
html = html.replaceAll("\r", "");
//按<br />分组,将换行<br>全部替换成p标签
String[] bb = html.split("<br[^>]*>");
String aa="";
for(int i=0;i<bb.length;i++){
if(StringUtils.isEmpty(bb[i]) || StringUtils.isEmpty(bb[i].trim())){
continue;
}
//清理包含关键词的段落
if(StringUtils.isNotEmpty(cleanInKeys)){
List<String> list = JSON.parseArray(cleanInKeys, String.class);
boolean flag = false;
for (String keys : list) {
for (String key : keys.split("\\+")) {
if(bb[i].contains(key)){
flag = true;
}else{
//组合词只要有一个不满足则不去除
flag = false;
break;
}
}
if(flag){
//满足一项则去除此段
break;
}
}
if(flag){
continue;
}
}
//清理等于关键词的段落
if(StringUtils.isNotEmpty(cleanEqKeys)) {
List<String> list = JSON.parseArray(cleanEqKeys, String.class);
if(list.contains(bb[i].trim())){
continue;
}
}
if(bb[i].trim().startsWith("<img ")){
aa = aa+"<p style=\"text-align:center;\">"+bb[i].trim()+"</p>";
}else{
aa = aa+"<p style=\"text-indent:2em;\">"+bb[i].trim()+"</p>";
}
}
//首行缩进
//html = aa.replaceAll("<p[^>]*>", "<p style=\"text-indent:2em;\">");
html = aa;
//去除所有外链
html = removeLink(html);
//去除所有javascript代码标记
html = removeJavascript(html);
//去除空行
html = html.replaceAll("<p>  </p>","");
html = html.replaceAll("<p></p>","");
html = html.replaceAll("<p style=\"text-indent:2em;\">  </p>","");
html = html.replaceAll("<p style=\"text-indent:2em;\"></p>","");
html = html.replaceAll("<p style=\"text-indent:2em;\"><title></title></p>","");
//回写table部分
if (tableStrList.size()>0) {
for (int i = 0; i < tableStrList.size(); i++) {
html = html.replace("<p style=\"text-indent:2em;\">current_table_wlan"+i+"current_table_wlan</p>",tableStrList.get(i));
html = html.replace("current_table_wlan"+i+"current_table_wlan",tableStrList.get(i));
}
}
//回写figure部分
if (figureStrList.size()>0) {
for (int i = 0; i < figureStrList.size(); i++) {
html = html.replace("<p style=\"text-indent:2em;\">current_figure_wlan"+i+"current_figure_wlan</p>",figureStrList.get(i));
html = html.replace("current_figure_wlan"+i+"current_figure_wlan",figureStrList.get(i));
}
}
return html;
}
/**
* 去除所有外链
* @param html
* @return
*/
public static String removeLink(String html) {
html = html.replaceAll("<a[^>]*>","");
html = html.replaceAll("</a[^>]*>","");
return html;
}
/**
* 去除所有javascript代码标记
* @param html
* @return
*/
public static String removeJavascript(String html) {
html = html.replaceAll("<meta[^>]*>","");
html = html.replaceAll("<script[^>]*>","");
html = html.replaceAll("</script[^>]*>","");
html = html.replaceAll("<iframe[^>]*>","");
html = html.replaceAll("</iframe[^>]*>","");
html = html.replaceAll("<frame[^>]*>","");
html = html.replaceAll("javascript:","javascript:");
return html;
}
/**
* 去除表格
* @param html
* @return
*/
public static String removeTabel(String html) {
html = html.replaceAll("<table.*?</table>","");
return html;
}
/**
* 切割分片 尽量保证段落字数在200左右
* @param html
* @return
*/
public static List<String> splitContents(String html) {
List<String> list = new ArrayList<>();
html = formatHtml(html);
//去除换行
html = html.replaceAll("\n|\r","");
//去除表格
html = html.replaceAll("<table.*?</table>","");
//格式化代码
html = html.replaceAll("</div>", "</p>");
html = html.replaceAll("<div", "<p");
html = html.replaceAll("<figure[^>]*>", "");
html = html.replaceAll("</figure>", "");
html = html.replaceAll("<strong[^>]*>", "");
html = html.replaceAll("</strong>", "");
html = html.replaceAll("<html[^>]*>", "");
html = html.replaceAll("</html>", "");
html = html.replaceAll("<body[^>]*>", "");
html = html.replaceAll("</body>", "");
html = html.replaceAll("<head[^>]*>", "");
html = html.replaceAll("</head>", "");
html = html.replaceAll("<em[^>]*>", "");
html = html.replaceAll("</em>", "");
html = html.replaceAll("<u[^>]*>", "");
html = html.replaceAll("</u>", "");
html = html.replaceAll("<li[^>]*>", "");
html = html.replaceAll("</li>", "");
html = html.replaceAll("<span[^>]*>", "");
html = html.replaceAll("</span>", "");
html = html.replaceAll("&nbsp;", "");
html = html.replaceAll(" ", "");
html = html.replaceAll("<p></p>", "");
html = html.replaceAll("<a", "<a rel=\"nofollow\"");
//<br data-cke-filler="true">表示空行,去除br内部样式
html = html.replaceAll("<br[^>]*>","<br />");
html = html.replaceAll("</br[^>]*>","<br />");
//将p标签替换成<br />
html = html.replaceAll("<p[^>]*>","");
html = html.replaceAll("</p>","<br />");
html = html.replaceAll("<br /><br />","<br />");
html = html.replaceAll("\n", "<br />");
html = html.replaceAll("\r", "");
StringBuilder item = new StringBuilder();
for (String s : html.split("<br[^>]*>")) {
item.append(ContentUtility.TransferHTML2Text(s.trim()));
if(item.length()>500){
//单段超过500字符的,按照句号进行段落切分
String[] bb = item.toString().split("。");
StringBuilder item2 = new StringBuilder();
for (String s2 : bb) {
item2.append(s2.trim()).append("。");
if(item2.length()>150){
list.add(item2.toString());
item2 = new StringBuilder();
}
}
if(item2.length()>0){
list.add(item2.toString());
}
item = new StringBuilder();
}else if(item.length()>30){
//段落字数在30到500之间的,直接添加
list.add(item.toString());
item = new StringBuilder();
}
}
if(item.length()>0){
list.add(item.toString());
}
return list;
}
public static void main(String[] args) {
System.out.println(formatHtml("<p>标题</p><p>摘要</p><p>正搜索文</p>","[\"正+文\",\"正文\"]","[\"标题\",\"要\"]"));
System.out.println(formatHtml("<html>\n" +
" <head>111</head>\n" +
" <body> \n" +
" <div> \n" +
" <div> \n" +
" <table> \n" +
" <tbody> \n" +
" <tr> \n" +
" <td style=\"\" width=\"54\">品名</td> \n" +
" <td style=\"\" width=\"54\">规格(mm)</td> \n" +
" <td style=\"\" width=\"54\">材质</td> \n" +
" <td style=\"\" width=\"54\">钢厂/产地</td> \n" +
" <td style=\"\" width=\"54\">价格(元/吨)</td> \n" +
" <td style=\"\" width=\"54\">涨跌</td> \n" +
" <td style=\"\" width=\"54\">备注</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td align=\"right\">6</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">5050</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td align=\"right\">8</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4860</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td align=\"right\">10</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4760</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">经销</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td align=\"right\">12</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4290</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td align=\"right\">14</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4250</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>16-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4210</td> \n" +
" <td>-</td> \n" +
" <td><br></td> \n" +
" <td style=\"min-width: 30px\">代理</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>酒钢</td> \n" +
" <td align=\"right\">4190</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>柳钢</td> \n" +
" <td align=\"right\">4190</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>临钢</td> \n" +
" <td align=\"right\">4190</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>萍钢</td> \n" +
" <td align=\"right\">4190</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q235B</td> \n" +
" <td>南钢</td> \n" +
" <td align=\"right\">4190</td> \n" +
" <td>-</td> \n" +
" <td>无货</td> \n" +
" <td style=\"min-width: 30px\">代理</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普中板</td> \n" +
" <td>22-30</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4330</td> \n" +
" <td>-</td> \n" +
" <td><br></td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td>32-40</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4320</td> \n" +
" <td>-</td> \n" +
" <td><br></td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td align=\"right\">30</td> \n" +
" <td>Q235B</td> \n" +
" <td>酒钢</td> \n" +
" <td align=\"right\">4290</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td align=\"right\">40</td> \n" +
" <td>Q235B</td> \n" +
" <td>酒钢</td> \n" +
" <td align=\"right\">4270</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td align=\"right\">40</td> \n" +
" <td>Q235B</td> \n" +
" <td>萍钢</td> \n" +
" <td align=\"right\">4290</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td align=\"right\">50</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4360</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">普厚板</td> \n" +
" <td align=\"right\">60</td> \n" +
" <td>Q235B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4410</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td align=\"right\">6</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4980</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td align=\"right\">8</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4830</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td align=\"right\">10</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4750</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td align=\"right\">12</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4490</td> \n" +
" <td>-</td> \n" +
" <td>无货</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td align=\"right\">14</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4420</td> \n" +
" <td>-</td> \n" +
" <td>无货</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>16-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4400</td> \n" +
" <td>-</td> \n" +
" <td><br></td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>酒钢</td> \n" +
" <td align=\"right\">4380</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>萍钢</td> \n" +
" <td align=\"right\">4380</td> \n" +
" <td>-</td> \n" +
" <td>货少</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>临钢</td> \n" +
" <td align=\"right\">4380</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>柳钢</td> \n" +
" <td align=\"right\">4370</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金中板</td> \n" +
" <td>14-20</td> \n" +
" <td>Q355B</td> \n" +
" <td>南钢</td> \n" +
" <td align=\"right\">4370</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金厚板</td> \n" +
" <td>22-30</td> \n" +
" <td>Q355B</td> \n" +
" <td>重钢</td> \n" +
" <td align=\"right\">4450</td> \n" +
" <td>-</td> \n" +
" <td><br></td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" <tr> \n" +
" <td style=\"\">低合金厚板</td> \n" +
" <td align=\"right\">30</td> \n" +
" <td>Q355B</td> \n" +
" <td>酒钢</td> \n" +
" <td align=\"right\">4430</td> \n" +
" <td>-</td> \n" +
" <td>无货gl</td> \n" +
" <td style=\"min-width: 30px\">商家</td> \n" +
" </tr> \n" +
" </tbody> \n" +
" </table> \n" +
" <p style=\"text-indent:2rem\"><br></p> \n" +
" </div> \n" +
" </div> \n" +
" </body>\n" +
"</html>"));
}
/**
* 数据高亮显示,适用于标题和摘要处理
* @param text 原文
* @param keys 需要高亮数据
*/
public static String gaoLiang(String text,String keys) {
if(StringUtils.isEmpty(keys)){
return text;
}
String[] split = keys.split("");
//通过set去重
Set<String> set = new HashSet<>(Arrays.asList(split));
//去除原有标签
text = ContentUtility.TransferHTML2Text(text);
for (String key : set) {
//不替换标签内容
String zhanwei1 = "☛";
String zhanwei2 = "☚";
text = text.replace("<span style='color: #f73131;'>", zhanwei1).replace("</span>", zhanwei2);
text = text.replace(key, "<span style='color: #f73131;'>" + key + "</span>");
text = text.replace(zhanwei1,"<span style='color: #f73131;'>").replace(zhanwei2,"</span>");
}
return text;
}
}
......@@ -78,5 +78,6 @@ public class KnowledgeParam {
*/
private String verifyEndTime;
private String kbKnowledgeId;
private String subjectId;
}
......@@ -20,25 +20,40 @@ import java.util.List;
@NoArgsConstructor
@AllArgsConstructor
public class KnowledgeVO {
private String content;
private String id;
private String title;
private String verifyTime;
private String publishDate;
private String updateDate;
private String createTime;
private String kbKnowledgeId;
private String KnowledgeProjectId;
private String type;
private String typeId;
private String verifierName;
private String origin;
private String author;
private String contentAll;
private Integer score;
private Integer verifyStatus;
private Integer importData;
private String subjectId;
private String contentWithTag;
private String contentAll;
private List<Content> contents;
private List<KnowFile> files;
/**
* 审核时间
*/
private String verifyTime;
/**
* 审核状态(0未审核1审核审核通过 2审核不通过)
*/
private Integer verifyStatus;
/**
* 审核人id
*/
private String verifierId;
/**
* 审核人名字
*/
private String verifierName;
private Integer deleteFlag;
}
......@@ -61,8 +61,8 @@ document:
host: http://114.116.116.241:9088
# host: http://192.168.1.71:9088
files:
storage: /storage/know/
# storage: C:/know/
storage: /zzsn/nt/storage/know/
# storage: D:/storage/know/
docservice:
url:
site: http://114.116.116.241:80/
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论