提交 ff26c5fc 作者: 925993793@qq.com

自定义专题-保存为数据集接口

上级 af2be853
...@@ -26,6 +26,7 @@ import java.util.ArrayList; ...@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.CompletableFuture;
/** /**
* 资讯 * 资讯
...@@ -104,7 +105,7 @@ public class InformationController { ...@@ -104,7 +105,7 @@ public class InformationController {
* @date 2024/2/28 * @date 2024/2/28
*/ */
@GetMapping("/search/condition/list") @GetMapping("/search/condition/list")
public Result<?> queryInfo(@RequestParam String relationId) { public Result<?> queryInfo(@RequestParam("subjectId") String relationId) {
UserVo user = AuthUtil.getLoginUser(); UserVo user = AuthUtil.getLoginUser();
String userId = user.getId(); String userId = user.getId();
LambdaQueryWrapper<SubjectUserCondition> queryWrapper = Wrappers.lambdaQuery(); LambdaQueryWrapper<SubjectUserCondition> queryWrapper = Wrappers.lambdaQuery();
...@@ -192,17 +193,35 @@ public class InformationController { ...@@ -192,17 +193,35 @@ public class InformationController {
/** /**
* 事件对应专题库的资讯分页列表 * 事件对应专题库的资讯分页列表
* *
* @param subjectInfoVo 筛选条件 * @param searchCondition 筛选条件
* @author lkg * @author lkg
* @date 2024/5/6 * @date 2024/5/6
*/ */
@PostMapping("/subjectPageList") @PostMapping("/subjectPageList")
public Result<?> subjectPageList(@RequestBody InfoDataSearchCondition subjectInfoVo) { public Result<?> subjectPageList(@RequestBody InfoDataSearchCondition searchCondition) {
UserVo userVo = AuthUtil.getLoginUser(); UserVo userVo = AuthUtil.getLoginUser();
IPage<DisplayInfo> pageList = informationService.subjectPageList(userVo, subjectInfoVo); IPage<DisplayInfo> pageList = informationService.subjectPageList(userVo, searchCondition);
return Result.OK(pageList); return Result.OK(pageList);
} }
/**
* 保存为数据集
*
* @param searchCondition 筛选条件
* @author lkg
* @date 2024/5/6
*/
@PostMapping("/saveAsDataSet")
public Result<?> saveAsDataSet(@RequestBody InfoDataSearchCondition searchCondition) {
String dataSetId = searchCondition.getDataSetId();
if (StringUtils.isEmpty(dataSetId)) {
return Result.FAIL("数据集id不能为空");
}
CompletableFuture.runAsync(() -> informationService.saveAsDataSet(searchCondition));
return Result.OK("数据集保存中。。。");
}
/** /**
* 通过id查询(专题库) * 通过id查询(专题库)
...@@ -284,7 +303,7 @@ public class InformationController { ...@@ -284,7 +303,7 @@ public class InformationController {
* @author lkg * @author lkg
* @date 2024/12/19 * @date 2024/12/19
*/ */
@PostMapping(value = "/deleteInfoBatch") @PostMapping(value = "/deleteBatch")
public Result<?> deleteBatch(@RequestBody Map<String, Object> map) { public Result<?> deleteBatch(@RequestBody Map<String, Object> map) {
UserVo userVo = AuthUtil.getLoginUser(); UserVo userVo = AuthUtil.getLoginUser();
informationService.deleteBatch(map, userVo); informationService.deleteBatch(map, userVo);
...@@ -298,7 +317,7 @@ public class InformationController { ...@@ -298,7 +317,7 @@ public class InformationController {
* @author lkg * @author lkg
* @date 2024/12/19 * @date 2024/12/19
*/ */
@PostMapping(value = "/deleteBatch") @PostMapping(value = "/removeBatch")
public Result<?> removeBatch(@RequestBody Map<String, Object> map) { public Result<?> removeBatch(@RequestBody Map<String, Object> map) {
informationService.removeBatch(map); informationService.removeBatch(map);
return Result.OK(); return Result.OK();
......
...@@ -25,7 +25,7 @@ import java.util.Map; ...@@ -25,7 +25,7 @@ import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
/** /**
* 统计分析 * 统计分析-研究中心
* *
* @author lkg * @author lkg
* @date 2024/12/25 * @date 2024/12/25
......
...@@ -1131,13 +1131,15 @@ public class EsService { ...@@ -1131,13 +1131,15 @@ public class EsService {
searchSourceBuilder.trackTotalHits(true); searchSourceBuilder.trackTotalHits(true);
//创建查询对象 //创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
//判断ids字段是否为空,若不为空,则表示按照id勾选导出 //判断ids字段是否为空,若不为空,则表示按照id勾选
List<String> ids = searchCondition.getIds(); List<String> ids = searchCondition.getIds();
if (CollectionUtils.isNotEmpty(ids)) { if (CollectionUtils.isNotEmpty(ids)) {
searchSourceBuilder.size(ids.size()); searchSourceBuilder.size(ids.size());
boolQuery.must(QueryBuilders.termsQuery("id", ids)); boolQuery.must(QueryBuilders.termsQuery("id", ids));
List<String> sourceIdList = searchCondition.getSourceIdList(); List<String> sourceIdList = searchCondition.getSourceIdList();
boolQuery.must(QueryBuilders.termsQuery("sid.keyword", sourceIdList)); if (CollectionUtils.isNotEmpty(sourceIdList)) {
boolQuery.must(QueryBuilders.termsQuery("sid.keyword", sourceIdList));
}
} else { } else {
//设置分页参数 //设置分页参数
Integer pageNo = searchCondition.getPageNo(); Integer pageNo = searchCondition.getPageNo();
...@@ -1167,6 +1169,7 @@ public class EsService { ...@@ -1167,6 +1169,7 @@ public class EsService {
String queryInfo = hit.getSourceAsString(); String queryInfo = hit.getSourceAsString();
SpecialInformation info = JSON.parseObject(queryInfo, SpecialInformation.class); SpecialInformation info = JSON.parseObject(queryInfo, SpecialInformation.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate())); info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
info.setIndex(hit.getIndex());
dataList.add(info); dataList.add(info);
} }
} catch (IOException e) { } catch (IOException e) {
...@@ -1516,7 +1519,7 @@ public class EsService { ...@@ -1516,7 +1519,7 @@ public class EsService {
* @date 2024/12/26 * @date 2024/12/26
*/ */
public List<CountVO> groupByClassificationType(InfoDataSearchCondition searchCondition) { public List<CountVO> groupByClassificationType(InfoDataSearchCondition searchCondition) {
return groupByTerm(searchCondition, "group", "classificationType",false); return groupByTerm(searchCondition, "group", "classificationType", false);
} }
/** /**
...@@ -1742,8 +1745,9 @@ public class EsService { ...@@ -1742,8 +1745,9 @@ public class EsService {
if (StringUtils.isNotBlank(crawler)) { if (StringUtils.isNotBlank(crawler)) {
boolQuery.must(QueryBuilders.termQuery("source.keyword", crawler)); boolQuery.must(QueryBuilders.termQuery("source.keyword", crawler));
} }
Set<String> relationIdSet = new HashSet<>(searchCondition.getLabelIds()); List<String> labelIds = searchCondition.getLabelIds();
if (CollectionUtils.isNotEmpty(relationIdSet)) { if (CollectionUtils.isNotEmpty(labelIds)) {
Set<String> relationIdSet = new HashSet<>(labelIds);
BoolQueryBuilder nestedBoolQueryBuilder = QueryBuilders.boolQuery(); BoolQueryBuilder nestedBoolQueryBuilder = QueryBuilders.boolQuery();
for (String relationId : relationIdSet) { for (String relationId : relationIdSet) {
TermQueryBuilder relationIdQuery = QueryBuilders.termQuery("labels.relationId", relationId); TermQueryBuilder relationIdQuery = QueryBuilders.termQuery("labels.relationId", relationId);
......
...@@ -30,14 +30,23 @@ public interface InformationService { ...@@ -30,14 +30,23 @@ public interface InformationService {
/** /**
* 专题库资讯分页列表 * 专题库资讯分页列表
* *
* @param userVo 用户信息 * @param userVo 用户信息
* @param subjectInfo 筛选条件 * @param subjectInfo 筛选条件
* @author lkg * @author lkg
* @date 2024/5/6 * @date 2024/5/6
*/ */
IPage<DisplayInfo> subjectPageList(UserVo userVo, InfoDataSearchCondition subjectInfo); IPage<DisplayInfo> subjectPageList(UserVo userVo, InfoDataSearchCondition subjectInfo);
/** /**
* 保存数据集-研究中心
*
* @param searchCondition 筛选条件
* @author lkg
* @date 2025/1/6
*/
void saveAsDataSet(InfoDataSearchCondition searchCondition);
/**
* 资讯详情 * 资讯详情
* *
* @param type 类别(1-采集库;2-专题库) * @param type 类别(1-采集库;2-专题库)
......
...@@ -167,6 +167,54 @@ public class InformationServiceImpl implements InformationService { ...@@ -167,6 +167,54 @@ public class InformationServiceImpl implements InformationService {
} }
@Override @Override
public void saveAsDataSet(InfoDataSearchCondition searchCondition) {
String[] fetchFields = new String[]{"id", "labels"};
searchCondition.setFetchFields(fetchFields);
searchCondition.setPageSize(300);
Label dataSet = new Label();
String dataSetId = searchCondition.getDataSetId();
dataSet.setRelationId(dataSetId);
//totalMap结构 index >> id >> updateField
Map<String,Map<String, Map<String, Object>>> totalMap = new HashMap<>();
for (int i = 1; ; i++) {
searchCondition.setPageNo(i);
List<SpecialInformation> informationList = esService.informationList(searchCondition);
log.info("保存数据集:本次循环-{},数据量为-{}", i, informationList.size());
if (CollectionUtils.isEmpty(informationList)) {
break;
}
for (SpecialInformation information : informationList) {
String id = information.getId();
String index = information.getIndex();
List<Label> labels = information.getLabels();
if (CollectionUtils.isNotEmpty(labels)) {
boolean present = labels.stream().anyMatch(label -> label.getRelationId().equals(dataSetId));
if (!present) {
labels.add(dataSet);
}
} else {
labels.add(dataSet);
}
Map<String,Object> updateField = new HashMap<>();
updateField.put("labels", labels);
if (totalMap.containsKey(index)) {
totalMap.get(index).put(id, updateField);
} else {
//updateFields结构 id >> updateField
Map<String, Map<String, Object>> updateFields = new HashMap<>();
updateFields.put(id, updateField);
totalMap.put(index, updateFields);
}
}
}
for (Map.Entry<String, Map<String, Map<String, Object>>> entry : totalMap.entrySet()) {
String index = entry.getKey();
Map<String, Map<String, Object>> value = entry.getValue();
esOpUtil.bulkUpdateFields(index, value);
}
}
@Override
public List<List<String>> statisticsExportList(String subjectId, String startDate, String endDate) { public List<List<String>> statisticsExportList(String subjectId, String startDate, String endDate) {
List<String> subjectIdList = new ArrayList<>(); List<String> subjectIdList = new ArrayList<>();
//查询类别id的所有明细id //查询类别id的所有明细id
...@@ -199,7 +247,7 @@ public class InformationServiceImpl implements InformationService { ...@@ -199,7 +247,7 @@ public class InformationServiceImpl implements InformationService {
sum += Math.toIntExact(count); sum += Math.toIntExact(count);
} }
} }
List<String> list = new ArrayList<>(subjectStatisticsVo.toExcelList()); List<String> list = subjectStatisticsVo.toExcelList();
//信息源在时间段内的采集总量 //信息源在时间段内的采集总量
list.add(String.valueOf(sum)); list.add(String.valueOf(sum));
//信息源采集到数据的日期集合 //信息源采集到数据的日期集合
......
package com.zzsn.event.util; package com.zzsn.event.util;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.ObjectUtil;
import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSON;
...@@ -701,12 +702,12 @@ public class EsOpUtil<T> { ...@@ -701,12 +702,12 @@ public class EsOpUtil<T> {
* 根据UpdateResponse的结果进行处理,如果更新成功,则打印日志;如果没有进行任何更改,则打印日志;如果更新失败,则打印日志。 * 根据UpdateResponse的结果进行处理,如果更新成功,则打印日志;如果没有进行任何更改,则打印日志;如果更新失败,则打印日志。
*/ */
public void updateById(String index, String id, Map<String, Object> args) { public void updateById(String index, String id, Map<String, Object> args) {
if (CollectionUtil.isEmpty(args)) { if (CollUtil.isEmpty(args)) {
return; return;
} }
// 执行更新请求 // 执行更新请求
try { try {
UpdateResponse response = client.update(createUpdateRequest(index, id, args), RequestOptions.DEFAULT); UpdateResponse response = client.update(createUpdateRequest(index, id, args,true), RequestOptions.DEFAULT);
RestStatus status = response.status(); RestStatus status = response.status();
if (status.getStatus() != 200) { if (status.getStatus() != 200) {
log.info("{},更新失败",id); log.info("{},更新失败",id);
...@@ -714,10 +715,8 @@ public class EsOpUtil<T> { ...@@ -714,10 +715,8 @@ public class EsOpUtil<T> {
} catch (IOException e) { } catch (IOException e) {
log.info("{},更新异常",id); log.info("{},更新异常",id);
} }
} }
/** /**
* 根据id更新,批量更新 * 根据id更新,批量更新
* 参数:index表示索引名称,batch表示要批量更新的文档,其中batch是一个Map,key为文档的id,value为要更新的字段和对应的值。 * 参数:index表示索引名称,batch表示要批量更新的文档,其中batch是一个Map,key为文档的id,value为要更新的字段和对应的值。
...@@ -727,32 +726,29 @@ public class EsOpUtil<T> { ...@@ -727,32 +726,29 @@ public class EsOpUtil<T> {
* 使用client.bulk方法执行批量更新请求,返回一个BulkResponse对象。 * 使用client.bulk方法执行批量更新请求,返回一个BulkResponse对象。
* 根据BulkResponse的结果进行处理,如果有更新失败的情况,则打印日志;如果全部更新成功,则打印日志。 * 根据BulkResponse的结果进行处理,如果有更新失败的情况,则打印日志;如果全部更新成功,则打印日志。
*/ */
public void bulkUpdateDocuments(String index, Map<String, Map<String, Object>> batch) throws IOException { public void bulkUpdateFields(String index, Map<String, Map<String, Object>> args) {
if (CollUtil.isEmpty(args)) {
if (CollectionUtil.isEmpty(batch)) {
return; return;
} }
BulkRequest bulkRequest = new BulkRequest(); BulkRequest bulkRequest = new BulkRequest();
// 添加批量更新的请求 args.forEach((id, args1) -> {
batch.forEach((id, args) -> {
try { try {
bulkRequest.add(createUpdateRequest(index, id, args)); bulkRequest.add(createUpdateRequest(index, id, args1,false));
} catch (IOException e) { } catch (IOException e) {
log.info("添加更新请求异常"); e.printStackTrace();
} }
}); });
// 添加更多的更新请求 // 执行更新请求
try {
// 执行批量更新请求 BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT);
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT); BulkItemResponse[] bulkItemResponses = bulk.getItems();
for (BulkItemResponse response : bulkItemResponses) {
// 处理批量更新的响应结果 if (response.isFailed()) {
if (bulkResponse.hasFailures()) { log.info("批量更新字段[{}]过程中,id为[{}]的更新失败,失败原因[{}]", response.getIndex(), response.getId(), response.getFailureMessage());
// 处理失败的情况 }
log.info("批量更新失败{}", batch); }
} else { } catch (IOException e) {
// 处理成功的情况 e.printStackTrace();
log.info("批量更新成功{}", batch);
} }
} }
...@@ -764,10 +760,12 @@ public class EsOpUtil<T> { ...@@ -764,10 +760,12 @@ public class EsOpUtil<T> {
* 将contentBuilder设置为更新请求的内容。 * 将contentBuilder设置为更新请求的内容。
* 返回更新请求UpdateRequest对象。 * 返回更新请求UpdateRequest对象。
*/ */
private UpdateRequest createUpdateRequest(String index, String documentId, Map<String, Object> args) throws IOException { private UpdateRequest createUpdateRequest(String index, String documentId, Map<String, Object> args,Boolean refreshPolicy) throws IOException {
UpdateRequest request = new UpdateRequest(index, documentId); UpdateRequest request = new UpdateRequest(index, documentId);
//刷新策略,立即刷新 if (Boolean.TRUE.equals(refreshPolicy)) {
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); //刷新策略,立即刷新
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
}
// 创建要更新的内容 // 创建要更新的内容
XContentBuilder contentBuilder = XContentFactory.jsonBuilder(); XContentBuilder contentBuilder = XContentFactory.jsonBuilder();
contentBuilder.startObject(); contentBuilder.startObject();
......
...@@ -60,23 +60,27 @@ public class InfoDataSearchCondition { ...@@ -60,23 +60,27 @@ public class InfoDataSearchCondition {
private String isSubject = "1"; private String isSubject = "1";
//爬虫类型 //爬虫类型
private String crawler; private String crawler;
//企业标签id集合 //标签id集合
private List<String> labelTypeIds; private List<String> labelTypeIds;
//关联标签id集合 //关联标签id集合
private List<String> labelIds; private List<String> labelIds;
//信息源id //信息源id
private String sourceId; private String sourceId;
//搜索词信息集合 //搜索词信息集合-研究中心
private List<SearchWordVO> searchWordList; private List<SearchWordVO> searchWordList;
//关键词信息;推荐信息源时使用 //数据集id,保存数据集时使用-研究中心
private String dataSetId;
//关键词信息;推荐信息源时使用-研究中心
private List<String> wordsList; private List<String> wordsList;
//是否免审核(1-是;0-否) //是否免审核(1-是;0-否)
private Integer isFreeCheck; private Integer isFreeCheck;
//得分范围 //得分范围-研究中心
//最小得分 //最小得分
private Integer minScore; private Integer minScore;
//最大得分 //最大得分
...@@ -97,10 +101,10 @@ public class InfoDataSearchCondition { ...@@ -97,10 +101,10 @@ public class InfoDataSearchCondition {
//每页返回条数 //每页返回条数
private Integer pageSize = 10; private Integer pageSize = 10;
//资讯id集合,导出/专题分析时 使用 //资讯id集合,导出/专题分析时使用-研究中心
private List<String> ids; private List<String> ids;
/*------资讯导出类参数---start-------------------*/ /*------资讯导出类参数-研究中心---start-------------------*/
//导出方式(1-摘要;2-正文) //导出方式(1-摘要;2-正文)
private Integer exportType; private Integer exportType;
/*------资讯导出类参数---end-------------------*/ /*------资讯导出类参数---end-------------------*/
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论