提交 00256774 作者: chenshiqiang

commit

上级 7fd0fc67
...@@ -50,6 +50,12 @@ ...@@ -50,6 +50,12 @@
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>3.12.0</version> <version>3.12.0</version>
</dependency> </dependency>
<!--hanlp-->
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.8.3</version>
</dependency>
<!-- json--> <!-- json-->
<dependency> <dependency>
......
...@@ -29,7 +29,7 @@ public class Constants { ...@@ -29,7 +29,7 @@ public class Constants {
//专题事件脉络展示 伪事件脉络 的资讯数量阈值 //专题事件脉络展示 伪事件脉络 的资讯数量阈值
public static final int FAKE_NUM = 6; public static final int FAKE_NUM = 6;
//kafka 发送分析命令 主题 //kafka 发送分析命令 主题
public static final String EVENT_ANALYSIS_TOPIC = "event-analysis"; public static final String EVENT_VIEWPOINT_SEND_DATA = "event_viewpoint_send_data";
//kafka 发送 事件脉络所需信息 主题 //kafka 发送 事件脉络所需信息 主题
public static final String EVENT_CONTEXT_SEND_TOPIC = "event_context_send_topic"; public static final String EVENT_CONTEXT_SEND_TOPIC = "event_context_send_topic";
//kafka 发送 伪事件脉络所需信息 主题 //kafka 发送 伪事件脉络所需信息 主题
......
...@@ -46,6 +46,7 @@ public class KafkaConsumer { ...@@ -46,6 +46,7 @@ public class KafkaConsumer {
@KafkaListener(topics = {Constants.VIEWPOINT_RECEIVE_TOPIC}) @KafkaListener(topics = {Constants.VIEWPOINT_RECEIVE_TOPIC})
public void viewPointAnalysis(ConsumerRecord<String, String> record) { public void viewPointAnalysis(ConsumerRecord<String, String> record) {
String value = record.value(); String value = record.value();
log.info("viewpointMessage:{}",value);
if (StringUtils.isNotEmpty(value)) { if (StringUtils.isNotEmpty(value)) {
String subjectId = null; String subjectId = null;
try { try {
......
...@@ -4,7 +4,6 @@ import cn.hutool.core.date.DateField; ...@@ -4,7 +4,6 @@ import cn.hutool.core.date.DateField;
import cn.hutool.core.date.DateTime; import cn.hutool.core.date.DateTime;
import cn.hutool.core.date.DateUnit; import cn.hutool.core.date.DateUnit;
import cn.hutool.core.date.DateUtil; import cn.hutool.core.date.DateUtil;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.core.metadata.IPage;
...@@ -12,13 +11,17 @@ import com.zzsn.event.constant.Constants; ...@@ -12,13 +11,17 @@ import com.zzsn.event.constant.Constants;
import com.zzsn.event.entity.*; import com.zzsn.event.entity.*;
import com.zzsn.event.producer.ProduceInfo; import com.zzsn.event.producer.ProduceInfo;
import com.zzsn.event.service.*; import com.zzsn.event.service.*;
import com.zzsn.event.util.*; import com.zzsn.event.util.CalculateUtil;
import com.zzsn.event.util.HttpUtil;
import com.zzsn.event.util.ObjectUtil;
import com.zzsn.event.util.RedisUtil;
import com.zzsn.event.vo.*; import com.zzsn.event.vo.*;
import com.zzsn.event.xxljob.entity.KeyWords; import com.zzsn.event.xxljob.entity.KeyWords;
import com.zzsn.event.xxljob.service.IXxlJobInfoService; import com.zzsn.event.xxljob.service.IXxlJobInfoService;
import io.swagger.annotations.Api; import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
...@@ -26,9 +29,11 @@ import org.springframework.beans.factory.annotation.Value; ...@@ -26,9 +29,11 @@ import org.springframework.beans.factory.annotation.Value;
import org.springframework.web.bind.annotation.*; import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest; import org.springframework.web.multipart.MultipartHttpServletRequest;
import org.springframework.web.util.WebUtils;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.*; import java.util.*;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
...@@ -69,6 +74,10 @@ public class EventController { ...@@ -69,6 +74,10 @@ public class EventController {
private LabelEntityService labelEntityService; private LabelEntityService labelEntityService;
@Autowired @Autowired
private SubjectDisplayServive subjectDisplayServive; private SubjectDisplayServive subjectDisplayServive;
@Autowired
private SubjectAnalysisService subjectAnalysisService;
@Autowired
private ColumnService columnService;
/** /**
* 1.1 分页列表查询 * 1.1 分页列表查询
...@@ -82,11 +91,13 @@ public class EventController { ...@@ -82,11 +91,13 @@ public class EventController {
@ApiOperation(value = "事件-分页列表查询", notes = "事件-分页列表查询") @ApiOperation(value = "事件-分页列表查询", notes = "事件-分页列表查询")
@GetMapping(value = "/list") @GetMapping(value = "/list")
public Result<?> queryPageList(Event event, public Result<?> queryPageList(Event event,
@RequestParam(name = "order", required = false) String order,
@RequestParam(name = "orderType", defaultValue = "asc") String orderType,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo, @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize, @RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize,
HttpServletRequest req) { HttpServletRequest req) {
IPage<Event> pageList = eventService.pageList(event, pageNo, pageSize); IPage<Event> pageList = eventService.pageList(event, pageNo, pageSize, order, orderType);
return Result.OK(pageList); return Result.OK(pageList);
} }
...@@ -183,6 +194,22 @@ public class EventController { ...@@ -183,6 +194,22 @@ public class EventController {
return Result.OK(event); return Result.OK(event);
} }
/**
* 1.7 发布
*
* @return
*/
@PostMapping(value = "/publish")
public Result<?> publish(@RequestBody Event event) {
Event byId = eventService.getById(event.getId());
byId.setUpdateTime(new Date());
byId.setPublishDate(event.getPublishDate());
byId.setPublishStatus(event.getPublishStatus());
eventService.updateById(byId);
return Result.OK("发布成功!");
}
/** /**
* 2.1 专题信息源绑定 * 2.1 专题信息源绑定
...@@ -475,7 +502,46 @@ public class EventController { ...@@ -475,7 +502,46 @@ public class EventController {
String url = eventService.upload(file); String url = eventService.upload(file);
return Result.OK(url); return Result.OK(url);
} }
/**
* 2.18 资讯详情
*
* @return
*/
@GetMapping(value = "/articleDetail")
public Result articleDetail(@RequestParam(name = "articleId") String articleId) {
DisplayInfo displayInfo=eventService.articleDetail(articleId);
return Result.OK(displayInfo);
}
/**
* 2.19 单篇文章热词
*
* @param id
* @return
*/
@ApiOperation(value = "单篇文章热词", notes = "单篇文章热词")
@GetMapping(value = "/hotWords")
public Result<?> articleList(@RequestParam("id") String id,
@RequestParam(name = "number", defaultValue = "200") Integer number) {
return eventService.hotWords(id,number);
}
/**
* 2.20 相关推荐
*
* @param title
* @return
* @throws Exception
*/
@GetMapping(value = "/recommendList")
public Result<?> recommendList(@RequestParam(name = "title", defaultValue = "") String title,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize) throws Exception {
//获取数据
IPage<SpecialInformation> pageList = subjectDisplayServive.queryRecommendList(title, pageNo, pageSize);
return Result.OK(pageList);
}
/** /**
* 3.1 传播路径 * 3.1 传播路径
...@@ -486,6 +552,7 @@ public class EventController { ...@@ -486,6 +552,7 @@ public class EventController {
public Result propagationPath(@RequestParam String eventId) { public Result propagationPath(@RequestParam String eventId) {
String key = Constants.SUBJECT_ANALYSIS_PRE + Constants.PROPAGATION_KEY + eventId; String key = Constants.SUBJECT_ANALYSIS_PRE + Constants.PROPAGATION_KEY + eventId;
PropagationPathVo pathVo = (PropagationPathVo) redisUtil.get(key); PropagationPathVo pathVo = (PropagationPathVo) redisUtil.get(key);
pathVo = analysisService.propagationPath(eventId);
if (ObjectUtils.isEmpty(pathVo)) { if (ObjectUtils.isEmpty(pathVo)) {
pathVo = analysisService.propagationPath(eventId); pathVo = analysisService.propagationPath(eventId);
} }
...@@ -544,8 +611,13 @@ public class EventController { ...@@ -544,8 +611,13 @@ public class EventController {
Object count = map.get("totalCount"); Object count = map.get("totalCount");
String divide = CalculateUtil.divide(String.valueOf(count), String.valueOf(hours), 2); String divide = CalculateUtil.divide(String.valueOf(count), String.valueOf(hours), 2);
map.put("spread", divide); map.put("spread", divide);
// String mainReport = esStatisticsService.mainReport(subjectId); String mainReport = esStatisticsService.mainReport(subjectId);
map.put("mainReport", "mainReport"); map.put("mainReport", mainReport);
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
LocalDateTime now = LocalDateTime.now();
LocalDateTime previousHour = now.minus(1, ChronoUnit.HOURS);
Map<String, Object> map2 = esStatisticsService.totalAndMax(subjectId, previousHour.format(formatter), now.format(formatter));
map.put("lastHour", map2.get("totalCount"));
return Result.OK(map); return Result.OK(map);
} }
...@@ -640,18 +712,12 @@ public class EventController { ...@@ -640,18 +712,12 @@ public class EventController {
* 3.7 事件资讯列表展示 * 3.7 事件资讯列表展示
* *
* @param subjectInfoVo 部分筛选条件封装 * @param subjectInfoVo 部分筛选条件封装
* @param userId 用户id
* @param video * @param video
* @param pageNo 当前页 * @param pageNo 当前页
* @param pageSize 返回条数 * @param pageSize 返回条数
* @param column 排序字段 * @param column 排序字段
* @param isCustomer 是否为客户 1是 0 否
* @param order 排序方式 * @param order 排序方式
* @param crawler * @param crawler
* @param objectType
* @param objectId
* @param isSubject
* @param subjectType
* @param labelIds * @param labelIds
* @param sourceId * @param sourceId
* @author lkg * @author lkg
...@@ -660,26 +726,110 @@ public class EventController { ...@@ -660,26 +726,110 @@ public class EventController {
@ApiOperation(value = "专题信息列表-分页列表查询", notes = "专题信息列表-分页列表查询") @ApiOperation(value = "专题信息列表-分页列表查询", notes = "专题信息列表-分页列表查询")
@PostMapping(value = "/listArticle") @PostMapping(value = "/listArticle")
public Result<?> queryPageList(@RequestBody SubjectInfoVo subjectInfoVo, public Result<?> queryPageList(@RequestBody SubjectInfoVo subjectInfoVo,
@RequestParam(name = "userId", defaultValue = "") String userId,
@RequestParam(name = "video", defaultValue = "") String video, @RequestParam(name = "video", defaultValue = "") String video,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo, @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "isCustomer", defaultValue = "0") Integer isCustomer,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize, @RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize,
@RequestParam(name = "column", defaultValue = "common") String column, @RequestParam(name = "column", defaultValue = "common") String column,
@RequestParam(name = "order", defaultValue = "desc") String order, @RequestParam(name = "order", defaultValue = "desc") String order,
@RequestParam(name = "crawler", defaultValue = "") String crawler, @RequestParam(name = "crawler", defaultValue = "") String crawler,
@RequestParam(name = "objectType", defaultValue = "", required = false) String objectType,
@RequestParam(name = "objectId", defaultValue = "", required = false) String objectId,
@RequestParam(name = "isSubject", defaultValue = "1") String isSubject,
@RequestParam(name = "subjectType", defaultValue = "1") String subjectType,
@RequestParam(name = "labelIds", required = false) String labelIds, @RequestParam(name = "labelIds", required = false) String labelIds,
@RequestParam(name = "sourceId", required = false) String sourceId) throws Exception { @RequestParam(name = "sourceId", required = false) String sourceId) throws Exception {
List<String> socialCreditCodeList = new ArrayList<>(); List<String> socialCreditCodeList = new ArrayList<>();
//获取数据 //获取数据
IPage<DisplayInfo> pageList = subjectDisplayServive.frontListByPage(objectType, objectId, userId, subjectInfoVo, video, pageNo, pageSize, column, order, crawler, isSubject, subjectType, labelIds, socialCreditCodeList, sourceId, isCustomer); IPage<DisplayInfo> pageList = subjectDisplayServive.frontListByPage(subjectInfoVo, video, pageNo, pageSize, column, order, crawler, labelIds, socialCreditCodeList, sourceId);
return Result.OK(pageList); return Result.OK(pageList);
} }
/**
* 3.8 观点
*
* @author lkg
* @date 2024/1/12
*/
@ApiOperation(value = "专题信息列表-分页列表查询", notes = "专题信息列表-分页列表查询")
@GetMapping(value = "/listEventAnalysis")
public Result<List<SubjectAnalysis>> listEventAnalysis(
@RequestParam(name = "eventId") String eventId,
@RequestParam(name = "type", defaultValue = "0") Integer type) {
LambdaQueryWrapper<SubjectAnalysis> wrapper = new LambdaQueryWrapper<SubjectAnalysis>()
.eq(SubjectAnalysis::getSubjectId, eventId)
.eq(SubjectAnalysis::getCategory, 2);
if (type == 4) {
wrapper.eq(SubjectAnalysis::getType, 4);
} else {
wrapper.ne(SubjectAnalysis::getType, 4);
}
List<SubjectAnalysis> list = subjectAnalysisService.list(wrapper);
return Result.OK(list);
}
/**
* 4.1 eventData
*
* @author lkg
* @date 2024/1/12
*/
@GetMapping(value = "/eventData")
public Result<?> eventData() {
Map<String, Integer> map = new HashMap<>();
List<Event> list = this.eventService.list();
map.put("eventCount", list.size());
List<String> eventIdList = list.stream().map(Event::getId).collect(Collectors.toList());
long total = esStatisticsService.totalCount(eventIdList, null, null);
map.put("total", (int) total);
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
LocalDateTime now = LocalDateTime.now();
LocalDateTime previousHour = now.minus(1, ChronoUnit.DAYS);
long totalAdd = esStatisticsService.totalCount(eventIdList, previousHour.format(formatter), now.format(formatter));
map.put("totalAdd", (int) totalAdd);
return Result.OK(map);
}
/**
* 4.2 地域分布
*
* @author lkg
* @date 2024/2/2
*/
@GetMapping("/region")
public Result<List<CountVO>> region() {
String eventIds = this.eventService.list().stream().map(Event::getId).collect(Collectors.joining(","));
List<CountVO> region = columnService.region(null, null, eventIds, null, null);
return Result.OK(region);
}
/**
* 4.3 hotEventFlow
*
* @author lkg
* @date 2024/2/2
*/
@GetMapping("/hotEventFlow")
public Result<?> hotEventFlow(@RequestParam String startTime,
@RequestParam String endTime, @RequestParam Integer type) {
List<Event> list = this.eventService.list(new LambdaQueryWrapper<Event>()
.orderByDesc(Event::getTotalHot).last(" limit 10"));
List<CountVO> countVOList = new ArrayList<>(10);
for (Event event : list) {
AtomicLong total = new AtomicLong();
CountVO countVO = esStatisticsService.flowData(event.getId(), startTime, endTime, type);
total.addAndGet(countVO.getValue());
supply(countVO, startTime, endTime, type);
long value = countVO.getValue();
long totalCount = total.get();
String divide = CalculateUtil.divide(String.valueOf(value), String.valueOf(totalCount));
String percentage = "0%";
if (StringUtils.isNotEmpty(divide)) {
percentage = CalculateUtil.percentage(Double.parseDouble(divide), false);
}
countVO.setPercentage(percentage);
countVOList.add(countVO);
}
return Result.OK(countVOList);
}
/** /**
* 补充缺失的时间 * 补充缺失的时间
......
...@@ -604,13 +604,8 @@ public class ReportApiController { ...@@ -604,13 +604,8 @@ public class ReportApiController {
* @param pageNo 当前页 * @param pageNo 当前页
* @param pageSize 返回条数 * @param pageSize 返回条数
* @param column 排序字段 * @param column 排序字段
* @param isCustomer 是否为客户 1是 0 否
* @param order 排序方式 * @param order 排序方式
* @param crawler * @param crawler
* @param objectType
* @param objectId
* @param isSubject
* @param subjectType
* @param labelIds * @param labelIds
* @param sourceId * @param sourceId
* @author lkg * @author lkg
...@@ -622,22 +617,19 @@ public class ReportApiController { ...@@ -622,22 +617,19 @@ public class ReportApiController {
@RequestParam(name = "subjectId", defaultValue = "") String subjectId, @RequestParam(name = "subjectId", defaultValue = "") String subjectId,
@RequestParam(name = "video", defaultValue = "") String video, @RequestParam(name = "video", defaultValue = "") String video,
@RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo, @RequestParam(name = "pageNo", defaultValue = "1") Integer pageNo,
@RequestParam(name = "isCustomer", defaultValue = "0") Integer isCustomer,
@RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize, @RequestParam(name = "pageSize", defaultValue = "10") Integer pageSize,
@RequestParam(name = "column", defaultValue = "common") String column, @RequestParam(name = "column", defaultValue = "common") String column,
@RequestParam(name = "order", defaultValue = "desc") String order, @RequestParam(name = "order", defaultValue = "desc") String order,
@RequestParam(name = "crawler", defaultValue = "") String crawler, @RequestParam(name = "crawler", defaultValue = "") String crawler,
@RequestParam(name = "objectType", defaultValue = "", required = false) String objectType,
@RequestParam(name = "objectId", defaultValue = "", required = false) String objectId,
@RequestParam(name = "isSubject", defaultValue = "1") String isSubject,
@RequestParam(name = "subjectType", defaultValue = "1") String subjectType,
@RequestParam(name = "labelIds", required = false) String labelIds, @RequestParam(name = "labelIds", required = false) String labelIds,
@RequestParam(name = "sourceId", required = false) String sourceId) throws Exception { @RequestParam(name = "sourceId", required = false) String sourceId) throws Exception {
SubjectInfoVo subjectInfoVo=new SubjectInfoVo(); SubjectInfoVo subjectInfoVo=new SubjectInfoVo();
subjectInfoVo.setSubjectId(subjectId); subjectInfoVo.setSubjectId(subjectId);
List<String> socialCreditCodeList = new ArrayList<>(); List<String> socialCreditCodeList = new ArrayList<>();
//获取数据 //获取数据
IPage<DisplayInfo> pageList = subjectDisplayServive.frontListByPage(objectType, objectId, null, subjectInfoVo, video, pageNo, pageSize, column, order, crawler, isSubject, subjectType, labelIds, socialCreditCodeList, sourceId, isCustomer); IPage<DisplayInfo> pageList = subjectDisplayServive.frontListByPage(subjectInfoVo, video, pageNo, pageSize, column, order, crawler, labelIds, socialCreditCodeList, sourceId);
return Result.OK(pageList); return Result.OK(pageList);
} }
} }
package com.zzsn.event.entity;
import com.baomidou.mybatisplus.annotation.TableName;
import io.swagger.annotations.ApiModelProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* @Description: 专家
* @Author: jeecg-boot
* @Date: 2024-03-14
* @Version: V1.0
*/
@Data
@TableName("cms_expert")
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class CmsExpert {
/**id*/
private String expertId;
private String headPic;
}
...@@ -127,6 +127,7 @@ public class Event { ...@@ -127,6 +127,7 @@ public class Event {
private Integer otherHot; private Integer otherHot;
private Integer publishStatus; private Integer publishStatus;
private String relationEvents; private String relationEvents;
private String publishDate;
@TableField(exist = false) @TableField(exist = false)
...@@ -137,4 +138,18 @@ public class Event { ...@@ -137,4 +138,18 @@ public class Event {
private List<Event> relatedEventList; private List<Event> relatedEventList;
@TableField(exist = false) @TableField(exist = false)
private EventTag eventTag; private EventTag eventTag;
@TableField(exist = false)
private String extractIndustryTag;
@TableField(exist = false)
private String extractCompanyTag;
@TableField(exist = false)
private String extractPersonTag;
@TableField(exist = false)
private String extractSentimentTag;
@TableField(exist = false)
private String extractKeywordsTag;
@TableField(exist = false)
private String extractLocationTag;
@TableField(exist = false)
private String extractTimeTag;
} }
...@@ -46,4 +46,5 @@ public class SubjectAnalysis implements Serializable { ...@@ -46,4 +46,5 @@ public class SubjectAnalysis implements Serializable {
/*分析时间*/ /*分析时间*/
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss") @DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
private Date analysisDate; private Date analysisDate;
private String professionName;
} }
package com.zzsn.event.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.annotation.JsonFormat;
import io.swagger.annotations.ApiModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import org.jeecgframework.poi.excel.annotation.Excel;
import org.springframework.format.annotation.DateTimeFormat;
import java.io.Serializable;
import java.util.Date;
/**
* @Description: 国内地域表
* @Author: jeecg-boot
* @Date: 2022-03-09
* @Version: V1.0
*/
@Data
@TableName("sys_base_domestic_region")
@ApiModel(value="sys_base_domestic_region对象", description="国内地域表")
public class SysBaseDomesticRegion implements Serializable {
private static final long serialVersionUID = 1L;
/**主键*/
@TableId(type = IdType.ASSIGN_ID)
@ApiModelProperty(value = "主键")
private String id;
/**名称*/
@Excel(name = "名称", width = 15)
@ApiModelProperty(value = "名称")
private String name;
/**别名*/
@Excel(name = "别名", width = 15)
@ApiModelProperty(value = "别名")
private String alias;
/**行政编码*/
@Excel(name = "行政编码", width = 15)
@ApiModelProperty(value = "行政编码")
private String code;
/**所属省或直辖市*/
@Excel(name = "所属省或直辖市", width = 15)
@ApiModelProperty(value = "所属省或直辖市")
private String topId;
/**地理区*/
@Excel(name = "地理区", width = 15)
@ApiModelProperty(value = "地理区")
private String area;
/**经济区*/
@Excel(name = "经济区", width = 15)
@ApiModelProperty(value = "经济区")
private String economy;
/**所有pid*/
@Excel(name = "所有pid", width = 15)
@ApiModelProperty(value = "所有pid")
private String pathIds;
/**组合词*/
@Excel(name = "组合词", width = 15)
@ApiModelProperty(value = "组合词")
private String composeWords;
/**层级*/
@Excel(name = "层级", width = 15)
@ApiModelProperty(value = "层级")
private Integer level;
/**创建人*/
@ApiModelProperty(value = "创建人")
private String createBy;
/**创建日期*/
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
@ApiModelProperty(value = "创建日期")
private Date createTime;
/**更新人*/
@ApiModelProperty(value = "更新人")
private String updateBy;
/**更新日期*/
@JsonFormat(timezone = "GMT+8",pattern = "yyyy-MM-dd HH:mm:ss")
@DateTimeFormat(pattern="yyyy-MM-dd HH:mm:ss")
@ApiModelProperty(value = "更新日期")
private Date updateTime;
/**所属部门*/
@ApiModelProperty(value = "所属部门")
private String sysOrgCode;
/**父级节点*/
@Excel(name = "父级节点", width = 15)
@ApiModelProperty(value = "父级节点")
private String pid;
/**是否有子节点*/
@Excel(name = "是否有子节点", width = 15, dicCode = "yn")
@ApiModelProperty(value = "是否有子节点")
private String hasChild;
/**经度*/
private String longitude;
/**纬度*/
private String latitude;
}
...@@ -21,7 +21,7 @@ public interface EventMapper extends BaseMapper<Event> { ...@@ -21,7 +21,7 @@ public interface EventMapper extends BaseMapper<Event> {
List<SubjectKafkaVo> eventSubjectList(int i); List<SubjectKafkaVo> eventSubjectList(int i);
List<Event> pageList(@Param("event")Event event, Integer offset, Integer pageSize); List<Event> pageList(@Param("event")Event event, Integer offset, Integer pageSize,String order,String orderType);
Integer totalCount(@Param("event") Event event); Integer totalCount(@Param("event") Event event);
......
package com.zzsn.event.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.zzsn.event.entity.SysBaseDomesticRegion;
import org.apache.ibatis.annotations.Mapper;
/**
* @Description: 国内地域表
* @Author: jeecg-boot
* @Date: 2022-03-09
* @Version: V1.0
*/
@Mapper
public interface SysBaseDomesticRegionMapper extends BaseMapper<SysBaseDomesticRegion> {
}
...@@ -10,8 +10,9 @@ ...@@ -10,8 +10,9 @@
</select> </select>
<select id="pageList" resultType="com.zzsn.event.entity.Event"> <select id="pageList" resultType="com.zzsn.event.entity.Event">
select t2.type_name,t1.* from event t1 select t2.type_name,t1.*,t3.* from event t1
left join event_category t2 on t1.event_type =t2.id left join event_category t2 on t1.event_type =t2.id
left join event_tag t3 on t1.id=t3.event_id
where 1=1 where 1=1
<if test="event.eventName!=null and event.eventName != ''"> <if test="event.eventName!=null and event.eventName != ''">
and t1.event_name like CONCAT('%',#{event.eventName},'%') and t1.event_name like CONCAT('%',#{event.eventName},'%')
...@@ -25,7 +26,12 @@ ...@@ -25,7 +26,12 @@
<if test="event.endDate!=null and event.endDate != ''"> <if test="event.endDate!=null and event.endDate != ''">
and t1.create_time <![CDATA[ <= ]]> #{event.endDate} and t1.create_time <![CDATA[ <= ]]> #{event.endDate}
</if> </if>
order by t1.create_time desc <if test="order!=null and order != ''">
order by t1.${order}
<if test="orderType!=null and orderType != ''">
${orderType}
</if>
</if>
limit #{offset}, #{pageSize} limit #{offset}, #{pageSize}
</select> </select>
<select id="totalCount" resultType="java.lang.Integer"> <select id="totalCount" resultType="java.lang.Integer">
...@@ -50,7 +56,7 @@ ...@@ -50,7 +56,7 @@
select s.id,s.event_name as subject_name,s.start_time as time_enable,s.end_time as time_disable,s.incre_ana_rule, select s.id,s.event_name as subject_name,s.start_time as time_enable,s.end_time as time_disable,s.incre_ana_rule,
s.total_ana_rule,s.time_ana_rule,s.analysis_time,s.event_time s.total_ana_rule,s.time_ana_rule,s.analysis_time,s.event_time
from event s from event s
where 1=1 and id='1702575766171996162' where 1=1
<if test="endDate != null"> <if test="endDate != null">
and (s.end_time is null or s.end_time <![CDATA[ >= ]]> #{endDate}) and (s.end_time is null or s.end_time <![CDATA[ >= ]]> #{endDate})
</if> </if>
......
...@@ -5,6 +5,7 @@ import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; ...@@ -5,6 +5,7 @@ import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.zzsn.event.constant.Constants; import com.zzsn.event.constant.Constants;
import com.zzsn.event.entity.SysBaseDomesticRegion;
import com.zzsn.event.enums.InfluenceEnum; import com.zzsn.event.enums.InfluenceEnum;
import com.zzsn.event.enums.SourceEnum; import com.zzsn.event.enums.SourceEnum;
import com.zzsn.event.util.CalculateUtil; import com.zzsn.event.util.CalculateUtil;
...@@ -57,6 +58,8 @@ public class ColumnService { ...@@ -57,6 +58,8 @@ public class ColumnService {
private RestHighLevelClient client; private RestHighLevelClient client;
@Autowired @Autowired
private IInfoSourceService infoSourceService; private IInfoSourceService infoSourceService;
@Autowired
private ISysBaseDomesticRegionService domesticRegionService;
/** /**
* 获取栏目或栏目组下的信息数量 * 获取栏目或栏目组下的信息数量
...@@ -494,7 +497,7 @@ public class ColumnService { ...@@ -494,7 +497,7 @@ public class ColumnService {
dataTypeBoolQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("dataType"))); dataTypeBoolQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("dataType")));
boolQuery.must(dataTypeBoolQuery); boolQuery.must(dataTypeBoolQuery);
if (StringUtils.isNotEmpty(subjectId)) { if (StringUtils.isNotEmpty(subjectId)) {
boolQuery.must(QueryBuilders.termQuery("subjectId.keyword", subjectId)); boolQuery.must(QueryBuilders.termsQuery("subjectId.keyword", subjectId.split(",")));
} }
if (StringUtils.isNotEmpty(startTime)) { if (StringUtils.isNotEmpty(startTime)) {
boolQuery.filter(QueryBuilders.rangeQuery("publishDate").gte(EsDateUtil.esFieldDateFormat(startTime))); boolQuery.filter(QueryBuilders.rangeQuery("publishDate").gte(EsDateUtil.esFieldDateFormat(startTime)));
...@@ -528,4 +531,86 @@ public class ColumnService { ...@@ -528,4 +531,86 @@ public class ColumnService {
return percentage; return percentage;
} }
/**
* 地域分布
*
* @param column 栏目id或栏目组id,多个逗号隔开
* @param ynGroup 是否栏目组
* @param subjectId 专题id
* @param startTime 开始时间
* @param endTime 结束时间
* @author lkg
* @date 2024/2/2
*/
public List<CountVO> region(String column, Boolean ynGroup, String subjectId, String startTime, String endTime) {
List<CountVO> list = new ArrayList<>();
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = getSearchSourceBuilder(column, ynGroup, subjectId, startTime, endTime, null);
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
NestedAggregationBuilder nestedAggregationBuilder = AggregationBuilders.nested("labels", "labels")
.subAggregation(AggregationBuilders.terms("group_type")
.field("labels.labelMark.keyword")
.subAggregation(AggregationBuilders.terms("group_code")
.field("labels.relationId")
.order(BucketOrder.count(false))
.size(10)));
searchSourceBuilder.aggregation(nestedAggregationBuilder);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//获取分组桶
Aggregations aggregations = searchResponse.getAggregations();
ParsedNested labels = aggregations.get("labels");
Aggregations labelsAggregations = labels.getAggregations();
//获取按企业信用代码分组集合
Terms groupType = labelsAggregations.get("group_type");
List<? extends Terms.Bucket> typeBuckets = groupType.getBuckets();
Map<String, Long> regionMap = new HashMap<>();
for (Terms.Bucket type : typeBuckets) {
String key = type.getKeyAsString();
if ("region_out".equals(key)) {
Aggregations codeAggregations = type.getAggregations();
Terms groupCode = codeAggregations.get("group_code");
List<? extends Terms.Bucket> codeBuckets = groupCode.getBuckets();
for (Terms.Bucket code : codeBuckets) {
regionMap.put(code.getKeyAsString(), code.getDocCount());
}
break;
}
}
List<SysBaseDomesticRegion> regionList = provinceList();
for (SysBaseDomesticRegion region : regionList) {
if (list.size() < 10) {
CountVO countVO = new CountVO();
String id = region.getId();
String name = region.getName();
countVO.setKey(name);
if (regionMap.containsKey(id)) {
Long count = regionMap.get(id);
countVO.setCount(String.valueOf(count));
list.add(countVO);
} else {
countVO.setCount("0");
}
} else {
break;
}
}
list.sort((o1, o2) -> o2.getCount().compareTo(o1.getCount()));
for (int i = 0; i < list.size(); i++) {
CountVO countVO = list.get(i);
countVO.setOrder(i + 1);
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
private List<SysBaseDomesticRegion> provinceList() {
LambdaQueryWrapper<SysBaseDomesticRegion> queryWrapper = Wrappers.lambdaQuery();
queryWrapper.select(SysBaseDomesticRegion::getId, SysBaseDomesticRegion::getName, SysBaseDomesticRegion::getLevel);
queryWrapper.eq(SysBaseDomesticRegion::getLevel, 1);
return domesticRegionService.list(queryWrapper);
}
} }
...@@ -18,6 +18,11 @@ import org.elasticsearch.index.query.QueryBuilders; ...@@ -18,6 +18,11 @@ import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
...@@ -291,4 +296,37 @@ public class EsService { ...@@ -291,4 +296,37 @@ public class EsService {
} }
return null; return null;
} }
public List<String> groupByOrigin(String title, String publishDate) {
List<String> originList = new ArrayList<>();
SearchRequest searchRequest = new SearchRequest(Constants.COLLECT_INDEX);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//只返回分组聚合结果,不返回具体数据
searchSourceBuilder.size(0);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
String[] arr = new String[]{"title"};
boolQuery.must(QueryBuilders.multiMatchQuery(title,arr));
boolQuery.filter(QueryBuilders.rangeQuery("publishDate").gt(EsDateUtil.esFieldDateFormat(publishDate)));
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_origin")
.field("origin.keyword")
.size(20)
.order(BucketOrder.count(false));
searchSourceBuilder.query(boolQuery);
searchSourceBuilder.aggregation(aggregationBuilder);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
Terms groupOrigin = aggregations.get("group_origin");
List<? extends Terms.Bucket> buckets = groupOrigin.getBuckets();
for (Terms.Bucket bucket : buckets) {
String origin = bucket.getKeyAsString();
originList.add(origin);
}
} catch (Exception e) {
e.printStackTrace();
}
return originList;
}
} }
...@@ -146,4 +146,6 @@ public interface EsStatisticsService { ...@@ -146,4 +146,6 @@ public interface EsStatisticsService {
Page<NegativeDataVO> labelPageList(String labelId, String startTime, String endTime, Integer pageNo, Integer pageSize); Page<NegativeDataVO> labelPageList(String labelId, String startTime, String endTime, Integer pageNo, Integer pageSize);
CountVO flowData(String subjectId, String startTime, String endTime, Integer type); CountVO flowData(String subjectId, String startTime, String endTime, Integer type);
long totalCount(List<String> eventIdList, String startTime, String endTime);
} }
...@@ -4,9 +4,7 @@ import com.baomidou.mybatisplus.core.metadata.IPage; ...@@ -4,9 +4,7 @@ import com.baomidou.mybatisplus.core.metadata.IPage;
import com.zzsn.event.entity.Event; import com.zzsn.event.entity.Event;
import com.baomidou.mybatisplus.extension.service.IService; import com.baomidou.mybatisplus.extension.service.IService;
import com.zzsn.event.entity.LabelEntity; import com.zzsn.event.entity.LabelEntity;
import com.zzsn.event.vo.AddEventParam; import com.zzsn.event.vo.*;
import com.zzsn.event.vo.KeyWordsPage;
import com.zzsn.event.vo.SubjectKafkaVo;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import java.util.Date; import java.util.Date;
...@@ -29,7 +27,7 @@ public interface IEventService extends IService<Event> { ...@@ -29,7 +27,7 @@ public interface IEventService extends IService<Event> {
List<SubjectKafkaVo> eventSubjectList(); List<SubjectKafkaVo> eventSubjectList();
IPage<Event> pageList(Event event, Integer pageNo, Integer pageSize); IPage<Event> pageList(Event event, Integer pageNo, Integer pageSize,String order,String orderType);
void extractHotWords(AddEventParam event); void extractHotWords(AddEventParam event);
...@@ -46,4 +44,8 @@ public interface IEventService extends IService<Event> { ...@@ -46,4 +44,8 @@ public interface IEventService extends IService<Event> {
String upload(MultipartFile file); String upload(MultipartFile file);
List<LabelEntity> listByType(String labelTypeId); List<LabelEntity> listByType(String labelTypeId);
DisplayInfo articleDetail(String articleId);
Result<?> hotWords(String id, Integer number);
} }
package com.zzsn.event.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.zzsn.event.entity.SysBaseDomesticRegion;
/**
* @Description: 国内地域表
* @Author: jeecg-boot
* @Date: 2022-03-09
* @Version: V1.0
*/
public interface ISysBaseDomesticRegionService extends IService<SysBaseDomesticRegion> {
}
package com.zzsn.event.service; package com.zzsn.event.service;
import cn.hutool.json.JSONUtil; import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson.JSON;
import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
...@@ -9,6 +10,8 @@ import com.zzsn.event.util.DateUtil; ...@@ -9,6 +10,8 @@ import com.zzsn.event.util.DateUtil;
import com.zzsn.event.util.EsDateUtil; import com.zzsn.event.util.EsDateUtil;
import com.zzsn.event.util.EsIndexUtil; import com.zzsn.event.util.EsIndexUtil;
import com.zzsn.event.vo.DisplayInfo; import com.zzsn.event.vo.DisplayInfo;
import com.zzsn.event.vo.RepeatHold;
import com.zzsn.event.vo.SpecialInformation;
import com.zzsn.event.vo.SubjectInfoVo; import com.zzsn.event.vo.SubjectInfoVo;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
...@@ -21,12 +24,21 @@ import org.elasticsearch.index.query.BoolQueryBuilder; ...@@ -21,12 +24,21 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.stream.Collectors;
@Slf4j @Slf4j
@Service @Service
...@@ -47,9 +59,8 @@ public class SubjectDisplayServive { ...@@ -47,9 +59,8 @@ public class SubjectDisplayServive {
* @param sourceId * @param sourceId
* @throws Exception 异常 * @throws Exception 异常
*/ */
public IPage<DisplayInfo> frontListByPage(String objectType, String objectId, String userId, SubjectInfoVo subjectInfo, String video, int offset, int pageSize, public IPage<DisplayInfo> frontListByPage(SubjectInfoVo subjectInfo, String video, int offset, int pageSize,
String column, String order, String crawler, String isSubject, String subjectType, String labelIds, List<String> socialCreditCodeList, String sourceId, Integer isCustomer) throws Exception { String column, String order, String crawler, String labelIds, List<String> socialCreditCodeList, String sourceId) throws Exception {
String[] indexs = EsIndexUtil.getIndexIntervalYear(Constants.ES_DATA_FOR_SUBJECT, subjectInfo.getStartTime(), subjectInfo.getEndTime());
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT); SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//设置分页参数 //设置分页参数
...@@ -175,21 +186,137 @@ public class SubjectDisplayServive { ...@@ -175,21 +186,137 @@ public class SubjectDisplayServive {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits(); SearchHit[] searchHits = searchResponse.getHits().getHits();
List<DisplayInfo> list = new ArrayList<>(); List<DisplayInfo> list = new ArrayList<>();
int i = 0;
for (SearchHit hit : searchHits) { for (SearchHit hit : searchHits) {
i++;
String queryInfo = hit.getSourceAsString(); String queryInfo = hit.getSourceAsString();
DisplayInfo info = JSONUtil.toBean(queryInfo, DisplayInfo.class); DisplayInfo info = JSONUtil.toBean(queryInfo, DisplayInfo.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate())); info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
String index = hit.getIndex(); String index = hit.getIndex();
info.setIndex(index); info.setIndex(index);
info.setSimilarNumber(0);
list.add(info); list.add(info);
} }
Map<String, Integer> map = getSimilarNumber(list.stream().map(DisplayInfo::getId).collect(Collectors.toList()));
for (DisplayInfo displayInfo : list) {
displayInfo.setSimilarNumber(map.get(displayInfo.getId()));
}
IPage<DisplayInfo> pageData = new Page<>(offset, pageSize, searchResponse.getHits().getTotalHits().value); IPage<DisplayInfo> pageData = new Page<>(offset, pageSize, searchResponse.getHits().getTotalHits().value);
pageData.setRecords(list); pageData.setRecords(list);
return pageData; return pageData;
} }
/**
* 获取相似文章数
*
* @param articleIdList 资讯id
* @return
*/
private Map<String, Integer> getSimilarNumber(List<String> articleIdList) {
Map<String, Integer> map = new HashMap<>();
articleIdList.add("70694089423478814");
articleIdList.add("71056913337311317");
articleIdList.add("71056913337311282");
Map<String, String> markmap = getMark(articleIdList);
List<String> markList = new ArrayList<>(markmap.keySet());
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("repeatMark", markList));
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
TermsAggregationBuilder aggregationBuilder = AggregationBuilders.terms("group_mark")
.field("repeatMark")
.order(BucketOrder.count(false))
.size(1);
searchSourceBuilder.aggregation(aggregationBuilder);
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
Terms groupSource = aggregations.get("group_mark");
List<? extends Terms.Bucket> buckets = groupSource.getBuckets();
if (org.apache.commons.collections4.CollectionUtils.isNotEmpty(buckets)) {
for (Terms.Bucket bucket : buckets) {
map.put(markmap.get(bucket.getKeyAsString()), (int) bucket.getDocCount());
}
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
private Map<String, String> getMark(List<String> articleIdList) {
Map<String, String> map = new HashMap<>();
String mark = null;
SearchRequest searchRequest = new SearchRequest(Constants.ES_REPEAT_OLD);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termsQuery("articleId", articleIdList));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = response.getHits().getHits();
if (hits != null && hits.length != 0) {
for (SearchHit hit : hits) {
String queryInfo = hit.getSourceAsString();
RepeatHold info = JSONUtil.toBean(queryInfo, RepeatHold.class);
map.put(info.getRepeatMark(), info.getArticleId());
}
}
} catch (Exception e) {
e.printStackTrace();
}
return map;
}
/**
* 根据标题获取相关推荐文章
*
* @param title
* @return
*/
public IPage<SpecialInformation> queryRecommendList(String title, Integer pageNo, Integer pageSize) throws IOException {
String[] indexs = EsIndexUtil.getIndexLatelyTwoYear(Constants.ES_DATA_FOR_SUBJECT);
SearchRequest searchRequest = new SearchRequest(indexs);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//设置分页参数
searchSourceBuilder.size(pageSize);
searchSourceBuilder.from((pageNo - 1) * pageSize);
searchSourceBuilder.sort("score", SortOrder.DESC);
searchSourceBuilder.sort("publishDate", SortOrder.DESC);
//默认最大数量是10000,设置为true后,显示准确数量
searchSourceBuilder.trackTotalHits(true);
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
List<Integer> checkStatus = new ArrayList<>();
checkStatus.add(1);
boolQuery.must(QueryBuilders.termsQuery("checkStatus", checkStatus));
boolQuery.must(QueryBuilders.matchQuery("title", title));
boolQuery.mustNot(QueryBuilders.matchQuery("type", "video"));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
searchSourceBuilder.collapse(new CollapseBuilder("sourceAddress.keyword"));
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] searchHits = searchResponse.getHits().getHits();
List<SpecialInformation> list = new ArrayList<>();
for (SearchHit hit : searchHits) {
String queryInfo = hit.getSourceAsString();
SpecialInformation info = JSON.parseObject(queryInfo, SpecialInformation.class);
info.setPublishDate(EsDateUtil.esFieldDateMapping(info.getPublishDate()));
list.add(info);
}
IPage<SpecialInformation> pageData = new Page<>(pageNo, pageSize, searchResponse.getHits().getTotalHits().value);
pageData.setRecords(list);
return pageData;
}
} }
...@@ -10,6 +10,7 @@ import com.zzsn.event.service.*; ...@@ -10,6 +10,7 @@ import com.zzsn.event.service.*;
import com.zzsn.event.util.DateUtil; import com.zzsn.event.util.DateUtil;
import com.zzsn.event.vo.PropagationPathVo; import com.zzsn.event.vo.PropagationPathVo;
import com.zzsn.event.vo.SubjectDataVo; import com.zzsn.event.vo.SubjectDataVo;
import io.netty.util.Constant;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils; import org.springframework.beans.BeanUtils;
...@@ -104,83 +105,57 @@ public class AnalysisServiceImpl implements AnalysisService { ...@@ -104,83 +105,57 @@ public class AnalysisServiceImpl implements AnalysisService {
@Override @Override
public PropagationPathVo propagationPath(String subjectId) { public PropagationPathVo propagationPath(String subjectId) {
PropagationPathVo top = null;
Event event = eventService.getById(subjectId); Event event = eventService.getById(subjectId);
String subjectName = event.getEventName(); String subjectName = event.getEventName();
List<PropagationPathVo> children = new ArrayList<>(); List<PropagationPathVo> children = new ArrayList<>();
//获取专题数据 //获取专题数据
List<SubjectDataVo> specialDataList = getSubjectData(event, "0", Constants.FETCH_FIELDS_STATISTIC,1); List<SubjectDataVo> specialDataList = getSubjectData(event, "0", Constants.FETCH_FIELDS_STATISTIC,1);
if (CollectionUtils.isNotEmpty(specialDataList)) { if (com.baomidou.mybatisplus.core.toolkit.CollectionUtils.isNotEmpty(specialDataList)) {
//用于来源去重
//最早发布的时间 List<String> allOriginList = new ArrayList<>();
String publishDate = specialDataList.get(0).getPublishDate(); top = new PropagationPathVo();
String earlyTime; top.setName(subjectName);
if (publishDate.length() > 10) { //获取发布时间最早的前10的资讯(来源不重复)
earlyTime = DateUtil.formatStr(publishDate, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd"); List<PropagationPathVo> secondList = new ArrayList<>();
} else { List<SubjectDataVo> earlyList = topN(specialDataList,6);
earlyTime = publishDate; earlyList.forEach(e->allOriginList.add(e.getOrigin()));
} for (SubjectDataVo subjectDataVo : earlyList) {
//获取最早发布的信息,若过多,取前一个 String origin = subjectDataVo.getOrigin();
List<SubjectDataVo> earlyList = specialDataList.stream() PropagationPathVo second = new PropagationPathVo();
.filter(subjectDataVo -> { second.setName(origin);
String date = subjectDataVo.getPublishDate(); secondList.add(second);
if (date.length() > 10) { List<String> thirdList = esService.groupByOrigin(subjectDataVo.getTitle(), subjectDataVo.getPublishDate());
date = DateUtil.formatStr(date, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd"); thirdList.removeAll(allOriginList);
} List<PropagationPathVo> lastList = new ArrayList<>();
return earlyTime.equals(date); if (thirdList.size() > 3) {
}) thirdList = thirdList.subList(0,3);
.collect(Collectors.toList());
if (earlyList.size() > 1) {
earlyList = earlyList.subList(0, 1);
}
Map<SubjectDataVo, List<SubjectDataVo>> map = new HashMap<>();
//获取跟最早发布的信息的重复信息
earlyList.forEach(e -> {
List<SubjectDataVo> subjectDataVoList = esService.dataById(subjectId, e.getId());
if (CollectionUtils.isNotEmpty(subjectDataVoList)) {
map.put(e, subjectDataVoList);
}
});
//若最早发布的信息没有重复数据,则随机补充10个重复数最多的信息来源
// 本不该有的逻辑,奈何数据采集、处理 不给力
if (map.isEmpty()) {
for (SubjectDataVo subjectDataVo : earlyList) {
PropagationPathVo vo = new PropagationPathVo();
String topOrigin = subjectDataVo.getOrigin();
vo.setName(topOrigin);
//
vo.setChildren(pathByRepeat(topOrigin, event));
children.add(vo);
} }
} else {//正常逻辑 for (String s : thirdList) {
Map<SubjectDataVo, LinkedHashMap<String, List<SubjectDataVo>>> dataMap = new HashMap<>(); PropagationPathVo third = new PropagationPathVo();
for (Map.Entry<SubjectDataVo, List<SubjectDataVo>> entry : map.entrySet()) { third.setName(s);
SubjectDataVo information = entry.getKey(); lastList.add(third);
List<SubjectDataVo> value = entry.getValue();
//按来源分组
Map<String, List<SubjectDataVo>> originMap = value.stream().filter(e -> StringUtils.isNotEmpty(e.getOrigin()))
.collect(Collectors.groupingBy(SubjectDataVo::getOrigin));
//按统一来源信息数量 倒序 并截取前10
LinkedHashMap<String, List<SubjectDataVo>> orderMap = new LinkedHashMap<>();
originMap.entrySet().stream()
.sorted((o1, o2) -> o2.getValue().size() - o1.getValue().size()).limit(10)
.collect(Collectors.toList()).forEach(info -> orderMap.put(info.getKey(), info.getValue()));
dataMap.put(information, orderMap);
} }
children = getPath(dataMap); second.setChildren(lastList);
allOriginList.addAll(thirdList);
} }
top.setChildren(secondList);
} }
PropagationPathVo pathVo = null; return top;
if (CollectionUtils.isNotEmpty(children)) { }
pathVo = new PropagationPathVo(); //获取发布时间最早的前N条资讯(来源不重复)
pathVo.setName(subjectName); private List<SubjectDataVo> topN(List<SubjectDataVo> list,Integer num){
if(null!=children.get(0)){ TreeSet<SubjectDataVo> subjectDataVos = new TreeSet<>(Comparator.comparing(SubjectDataVo::getOrigin));
pathVo.setChildren(children.get(0).getChildren()); for (SubjectDataVo subjectDataVo : list) {
subjectDataVos.add(subjectDataVo);
if (subjectDataVos.size() == num) {
break;
} }
} }
return pathVo; return new ArrayList<>(subjectDataVos);
} }
//
//
// @Override // @Override
// public Map<String, Object> statisticAnalysis(String subjectId) { // public Map<String, Object> statisticAnalysis(String subjectId) {
// Map<String, Object> map = new HashMap<>(); // Map<String, Object> map = new HashMap<>();
......
...@@ -42,6 +42,7 @@ import java.util.ArrayList; ...@@ -42,6 +42,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
/** /**
* 舆情信息统计 es查询工具类 * 舆情信息统计 es查询工具类
...@@ -449,6 +450,9 @@ public class EsStatisticsServiceImpl implements EsStatisticsService { ...@@ -449,6 +450,9 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
try { try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT); SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = response.getHits().getHits(); SearchHit[] hits = response.getHits().getHits();
if(hits.length==0){
return " There is nothing about this event in repeathold ";
}
SearchHit hit = hits[0]; SearchHit hit = hits[0];
title = hit.getSourceAsMap().get("title").toString(); title = hit.getSourceAsMap().get("title").toString();
} catch (Exception e) { } catch (Exception e) {
...@@ -533,6 +537,7 @@ public class EsStatisticsServiceImpl implements EsStatisticsService { ...@@ -533,6 +537,7 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
if (StringUtils.isNotEmpty(relationId)) { if (StringUtils.isNotEmpty(relationId)) {
param.setSearchLabelIds(relationId); param.setSearchLabelIds(relationId);
} }
param.setSubjectId(subjectId);
if (StringUtils.isNotBlank(startTime)) { if (StringUtils.isNotBlank(startTime)) {
param.setStartTime(startTime); param.setStartTime(startTime);
} }
...@@ -552,6 +557,7 @@ public class EsStatisticsServiceImpl implements EsStatisticsService { ...@@ -552,6 +557,7 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
return dataSearchService.packageSearQuery(param); return dataSearchService.packageSearQuery(param);
} }
// /** // /**
// * 获取一级地域信息 // * 获取一级地域信息
// * // *
...@@ -621,4 +627,24 @@ public class EsStatisticsServiceImpl implements EsStatisticsService { ...@@ -621,4 +627,24 @@ public class EsStatisticsServiceImpl implements EsStatisticsService {
return countVO; return countVO;
} }
@Override
public long totalCount(List<String> eventIdList, String startTime, String endTime) {
Map<String, Object> map = new HashMap<>();
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
String eventIds = eventIdList.stream().collect(Collectors.joining(","));
SearchSourceBuilder searchSourceBuilder = formatSourceBuilder(eventIds, null, startTime, endTime, null);
searchSourceBuilder.size(0);
searchSourceBuilder.trackTotalHits(true);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
long value = response.getHits().getTotalHits().value;
return value;
} catch (Exception e) {
e.printStackTrace();
}
return 0;
}
} }
package com.zzsn.event.service.impl; package com.zzsn.event.service.impl;
import com.alibaba.fastjson.JSON;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
...@@ -13,11 +14,23 @@ import com.zzsn.event.mapper.EventMapper; ...@@ -13,11 +14,23 @@ import com.zzsn.event.mapper.EventMapper;
import com.zzsn.event.producer.ProduceInfo; import com.zzsn.event.producer.ProduceInfo;
import com.zzsn.event.service.*; import com.zzsn.event.service.*;
import com.zzsn.event.util.*; import com.zzsn.event.util.*;
import com.zzsn.event.vo.AddEventParam; import com.zzsn.event.vo.*;
import com.zzsn.event.vo.KeyWordsDTO;
import com.zzsn.event.vo.KeyWordsPage;
import com.zzsn.event.vo.SubjectKafkaVo;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.ParsedTopHits;
import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.BeanUtils; import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
...@@ -66,6 +79,11 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements ...@@ -66,6 +79,11 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
private RedisUtil redisUtil; private RedisUtil redisUtil;
@Value("${files.storage}") @Value("${files.storage}")
String filesStorage; String filesStorage;
@Autowired
private RestHighLevelClient client;
@Autowired
private EsOpUtil esOpUtil;
@Value("${scoreRule.weekScore}") @Value("${scoreRule.weekScore}")
Integer weekScore; Integer weekScore;
...@@ -98,9 +116,7 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements ...@@ -98,9 +116,7 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
event.setWechatHot(wechatHot); event.setWechatHot(wechatHot);
event.setOtherHot(otherHot); event.setOtherHot(otherHot);
this.updateById(event); this.updateById(event);
} }
} }
...@@ -147,12 +163,14 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements ...@@ -147,12 +163,14 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
} }
@Override @Override
public IPage<Event> pageList(Event event, Integer pageNo, Integer pageSize) { public IPage<Event> pageList(Event event, Integer pageNo, Integer pageSize, String order, String orderType) {
Integer offset = (pageNo - 1) * pageSize; Integer offset = (pageNo - 1) * pageSize;
if (pageNo == 0) { if (pageNo == 0) {
offset = 0; offset = 0;
} }
List<Event> pageList = baseMapper.pageList(event, offset, pageSize); List<Event> pageList = baseMapper.pageList(event, offset, pageSize, order, orderType);
Map<String, Object> map = getFistMap(pageList);
//获取总条数 //获取总条数
Integer count = baseMapper.totalCount(event); Integer count = baseMapper.totalCount(event);
IPage<Event> pageData = new Page<>(pageNo, pageSize, count); IPage<Event> pageData = new Page<>(pageNo, pageSize, count);
...@@ -160,6 +178,45 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements ...@@ -160,6 +178,45 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
return pageData; return pageData;
} }
private Map<String, Object> getFistMap(List<Event> pageList) {
Map<String, Object> resultlist = new HashMap<>();
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
//根据设备查询设备的相关信息
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.from(0);
searchSourceBuilder.size(0);
searchSourceBuilder.query(boolQueryBuilder);
searchSourceBuilder.sort("publishDate", SortOrder.ASC);
// 聚合搜索
TermsAggregationBuilder one = AggregationBuilders.terms("one").field("subjectId.keyword");
// TermsAggregationBuilder two = AggregationBuilders.terms("two").field("media");
// TermsAggregationBuilder three = AggregationBuilders.terms("three").field("pubTime.keyword");
//ES分组取每组第一条Java写法
TopHitsAggregationBuilder topHitsAggregationBuilder = AggregationBuilders.topHits("top_docs").size(1);
one.subAggregation(topHitsAggregationBuilder);
searchSourceBuilder.aggregation(one);
SearchRequest searchRequest = new SearchRequest(Constants.ES_DATA_FOR_SUBJECT);
searchRequest.source(searchSourceBuilder);
// 查询ES
SearchResponse searchResponse = null;
try {
searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
ParsedStringTerms oneTerms = searchResponse.getAggregations().get("one");
List<? extends Terms.Bucket> onebuckets = oneTerms.getBuckets();
for (Terms.Bucket threebucket : onebuckets) {
ParsedTopHits topDetail = threebucket.getAggregations().get("top_docs");
SearchHit[] hits = topDetail.getHits().getHits();
Map<String, Object> latestDocument = hits[0].getSourceAsMap();
resultlist.put(latestDocument.get("subjectId").toString(), latestDocument);
}
return resultlist;
}
@Override @Override
public void extractHotWords(AddEventParam event) { public void extractHotWords(AddEventParam event) {
if (!StringUtils.isEmpty(event.getExtractHotWords()) && "1".equals(event.getExtractHotWords())) { if (!StringUtils.isEmpty(event.getExtractHotWords()) && "1".equals(event.getExtractHotWords())) {
...@@ -256,6 +313,33 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements ...@@ -256,6 +313,33 @@ public class EventServiceImpl extends ServiceImpl<EventMapper, Event> implements
return baseMapper.listByType(labelTypeId); return baseMapper.listByType(labelTypeId);
} }
@Override
public DisplayInfo articleDetail(String articleId) {
Map<String, Object> map = esOpUtil.searchDoc(Constants.SUBJECT_INDEX, articleId);
DisplayInfo displayInfo = new DisplayInfo();
if (map != null) {
displayInfo = JSON.parseObject(JSON.toJSONString(map), DisplayInfo.class);
}
return displayInfo;
}
@Override
public Result<?> hotWords(String id, Integer number) {
Map<String, Object> map = esOpUtil.searchDoc(Constants.ES_DATA_FOR_SUBJECT, id);
String content = map.get("content").toString();
List<Map.Entry<String, Integer>> keywordsList = HanlpUtil.extractKeyWordsByText(content, number);
List<StatisticsKeyWordVo> rn = new ArrayList<>();
if (CollectionUtils.isNotEmpty(keywordsList)) {
for (Map.Entry<String, Integer> entry : keywordsList) {
StatisticsKeyWordVo statisticsKeyWordVo = new StatisticsKeyWordVo();
statisticsKeyWordVo.setName(entry.getKey());
statisticsKeyWordVo.setValue(entry.getValue());
rn.add(statisticsKeyWordVo);
}
}
return Result.OK(rn);
}
//生成文件夹路径 //生成文件夹路径
private String getFilePath() { private String getFilePath() {
LocalDate currentDate = LocalDate.now(); LocalDate currentDate = LocalDate.now();
......
package com.zzsn.event.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.zzsn.event.entity.SysBaseDomesticRegion;
import com.zzsn.event.mapper.SysBaseDomesticRegionMapper;
import com.zzsn.event.service.ISysBaseDomesticRegionService;
import org.springframework.stereotype.Service;
/**
* @Description: 国内地域表
* @Author: jeecg-boot
* @Date: 2022-03-09
* @Version: V1.0
*/
@Service
public class SysBaseDomesticRegionServiceImpl extends ServiceImpl<SysBaseDomesticRegionMapper, SysBaseDomesticRegion> implements ISysBaseDomesticRegionService {
}
package com.zzsn.event.task; package com.zzsn.event.task;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson2.JSON; import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONWriter; import com.alibaba.fastjson2.JSONWriter;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
...@@ -61,7 +62,7 @@ public class AnalysisTask { ...@@ -61,7 +62,7 @@ public class AnalysisTask {
* 按天发送kafka 获取进行中(未结束)的事件专题列表 * 按天发送kafka 获取进行中(未结束)的事件专题列表
*/ */
// @Scheduled(cron = "0 0 0 * * ?") // @Scheduled(cron = "0 0 0 * * ?")
@Scheduled(cron = "0 * * * * ?") // @Scheduled(cron = "0 * * * * ?")
public void subjectList() { public void subjectList() {
List<SubjectKafkaVo> data = new ArrayList<>(); List<SubjectKafkaVo> data = new ArrayList<>();
Date today = new Date(); Date today = new Date();
...@@ -79,22 +80,27 @@ public class AnalysisTask { ...@@ -79,22 +80,27 @@ public class AnalysisTask {
analysisTime = e.getTimeEnable(); analysisTime = e.getTimeEnable();
} }
Integer betweenTwoDate = DateUtil.betweenTwoDate(analysisTime, today); Integer betweenTwoDate = DateUtil.betweenTwoDate(analysisTime, today);
if (increCount.compareTo(e.getIncreAnaRule()) > 0 try {
|| totalCount.compareTo(e.getTotalAnaRule()) > 0 if (increCount.compareTo(e.getIncreAnaRule()) > 0
|| betweenTwoDate.compareTo(e.getTimeAnaRule()) > 0) { || totalCount.compareTo(e.getTotalAnaRule()) > 0
e.setIndexName(Constants.SUBJECT_INDEX); || betweenTwoDate.compareTo(e.getTimeAnaRule()) > 0) {
String query = queryString.replace("subject_id", subjectId); e.setIndexName(Constants.SUBJECT_INDEX);
e.setQuery(query); String query = queryString.replace("subject_id", subjectId);
String beforeQuery = beforeQueryString.replace("subject_id", subjectId).replace("start_time", startTime).replace("end_time", endTime); e.setQuery(query);
e.setBeforeQuery(beforeQuery); String beforeQuery = beforeQueryString.replace("subject_id", subjectId).replace("start_time", startTime).replace("end_time", endTime);
String repeatNumQuery = repeatNumQueryString.replace("subject_id", subjectId); e.setBeforeQuery(beforeQuery);
e.setRepeatNumQuery(repeatNumQuery); String repeatNumQuery = repeatNumQueryString.replace("subject_id", subjectId);
e.setAnalysisTime(today); e.setRepeatNumQuery(repeatNumQuery);
data.add(e); e.setAnalysisTime(today);
data.add(e);
}
}catch (Exception exception){
log.error("事件{},报错信息:{}",e,exception.getMessage());
} }
}); });
if (CollectionUtils.isNotEmpty(data)) { if (CollectionUtils.isNotEmpty(data)) {
kafkaTemplate.send(Constants.EVENT_ANALYSIS_TOPIC, JSON.toJSONString(data)); kafkaTemplate.send(Constants.EVENT_VIEWPOINT_SEND_DATA, JSON.toJSONString(data));
} }
log.info("进行中(未结束)的事件专题列表数据{}发送成功!",data); log.info("进行中(未结束)的事件专题列表数据{}发送成功!",data);
} }
...@@ -104,7 +110,7 @@ public class AnalysisTask { ...@@ -104,7 +110,7 @@ public class AnalysisTask {
* 按天发送 事件脉络 所需信息到kafka对应的topic * 按天发送 事件脉络 所需信息到kafka对应的topic
* 保证信息采集的及时性,审核人员审核的及时性 * 保证信息采集的及时性,审核人员审核的及时性
*/ */
@Scheduled(cron = "0 5 0 * * ?") // @Scheduled(cron = "0 5 0 * * ?")
// @Scheduled(cron = "0 * * * * ?") // @Scheduled(cron = "0 * * * * ?")
public void eventContext() { public void eventContext() {
Date today = new Date(); Date today = new Date();
...@@ -141,7 +147,7 @@ public class AnalysisTask { ...@@ -141,7 +147,7 @@ public class AnalysisTask {
* 每天凌晨0点20分执行一次 * 每天凌晨0点20分执行一次
* 发送 伪事件脉络 所需信息到kafka对应的topic * 发送 伪事件脉络 所需信息到kafka对应的topic
*/ */
@Scheduled(cron = "0 20 0 * * ?") // @Scheduled(cron = "0 20 0 * * ?")
// @Scheduled(cron = "0 * * * * ?") // @Scheduled(cron = "0 * * * * ?")
public void eventContext_fake() { public void eventContext_fake() {
Date today = new Date(); Date today = new Date();
...@@ -191,7 +197,7 @@ public class AnalysisTask { ...@@ -191,7 +197,7 @@ public class AnalysisTask {
* 定时生成传播路径 * 定时生成传播路径
* 每天凌晨0点10分执行一次 * 每天凌晨0点10分执行一次
*/ */
@Scheduled(cron = "0 0 0 * * ?") // @Scheduled(cron = "0 0 0 * * ?")
// @Scheduled(cron = "0 * * * * ?") // @Scheduled(cron = "0 * * * * ?")
public void propagationPath() { public void propagationPath() {
Date today = new Date(); Date today = new Date();
...@@ -209,6 +215,7 @@ public class AnalysisTask { ...@@ -209,6 +215,7 @@ public class AnalysisTask {
if (ObjectUtils.isNotEmpty(pathVo)) { if (ObjectUtils.isNotEmpty(pathVo)) {
redisUtil.set(key, pathVo); redisUtil.set(key, pathVo);
} }
log.info("专题id为-{}-的专题-传播路径数据:{}-缓存成功!", subjectId, JSONObject.toJSON(pathVo));
} }
} else {//已经结束的事件专题,缓存有效期一天 } else {//已经结束的事件专题,缓存有效期一天
PropagationPathVo pathVo = analysisService.propagationPath(subjectId); PropagationPathVo pathVo = analysisService.propagationPath(subjectId);
......
package com.zzsn.event.util;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.utility.SentencesUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class CompanyUtil {
private static PerceptronLexicalAnalyzer analyzer = null;
static {
try {
analyzer = new PerceptronLexicalAnalyzer(HanLP.Config.PerceptronCWSModelPath,
HanLP.Config.PerceptronPOSModelPath,
HanLP.Config.PerceptronNERModelPath);
/* analyzer = new PerceptronLexicalAnalyzer("data/model/crf/pku199801/cws.txt.bin",
"data/model/crf/pku199801/pos.txt.bin",
"data/model/crf/pku199801/ner.txt.bin");*/
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 识别命名实体
* 创建人: victory
* 创建时间: 2016-6-8 下午4:30:35
* @version 1.0
* @param content
* @return
*/
public static List<String> entityRecognize(String content) {
HashMap<String, List<String>> resultMap = new HashMap<String, List<String>>();
List<String> sentenceList = SentencesUtil.toSentenceList(content);
/**
* 特征实体
*/
List<String> orgs = new ArrayList<String>(); //组织机构
//根据词性获取 人名、地域、组织机构等
for (String sentence : sentenceList) {
Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
List<Term> termList1 = HanLP.segment(sentence); //调用hanlp算法进行分词
// CoreStopWordDictionary.apply(termList);
for (IWord word : termList)
{
if (word instanceof CompoundWord)
{
if (((CompoundWord) word).getLabel().equals("nt")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("nt(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("ntc")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("ntc(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("ntcf")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("ntcf(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("nto")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("nto(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("ntch")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("ntch(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("nth")) {
orgs.add(((CompoundWord) word).getValue());
// System.out.println("nth(组合):"+((CompoundWord) word).getValue());
}
} else {
if (word.getLabel().equals("nt")) {
orgs.add(word.getValue());
// System.out.println("nt:"+word.getValue());
} else if (word.getLabel().equals("ntc")) {
orgs.add(word.getValue());
// System.out.println("ntc:"+word.getValue());
} else if (word.getLabel().equals("ntcf")) {
orgs.add(word.getValue());
// System.out.println("ntcf:"+word.getValue());
} else if (word.getLabel().equals("nto")) {
orgs.add(word.getValue());
// System.out.println("nto:"+word.getValue());
} else if (word.getLabel().equals("ntch")) {
orgs.add(word.getValue());
// System.out.println("ntch:"+word.getValue());
} else if (word.getLabel().equals("nth")) {
orgs.add(word.getValue());
// System.out.println("nth:"+word.getValue());
}
}
}
}
List<String> listorg = new ArrayList<String>();
for (String temp : orgs) {
// if (temp.contains("公司") || temp.contains("集团")) {
if (temp != null && !temp.isEmpty()) {
String tempwds = temp.trim();
if (!listorg.contains(tempwds)) {
listorg.add(tempwds);
}
}
}
// }
return listorg;
}
/**
* 识别命名实体
* 创建人: victory
* 创建时间: 2016-6-8 下午4:30:35
* @version 1.0
* @param content
* @return
*/
public static List<String> entityPersonRecognize(String content) {
List<String> sentenceList = SentencesUtil.toSentenceList(content);
/**
* 特征实体
*/
List<String> orgs = new ArrayList<String>(); //组织机构
//根据词性获取 人名、地域、组织机构等
for (String sentence : sentenceList) {
Sentence termList = analyzer.analyze(sentence); //调用hanlp算法进行分词
// List<Term> termList = HanLP.segment(sentence); //调用hanlp算法进行分词
// CoreStopWordDictionary.apply(termList);
for (IWord word : termList)
{
if (word instanceof CompoundWord)
{
if (((CompoundWord) word).getLabel().equals("nr")) {
orgs.add(((CompoundWord) word).getValue());
System.out.println("nr(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("nrj")) {
orgs.add(((CompoundWord) word).getValue());
System.out.println("nrj(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("nr2")) {
orgs.add(((CompoundWord) word).getValue());
System.out.println("nr2(组合):"+((CompoundWord) word).getValue());
} else if (((CompoundWord) word).getLabel().equals("nrf")) {
orgs.add(((CompoundWord) word).getValue());
System.out.println("nrf(组合):"+((CompoundWord) word).getValue());
}
} else {
if (word.getLabel().equals("nr")) {
orgs.add(word.getValue());
} else if (word.getLabel().equals("nrj")) {
orgs.add(word.getValue());
} else if (word.getLabel().equals("nr2")) {
orgs.add(word.getValue());
} else if (word.getLabel().equals("nrf")) {
orgs.add(word.getValue());
}
}
}
}
List<String> listorg = new ArrayList<String>();
for (String temp : orgs) {
if (temp != null && !temp.isEmpty()) {
String tempwds = temp.trim();
if (!listorg.contains(tempwds)) {
listorg.add(tempwds);
}
}
}
return listorg;
}
/**
* 识别命名实体
* 创建人: victory
* 创建时间: 2016-6-8 下午4:30:35
* @version 1.0
* @param content
* @return
*/
public static List<String> entityPersonRecognizeOld(String content) {
/**
* 特征实体
*/
if (null!=content && content.trim().length()>0) {
content = content.replaceAll("丨", " ");
}
List<String> sentenceList = SentencesUtil.toSentenceList(content);
List<String> names = new ArrayList<String>(); //人名
//根据词性获取 人名
for (String sentence : sentenceList) {
List<Term> termList = HanLP.segment(sentence);
for(Term term : termList) {
if (term.nature.equals(Nature.w)) {
continue;
}
switch (term.nature.toString())
{
case "nr":
names.add(term.word);
break;
case "nrj":
names.add(term.word);
break;
case "nr2":
names.add(term.word);
break;
case "nrf":
names.add(term.word);
break;
}
}
}
List<String> listnames = new ArrayList<String>();
for (String temp : names) {
if (temp != null && !temp.isEmpty()) {
String tempwds = temp.trim();
if (!listnames.contains(tempwds)) {
listnames.add(tempwds);
}
}
}
return listnames;
}
/**
* 识别命名实体
* 创建人: victory
* 创建时间: 2016-6-8 下午4:30:35
* @version 1.0
* @param content
* @return
*/
public static List<String> entityAdrssRecognize(String content) {
/**
* 特征实体
*/
if (null!=content && content.trim().length()>0) {
content = content.replaceAll("丨", " ");
}
List<String> sentenceList = SentencesUtil.toSentenceList(content);
List<String> names = new ArrayList<String>(); //人名
//根据词性获取 地名
for (String sentence : sentenceList) {
List<Term> termList = HanLP.segment(sentence);
for(Term term : termList) {
if (term.nature.equals(Nature.w)) {
continue;
}
switch (term.nature.toString())
{
case "ns":
names.add(term.word);
break;
case "nsf":
names.add(term.word);
break;
}
}
}
List<String> listnames = new ArrayList<String>();
for (String temp : names) {
if (temp != null && !temp.isEmpty()) {
String tempwds = temp.trim();
if (!listnames.contains(tempwds)) {
listnames.add(tempwds);
}
}
}
return listnames;
}
public static List<String> entityAll(String content) {
List<String> result = new ArrayList<String>();
//获取地名
List<String> result1 = new ArrayList<String>();
result1 = CompanyUtil.entityAdrssRecognize(content);
//获取人名
List<String> result2 = new ArrayList<String>();
result2 = CompanyUtil.entityPersonRecognizeOld(content);
//获取组织及公司名
// List<String> result3 = new ArrayList<String>();
// result3 = CompanyUtil.entityRecognize(content);
result.addAll(result1);
result.addAll(result2);
// result.addAll(result3);
return result;
}
public static void main(String args[]){
// String aa = FileUtil.readFile(new File("D:/data/subject/111.txt"),"UTF-8");
String aa = "三星从越南撤离返回中国";
List<String> companylist = CompanyUtil.entityRecognize(aa);
for (String aab :companylist) {
System.out.println(aab);
}
// String str = "厂址紧靠地中海,位于土耳其阿达纳省尤穆塔勒克县,建设2×660MW超超临界火电发电机组,同步建设烟气脱硫和SCR脱硝装置";
// String address = "";
// String phone = "";
// String name = "";
// List<Term> terms = NLPTokenizer.segment(str);
// System.out.println(terms);
// for (Term term : terms) {
// if (term.nature.startsWith("nr")){
// //nr代表人名
// name = term.word;
// System.out.println("name: " + term.word);
// }else if (term.nature.startsWith("m") && term.word.length() == 11){
// //m代表数字
// phone = term.word;
// System.out.println("电话: " + term.word);
// } else if (term.nature.startsWith("ns")) {
// //nr代表地点
// address = term.word;
// System.out.println("address: " + term.word);
// }
// }
//
// //由于地址包含了数字,解析的时候数字成为单独的个体,与实际不符,所以通过差集求出地址
//// address = str.replace(phone, "").replace(name, "").trim();
// System.out.println("address: " + address);
}
}
package com.zzsn.event.util;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.ObjectUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.DocWriteResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.MainResponse;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.index.reindex.DeleteByQueryRequest;
import org.elasticsearch.index.reindex.UpdateByQueryRequest;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
/**
* Es操作相关工具栏
*
* @author kongliufeng
* @create 2020-08-03 16:48
*/
@Slf4j
@Component
public class EsOpUtil<T> {
public RestHighLevelClient getClient() {
return client;
}
@Autowired
RestHighLevelClient client;
public Boolean isClinet() {
return false;
}
/**
* 获取节点相关信息
*
* @return
*/
public Map<String, Object> getEsInfo() {
try {
Map<String, Object> map = new HashMap<>(100);
//获取Es相关集群信息
MainResponse response = client.info(RequestOptions.DEFAULT);
String clusterName = response.getClusterName();
String clusterUuid = response.getClusterUuid();
String nodeName = response.getNodeName();
MainResponse.Version version = response.getVersion();
String buildDate = version.getBuildDate();
String buildFlavor = version.getBuildFlavor();
String buildHash = version.getBuildHash();
String buildType = version.getBuildType();
String luceneVersion = version.getLuceneVersion();
String minimumIndexCompatibilityVersion = version.getMinimumIndexCompatibilityVersion();
String minimumWireCompatibilityVersion = version.getMinimumWireCompatibilityVersion();
String number = version.getNumber();
map.put("clusterName", clusterName);
map.put("clusterUuid", clusterUuid);
map.put("nodeName", nodeName);
map.put("version", version);
map.put("buildDate", buildDate);
map.put("buildFlavor", buildFlavor);
map.put("buildHash", buildHash);
map.put("buildType", buildType);
map.put("luceneVersion", luceneVersion);
map.put("minimumIndexCompatibilityVersion", minimumIndexCompatibilityVersion);
map.put("minimumWireCompatibilityVersion", minimumWireCompatibilityVersion);
map.put("number", number);
return map;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
/**
* 获取低级客户端
*
* @return
*/
public RestClient getLowLevelClient() {
return client.getLowLevelClient();
}
/**
* 创建索引
*
* @param index
* @return
*/
public boolean indexCreate(String index) {
try {
if (!indexExist(index)) {
CreateIndexRequest request = new CreateIndexRequest(index);
CreateIndexResponse createIndexResponse = client.indices().create(request, RequestOptions.DEFAULT);
log.info(createIndexResponse.isAcknowledged() ? "创建索引[{}]成功" : "创建索引[{}]失败", index);
return createIndexResponse.isAcknowledged();
}
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
/**
* 判断索引是否存在
*
* @param indices
* @return
*/
@SneakyThrows
public boolean indexExist(String... indices) {
GetIndexRequest request = new GetIndexRequest(indices);
return client.indices().exists(request, RequestOptions.DEFAULT);
}
/**
* 删除索引
*
* @param index
* @return
*/
@SneakyThrows
public boolean deleteIndex(String index) {
DeleteIndexRequest request = new DeleteIndexRequest(index);
AcknowledgedResponse delete = client.indices().delete(request, RequestOptions.DEFAULT);
return delete.isAcknowledged();
}
/**
* 判断doc是否存在
*
* @param index
* @param id
* @return
*/
@SneakyThrows
public boolean docExists(String index, String id) {
GetRequest request = new GetRequest(index, id);
//禁用提取_source
request.fetchSourceContext(new FetchSourceContext(false));
//禁用获取存储的字段
request.storedFields("_none_");
boolean exists = client.exists(request, RequestOptions.DEFAULT);
return exists;
}
public Boolean isNotExistUrl(String url, String index) {
try {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsQueryBuilder sourceAddress = QueryBuilders.termsQuery("sourceAddress", url);
searchSourceBuilder.query(sourceAddress);
//不返回文本内容
searchSourceBuilder.fetchSource(false);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total = hits.getTotalHits().value;
if (total == 0) {
return true;
} else if (total == 1) {
return false;
} else if (total > 1) {
log.warn("url为[{}]在es库中存在[{}]条数据", url, total);
return false;
}
} catch (IOException e) {
e.printStackTrace();
return false;
} catch (Exception e) {
e.printStackTrace();
return false;
}
return false;
}
/**
* 批量插入数据
*
* @param index 索引库
* @param list List<ESBaseData> 批量保存的list,根据实际需求实体集成ESBaseData
*/
public void docSaveBulk(String index, List<T> list) {
BulkRequest request = new BulkRequest();
request.timeout(TimeValue.timeValueMinutes(10));
for (int i = 0; i < list.size(); i++) {
T b = list.get(i);
String id = null;
try {
Field field = b.getClass().getDeclaredField("id");
field.setAccessible(true);
id = (String)field.get(b);
} catch (NoSuchFieldException e) {
log.info("实体没有id字段");
continue;
} catch (IllegalAccessException e) {
log.info("无权限访问id字段");
continue;
}
request.add(new IndexRequest(index).id(id).source(
JSON.toJSONString(list.get(i)), XContentType.JSON
));
}
try {
BulkResponse bulk = client.bulk(request, RequestOptions.DEFAULT);
BulkItemResponse[] bulkItemResponses = bulk.getItems();
int length = bulkItemResponses.length;
for (int i = 0; i < length; ++i) {
BulkItemResponse response = bulkItemResponses[i];
if (response.isFailed()) {
log.info("批量保存[{}]过程中,id为[{}]的保存失败,失败原因[{}]", response.getIndex(), response.getId(), response.getFailureMessage());
} else {
log.info("批量保存[{}]过程中,id为[{}]的保存成功,状态[{}],version[{}]", response.getIndex(), response.getId(), response.status(), response.getVersion());
}
}
} catch (IOException e) {
e.printStackTrace();
log.warn("批量[{}]保存失败", index);
}
}
/**
* 批量插入数据(异步)
*
* @param index 索引库
* @param list List<ESBaseData> 批量保存的list,根据实际需求实体集成ESBaseData
*/
public void docSavaBulkAsync(String index, List<T> list) {
BulkRequest request = new BulkRequest();
request.timeout(TimeValue.timeValueMinutes(10));
for (int i = 0; i < list.size(); i++) {
T b = list.get(i);
String id = null;
try {
Field field = b.getClass().getDeclaredField("id");
field.setAccessible(true);
id = (String)field.get(b);
} catch (NoSuchFieldException e) {
log.info("实体没有id字段");
continue;
} catch (IllegalAccessException e) {
log.info("无权限访问id字段");
continue;
}
request.add(new IndexRequest(index).id(id).source(
JSON.toJSONString(list.get(i)), XContentType.JSON
));
}
client.bulkAsync(request, RequestOptions.DEFAULT, new ActionListener<BulkResponse>() {
@Override
public void onResponse(BulkResponse bulkItemResponses) {
BulkItemResponse[] bulkItems = bulkItemResponses.getItems();
int length = bulkItems.length;
for (int i = 0; i < length; ++i) {
BulkItemResponse response = bulkItems[i];
if (response.isFailed()) {//查看所有请求失败结果
log.info("批量保存[{}]过程中,id为[{}]的保存失败,失败原因[{}]", response.getIndex(), response.getId(), response.getFailureMessage());
} else {//请求成功的
log.info("批量保存[{}]过程中,id为[{}]的保存成功,状态[{}],version[{}]", response.getIndex(), response.getId(), response.status(), response.getVersion());
}
}
}
@Override
public void onFailure(Exception e) {
log.warn("批量[{}]保存失败,失败原因[{}]", index, e.getMessage());
}
});
}
/**
* 插入数据
*
* @param index
* @param id
* @param object
* @return
*/
public String docSavaByEntity(String index, String id, Object object) {
return docSaveByJson(index, id, JSON.toJSONString(object, SerializerFeature.WriteMapNullValue));
}
public void docSavaByEntityAsync(String index, String id, Object object) {
docSaveByJsonAsync(index, id, JSON.toJSONString(object, SerializerFeature.WriteMapNullValue));
}
/**
* 保存json
*
* @param index
* @param id
* @param jsonStr
* @return
*/
public String docSaveByJson(String index, String id, String jsonStr) {
try {
IndexRequest request = new IndexRequest(index)
.id(id)
.source(jsonStr, XContentType.JSON);
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
return indexResponse.getId();
} catch (IOException e) {
log.warn("同步保存doc失败, _index=[{}], _id=[{}]", index, id);
}
return index;
}
/**
* 异步创建doc
*
* @param index
* @param id
* @param jsonStr
* @return
*/
public void docSaveByJsonAsync(String index, String id, String jsonStr) {
IndexRequest request = new IndexRequest(index);
request.id(id);
request.source(jsonStr, XContentType.JSON);
client.indexAsync(request, RequestOptions.DEFAULT, new ActionListener<IndexResponse>() {
@Override
public void onResponse(IndexResponse indexResponse) {
log.info("异步保存doc, _index=[{}], _id=[{}]成功, _version=[{}], _result=[{}]", index, indexResponse.getId(), indexResponse.getVersion(), indexResponse.getResult());
}
@Override
public void onFailure(Exception e) {
e.printStackTrace();
log.warn("异步保存失败,尝试同步方式保存doc, ex=[{}]", e.getMessage());
try {
IndexResponse response = client.index(request, RequestOptions.DEFAULT);
DocWriteResponse.Result result = response.getResult();
if (!(result == DocWriteResponse.Result.UPDATED || result == DocWriteResponse.Result.CREATED)) {
log.warn("同步保存doc失败,_index=[{}], _id=[{}], _body=[{}]", index, id, jsonStr);
}
} catch (IOException io) {
io.printStackTrace();
}
}
});
}
public String docSaveByMap(String index, String id, Map<String, Object> map) {
try {
IndexRequest request = new IndexRequest(index).id(id)
.source(map);
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
return indexResponse.getId();
} catch (IOException e) {
e.printStackTrace();
}
return index;
}
/**
* 插入数据
*
* @param index
* @param id
* @param object
* @return
*/
public String docSaveByJsonObject(String index, String id, JSONObject object) {
IndexRequest request = new IndexRequest(index);
request.id(id);
try {
request.source(JSON.toJSONString(object), XContentType.JSON);
IndexResponse indexResponse = client.index(request, RequestOptions.DEFAULT);
return indexResponse.getId();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 根据id删除doc
*
* @param index
* @param id
* @return
*/
public Boolean docDeleteById(String index, String id) {
try {
DeleteRequest deleteRequest = new DeleteRequest(index, id);
DeleteResponse delete = client.delete(deleteRequest, RequestOptions.DEFAULT);
if (delete.status() == RestStatus.OK) {
log.info("DELETE /{}/_doc/{}/\r\n", index, id);
return true;
}
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
public void docDeleteByIdAsync(String index, String id) {
DeleteRequest request = new DeleteRequest(index, id);
try {
client.deleteAsync(request, RequestOptions.DEFAULT, new ActionListener<DeleteResponse>() {
@Override
public void onResponse(DeleteResponse deleteResponse) {
log.info("删除doc成功, _index=[{}], _id=[{}]", index, deleteResponse.getId());
}
@Override
public void onFailure(Exception e) {
e.printStackTrace();
log.warn("删除doc失败, _index=[{}], _id=[{}]", index, id);
}
});
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 根据条件删除
*
* @param index
* @param query
* @return
*/
public Long docDeleteByQuery(final String index, QueryBuilder query) {
try {
DeleteByQueryRequest request = new DeleteByQueryRequest(index);
request.setQuery(query).setRefresh(true);
BulkByScrollResponse bulkByScrollResponse = client.deleteByQuery(request, RequestOptions.DEFAULT);
return bulkByScrollResponse.getDeleted();
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public boolean docUpdateById(String index, String id, String jsonStr) {
UpdateRequest request = new UpdateRequest(index, id);
//刷新策略,默认
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
request.setRefreshPolicy("true");
request.doc(jsonStr, XContentType.JSON);
try {
UpdateResponse response = client.update(request, RequestOptions.DEFAULT);
return response.status() == RestStatus.OK;
} catch (IOException e) {
e.printStackTrace();
log.warn("更新doc失败, _index=[{}], _id=[{}],_jsonStr=[{}]", index, id, jsonStr);
}
return false;
}
/**
* @Description 判断该专题下的内容是否重复导入
* @author kongliufeng
* @创建时间 2020/9/11 18:05
* @Version 1.0
*/
public Boolean isExistSubjectAndArticle(String index, String id, String subjectId) {
try {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("subjectId", subjectId);
boolQueryBuilder.must(matchQueryBuilder);
MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("id", id);
boolQueryBuilder.must(matchQueryBuilder1);
searchSourceBuilder.query(boolQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total = hits.getTotalHits().value;
if (total > 0) {
log.info("isExistSubjectAndArticle[index:{}][id:{}][subject:{}]重复,库中已存在", index, id, subjectId);
return true;
}
} catch (IOException e) {
e.printStackTrace();
log.info("isExistSubjectAndArticle[index:{}][id:{}][subject:{}]发生异常,异常信息为[{}]", index, id, subjectId);
return false;
}
return false;
}
/**
* 按条件查询数据
*
* @param index
* @param start
* @param size
* @param queryBuilder
* @return
*/
public Map<String, Object> searchByQuery(String index, int start, int size, QueryBuilder queryBuilder) {
try {
Map<String, Object> resultMap = new HashMap<>(100);
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
if (queryBuilder != null) {
searchSourceBuilder.query(queryBuilder);
}
if (start >= 0 && size >= 0) {
searchSourceBuilder.from(start);
searchSourceBuilder.size(size);
}
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHits hits = searchResponse.getHits();
Long total = hits.getTotalHits().value;
resultMap.put("total", total);
SearchHit[] searchHits = hits.getHits();
List<Map<String, Object>> mapList = new ArrayList<>(searchHits.length);
for (SearchHit hit : searchHits) {
//存储的字段
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
//得分
sourceAsMap.put("score", hit.getScore());
mapList.add(sourceAsMap);
}
resultMap.put("data", mapList);
return resultMap;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public boolean existBySourceAddress(String index, String sourceAddress) {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termQuery("sourceAddress.keyword", sourceAddress));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
long count = 0;
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
count = searchResponse.getHits().getTotalHits().value;
} catch (IOException e) {
e.printStackTrace();
}
return count == 0;
}
public boolean existByTitle(String index, String title) {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//创建查询对象
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termQuery("title.keyword", title));
searchSourceBuilder.query(boolQuery);
searchRequest.source(searchSourceBuilder);
long count = 0;
try {
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
count = searchResponse.getHits().getTotalHits().value;
} catch (IOException e) {
e.printStackTrace();
}
return count == 0;
}
/**
* 批量更新操作,根据指定的查询条件和字段信息,更新符合条件的文档的指定字段值。
* 方法名:updataBatchByQuery,表示批量更新文档的方法。
* 参数:index,指定要更新的索引名称。
* 参数:boolQuery,BoolQueryBuilder对象,表示更新文档的查询条件。
* 参数:modifyColum,表示要更新的字段名。
* 参数:modifyColumValue,表示要更新的字段值。
* 方法抛出了IOException和InterruptedException异常。
* 创建了一个UpdateByQueryRequest对象,传入了要更新的索引名称。
* 设置查询条件,使用setQuery方法,传入BoolQueryBuilder对象。
* 设置更新脚本,使用setScript方法,传入Script对象。脚本使用painless语言,通过ctx._source.字段名 = 字段值的方式来更新文档的指定字段值。
* 调用client.updateByQuery方法,传入UpdateByQueryRequest对象和默认的RequestOptions,执行批量更新操作,并返回BulkByScrollResponse对象。
* */
public void updataBatchByQuery(String index, BoolQueryBuilder boolQuery, String modifyColum, String modifyColumValue) throws IOException, InterruptedException {
UpdateByQueryRequest request = new UpdateByQueryRequest(index);
request.setQuery(boolQuery);
request.setScript(new Script(ScriptType.INLINE, "painless", "ctx._source."+modifyColum+" = '"+modifyColumValue+"'", Collections.emptyMap()));
BulkByScrollResponse response = client.updateByQuery(request, RequestOptions.DEFAULT);
long updated = response.getUpdated();
log.info("更新条数{}",updated);
}
/**
* 批量更新操作,根据指定的查询条件和多个字段的映射关系,更新符合条件的文档的多个字段的值。
* 方法名:updataMoreColumBatchByQuery,表示批量更新文档的多个字段的方法。
* 参数:index,指定要更新的索引名称。
* 参数:boolQuery,BoolQueryBuilder对象,表示更新文档的查询条件。
* 参数:modifyColum_value,表示要更新的多个字段名和对应的字段值的映射关系。
* 方法抛出了IOException和InterruptedException异常。
* 创建了一个UpdateByQueryRequest对象,传入了要更新的索引名称。
* 设置查询条件,使用setQuery方法,传入BoolQueryBuilder对象。
* 调用getIdOrCode方法,传入多个字段名和字段值的映射关系,获取更新脚本。
* 设置更新脚本,使用setScript方法,传入Script对象。脚本使用painless语言,通过ctx._source.字段名 = 字段值的方式来更新文档的多个字段的值。
* 调用client.updateByQuery方法,传入UpdateByQueryRequest对象和默认的RequestOptions,执行批量更新操作,并返回BulkByScrollResponse对象。
* Collections.emptyMap()是setScript脚本的参数,此方法没有设置其他参数,使用一个空的map
* */
public void updataMoreColumBatchByQuery(String index, BoolQueryBuilder boolQuery, Map<String,String> modifyColumValue) throws IOException, InterruptedException {
UpdateByQueryRequest request = new UpdateByQueryRequest(index);
request.setQuery(boolQuery);
request.setScript(new Script(ScriptType.INLINE, "painless", getIdOrCode(modifyColumValue), Collections.emptyMap()));
BulkByScrollResponse response = client.updateByQuery(request, RequestOptions.DEFAULT);
long updated = response.getUpdated();
log.info("更新条数{}",updated);
}
private String getIdOrCode(Map<String,String> columValue){
StringBuffer script = new StringBuffer();
columValue.forEach((colum,value)->{
script.append("ctx._source."+colum+" = '"+value+"';");
});
return script.toString();
}
/**
* 这段代码是一个用于获取Elasticsearch索引中某个字段的最大值的方法。
* getColumMax方法:
* 参数:index表示索引名称,boolQuery表示查询条件,colum表示要获取最大值的字段名。
* 创建一个搜索请求SearchRequest,并将index作为参数传入。
* 创建一个搜索源构建器SearchSourceBuilder,并设置其大小为0(即只返回聚合结果,不返回具体文档)。
* 如果boolQuery不为空,则将其设置为搜索源构建器的查询条件。
* 创建一个TermsAggregationBuilder聚合器,用于按照字段colum进行聚合。
* 设置聚合器的大小为1,表示只返回一个聚合结果。
* 设置聚合器的排序方式为按照聚合桶的键值降序排序。
* 将聚合器添加到搜索源构建器中。
* 将搜索源构建器设置为搜索请求的源。
* 使用client.search方法执行搜索请求,返回一个SearchResponse对象。
* 从SearchResponse中获取聚合结果Aggregations。
* 根据聚合结果中的聚合器名称"groupByColum"获取到对应的Terms聚合器。
* 从Terms聚合器中获取聚合桶codeBuckets,即按照字段colum聚合后的结果。
* 如果聚合桶不为空,则返回第一个聚合桶的键值转换为字符串;否则返回null。
* 如果执行搜索请求过程中发生异常,则打印异常堆栈并返回null。
* 该方法的作用是执行一个查询请求,按照指定字段进行聚合,并获取聚合结果中的最大值。返回的最大值是一个字符串类型。
* */
public String getColumMax(String index, BoolQueryBuilder boolQuery,String colum){
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.size(0);
if (ObjectUtil.isNotEmpty(boolQuery)){
searchSourceBuilder.query(boolQuery);
}
TermsAggregationBuilder aggregation = AggregationBuilders.terms("groupByColum")
.field(colum)
.size(1)
.order(BucketOrder.key(false));
searchSourceBuilder.aggregation(aggregation);
searchRequest.source(searchSourceBuilder);
try {
SearchResponse response = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = response.getAggregations();
Terms groupCode = aggregations.get("groupByColum");
List<? extends Terms.Bucket> codeBuckets = groupCode.getBuckets();
return CollectionUtil.isNotEmpty(codeBuckets)? codeBuckets.get(0).getKeyAsString() : null;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 根据文档id更新
* 参数:index表示索引名称,id表示文档的id,args表示要更新的字段和对应的值。
* 首先判断args是否为空,如果为空则直接返回。
* 创建一个更新请求UpdateRequest,并将index和id作为参数传入。
* 使用XContentBuilder构建要更新的内容,将args中的字段和值添加到contentBuilder中。
* 将contentBuilder设置为更新请求的内容。
* 使用client.update方法执行更新请求,返回一个UpdateResponse对象。
* 根据UpdateResponse的结果进行处理,如果更新成功,则打印日志;如果没有进行任何更改,则打印日志;如果更新失败,则打印日志。*/
public void updateByid(String index,String id , Map<String,String> args) throws IOException {
if (CollectionUtil.isEmpty(args)){
return;
}
// 执行更新请求
UpdateResponse response = client.update(createUpdateRequest(index,id,args), RequestOptions.DEFAULT);
// 处理更新的响应结果
if (response.getResult() == DocWriteResponse.Result.UPDATED) {
} else if (response.getResult() == DocWriteResponse.Result.NOOP) {
} else {
log.info("{}更新失败eeeeeeeeeeeeeeeeeeeeeeeee",id);
}
}
/**
* 根据id更新,批量更新
* 参数:index表示索引名称,batch表示要批量更新的文档,其中batch是一个Map,key为文档的id,value为要更新的字段和对应的值。
* 首先判断batch是否为空,如果为空则直接返回。
* 创建一个批量请求BulkRequest。
* 遍历batch中的每个文档,将每个文档的id和要更新的字段和值创建一个更新请求UpdateRequest,并将其添加到bulkRequest中。
* 使用client.bulk方法执行批量更新请求,返回一个BulkResponse对象。
* 根据BulkResponse的结果进行处理,如果有更新失败的情况,则打印日志;如果全部更新成功,则打印日志。
* */
public void bulkUpdateDocuments(String index, Map<String , Map<String,String> > batch) throws IOException {
if(CollectionUtil.isEmpty(batch)){
return;
}
BulkRequest bulkRequest = new BulkRequest();
// 添加批量更新的请求
batch.forEach((id,args)->{
try {
bulkRequest.add(createUpdateRequest(index,id,args));
} catch (IOException e) {
log.info("添加更新请求异常");
}
});
// 添加更多的更新请求
// 执行批量更新请求
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
// 处理批量更新的响应结果
if (bulkResponse.hasFailures()) {
// 处理失败的情况
log.info("批量更新失败{}",batch);
} else {
// 处理成功的情况
log.info("批量更新成功{}",batch);
}
}
/**
* 创建更新请求对象
* 参数:index表示索引名称,documentId表示文档的id,args表示要更新的字段和对应的值。
* 创建一个更新请求UpdateRequest,并将index和documentId作为参数传入。
* 使用XContentBuilder构建要更新的内容,将args中的字段和值添加到contentBuilder中。
* 将contentBuilder设置为更新请求的内容。
* 返回更新请求UpdateRequest对象。*/
private UpdateRequest createUpdateRequest(String index,String documentId, Map<String,String> args) throws IOException {
UpdateRequest request = new UpdateRequest(index, documentId);
// 创建要更新的内容
XContentBuilder contentBuilder = XContentFactory.jsonBuilder();
contentBuilder.startObject();
args.forEach((fieldName,fieldValue) ->
{
try {
contentBuilder.field(fieldName, fieldValue);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
);
contentBuilder.endObject();
// 设置更新请求的内容
request.doc(contentBuilder);
return request;
}
/**
* 根据id查询各类资讯详情
* */
public T getInfoByid(String index, String id, Class<T> entry){
try {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("id", id);
searchSourceBuilder.query(termQueryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hits = searchResponse.getHits().getHits();
T res = null;
if (hits.length>0){
res = JSON.parseObject(hits[0].getSourceAsString(), entry);
}
return res;
} catch (IOException e) {
log.info("查询异常{}",e.getMessage(),e);
return null;
}
}
/**
* 通用查询
* */
public List<T> queryList(String index,QueryBuilder queryBuilder, Class<T> entry) {
List<T> list = new ArrayList<>();
try {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(queryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
// 处理搜索结果
SearchHit[] hits = searchResponse.getHits().getHits();
if (hits.length>0){
Arrays.stream(hits).forEach(e -> {
T t = JSON.parseObject(e.getSourceAsString(), entry);
list.add(t);
});
}
return list;
} catch (IOException e) {
log.info("查询异常{}",e.getMessage(),e);
return list;
}
}
/**
* 通用分页查询
* */
public Page<T> queryPage(String index,QueryBuilder queryBuilder, Class<T> entry,Integer pageNo,Integer pageSize) {
List<T> list = new ArrayList<>();
Page<T> pageData = new Page<>(pageNo, pageSize);
try {
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//设置分页参数
searchSourceBuilder.size(pageSize);
searchSourceBuilder.from((pageNo - 1) * pageSize);
//默认最大数量是10000,设置为true后,显示准确数量
searchSourceBuilder.trackTotalHits(true);
searchSourceBuilder.sort(SortBuilders.fieldSort("publishDate").order(SortOrder.DESC));
searchSourceBuilder.query(queryBuilder);
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
// 处理搜索结果
SearchHit[] hits = searchResponse.getHits().getHits();
if (hits.length>0){
Arrays.stream(hits).forEach(e -> {
T t = JSON.parseObject(e.getSourceAsString(), entry);
list.add(t);
});
}
pageData.setTotal(searchResponse.getHits().getTotalHits().value);
pageData.setRecords(list);
return pageData;
} catch (IOException e) {
log.info("查询异常{}",e.getMessage(),e);
return pageData;
}
}
/**
* 根据index,id索引文件
*
* @param index 索引名称
* @param id 资讯id
*/
public Map<String, Object> searchDoc(String index, String id) {
try {
GetRequest searchRequest = new GetRequest(index, id);
GetResponse documentFields = client.get(searchRequest, RequestOptions.DEFAULT);
return documentFields.getSourceAsMap();
} catch (IOException e) {
log.warn("查询doc异常,index=[{}],id=[{}], ex=[{}]", index, id, e.getMessage());
}
return null;
}
}
package com.zzsn.event.util;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.core.io.ClassPathResource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
/**
* @author 闫鑫
* @date 2022/6/21 12:01
*/
@Slf4j
public class FileUtil {
/**
* @Description: 读取resources 目录下的文件
* @param filePath 相对路径
* @Return: 文件实际地址
**/
public static String getFilePath(String filePath){
return Thread.currentThread().getContextClassLoader().getResource(filePath).getPath();
}
/**
* 读取classpath路径下的文本文件,返回list
* @param filePath
* @return
* @throws IOException
*/
public static List<String> readFileList(String filePath){
List<String> list = new ArrayList<>();
InputStream inputStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
try {
// ClassPathResource类的构造方法接收路径名称,自动去classpath路径下找文件
ClassPathResource classPathResource = new ClassPathResource(filePath);
// 打成jar包后,只能获取输入流对象
inputStream = classPathResource.getInputStream();
inputStreamReader = new InputStreamReader(inputStream, "UTF-8");
bufferedReader = new BufferedReader(inputStreamReader);
String line = null;
while ((line = bufferedReader.readLine()) != null) {
if(oConvertUtils.isNotEmpty(line)){
list.add(line);
}
}
}catch (IOException e){
log.error("读取文件失败,filePath:{},e:{}",filePath,e);
}finally {
// 统一在finally中关闭流,防止发生异常的情况下,文件流未能正常关闭
try {
if (inputStream != null) {
inputStream.close();
}
if (inputStreamReader != null) {
inputStreamReader.close();
}
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
public static InputStream DownloadFileFormServer(String strUrl) {
InputStream inputStream = null;
HttpURLConnection conn = null;
try {
URL url = new URL(strUrl);
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(20 * 1000);
// 模板文件名
// 模板文件路径
inputStream = conn.getInputStream();
} catch (Exception e) {
} finally {
}
return inputStream;
}
/**
* 获取resource下的文件内容
*
* @param path
* @return
*/
public static String getResource(String path) throws IOException {
if (StringUtils.isBlank(path)) {
return null;
}
ClassPathResource classPathResource = new ClassPathResource(path);
StringBuilder stringBuilder = new StringBuilder();
InputStream inputStream =null;
try {
inputStream = classPathResource.getInputStream();
byte[] bytes = new byte[1024];
int lenth;
while ((lenth = inputStream.read(bytes)) != -1) {
stringBuilder.append(new String(bytes, 0, lenth));
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("读取文件错误");
}finally {
inputStream.close();
}
return stringBuilder.toString();
}
public static String getFileNameFormPath(String fileName){
if(fileName == null || StringUtils.isEmpty(fileName)){
return null;
}
fileName = fileName.trim();
fileName = fileName.replace("\\", "/");
while(fileName.endsWith("/")){
fileName = fileName.substring(0, fileName.length() -1);
}
if(fileName.contains("/")){
fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
}
return fileName;
}
/**
* 根据文件的字节大小计算显示大小
*
* @param size
* @return
*/
public static String readableFileSize(long size) {
if (size <= 0) return "0";
final String[] units = new String[]{"B", "KB", "MB", "GB", "TB"};
int digitGroups = (int) (Math.log10(size) / Math.log10(1024));
return new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + units[digitGroups];
}
}
package com.zzsn.event.util;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.mining.word.TfIdfCounter;
import com.hankcs.hanlp.seg.common.Term;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* @author kongliufeng
* @Description TODO: hanlp分词工具
* @create 2020-08-31 18:50
* @Version 1.0
*/
public class HanlpUtil {
/**
* @Description 单文本关键词, 基于hanlp中短语抽取算法extractPhrase
* @author kongliufeng
* @创建时间 2020/9/4 17:59
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByPhrase(String text, int limitNo) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractPhrase(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
StopWordsUtil.removeStopWords(repeatList);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
return null;
}
/**
* @param text 文本
* @param limitNo 提取个数
* @param blackList 黑名单
* @Description 单文本关键词, 基于hanlp中短语抽取算法extractPhrase 移除blackList中的关键词
* @author yanxin
* @创建时间 2022年6月21日14:19:21
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListExclude(String text, int limitNo, List<String> blackList) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractPhrase(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
if (repeatList != null) {
Iterator<String> iterator = repeatList.iterator();
while (iterator.hasNext()) {
if (blackList.contains(iterator.next())) {
iterator.remove();
}
}
}
//StopWordsUtil.removeStopWords(repeatList);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
return null;
}
/**
* @Description 多文本关键词, 基于hanlp中短语抽取算法extractPhrase
* @author kongliufeng
* @创建时间 2020/9/4 17:59
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByPhrase(Collection<String> texts, int limitNo) {
Map<String, Integer> result = new HashMap<String, Integer>();
for (String text : texts) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractPhrase(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
StopWordsUtil.removeStopWords(repeatList);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
for (Map.Entry<String, Integer> entry : map.entrySet()) {
Integer f = result.get(entry.getKey());
if (f == null) {
result.put(entry.getKey(), entry.getValue());
} else {
result.put(entry.getKey(), f + entry.getValue());
}
}
}
}
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(result);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
public static List<Map.Entry<String, Integer>> getKeywordListByPhraseExclude(Collection<String> texts, int limitNo, String excludeWord) {
Map<String, Integer> result = new HashMap<String, Integer>();
for (String text : texts) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractPhrase(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
StopWordsUtil.removeStopWords(repeatList, excludeWord);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
for (Map.Entry<String, Integer> entry : map.entrySet()) {
Integer f = result.get(entry.getKey());
if (f == null) {
result.put(entry.getKey(), entry.getValue());
} else {
result.put(entry.getKey(), f + entry.getValue());
}
}
}
}
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(result);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
/**
* @Description 单文本抽取关键词, 基于hanlp中的textRank算法
* @author kongliufeng
* @创建时间 2020/9/4 17:57
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByTextRank(String text, int limitNo) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractKeyword(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
StopWordsUtil.removeStopWords(repeatList);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
return null;
}
/**
* @Description 多文本抽取关键词, 基于hanlp中的textRank算法
* @author kongliufeng
* @创建时间 2020/9/4 18:08
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByTextRank(Collection<String> texts, int limitNo) {
Map<String, Integer> result = new HashMap<String, Integer>();
for (String text : texts) {
if (StringUtils.isEmpty(text)) {
return null;
}
int keySize = text.length() * 3 / 100;
List<String> repeatList = HanLP.extractKeyword(text, keySize);
if (repeatList != null && repeatList.size() > 0) {
//移除停用词
StopWordsUtil.removeStopWords(repeatList);
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(repeatList, text);
for (Map.Entry<String, Integer> entry : map.entrySet()) {
Integer f = result.get(entry.getKey());
if (f == null) {
result.put(entry.getKey(), entry.getValue());
} else {
result.put(entry.getKey(), f + entry.getValue());
}
}
}
}
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(result);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
/**
* @Description
* @author kongliufeng
* @创建时间 2020/9/4 18:15
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByTFIDF(String text, int limitNo) {
TfIdfCounter tfIdfCounter = new TfIdfCounter();
tfIdfCounter.add(text);
tfIdfCounter.compute();
List<Map.Entry<String, Integer>> entries = tfIdfCounter.sortedAllTfInt();
List<Map.Entry<String, Integer>> list = new ArrayList<>(limitNo);
for (Map.Entry<String, Integer> entry : entries) {
if (!StopWordsUtil.isStopWord(entry.getKey())) {
list.add(entry);
}
if (list.size() == limitNo) {
break;
}
}
return list;
}
/**
* @Description
* @author kongliufeng
* @创建时间 2020/9/4 18:14
* @Version 1.0
*/
public static List<Map.Entry<String, Integer>> getKeywordListByTFIDF(Collection<String> texts, int limitNo) {
List<Map.Entry<String, Integer>> list = new ArrayList<>(limitNo);
TfIdfCounter tfIdfCounter = new TfIdfCounter();
for (String text : texts) {
tfIdfCounter.add(text);
}
tfIdfCounter.compute();
List<Map.Entry<String, Integer>> entries = tfIdfCounter.sortedAllTfInt();
for (Map.Entry<String, Integer> entry : entries) {
if (!StopWordsUtil.isStopWord(entry.getKey())) {
list.add(entry);
}
if (list.size() == limitNo) {
break;
}
}
return list;
}
/**
* 但文本抽取关键词
*
* @param text 文本
* @param limitNo 热词数
* @author lkg
* @date 2024/1/9
*/
public static List<Map.Entry<String, Integer>> extractKeyWordsByText(String text, int limitNo) {
if (StringUtils.isEmpty(text)) {
return null;
}
//每100字最多提取30个词
int keySize = text.length() * 30 / 100;
List<String> phraseList = HanLP.extractKeyword(text, keySize);
//获取短语前100个
if (phraseList != null) {
Iterator<String> iterator = phraseList.iterator();
while (iterator.hasNext()) {
String strs = iterator.next();
if (strs.length() < 3 || !isChinese(strs)) {
//排除长度小于3的 和非中文关键词
iterator.remove();
continue;
}
//如果是人名、地名、组织名 则去除
//获取人名、地名、组织名
List<String> resul = CompanyUtil.entityAll(strs);
if (resul.size() > 0) {
iterator.remove();
continue;
}
//判断词性,仅保留词性为gi和词性中包含n的词
List<Term> termList = HanLP.segment(strs); //调用hanlp算法进行分词
if (termList != null && termList.size() == 1 && termList.get(0).nature != null) {
String nature = termList.get(0).nature.toString();
if (nature == null || (!nature.contains("n") && !nature.equals("gi"))) {
iterator.remove();
}
}
}
//去重
List<String> distinctList = phraseList.stream().distinct().collect(Collectors.toList());
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(distinctList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
return null;
}
/*
* 判断是否为中文
*/
public static boolean isChinese(String str) {
if (oConvertUtils.isEmpty(str)) {
return false;
}
String regEx = "[\\u4e00-\\u9fa5]+";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(str);
if (m.find()) {
return true;
}
return false;
}
/**
* 单文本抽取关键词
*
* @param text 文本
* @param limitNo 热词数
* @author lkg
* @date 2024/1/9
*/
public static List<Map.Entry<String, Integer>> extractKeyWordsByText(String text, int limitNo, List<String> blackList, List<String> whiteList) {
if (StringUtils.isEmpty(text)) {
return null;
}
//每100字最多提取30个词
int keySize = text.length() * 30 / 100;
//List<String> phraseList = HanLP.extractPhrase(text, keySize);
List<String> phraseList = HanLP.extractKeyword(text, keySize);
//获取短语前100个
if (phraseList != null) {
Iterator<String> iterator = phraseList.iterator();
while (iterator.hasNext()) {
String strs = iterator.next();
//移除停用词
if (blackList.contains(strs)) {
iterator.remove();
continue;
}
if (strs.length()<2 || !isChinese(strs)) {
//排除长度小于2的 和非中文关键词
iterator.remove();
continue;
}
//如果是人名、地名、组织名 则去除
//获取人名、地名、组织名
List<String> resul = CompanyUtil.entityAll(strs);
if (null!=resul && resul.size()>0) {
iterator.remove();
continue;
}
//判断词性,仅保留词性为gi和词性中包含n的词
List<Term> termList = HanLP.segment(strs); //调用hanlp算法进行分词
if(termList!=null && termList.size()==1 && termList.get(0).nature!=null){
String nature = termList.get(0).nature.toString();
//判断词性,词长度>2时,仅保留词性为gi和词性中包含n的词;词长度=2时,只保留nz属性的
if(strs.length() == 2){
if(nature==null || !nature.equals("nz")){
iterator.remove();
continue;
}
}else{
if(nature==null || (!nature.contains("n") && !nature.equals("gi"))){
iterator.remove();
continue;
}
}
}
}
//合并白名单词
phraseList.addAll(whiteList);
//去重
List<String> distinctList = phraseList.stream().distinct().collect(Collectors.toList());
Map<String, Integer> map = StringUtil.getHitWordsAndTimes(distinctList, text);
//根据频次排序
List<Map.Entry<String, Integer>> list = SortUtil.sortMap(map);
if (limitNo > list.size()) {
return list;
} else {
return list.subList(0, limitNo);
}
}
return null;
}
}
package com.zzsn.event.util;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
/**
* @author kongliufeng
* @Description TODO: 自定义排序
* @create 2020-09-03 19:11
* @Version 1.0
*/
public class SortUtil {
public static final DateTimeFormatter dtf2 = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public static List<Map.Entry<String, Integer>> sortMap(Map<String,Integer> map){
if(map!=null){
List<Map.Entry<String, Integer>> list = new ArrayList<>(map.entrySet());
Collections.sort(list,new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1,
Map.Entry<String, Integer> o2) {
return o2.getValue() - o1.getValue();
}
});
return list;
}
return new ArrayList<Map.Entry<String, Integer>>();
}
public static int dateLg(String s1,String s2){
if(s1.equals(s2)){
return 0;
}
int temp = 0;
if(s1.length()==10){
s1 = s1 +" 00:00:00";
}
if(s2.length()==10){
s2 = s2 +" 00:00:00";
}
try {
LocalDateTime localDateTime1 = LocalDateTime.parse(s1, dtf2);
LocalDateTime localDateTime2 = LocalDateTime.parse(s2, dtf2);
if(localDateTime1.isEqual(localDateTime2)){
temp = 0;
}
temp = localDateTime1.isBefore(localDateTime2)==true? 1:-1;
StringBuilder sb = new StringBuilder();
sb.append("s1对比s2").append(s1).append(":").append(s2).append("返回结果").append(temp);
return temp;
}catch (Exception e){
e.printStackTrace();
return -1;
}
}
public static void main(String[] args) {
String s1= "2020-10-22" ;
String s2= "2020-10-23";
System.out.println(s1.length());
System.out.println(dateLg(s1, s2));
}
}
package com.zzsn.event.util;
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
import java.util.Collection;
import java.util.Iterator;
import java.util.TreeMap;
/**
* @author kongliufeng
* @Description TODO:
* @create 2020-09-03 16:30
* @Version 1.0
*/
public class StopWordsUtil {
private static final String path = "hanlp/cloudstopwords.txt";
private static DoubleArrayTrie<String> tree;
static {
TreeMap<String, String> map = new TreeMap<>();
for (String word : FileUtil.readFileList(path)) {
map.put(word,word);
}
tree = new DoubleArrayTrie<>(map);
}
/**
* @Description 判断一个词是否为停用词
* @author kongliufeng
* @创建时间 2020/9/3 17:08
* @Version 1.0
*/
public static Boolean isStopWord(String word){
if(tree.containsKey(word)){
return true;
}
return false;
}
/**
* @Description 对已有的集合移除停用词
* @author kongliufeng
* @创建时间 2020/9/3 17:11
* @Version 1.0
*/
public static void removeStopWords(Collection<String> collection, String excludeWord){
if(collection!=null){
Iterator<String> iterator = collection.iterator();
while (iterator.hasNext()){
String next = iterator.next();
if(excludeWord!=null){
if(excludeWord.equals(next)){
iterator.remove();
}
}
if(tree.containsKey(next))
iterator.remove();
}
}
}
public static void removeStopWords(Collection<String> collection){
if(collection!=null){
Iterator<String> iterator = collection.iterator();
while (iterator.hasNext()){
if(tree.containsKey(iterator.next()))
iterator.remove();
}
}
}
}
package com.zzsn.event.util;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtil {
public static boolean convertBoolean(String s, boolean b) {
if (s == null) {
return b;
}
if (s.equals("0")) {
return false;
}
if (s.equals("1")) {
return true;
}
return b;
}
public static String convertBooleanToString(boolean b) {
String s = b ? "1" : "0";
return s;
}
public static String trimWhiteSpace(String str) {
String s = replaceBlank(str);
String ret = s.trim();
return ret;
}
public static String replaceBlank(String str) {
/* String dest = "";
if (str != null) {
Pattern p = Pattern.compile("\\s*|\t|\r|\n");su
Matcher m = p.matcher(str);
dest = m.replaceAll("");
}*/
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
boolean bspace = Character.isWhitespace(c);
if (bspace) {
c = ' ';
}
buffer.append(c);
}
return buffer.toString();
}
//获取分隔符[和]之间的子串,如aa[abc]bbb->abc
public static List<String> getSubStrs(String str, String start, String end) {
List<String> resultStrs = new ArrayList<String>();
if (str == null || str.trim().length() == 0) {
return resultStrs;
}
String ptnstr = String.format("%s([^%s%s]+)%s", start, start, end, end);
// String ptnstr1 = "\\[([^\\[\\]]+)\\]";
Pattern pattern = Pattern.compile(ptnstr);
Matcher matcher = pattern.matcher(str);
while (matcher.find()) {
String substr = matcher.group(1);
resultStrs.add(substr);
}
return resultStrs;
}
//fromStr:aaa123bb, origStr:aaa[xxx]bb, replaceStr:[xxx]. return:123
public static String getHomologousWord(String replaceStr,
String origStr, String fromStr) {
String retStr = null;
int pos = origStr.indexOf(replaceStr);
if (pos == -1) {
return retStr;
}
String start = origStr.substring(0, pos);
String end = origStr.substring(pos + replaceStr.length());
if (start.length() > 0 && !fromStr.startsWith(start)) {
return retStr;
}
if (end.length() > 0 && !fromStr.endsWith(end)) {
return retStr;
}
retStr = fromStr.substring(start.length(),
fromStr.length() - end.length());
return retStr;
}
public static String trimBeginningBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers
= new HashMap<Character, Character>();
braketPeers.put('【', '】');
braketPeers.put('[', ']');
braketPeers.put('[', ']');
braketPeers.put('(', ')');
braketPeers.put('(', ')');
braketPeers.put('〔', '〕');
String searchStr = s;
while (searchStr.length() > 0) {
char beginc = searchStr.charAt(0);
Character value = braketPeers.get(beginc);
if (value == null) {
break;
}
int endPos = -1;
for (int i = 1; i < searchStr.length(); i++) {
if (searchStr.charAt(i) == value) {
endPos = i;
break;
}
}
if (endPos >= 0) {
ret = searchStr.substring(endPos + 1);
searchStr = ret;
} else {
break;
}
}
return ret;
}
public static String trimMiddleBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers = new HashMap<Character, Character>();
String[] brakets = {"】", "]", "]", ")", ")", "〕"};
braketPeers.put('【', '】');
braketPeers.put('[', ']');
braketPeers.put('[', ']');
braketPeers.put('(', ')');
braketPeers.put('(', ')');
braketPeers.put('〔', '〕');
String searchStr = s;
int index = 0;
while (searchStr.length() > 0) {
int startPos = -1;
Character value = null;
for (int i = index; i < searchStr.length(); i++) {
boolean findLeftBraket = false;
value = searchStr.charAt(i);
for (Character key : braketPeers.keySet()) {
if (value.equals(key)) {
startPos = i;
findLeftBraket = true;
break;
}
}
if (findLeftBraket) {
break;
}
}
int endPos = -1;
for (int i = startPos + 1; i < searchStr.length(); i++) {
if (null != braketPeers.get(value) && searchStr.charAt(i) == braketPeers.get(value)) {
endPos = i;
break;
}
}
if (endPos >= startPos) {
if (startPos >= 0) {
searchStr = searchStr.substring(0, startPos) + searchStr.substring(endPos + 1, searchStr.length());
}
} else {
searchStr = searchStr.replace(value.toString(), "");
index = startPos;
}
if (startPos < 0) {
ret = searchStr;
break;
}
}
for (String bs : brakets) {
ret = ret.replace(bs.toString(), "");
}
return ret;
}
public static String trimEnddingBracket(String s) {
String ret = s;
if (s.length() == 0) {
return s;
}
Map<Character, Character> braketPeers
= new HashMap<Character, Character>();
braketPeers.put('】', '【');
braketPeers.put(']', '[');
braketPeers.put(')', '(');
braketPeers.put(')', '(');
braketPeers.put('〕', '〔');
int endPos = s.length() - 1;
String searchStr = s;
while (endPos >= 0) {
char endc = searchStr.charAt(endPos);
Character value = braketPeers.get(endc);
if (value == null) {
break;
}
int startPos = -1;
for (int i = searchStr.length() - 2; i >= 0; i--) {
if (searchStr.charAt(i) == value) {
startPos = i;
break;
}
}
if (startPos >= 0) {
ret = searchStr.substring(0, startPos);
searchStr = ret;
}
endPos = startPos - 1;
}
return ret;
}
public static String delCharNotChinese(String s) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (isChinese(c)) {
buffer.append(c);
}
}
return buffer.toString();
}
public static boolean isChinese(char c) {
if (c >= 0x4e00 && c <= 0x9fa5) {
return true;
}
return false;
}
public static String toBanjiao(String s) {
if (s == null || s.length() == 0) {
return s;
}
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c >= 65281 && c <= 65374) {
c = (char) (c - 65248);
} else if (c == 12288) { // 空格
c = (char) 32;
}
buffer.append(c);
}
return buffer.toString();
}
public static String listToString(List<String> arr) {
StringBuffer buffer = new StringBuffer();
if (arr == null) {
return buffer.toString();
}
for (int i = 0; i < arr.size(); i++) {
buffer.append(arr.get(i));
if (i != arr.size() - 1) {
buffer.append(";");
}
}
return buffer.toString();
}
public static List<String> stringToList(String str) {
List<String> strs = new ArrayList<String>();
if (str == null) {
return strs;
}
String[] ss = str.split(";");
for (String s : ss) {
if (s.trim().length() == 0) {
continue;
}
strs.add(s);
}
return strs;
}
public static String normalizeHtmlTransf(String s) {
String ret = s.replaceAll("&bull;", "·");
ret = ret.replaceAll("&middot;", "·");
ret = ret.replaceAll("&nbsp;", " ");
ret = ret.replaceAll("&quot;", "\"");
ret = ret.replaceAll("&amp;", "&");
ret = ret.replace('・', '·');
ret = ret.replace("&ldquo;", "\"");
ret = ret.replace("&rdquo;", "\"");
ret = ret.replace("&hellip;", "...");
ret = ret.replace("&lt;", "<");
ret = ret.replace("&gt;", ">");
ret = ret.replace("&mdash;", "—");
ret = ret.replace("&ndash;", "–");
ret = ret.replace("&tilde;", "~");
ret = ret.replace("&lsquo;", "'");
ret = ret.replace("&rsquo;", "'");
ret = ret.replace("&sbquo;", ",");
ret = ret.replace("&lsaquo;", "‹");
ret = ret.replace("&rsaquo;", "›");
ret = ret.replace("&hellip;", "…");
// ret = ret.replace("|", " ");
return ret;
}
public static String normalizeSegTransf(String s) {
String ret = s.replaceAll("\r\n;", " ");
ret = ret.replace("\n", "");
ret = ret.replace("|", " ");
return ret;
}
/**
* @Description 获取srcList中在text存在的集合(包含频次)
* @author kongliufeng
* @创建时间 2020/9/3 18:41
* @Version 1.0
*/
public static Map<String, Integer> getHitWordsAndTimes(Collection<String> srcList, String text){
Map<String, Integer> map = new HashMap<>();
if(srcList==null || StringUtils.isEmpty(text)){
return map;
}
for (String s : srcList) {
int i = countKeyWordInContent(s, text);
if(i>0){
map.put(s,i);
}
}
return map;
}
/**
* @Description 判断一个词是否在文本中
* @author kongliufeng
* @创建时间 2020/9/3 18:26
* @Version 1.0
*/
public static Boolean isKeyWordInText(String keyWord, String text){
if(keyWord==null || text==null)
return false;
int leng = text.length();
int j = 0;
for (int i = 0; i < leng; i++){
if (text.charAt(i) == keyWord.charAt(j)){
j++;
if (j == keyWord.length()){
return true;
}
}
else{
i = i - j;
j = 0;
}
}
return false;
}
/**
* @Description 计算一个词在一个文本中的次数
* @author kongliufeng
* @创建时间 2020/8/27 19:56
* @Version 1.0
*/
public static int countKeyWordInContent(String keyword, String srcContent){
if(keyword==null ||keyword.trim().equals("")){
return 0;
}
int count = 0;
int leng = srcContent.length();
int j = 0;
for (int i = 0; i < leng; i++){
if (srcContent.charAt(i) == keyword.charAt(j)){
j++;
if (j == keyword.length()){
count++;
j = 0;
}
}
else{
i = i - j;
j = 0;
}
}
return count;
}
/**
* @Description 在文本中根据自定义组合词匹配,返回匹配中的词
* 例如:组合词如:(产业链|供应链)+主席 == 主席+供应链;产业链+主席
*
* 括号里面是或
* @author kongliufeng
* @创建时间 2020/9/9 10:05
* @Version 1.0
*/
public static String matchComposeWords(String content , String composeWords){
if(content==null || composeWords == null)
return null;
String[] matchGroups = composeWords.split(";");
StringBuilder sb = new StringBuilder();
Boolean isMatch = false;
for(String group :matchGroups){//分组,匹配其中之一即可
String[] allContent = group.split("\\+");
Boolean allContentHit = true;
StringBuilder groupMatch = new StringBuilder();
for(String ss :allContent){//全部需要匹配
Boolean orContentHit = false;
String[] orContent = ss.replaceAll("[()]", "").split("\\|");
for(String sss:orContent){//匹配其一即可跳出
//isKeyWordInText(sss,content)
if(content.contains(sss)){
//sb.append(sss).append(",");
groupMatch.append(sss).append(",");
orContentHit=true;
break;
}
}
if(orContentHit){
continue;
}else{
allContentHit = false;
break;
}
}
if(allContentHit){
sb.append(groupMatch);
isMatch = true;
break;
}
}
if(isMatch){
return sb.toString();
}else{
return null;
}
}
}
...@@ -124,4 +124,5 @@ public class DisplayInfo { ...@@ -124,4 +124,5 @@ public class DisplayInfo {
* 是否是主条目,0为非主条目,1为主条目 * 是否是主条目,0为非主条目,1为主条目
*/ */
private Integer ismain; private Integer ismain;
private Integer similarNumber;
} }
package com.zzsn.event.vo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class RepeatHold {
private String articleId;
private String repeatMark;
}
...@@ -70,6 +70,7 @@ public class SpecialInformation { ...@@ -70,6 +70,7 @@ public class SpecialInformation {
//删除标记(1:删除;0:保留) //删除标记(1:删除;0:保留)
private Integer deleteFlag; private Integer deleteFlag;
private String subjectId; private String subjectId;
private List<String> subjectIdList;
//审核操作(0:未审核 1:审核通过 2:审核未通过 3:暂定 默认值为0) //审核操作(0:未审核 1:审核通过 2:审核未通过 3:暂定 默认值为0)
private Integer checkStatus; private Integer checkStatus;
//阅读数 //阅读数
......
package com.zzsn.event.vo;
import lombok.Data;
@Data
public class StatisticsKeyWordVo {
//专题id
private String subjectId;
// 关键词id
private String kid;
//词频
private Integer value;
// name
private String name;
}
...@@ -115,8 +115,8 @@ mybatis-plus: ...@@ -115,8 +115,8 @@ mybatis-plus:
log-impl: org.apache.ibatis.logging.stdout.StdOutImpl log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
map-underscore-to-camel-case: true map-underscore-to-camel-case: true
serviceProject: serviceProject:
# url: https://clb.ciglobal.cn/clb-api/datapull/ url: https://clb.ciglobal.cn/clb-api/datapull/
url: http://127.0.0.1:9988/datapull/ # url: http://127.0.0.1:9988/datapull/
#热词抽取地址 #热词抽取地址
hotWords: hotWords:
extractUrl: http://114.116.99.6:8055/task/dispose/extractKeyword extractUrl: http://114.116.99.6:8055/task/dispose/extractKeyword
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论