Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
E
event
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
陈世强
event
Commits
7b78e206
提交
7b78e206
authored
1月 23, 2025
作者:
925993793@qq.com
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
推荐关键词改用python模型
上级
0fa19054
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
127 行增加
和
28 行删除
+127
-28
SubjectManageController.java
...va/com/zzsn/event/controller/SubjectManageController.java
+70
-18
EsService.java
src/main/java/com/zzsn/event/es/EsService.java
+0
-2
HanlpUtil.java
src/main/java/com/zzsn/event/util/HanlpUtil.java
+1
-1
PythonUtil.java
src/main/java/com/zzsn/event/util/PythonUtil.java
+52
-0
StatisticsKeyWordVo.java
src/main/java/com/zzsn/event/vo/StatisticsKeyWordVo.java
+0
-7
DisplayInfo.java
src/main/java/com/zzsn/event/vo/es/DisplayInfo.java
+2
-0
SpecialInformation.java
src/main/java/com/zzsn/event/vo/es/SpecialInformation.java
+2
-0
没有找到文件。
src/main/java/com/zzsn/event/controller/SubjectManageController.java
浏览文件 @
7b78e206
package
com
.
zzsn
.
event
.
controller
;
package
com
.
zzsn
.
event
.
controller
;
import
cn.hutool.core.map.MapUtil
;
import
cn.hutool.core.util.ObjectUtil
;
import
cn.hutool.core.util.ObjectUtil
;
import
cn.hutool.core.util.StrUtil
;
import
cn.hutool.core.util.StrUtil
;
import
com.alibaba.fastjson2.JSONObject
;
import
com.alibaba.fastjson2.JSONObject
;
...
@@ -22,6 +23,9 @@ import com.zzsn.event.xxljob.service.IXxlJobInfoService;
...
@@ -22,6 +23,9 @@ import com.zzsn.event.xxljob.service.IXxlJobInfoService;
import
lombok.extern.slf4j.Slf4j
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.poi.hwpf.HWPFDocument
;
import
org.apache.poi.hwpf.extractor.WordExtractor
;
import
org.apache.poi.xwpf.extractor.XWPFWordExtractor
;
import
org.apache.poi.xwpf.usermodel.XWPFDocument
;
import
org.apache.poi.xwpf.usermodel.XWPFDocument
;
import
org.apache.poi.xwpf.usermodel.XWPFParagraph
;
import
org.apache.poi.xwpf.usermodel.XWPFParagraph
;
import
org.springframework.beans.BeanUtils
;
import
org.springframework.beans.BeanUtils
;
...
@@ -85,6 +89,8 @@ public class SubjectManageController {
...
@@ -85,6 +89,8 @@ public class SubjectManageController {
private
ObsUtil
obsUtil
;
private
ObsUtil
obsUtil
;
@Resource
@Resource
private
KafkaTemplate
<
String
,
String
>
kafkaTemplate
;
private
KafkaTemplate
<
String
,
String
>
kafkaTemplate
;
@Autowired
private
PythonUtil
pythonUtil
;
@Value
(
"${kafka.topic.subject.run:}"
)
@Value
(
"${kafka.topic.subject.run:}"
)
...
@@ -171,7 +177,12 @@ public class SubjectManageController {
...
@@ -171,7 +177,12 @@ public class SubjectManageController {
if
(!
flag
)
{
if
(!
flag
)
{
return
Result
.
FAIL
(
"包含不支持的文件类型"
);
return
Result
.
FAIL
(
"包含不支持的文件类型"
);
}
}
List
<
StatisticsKeyWordVo
>
statisticsKeyWordVos
=
articleWords
(
files
);
List
<
StatisticsKeyWordVo
>
statisticsKeyWordVos
=
new
ArrayList
<>();
try
{
statisticsKeyWordVos
=
articleWords
(
files
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
jsonObject
.
put
(
"article"
,
statisticsKeyWordVos
);
jsonObject
.
put
(
"article"
,
statisticsKeyWordVos
);
}
}
if
(
StringUtils
.
isNotBlank
(
words
))
{
if
(
StringUtils
.
isNotBlank
(
words
))
{
...
@@ -642,14 +653,14 @@ public class SubjectManageController {
...
@@ -642,14 +653,14 @@ public class SubjectManageController {
*/
*/
@PostMapping
(
"/bindInfoSourceLabel"
)
@PostMapping
(
"/bindInfoSourceLabel"
)
public
Result
<?>
bindInfoSourceLabel
(
@RequestBody
SubjectSourceTagVO
subjectSourceTagVO
)
{
public
Result
<?>
bindInfoSourceLabel
(
@RequestBody
SubjectSourceTagVO
subjectSourceTagVO
)
{
List
<
InfoSourceLabelVO
>
labelList
=
subjectSourceTagVO
.
getLabelList
();
if
(
CollectionUtils
.
isEmpty
(
labelList
))
{
return
Result
.
OK
();
}
String
subjectId
=
subjectSourceTagVO
.
getSubjectId
();
String
subjectId
=
subjectSourceTagVO
.
getSubjectId
();
if
(
StringUtils
.
isEmpty
(
subjectId
))
{
if
(
StringUtils
.
isEmpty
(
subjectId
))
{
return
Result
.
FAIL
(
"专题id不能为空"
);
return
Result
.
FAIL
(
"专题id不能为空"
);
}
}
List
<
InfoSourceLabelVO
>
labelList
=
subjectSourceTagVO
.
getLabelList
();
if
(
CollectionUtils
.
isEmpty
(
labelList
))
{
return
Result
.
FAIL
(
"信息源标签数据不能为空"
);
}
List
<
SubjectInfoSourceMap
>
dataList
=
new
ArrayList
<>();
List
<
SubjectInfoSourceMap
>
dataList
=
new
ArrayList
<>();
for
(
InfoSourceLabelVO
infoSourceLabelVO
:
labelList
)
{
for
(
InfoSourceLabelVO
infoSourceLabelVO
:
labelList
)
{
List
<
InfoSourceLabelItemVO
>
infoSourceLabelItemList
=
infoSourceLabelVO
.
getInfoSourceLabelItemList
();
List
<
InfoSourceLabelItemVO
>
infoSourceLabelItemList
=
infoSourceLabelVO
.
getInfoSourceLabelItemList
();
...
@@ -1083,6 +1094,8 @@ public class SubjectManageController {
...
@@ -1083,6 +1094,8 @@ public class SubjectManageController {
searchWord
.
setSearchInfo
(
words
);
searchWord
.
setSearchInfo
(
words
);
searchWordList
.
add
(
searchWord
);
searchWordList
.
add
(
searchWord
);
searchCondition
.
setSearchWordList
(
searchWordList
);
searchCondition
.
setSearchWordList
(
searchWordList
);
searchCondition
.
setColumn
(
"score"
);
searchCondition
.
setOrder
(
"desc"
);
try
{
try
{
IPage
<
SpecialInformation
>
page
=
esService
.
pageListByCondition
(
searchCondition
,
null
);
IPage
<
SpecialInformation
>
page
=
esService
.
pageListByCondition
(
searchCondition
,
null
);
if
(
page
.
getTotal
()
>
0
)
{
if
(
page
.
getTotal
()
>
0
)
{
...
@@ -1091,7 +1104,9 @@ public class SubjectManageController {
...
@@ -1091,7 +1104,9 @@ public class SubjectManageController {
for
(
SpecialInformation
information
:
records
)
{
for
(
SpecialInformation
information
:
records
)
{
text
.
append
(
information
.
getTitle
()).
append
(
information
.
getContent
());
text
.
append
(
information
.
getTitle
()).
append
(
information
.
getContent
());
}
}
List
<
Map
.
Entry
<
String
,
Integer
>>
extractKeyWordsByText
=
HanlpUtil
.
extractKeyWordsByText
(
text
.
toString
(),
10
);
List
<
String
>
wordsList
=
pythonUtil
.
extractKeyword
(
text
.
toString
(),
10
);
wordList
=
formatWordInfo
(
text
.
toString
(),
wordsList
);
/*List<Map.Entry<String, Integer>> extractKeyWordsByText = HanlpUtil.extractKeyWordsByText(text.toString(), 10);
if (CollectionUtils.isNotEmpty(extractKeyWordsByText)) {
if (CollectionUtils.isNotEmpty(extractKeyWordsByText)) {
for (Map.Entry<String, Integer> entry : extractKeyWordsByText) {
for (Map.Entry<String, Integer> entry : extractKeyWordsByText) {
StatisticsKeyWordVo statisticsKeyWordVo = new StatisticsKeyWordVo();
StatisticsKeyWordVo statisticsKeyWordVo = new StatisticsKeyWordVo();
...
@@ -1099,12 +1114,12 @@ public class SubjectManageController {
...
@@ -1099,12 +1114,12 @@ public class SubjectManageController {
statisticsKeyWordVo.setValue(entry.getValue());
statisticsKeyWordVo.setValue(entry.getValue());
wordList.add(statisticsKeyWordVo);
wordList.add(statisticsKeyWordVo);
}
}
}
}
*/
}
}
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
return
wordList
;
return
wordList
.
stream
().
sorted
(
Comparator
.
comparing
(
StatisticsKeyWordVo:
:
getValue
).
reversed
()).
collect
(
Collectors
.
toList
())
;
}
}
/**
/**
...
@@ -1120,13 +1135,17 @@ public class SubjectManageController {
...
@@ -1120,13 +1135,17 @@ public class SubjectManageController {
String
originalFilename
=
file
.
getOriginalFilename
();
String
originalFilename
=
file
.
getOriginalFilename
();
if
(
originalFilename
.
endsWith
(
".txt"
))
{
if
(
originalFilename
.
endsWith
(
".txt"
))
{
parseTxt
(
text
,
file
);
parseTxt
(
text
,
file
);
}
else
{
}
else
if
(
originalFilename
.
endsWith
(
".docx"
)){
parseWord
(
text
,
file
);
parseWordDocx
(
text
,
file
);
}
else
if
(
originalFilename
.
endsWith
(
".doc"
))
{
parseWordDoc
(
text
,
file
);
}
}
}
}
List
<
StatisticsKeyWordVo
>
articleWordList
=
new
ArrayList
<>();
List
<
StatisticsKeyWordVo
>
articleWordList
=
new
ArrayList
<>();
if
(
StringUtils
.
isNotEmpty
(
text
))
{
if
(
StringUtils
.
isNotEmpty
(
text
))
{
List
<
Map
.
Entry
<
String
,
Integer
>>
keywordsList
=
HanlpUtil
.
extractKeyWordsByText
(
text
.
toString
(),
10
);
List
<
String
>
wordsList
=
pythonUtil
.
extractKeyword
(
text
.
toString
(),
10
);
articleWordList
=
formatWordInfo
(
text
.
toString
(),
wordsList
);
/*List<Map.Entry<String, Integer>> keywordsList = HanlpUtil.extractKeyWordsByText(text.toString(), 10);
if (CollectionUtils.isNotEmpty(keywordsList)) {
if (CollectionUtils.isNotEmpty(keywordsList)) {
for (Map.Entry<String, Integer> entry : keywordsList) {
for (Map.Entry<String, Integer> entry : keywordsList) {
StatisticsKeyWordVo statisticsKeyWordVo = new StatisticsKeyWordVo();
StatisticsKeyWordVo statisticsKeyWordVo = new StatisticsKeyWordVo();
...
@@ -1134,26 +1153,59 @@ public class SubjectManageController {
...
@@ -1134,26 +1153,59 @@ public class SubjectManageController {
statisticsKeyWordVo.setValue(entry.getValue());
statisticsKeyWordVo.setValue(entry.getValue());
articleWordList.add(statisticsKeyWordVo);
articleWordList.add(statisticsKeyWordVo);
}
}
}
}*/
}
return
articleWordList
.
stream
().
sorted
(
Comparator
.
comparing
(
StatisticsKeyWordVo:
:
getValue
).
reversed
()).
collect
(
Collectors
.
toList
());
}
private
List
<
StatisticsKeyWordVo
>
formatWordInfo
(
String
text
,
List
<
String
>
wordsList
){
List
<
StatisticsKeyWordVo
>
articleWordList
=
new
ArrayList
<>();
Map
<
String
,
Integer
>
hitWordsAndTimes
=
HanlpUtil
.
getHitWordsAndTimes
(
wordsList
,
text
);
for
(
Map
.
Entry
<
String
,
Integer
>
entry
:
hitWordsAndTimes
.
entrySet
())
{
StatisticsKeyWordVo
statisticsKeyWordVo
=
new
StatisticsKeyWordVo
();
statisticsKeyWordVo
.
setName
(
entry
.
getKey
());
statisticsKeyWordVo
.
setValue
(
entry
.
getValue
());
articleWordList
.
add
(
statisticsKeyWordVo
);
}
}
return
articleWordList
;
return
articleWordList
;
}
}
/**
/**
* 解析word文档,获取纯文本内容
* 解析word文档
,docx后缀
,获取纯文本内容
*
*
* @param text 内容
* @param text 内容
* @param file word文件
* @param file word文件
* @author lkg
* @author lkg
* @date 2025/1/3
* @date 2025/1/3
*/
*/
private
void
parseWord
(
StringBuilder
text
,
MultipartFile
file
)
{
private
void
parseWord
Docx
(
StringBuilder
text
,
MultipartFile
file
)
{
try
{
try
{
InputStream
inputStream
=
file
.
getInputStream
();
InputStream
inputStream
=
file
.
getInputStream
();
XWPFDocument
doc
=
new
XWPFDocument
(
inputStream
);
XWPFDocument
docx
=
new
XWPFDocument
(
inputStream
);
for
(
XWPFParagraph
paragraph
:
doc
.
getParagraphs
())
{
XWPFWordExtractor
extractor
=
new
XWPFWordExtractor
(
docx
);
text
.
append
(
paragraph
.
getText
());
text
.
append
(
extractor
.
getText
());
}
inputStream
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
/**
* 解析word文档,doc后缀,获取纯文本内容
*
* @param text 内容
* @param file word文件
* @author lkg
* @date 2025/1/3
*/
private
void
parseWordDoc
(
StringBuilder
text
,
MultipartFile
file
)
{
try
{
InputStream
inputStream
=
file
.
getInputStream
();
HWPFDocument
doc
=
new
HWPFDocument
(
inputStream
);
WordExtractor
wordExtractor
=
new
WordExtractor
(
doc
);
text
.
append
(
wordExtractor
.
getText
());
inputStream
.
close
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
...
src/main/java/com/zzsn/event/es/EsService.java
浏览文件 @
7b78e206
...
@@ -27,7 +27,6 @@ import org.elasticsearch.client.RestHighLevelClient;
...
@@ -27,7 +27,6 @@ import org.elasticsearch.client.RestHighLevelClient;
import
org.elasticsearch.index.query.*
;
import
org.elasticsearch.index.query.*
;
import
org.elasticsearch.search.SearchHit
;
import
org.elasticsearch.search.SearchHit
;
import
org.elasticsearch.search.SearchHits
;
import
org.elasticsearch.search.SearchHits
;
import
org.elasticsearch.search.aggregations.Aggregation
;
import
org.elasticsearch.search.aggregations.AggregationBuilders
;
import
org.elasticsearch.search.aggregations.AggregationBuilders
;
import
org.elasticsearch.search.aggregations.Aggregations
;
import
org.elasticsearch.search.aggregations.Aggregations
;
import
org.elasticsearch.search.aggregations.BucketOrder
;
import
org.elasticsearch.search.aggregations.BucketOrder
;
...
@@ -41,7 +40,6 @@ import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
...
@@ -41,7 +40,6 @@ import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
import
org.elasticsearch.search.aggregations.bucket.terms.Terms
;
import
org.elasticsearch.search.aggregations.bucket.terms.Terms
;
import
org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder
;
import
org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder
;
import
org.elasticsearch.search.aggregations.metrics.Cardinality
;
import
org.elasticsearch.search.aggregations.metrics.Cardinality
;
import
org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder
;
import
org.elasticsearch.search.builder.SearchSourceBuilder
;
import
org.elasticsearch.search.builder.SearchSourceBuilder
;
import
org.elasticsearch.search.collapse.CollapseBuilder
;
import
org.elasticsearch.search.collapse.CollapseBuilder
;
import
org.elasticsearch.search.sort.SortBuilders
;
import
org.elasticsearch.search.sort.SortBuilders
;
...
...
src/main/java/com/zzsn/event/util/HanlpUtil.java
浏览文件 @
7b78e206
...
@@ -93,7 +93,7 @@ public class HanlpUtil {
...
@@ -93,7 +93,7 @@ public class HanlpUtil {
* @创建时间 2020/9/3 18:41
* @创建时间 2020/9/3 18:41
* @Version 1.0
* @Version 1.0
*/
*/
p
rivate
static
Map
<
String
,
Integer
>
getHitWordsAndTimes
(
Collection
<
String
>
srcList
,
String
text
){
p
ublic
static
Map
<
String
,
Integer
>
getHitWordsAndTimes
(
Collection
<
String
>
srcList
,
String
text
){
Map
<
String
,
Integer
>
map
=
new
HashMap
<>();
Map
<
String
,
Integer
>
map
=
new
HashMap
<>();
if
(
srcList
==
null
||
StringUtils
.
isEmpty
(
text
)){
if
(
srcList
==
null
||
StringUtils
.
isEmpty
(
text
)){
return
map
;
return
map
;
...
...
src/main/java/com/zzsn/event/util/PythonUtil.java
0 → 100644
浏览文件 @
7b78e206
package
com
.
zzsn
.
event
.
util
;
import
com.alibaba.fastjson2.JSON
;
import
com.alibaba.fastjson2.JSONArray
;
import
com.alibaba.fastjson2.JSONObject
;
import
org.apache.commons.lang3.StringUtils
;
import
org.springframework.beans.factory.annotation.Value
;
import
org.springframework.stereotype.Service
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
/**
* python工具类
*
* @author lkg
* @date 2025/1/23
*/
@Service
public
class
PythonUtil
{
@Value
(
"${python.keyWordsExtractUrl}"
)
private
String
keywordExtractUrl
;
/**
* 提取关键词
*
* @param content 文本内容
* @param number 提取的关键词数量
* @author lkg
* @date 2025/1/23
*/
public
List
<
String
>
extractKeyword
(
String
content
,
Integer
number
)
{
List
<
String
>
wordsList
=
new
ArrayList
<>();
if
(
StringUtils
.
isNotBlank
(
content
))
{
Map
<
String
,
Object
>
params
=
new
HashMap
<>();
params
.
put
(
"text"
,
content
);
params
.
put
(
"name"
,
"phrase"
);
params
.
put
(
"topK"
,
number
.
toString
());
String
result
=
HttpUtil
.
doPostForm
(
keywordExtractUrl
,
params
,
60000
);
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
result
);
JSONObject
resultData
=
jsonObject
.
getJSONObject
(
"resultData"
);
if
(
resultData
!=
null
)
{
JSONArray
data
=
resultData
.
getJSONArray
(
"data"
);
wordsList
=
JSON
.
parseArray
(
data
.
toString
(),
String
.
class
);
}
}
return
wordsList
;
}
}
src/main/java/com/zzsn/event/vo/StatisticsKeyWordVo.java
浏览文件 @
7b78e206
...
@@ -5,13 +5,6 @@ import lombok.Data;
...
@@ -5,13 +5,6 @@ import lombok.Data;
@Data
@Data
public
class
StatisticsKeyWordVo
{
public
class
StatisticsKeyWordVo
{
//专题id
private
String
subjectId
;
// 关键词id
private
String
kid
;
//词频
//词频
private
Integer
value
;
private
Integer
value
;
...
...
src/main/java/com/zzsn/event/vo/es/DisplayInfo.java
浏览文件 @
7b78e206
...
@@ -49,6 +49,8 @@ public class DisplayInfo {
...
@@ -49,6 +49,8 @@ public class DisplayInfo {
private
String
type
;
private
String
type
;
//标签信息
//标签信息
private
List
<
Label
>
labels
;
private
List
<
Label
>
labels
;
//模型打分信息
private
List
<
ModelScore
>
modelScores
;
//视频下载链接
//视频下载链接
private
String
downLoadUrl
;
private
String
downLoadUrl
;
//视频链接(原链接 网页版)
//视频链接(原链接 网页版)
...
...
src/main/java/com/zzsn/event/vo/es/SpecialInformation.java
浏览文件 @
7b78e206
...
@@ -52,6 +52,8 @@ public class SpecialInformation {
...
@@ -52,6 +52,8 @@ public class SpecialInformation {
private
String
type
;
private
String
type
;
//标签信息
//标签信息
private
List
<
Label
>
labels
;
private
List
<
Label
>
labels
;
//模型打分信息
private
List
<
ModelScore
>
modelScores
;
//视频下载链接
//视频下载链接
private
String
downLoadUrl
;
private
String
downLoadUrl
;
//视频链接(原链接 网页版)
//视频链接(原链接 网页版)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论