Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
K
know-base
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
张京坤
know-base
Commits
33915aac
提交
33915aac
authored
4月 29, 2024
作者:
ZhangJingKun
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
支持ppt格式
上级
7048d92a
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
64 行增加
和
4 行删除
+64
-4
KnowledgeServiceImpl.java
.../com/zzsn/knowbase/service/impl/KnowledgeServiceImpl.java
+64
-4
没有找到文件。
src/main/java/com/zzsn/knowbase/service/impl/KnowledgeServiceImpl.java
浏览文件 @
33915aac
...
@@ -27,7 +27,15 @@ import org.apache.pdfbox.pdmodel.PDDocument;
...
@@ -27,7 +27,15 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import
org.apache.pdfbox.pdmodel.PDPage
;
import
org.apache.pdfbox.pdmodel.PDPage
;
import
org.apache.pdfbox.text.PDFTextStripper
;
import
org.apache.pdfbox.text.PDFTextStripper
;
import
org.apache.pdfbox.text.TextPosition
;
import
org.apache.pdfbox.text.TextPosition
;
import
org.apache.poi.hslf.usermodel.HSLFShape
;
import
org.apache.poi.hslf.usermodel.HSLFSlide
;
import
org.apache.poi.hslf.usermodel.HSLFSlideShow
;
import
org.apache.poi.hslf.usermodel.HSLFTextShape
;
import
org.apache.poi.ss.usermodel.Workbook
;
import
org.apache.poi.ss.usermodel.Workbook
;
import
org.apache.poi.xslf.usermodel.XMLSlideShow
;
import
org.apache.poi.xslf.usermodel.XSLFShape
;
import
org.apache.poi.xslf.usermodel.XSLFSlide
;
import
org.apache.poi.xslf.usermodel.XSLFTextShape
;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook
;
import
org.apache.poi.xssf.usermodel.XSSFWorkbook
;
import
org.elasticsearch.action.search.SearchRequest
;
import
org.elasticsearch.action.search.SearchRequest
;
import
org.elasticsearch.action.search.SearchResponse
;
import
org.elasticsearch.action.search.SearchResponse
;
...
@@ -175,19 +183,69 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -175,19 +183,69 @@ class KnowledgeServiceImpl implements IKnowledgeService {
pdfTextStripper
.
setStartPage
(
0
);
pdfTextStripper
.
setStartPage
(
0
);
pdfTextStripper
.
setEndPage
(
document
.
getNumberOfPages
());
pdfTextStripper
.
setEndPage
(
document
.
getNumberOfPages
());
String
text
=
pdfTextStripper
.
getText
(
document
);
String
text
=
pdfTextStripper
.
getText
(
document
);
knowledge
.
setContentAll
(
text
);
//
knowledge.setContentAll(text);
log
.
info
(
"allBuilder:{}"
,
allBuilder
);
//log.info("allBuilder:{}", allBuilder.toString().substring(1000)
);
contentStringList
=
Arrays
.
asList
(
allBuilder
.
toString
().
split
(
"😀"
));
contentStringList
=
Arrays
.
asList
(
allBuilder
.
toString
().
split
(
"😀"
));
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"parsing pdf error :{}"
,
e
.
getMessage
());
log
.
error
(
"parsing pdf error :{}"
,
e
.
getMessage
());
}
}
}
else
if
(
".pptx"
.
equals
(
knowFile
.
getFileType
())){
knowledge
.
setImportData
(
2
);
filePath
=
path
;
log
.
info
(
"pptx dealing filePath{}"
,
filePath
);
// 加载PPT文件
XMLSlideShow
ppt
=
new
XMLSlideShow
(
new
FileInputStream
(
filePath
));
// 遍历幻灯片
for
(
XSLFSlide
slide
:
ppt
.
getSlides
())
{
StringBuffer
sb
=
new
StringBuffer
();
// 遍历形状
List
<
XSLFShape
>
shapes
=
slide
.
getShapes
();
for
(
XSLFShape
shape
:
shapes
)
{
// 检查形状是否包含文本
if
(
shape
instanceof
XSLFTextShape
)
{
XSLFTextShape
textShape
=
(
XSLFTextShape
)
shape
;
String
text
=
textShape
.
getText
();
if
(
text
!=
null
)
{
sb
.
append
(
text
);
log
.
info
(
"text :{}"
,
text
);
}
}
}
contentStringList
.
add
(
sb
.
toString
());
}
ppt
.
close
();
// 关闭PPT
}
else
if
(
".ppt"
.
equals
(
knowFile
.
getFileType
())){
knowledge
.
setImportData
(
2
);
filePath
=
path
;
log
.
info
(
"pptx dealing filePath{}"
,
filePath
);
// 加载PPT文件
HSLFSlideShow
ppt
=
new
HSLFSlideShow
(
new
FileInputStream
(
filePath
));
// 遍历幻灯片
for
(
HSLFSlide
slide
:
ppt
.
getSlides
())
{
StringBuffer
sb
=
new
StringBuffer
();
// 遍历形状
List
<
HSLFShape
>
shapes
=
slide
.
getShapes
();
for
(
HSLFShape
shape
:
shapes
)
{
// 检查形状是否包含文本
if
(
shape
instanceof
HSLFTextShape
)
{
HSLFTextShape
textShape
=
(
HSLFTextShape
)
shape
;
String
text
=
textShape
.
getText
();
if
(
text
!=
null
)
{
sb
.
append
(
text
);
log
.
info
(
"text :{}"
,
text
);
}
}
}
contentStringList
.
add
(
sb
.
toString
());
}
ppt
.
close
();
// 关闭PPT
}
else
{
}
else
{
filePath
=
path
;
filePath
=
path
;
File
file
=
new
File
(
filePath
);
File
file
=
new
File
(
filePath
);
log
.
info
(
"word dealing filePath{}"
,
filePath
);
log
.
info
(
"word dealing filePath{}"
,
filePath
);
html
=
DocUtil
.
convertDocStream2Html
(
new
FileInputStream
(
file
));
html
=
DocUtil
.
convertDocStream2Html
(
new
FileInputStream
(
file
));
log
.
info
(
"html :{}"
,
html
);
//
log.info("html :{}", html);
String
htmlWithTable
=
html
.
replace
(
"</p>"
,
"######</p>"
);
String
htmlWithTable
=
html
.
replace
(
"</p>"
,
"######</p>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</title>"
,
"######</title>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</title>"
,
"######</title>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</h1>"
,
"######</h1>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</h1>"
,
"######</h1>"
);
...
@@ -264,7 +322,9 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -264,7 +322,9 @@ class KnowledgeServiceImpl implements IKnowledgeService {
knowledge
.
setKnowledgeProjectId
(
knowledgeVO
.
getKbKnowledgeId
());
knowledge
.
setKnowledgeProjectId
(
knowledgeVO
.
getKbKnowledgeId
());
}
}
deleteForPython
(
knowledge
.
getId
(),
knowledge
.
getKnowledgeProjectId
());
deleteForPython
(
knowledge
.
getId
(),
knowledge
.
getKnowledgeProjectId
());
log
.
info
(
"send message:{}"
,
knowledgeMessage
);
//log.info("send message:{}", knowledgeMessage);
String
msg
=
JSON
.
toJSONString
(
knowledgeMessage
);
log
.
info
(
msg
);
produceInfo
.
sendKnowledgeContents
(
knowledgeMessage
);
produceInfo
.
sendKnowledgeContents
(
knowledgeMessage
);
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论