Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
K
know-base
概览
概览
详情
活动
周期分析
版本库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
张京坤
know-base
Commits
4efa9699
提交
4efa9699
authored
2月 21, 2024
作者:
obcy
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/master'
上级
5ac547cc
69ff9ad7
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
91 行增加
和
19 行删除
+91
-19
pom.xml
pom.xml
+13
-7
KnowledgeServiceImpl.java
.../com/zzsn/knowbase/service/impl/KnowledgeServiceImpl.java
+78
-12
没有找到文件。
pom.xml
浏览文件 @
4efa9699
...
@@ -184,13 +184,18 @@
...
@@ -184,13 +184,18 @@
<artifactId>
spring-boot-starter-thymeleaf
</artifactId>
<artifactId>
spring-boot-starter-thymeleaf
</artifactId>
</dependency>
</dependency>
<!--word-html处理工具-->
<!--word-html处理工具-->
<!-- <dependency>-->
<dependency>
<!-- <groupId>com.aspose</groupId>-->
<groupId>
com.aspose
</groupId>
<!-- <artifactId>aspose-words</artifactId>-->
<artifactId>
aspose-words
</artifactId>
<!-- <version>15.12.0</version>-->
<version>
15.12.0
</version>
<!-- <scope>system</scope>-->
<!-- <systemPath>${basedir}/lib/aspose-words-15.12.0-jdk16.jar</systemPath>-->
</dependency>
<!-- </dependency>-->
<dependency>
<groupId>
org.apache.pdfbox
</groupId>
<artifactId>
pdfbox
</artifactId>
<version>
2.0.23
</version>
</dependency>
<dependency>
<dependency>
<groupId>
com.aspose
</groupId>
<groupId>
com.aspose
</groupId>
<artifactId>
aspose-words
</artifactId>
<artifactId>
aspose-words
</artifactId>
...
@@ -209,6 +214,7 @@
...
@@ -209,6 +214,7 @@
<version>
2.2.10
</version>
<version>
2.2.10
</version>
</dependency>
</dependency>
</dependencies>
</dependencies>
<build>
<build>
...
...
src/main/java/com/zzsn/knowbase/service/impl/KnowledgeServiceImpl.java
浏览文件 @
4efa9699
...
@@ -21,6 +21,10 @@ import lombok.extern.slf4j.Slf4j;
...
@@ -21,6 +21,10 @@ import lombok.extern.slf4j.Slf4j;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.lucene.search.join.ScoreMode
;
import
org.apache.lucene.search.join.ScoreMode
;
import
org.apache.pdfbox.pdmodel.PDDocument
;
import
org.apache.pdfbox.pdmodel.PDPage
;
import
org.apache.pdfbox.text.PDFTextStripper
;
import
org.apache.pdfbox.text.TextPosition
;
import
org.elasticsearch.action.search.SearchRequest
;
import
org.elasticsearch.action.search.SearchRequest
;
import
org.elasticsearch.action.search.SearchResponse
;
import
org.elasticsearch.action.search.SearchResponse
;
import
org.elasticsearch.client.RequestOptions
;
import
org.elasticsearch.client.RequestOptions
;
...
@@ -36,15 +40,13 @@ import org.jsoup.Jsoup;
...
@@ -36,15 +40,13 @@ import org.jsoup.Jsoup;
import
org.springframework.beans.BeanUtils
;
import
org.springframework.beans.BeanUtils
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.beans.factory.annotation.Value
;
import
org.springframework.beans.factory.annotation.Value
;
import
org.springframework.data.redis.core.StringRedisTemplate
;
import
org.springframework.stereotype.Service
;
import
org.springframework.stereotype.Service
;
import
org.springframework.web.multipart.MultipartFile
;
import
org.springframework.web.multipart.MultipartFile
;
import
org.springframework.web.multipart.MultipartHttpServletRequest
;
import
org.springframework.web.multipart.MultipartHttpServletRequest
;
import
javax.servlet.http.HttpServletRequest
;
import
javax.servlet.http.HttpServletRequest
;
import
java.io.ByteArrayInputStream
;
import
java.io.*
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.util.*
;
import
java.util.*
;
import
java.util.concurrent.CompletableFuture
;
import
java.util.concurrent.CompletableFuture
;
import
java.util.stream.Collectors
;
import
java.util.stream.Collectors
;
...
@@ -82,6 +84,10 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -82,6 +84,10 @@ class KnowledgeServiceImpl implements IKnowledgeService {
@Autowired
@Autowired
private
AsyncService
asyncService
;
private
AsyncService
asyncService
;
private
String
TEMP_PATH
=
"/storage/temp/"
;
@Autowired
private
StringRedisTemplate
stringRedisTemplate
;
@Override
@Override
public
void
addKnowledge
(
KnowFile
knowFile
,
Knowledge
knowledge
,
KbAuthorizedUser
userInfo
)
{
public
void
addKnowledge
(
KnowFile
knowFile
,
Knowledge
knowledge
,
KbAuthorizedUser
userInfo
)
{
...
@@ -104,12 +110,72 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -104,12 +110,72 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List
<
String
>
contentStringList
=
new
ArrayList
<>();
List
<
String
>
contentStringList
=
new
ArrayList
<>();
String
html
=
null
;
String
html
=
null
;
try
{
try
{
File
file
=
new
File
(
filesStorage
+
knowledge
.
getFiles
().
get
(
0
).
getFilePath
());
String
path
=
filesStorage
+
knowledge
.
getFiles
().
get
(
0
).
getFilePath
();
html
=
DocUtil
.
convertDocStream2Html
(
new
FileInputStream
(
file
));
String
filePath
=
null
;
String
htmlWithTable
=
html
.
replace
(
"</p>"
,
"######</p>"
);
if
(
".pdf"
.
equals
(
knowFile
.
getFileType
()))
{
htmlWithTable
=
htmlWithTable
.
replace
(
"</title>"
,
"######</title>"
);
try
{
htmlWithTable
=
htmlWithTable
.
replace
(
"</h1>"
,
"######</h1>"
);
PDDocument
document
=
PDDocument
.
load
(
new
File
(
path
));
contentStringList
=
Arrays
.
asList
(
htmlWithTable
.
split
(
"######"
));
StringBuilder
allBuilder
=
new
StringBuilder
();
PDFTextStripper
pdfTextStripper
=
new
PDFTextStripper
(){
private
StringBuilder
paragraphBuilder
=
new
StringBuilder
();
@Override
protected
void
startPage
(
PDPage
page
)
throws
IOException
{
super
.
startPage
(
page
);
paragraphBuilder
.
setLength
(
0
);
//make paragraphBuilder empty
}
@Override
protected
void
writeLineSeparator
()
throws
IOException
{
super
.
writeLineSeparator
();
if
(
paragraphBuilder
.
toString
().
endsWith
(
"."
)||
paragraphBuilder
.
toString
().
endsWith
(
"。"
)||
paragraphBuilder
.
toString
().
endsWith
(
"!"
)||
paragraphBuilder
.
toString
().
endsWith
(
"!"
)
){
paragraphBuilder
.
append
(
"\n"
);
//mark paragraph
paragraphBuilder
.
append
(
"😀"
);
}
else
{
paragraphBuilder
.
append
(
"\n"
);
//mark paragraph
}
}
@Override
protected
void
writeString
(
String
string
,
List
<
TextPosition
>
textPositions
)
throws
IOException
{
super
.
writeString
(
string
,
textPositions
);
paragraphBuilder
.
append
(
string
);
//add text content
}
@Override
protected
void
endPage
(
PDPage
page
)
throws
IOException
{
super
.
endPage
(
page
);
String
paragraph
=
paragraphBuilder
.
toString
().
trim
();
//get paragraph
if
(!
paragraph
.
isEmpty
()){
allBuilder
.
append
(
paragraph
);
}
}
};
pdfTextStripper
.
setSortByPosition
(
true
);
pdfTextStripper
.
setStartPage
(
0
);
pdfTextStripper
.
setEndPage
(
document
.
getNumberOfPages
());
String
text
=
pdfTextStripper
.
getText
(
document
);
knowledge
.
setContentAll
(
text
);
log
.
info
(
"allBuilder:{}"
,
allBuilder
);
contentStringList
=
Arrays
.
asList
(
allBuilder
.
toString
().
split
(
"😀"
));
}
catch
(
Exception
e
)
{
log
.
error
(
"parsing pdf error :{}"
,
e
.
getMessage
());
}
}
else
{
filePath
=
path
;
File
file
=
new
File
(
filePath
);
html
=
DocUtil
.
convertDocStream2Html
(
new
FileInputStream
(
file
));
String
htmlWithTable
=
html
.
replace
(
"</p>"
,
"######</p>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</title>"
,
"######</title>"
);
htmlWithTable
=
htmlWithTable
.
replace
(
"</h1>"
,
"######</h1>"
);
contentStringList
=
Arrays
.
asList
(
htmlWithTable
.
split
(
"######"
));
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
@@ -221,7 +287,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -221,7 +287,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List
<
String
>
halfPermitList
=
entries
.
stream
().
filter
(
item
->
!
item
.
getValue
()).
map
(
Map
.
Entry
::
getKey
).
collect
(
Collectors
.
toList
());
List
<
String
>
halfPermitList
=
entries
.
stream
().
filter
(
item
->
!
item
.
getValue
()).
map
(
Map
.
Entry
::
getKey
).
collect
(
Collectors
.
toList
());
//know permit
//know permit
log
.
info
(
"halfPermitList=={}"
,
halfPermitList
.
toString
());
log
.
info
(
"halfPermitList=={}"
,
halfPermitList
.
toString
());
if
(!
halfPermitList
.
isEmpty
())
{
if
(!
halfPermitList
.
isEmpty
())
{
knowPermitList
=
authorizedUserService
.
getUserPermissionKnowsByIds
(
halfPermitList
,
userInfo
.
getId
());
knowPermitList
=
authorizedUserService
.
getUserPermissionKnowsByIds
(
halfPermitList
,
userInfo
.
getId
());
}
}
...
@@ -486,7 +552,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
...
@@ -486,7 +552,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
MultipartFile
multipartFile
=
fileMap
.
get
(
new
ArrayList
<
String
>(
fileMap
.
keySet
()).
get
(
0
));
MultipartFile
multipartFile
=
fileMap
.
get
(
new
ArrayList
<
String
>(
fileMap
.
keySet
()).
get
(
0
));
int
index
=
multipartFile
.
getOriginalFilename
().
lastIndexOf
(
"."
);
int
index
=
multipartFile
.
getOriginalFilename
().
lastIndexOf
(
"."
);
String
fileSuffix
=
multipartFile
.
getOriginalFilename
().
substring
(
index
+
1
);
String
fileSuffix
=
multipartFile
.
getOriginalFilename
().
substring
(
index
+
1
);
if
(
"
doc"
.
equals
(
fileSuffix
)
||
"docx"
.
equals
(
fileSuffix
)
||
"
xls"
.
equals
(
fileSuffix
)
||
"xlsx"
.
equals
(
fileSuffix
))
{
if
(
"xls"
.
equals
(
fileSuffix
)
||
"xlsx"
.
equals
(
fileSuffix
))
{
asyncService
.
doimport
(
request
,
fileSuffix
,
userId
);
asyncService
.
doimport
(
request
,
fileSuffix
,
userId
);
return
Result
.
OK
(
"已进行处理"
);
return
Result
.
OK
(
"已进行处理"
);
}
else
{
}
else
{
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论