提交 4efa9699 作者: obcy

Merge remote-tracking branch 'origin/master'

...@@ -184,13 +184,18 @@ ...@@ -184,13 +184,18 @@
<artifactId>spring-boot-starter-thymeleaf</artifactId> <artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency> </dependency>
<!--word-html处理工具--> <!--word-html处理工具-->
<!-- <dependency>--> <dependency>
<!-- <groupId>com.aspose</groupId>--> <groupId>com.aspose</groupId>
<!-- <artifactId>aspose-words</artifactId>--> <artifactId>aspose-words</artifactId>
<!-- <version>15.12.0</version>--> <version>15.12.0</version>
<!-- <scope>system</scope>-->
<!-- <systemPath>${basedir}/lib/aspose-words-15.12.0-jdk16.jar</systemPath>--> </dependency>
<!-- </dependency>-->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.23</version>
</dependency>
<dependency> <dependency>
<groupId>com.aspose</groupId> <groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId> <artifactId>aspose-words</artifactId>
...@@ -209,6 +214,7 @@ ...@@ -209,6 +214,7 @@
<version>2.2.10</version> <version>2.2.10</version>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>
......
...@@ -21,6 +21,10 @@ import lombok.extern.slf4j.Slf4j; ...@@ -21,6 +21,10 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.search.join.ScoreMode;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RequestOptions;
...@@ -36,15 +40,13 @@ import org.jsoup.Jsoup; ...@@ -36,15 +40,13 @@ import org.jsoup.Jsoup;
import org.springframework.beans.BeanUtils; import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest; import org.springframework.web.multipart.MultipartHttpServletRequest;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import java.io.ByteArrayInputStream; import java.io.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors; import java.util.stream.Collectors;
...@@ -82,6 +84,10 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -82,6 +84,10 @@ class KnowledgeServiceImpl implements IKnowledgeService {
@Autowired @Autowired
private AsyncService asyncService; private AsyncService asyncService;
private String TEMP_PATH="/storage/temp/";
@Autowired
private StringRedisTemplate stringRedisTemplate;
@Override @Override
public void addKnowledge(KnowFile knowFile, Knowledge knowledge, KbAuthorizedUser userInfo) { public void addKnowledge(KnowFile knowFile, Knowledge knowledge, KbAuthorizedUser userInfo) {
...@@ -104,12 +110,72 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -104,12 +110,72 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List<String> contentStringList = new ArrayList<>(); List<String> contentStringList = new ArrayList<>();
String html = null; String html = null;
try { try {
File file = new File(filesStorage + knowledge.getFiles().get(0).getFilePath()); String path = filesStorage + knowledge.getFiles().get(0).getFilePath();
html = DocUtil.convertDocStream2Html(new FileInputStream(file)); String filePath=null;
String htmlWithTable = html.replace("</p>", "######</p>"); if (".pdf".equals(knowFile.getFileType())) {
htmlWithTable = htmlWithTable.replace("</title>", "######</title>"); try {
htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>"); PDDocument document = PDDocument.load(new File(path));
contentStringList = Arrays.asList(htmlWithTable.split("######")); StringBuilder allBuilder= new StringBuilder();
PDFTextStripper pdfTextStripper = new PDFTextStripper(){
private StringBuilder paragraphBuilder= new StringBuilder();
@Override
protected void startPage(PDPage page) throws IOException{
super.startPage(page);
paragraphBuilder.setLength(0);//make paragraphBuilder empty
}
@Override
protected void writeLineSeparator() throws IOException{
super.writeLineSeparator();
if(
paragraphBuilder.toString().endsWith(".")||
paragraphBuilder.toString().endsWith("。")||
paragraphBuilder.toString().endsWith("!")||
paragraphBuilder.toString().endsWith("!")
){
paragraphBuilder.append("\n");//mark paragraph
paragraphBuilder.append("😀");
}else {
paragraphBuilder.append("\n");//mark paragraph
}
}
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException{
super.writeString(string,textPositions);
paragraphBuilder.append(string);//add text content
}
@Override
protected void endPage(PDPage page) throws IOException{
super.endPage(page);
String paragraph= paragraphBuilder.toString().trim();//get paragraph
if(!paragraph.isEmpty()){
allBuilder.append(paragraph);
}
}
};
pdfTextStripper.setSortByPosition(true);
pdfTextStripper.setStartPage(0);
pdfTextStripper.setEndPage(document.getNumberOfPages());
String text = pdfTextStripper.getText(document);
knowledge.setContentAll(text);
log.info("allBuilder:{}",allBuilder);
contentStringList = Arrays.asList(allBuilder.toString().split("😀"));
} catch (Exception e) {
log.error("parsing pdf error :{}", e.getMessage());
}
}else {
filePath=path;
File file = new File(filePath);
html = DocUtil.convertDocStream2Html(new FileInputStream(file));
String htmlWithTable = html.replace("</p>", "######</p>");
htmlWithTable = htmlWithTable.replace("</title>", "######</title>");
htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>");
contentStringList = Arrays.asList(htmlWithTable.split("######"));
}
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
...@@ -221,7 +287,7 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -221,7 +287,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List<String> halfPermitList = entries.stream().filter(item -> !item.getValue()).map(Map.Entry::getKey).collect(Collectors.toList()); List<String> halfPermitList = entries.stream().filter(item -> !item.getValue()).map(Map.Entry::getKey).collect(Collectors.toList());
//know permit //know permit
log.info("halfPermitList=={}", halfPermitList.toString()); log.info("halfPermitList=={}", halfPermitList.toString());
if(!halfPermitList.isEmpty()){ if (!halfPermitList.isEmpty()) {
knowPermitList = authorizedUserService.getUserPermissionKnowsByIds(halfPermitList, userInfo.getId()); knowPermitList = authorizedUserService.getUserPermissionKnowsByIds(halfPermitList, userInfo.getId());
} }
...@@ -486,7 +552,7 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -486,7 +552,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
MultipartFile multipartFile = fileMap.get(new ArrayList<String>(fileMap.keySet()).get(0)); MultipartFile multipartFile = fileMap.get(new ArrayList<String>(fileMap.keySet()).get(0));
int index = multipartFile.getOriginalFilename().lastIndexOf("."); int index = multipartFile.getOriginalFilename().lastIndexOf(".");
String fileSuffix = multipartFile.getOriginalFilename().substring(index + 1); String fileSuffix = multipartFile.getOriginalFilename().substring(index + 1);
if ("doc".equals(fileSuffix) || "docx".equals(fileSuffix) || "xls".equals(fileSuffix) || "xlsx".equals(fileSuffix)) { if ("xls".equals(fileSuffix) || "xlsx".equals(fileSuffix)) {
asyncService.doimport(request, fileSuffix, userId); asyncService.doimport(request, fileSuffix, userId);
return Result.OK("已进行处理"); return Result.OK("已进行处理");
} else { } else {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论