提交 c30ef121 作者: chenshiqiang

add pdf

上级 6bd5546f
......@@ -184,13 +184,18 @@
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!--word-html处理工具-->
<!-- <dependency>-->
<!-- <groupId>com.aspose</groupId>-->
<!-- <artifactId>aspose-words</artifactId>-->
<!-- <version>15.12.0</version>-->
<!-- <scope>system</scope>-->
<!-- <systemPath>${basedir}/lib/aspose-words-15.12.0-jdk16.jar</systemPath>-->
<!-- </dependency>-->
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>15.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.23</version>
</dependency>
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
......@@ -209,6 +214,7 @@
<version>2.2.10</version>
</dependency>
</dependencies>
<build>
......
......@@ -21,6 +21,10 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
......@@ -36,15 +40,13 @@ import org.jsoup.Jsoup;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.MultipartHttpServletRequest;
import javax.servlet.http.HttpServletRequest;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.*;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
......@@ -82,6 +84,10 @@ class KnowledgeServiceImpl implements IKnowledgeService {
@Autowired
private AsyncService asyncService;
private String TEMP_PATH="/storage/temp/";
@Autowired
private StringRedisTemplate stringRedisTemplate;
@Override
public void addKnowledge(KnowFile knowFile, Knowledge knowledge, KbAuthorizedUser userInfo) {
......@@ -104,12 +110,72 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List<String> contentStringList = new ArrayList<>();
String html = null;
try {
File file = new File(filesStorage + knowledge.getFiles().get(0).getFilePath());
html = DocUtil.convertDocStream2Html(new FileInputStream(file));
String htmlWithTable = html.replace("</p>", "######</p>");
htmlWithTable = htmlWithTable.replace("</title>", "######</title>");
htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>");
contentStringList = Arrays.asList(htmlWithTable.split("######"));
String path = filesStorage + knowledge.getFiles().get(0).getFilePath();
String filePath=null;
if (".pdf".equals(knowFile.getFileType())) {
try {
PDDocument document = PDDocument.load(new File(path));
StringBuilder allBuilder= new StringBuilder();
PDFTextStripper pdfTextStripper = new PDFTextStripper(){
private StringBuilder paragraphBuilder= new StringBuilder();
@Override
protected void startPage(PDPage page) throws IOException{
super.startPage(page);
paragraphBuilder.setLength(0);//make paragraphBuilder empty
}
@Override
protected void writeLineSeparator() throws IOException{
super.writeLineSeparator();
if(
paragraphBuilder.toString().endsWith(".")||
paragraphBuilder.toString().endsWith("。")||
paragraphBuilder.toString().endsWith("!")||
paragraphBuilder.toString().endsWith("!")
){
paragraphBuilder.append("\n");//mark paragraph
paragraphBuilder.append("😀");
}else {
paragraphBuilder.append("\n");//mark paragraph
}
}
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException{
super.writeString(string,textPositions);
paragraphBuilder.append(string);//add text content
}
@Override
protected void endPage(PDPage page) throws IOException{
super.endPage(page);
String paragraph= paragraphBuilder.toString().trim();//get paragraph
if(!paragraph.isEmpty()){
allBuilder.append(paragraph);
}
}
};
pdfTextStripper.setSortByPosition(true);
pdfTextStripper.setStartPage(0);
pdfTextStripper.setEndPage(document.getNumberOfPages());
String text = pdfTextStripper.getText(document);
knowledge.setContentAll(text);
log.info("allBuilder:{}",allBuilder);
contentStringList = Arrays.asList(allBuilder.toString().split("😀"));
} catch (Exception e) {
log.error("parsing pdf error :{}", e.getMessage());
}
}else {
filePath=path;
File file = new File(filePath);
html = DocUtil.convertDocStream2Html(new FileInputStream(file));
String htmlWithTable = html.replace("</p>", "######</p>");
htmlWithTable = htmlWithTable.replace("</title>", "######</title>");
htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>");
contentStringList = Arrays.asList(htmlWithTable.split("######"));
}
} catch (Exception e) {
e.printStackTrace();
}
......@@ -221,7 +287,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
List<String> halfPermitList = entries.stream().filter(item -> !item.getValue()).map(Map.Entry::getKey).collect(Collectors.toList());
//know permit
log.info("halfPermitList=={}", halfPermitList.toString());
if(!halfPermitList.isEmpty()){
if (!halfPermitList.isEmpty()) {
knowPermitList = authorizedUserService.getUserPermissionKnowsByIds(halfPermitList, userInfo.getId());
}
......@@ -486,7 +552,7 @@ class KnowledgeServiceImpl implements IKnowledgeService {
MultipartFile multipartFile = fileMap.get(new ArrayList<String>(fileMap.keySet()).get(0));
int index = multipartFile.getOriginalFilename().lastIndexOf(".");
String fileSuffix = multipartFile.getOriginalFilename().substring(index + 1);
if ("doc".equals(fileSuffix) || "docx".equals(fileSuffix) || "xls".equals(fileSuffix) || "xlsx".equals(fileSuffix)) {
if ("xls".equals(fileSuffix) || "xlsx".equals(fileSuffix)) {
asyncService.doimport(request, fileSuffix, userId);
return Result.OK("已进行处理");
} else {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论