提交 33915aac 作者: ZhangJingKun

支持ppt格式

上级 7048d92a
...@@ -27,7 +27,15 @@ import org.apache.pdfbox.pdmodel.PDDocument; ...@@ -27,7 +27,15 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper; import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition; import org.apache.pdfbox.text.TextPosition;
import org.apache.poi.hslf.usermodel.HSLFShape;
import org.apache.poi.hslf.usermodel.HSLFSlide;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFTextShape;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
...@@ -175,19 +183,69 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -175,19 +183,69 @@ class KnowledgeServiceImpl implements IKnowledgeService {
pdfTextStripper.setStartPage(0); pdfTextStripper.setStartPage(0);
pdfTextStripper.setEndPage(document.getNumberOfPages()); pdfTextStripper.setEndPage(document.getNumberOfPages());
String text = pdfTextStripper.getText(document); String text = pdfTextStripper.getText(document);
knowledge.setContentAll(text); //knowledge.setContentAll(text);
log.info("allBuilder:{}", allBuilder); //log.info("allBuilder:{}", allBuilder.toString().substring(1000));
contentStringList = Arrays.asList(allBuilder.toString().split("😀")); contentStringList = Arrays.asList(allBuilder.toString().split("😀"));
} catch (Exception e) { } catch (Exception e) {
log.error("parsing pdf error :{}", e.getMessage()); log.error("parsing pdf error :{}", e.getMessage());
} }
} else if(".pptx".equals(knowFile.getFileType())){
knowledge.setImportData(2);
filePath = path;
log.info("pptx dealing filePath{}", filePath);
// 加载PPT文件
XMLSlideShow ppt = new XMLSlideShow(new FileInputStream(filePath));
// 遍历幻灯片
for (XSLFSlide slide : ppt.getSlides()) {
StringBuffer sb = new StringBuffer();
// 遍历形状
List<XSLFShape> shapes = slide.getShapes();
for (XSLFShape shape : shapes) {
// 检查形状是否包含文本
if (shape instanceof XSLFTextShape) {
XSLFTextShape textShape = (XSLFTextShape) shape;
String text = textShape.getText();
if (text != null) {
sb.append(text);
log.info("text :{}", text);
}
}
}
contentStringList.add(sb.toString());
}
ppt.close(); // 关闭PPT
} else if(".ppt".equals(knowFile.getFileType())){
knowledge.setImportData(2);
filePath = path;
log.info("pptx dealing filePath{}", filePath);
// 加载PPT文件
HSLFSlideShow ppt = new HSLFSlideShow(new FileInputStream(filePath));
// 遍历幻灯片
for (HSLFSlide slide : ppt.getSlides()) {
StringBuffer sb = new StringBuffer();
// 遍历形状
List<HSLFShape> shapes = slide.getShapes();
for (HSLFShape shape : shapes) {
// 检查形状是否包含文本
if (shape instanceof HSLFTextShape) {
HSLFTextShape textShape = (HSLFTextShape) shape;
String text = textShape.getText();
if (text != null) {
sb.append(text);
log.info("text :{}", text);
}
}
}
contentStringList.add(sb.toString());
}
ppt.close(); // 关闭PPT
} else { } else {
filePath = path; filePath = path;
File file = new File(filePath); File file = new File(filePath);
log.info("word dealing filePath{}", filePath); log.info("word dealing filePath{}", filePath);
html = DocUtil.convertDocStream2Html(new FileInputStream(file)); html = DocUtil.convertDocStream2Html(new FileInputStream(file));
log.info("html :{}", html); //log.info("html :{}", html);
String htmlWithTable = html.replace("</p>", "######</p>"); String htmlWithTable = html.replace("</p>", "######</p>");
htmlWithTable = htmlWithTable.replace("</title>", "######</title>"); htmlWithTable = htmlWithTable.replace("</title>", "######</title>");
htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>"); htmlWithTable = htmlWithTable.replace("</h1>", "######</h1>");
...@@ -264,7 +322,9 @@ class KnowledgeServiceImpl implements IKnowledgeService { ...@@ -264,7 +322,9 @@ class KnowledgeServiceImpl implements IKnowledgeService {
knowledge.setKnowledgeProjectId(knowledgeVO.getKbKnowledgeId()); knowledge.setKnowledgeProjectId(knowledgeVO.getKbKnowledgeId());
} }
deleteForPython(knowledge.getId(), knowledge.getKnowledgeProjectId()); deleteForPython(knowledge.getId(), knowledge.getKnowledgeProjectId());
log.info("send message:{}", knowledgeMessage); //log.info("send message:{}", knowledgeMessage);
String msg = JSON.toJSONString(knowledgeMessage);
log.info(msg);
produceInfo.sendKnowledgeContents(knowledgeMessage); produceInfo.sendKnowledgeContents(knowledgeMessage);
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论