增加ppt内容的解析
This commit is contained in:
parent
9603bff042
commit
192672ab2c
|
|
@ -1,11 +1,27 @@
|
|||
package com.electromagnetic.industry.software.common.parse;
|
||||
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import com.electromagnetic.industry.software.common.util.OfficeFileUtil;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
@Slf4j
|
||||
public class PptParse extends FileParse {
|
||||
|
||||
@Override
|
||||
public String parseContent(InputStream stream, String fileType) {
|
||||
return "";
|
||||
String fileTmpPath = createFileTmpPath(fileType);
|
||||
String res = "";
|
||||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
if (fileType.endsWith("pptx")) {
|
||||
return OfficeFileUtil.parsePptx(fileTmpPath);
|
||||
}
|
||||
return OfficeFileUtil.parsePpt(fileTmpPath);
|
||||
} catch (Exception e) {
|
||||
log.error("解析{}格式的ppt错误,具体为{}",fileType, e.getMessage(), e);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,9 +8,13 @@ import com.documents4j.job.LocalConverter;
|
|||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.SlideShow;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRow;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
|
|
@ -149,4 +153,23 @@ public class OfficeFileUtil {
|
|||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String parsePpt(String path) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
InputStream input = Files.newInputStream(Paths.get(path));
|
||||
HSLFSlideShow hss = new HSLFSlideShow(input);
|
||||
String pptText = new SlideShowExtractor(hss).getText();
|
||||
stringBuilder.append(pptText);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
|
||||
public static String parsePptx(String filePath) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
InputStream input = Files.newInputStream(Paths.get(filePath));
|
||||
XMLSlideShow xss = new XMLSlideShow(input);
|
||||
String pptText = new SlideShowExtractor(xss).getText();
|
||||
stringBuilder.append(pptText);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue