增加ppt内容的解析

This commit is contained in:
chenxudong 2025-01-21 10:39:40 +08:00
parent 9603bff042
commit 192672ab2c
2 changed files with 40 additions and 1 deletions

View File

@ -1,11 +1,27 @@
package com.electromagnetic.industry.software.common.parse;
import cn.hutool.core.io.FileUtil;
import com.electromagnetic.industry.software.common.util.OfficeFileUtil;
import lombok.extern.slf4j.Slf4j;
import java.io.InputStream;
@Slf4j
public class PptParse extends FileParse {
@Override
public String parseContent(InputStream stream, String fileType) {
return "";
String fileTmpPath = createFileTmpPath(fileType);
String res = "";
try {
FileUtil.writeFromStream(stream, fileTmpPath);
if (fileType.endsWith("pptx")) {
return OfficeFileUtil.parsePptx(fileTmpPath);
}
return OfficeFileUtil.parsePpt(fileTmpPath);
} catch (Exception e) {
log.error("解析{}格式的ppt错误具体为{}",fileType, e.getMessage(), e);
}
return res;
}
}

View File

@ -8,9 +8,13 @@ import com.documents4j.job.LocalConverter;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
@ -149,4 +153,23 @@ public class OfficeFileUtil {
}
return stringBuilder.toString();
}
public static String parsePpt(String path) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
InputStream input = Files.newInputStream(Paths.get(path));
HSLFSlideShow hss = new HSLFSlideShow(input);
String pptText = new SlideShowExtractor(hss).getText();
stringBuilder.append(pptText);
return stringBuilder.toString();
}
public static String parsePptx(String filePath) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
InputStream input = Files.newInputStream(Paths.get(filePath));
XMLSlideShow xss = new XMLSlideShow(input);
String pptText = new SlideShowExtractor(xss).getText();
stringBuilder.append(pptText);
return stringBuilder.toString();
}
}