reformat code

This commit is contained in:
chenxudong 2025-01-21 12:11:50 +08:00
parent 192672ab2c
commit 9a2a99e073
6 changed files with 39 additions and 47 deletions

View File

@ -16,9 +16,9 @@ public class ExcelParse extends FileParse {
try {
FileUtil.writeFromStream(stream, fileTmpPath);
if (fileType.endsWith("xlsx")) {
return OfficeFileUtil.parseXlsx(fileTmpPath);
return OfficeFileUtil.parseXlsxAllText(fileTmpPath);
}
return OfficeFileUtil.parseXls(fileTmpPath);
return OfficeFileUtil.parseXlsAllText(fileTmpPath);
} catch (Exception e) {
log.error("解析{}格式的excel错误具体为{}",fileType, e.getMessage(), e);
}

View File

@ -13,7 +13,7 @@ public class PdfParse extends FileParse {
String res = "";
String fileTmpPath = createFileTmpPath(fileType);
try {
res = OfficeFileUtil.parsePdf(fileTmpPath);
res = OfficeFileUtil.parsePdfAllText(fileTmpPath);
} catch (Exception e) {
log.error("解析pdf文件失败{}", e.getMessage(), e);
} finally {

View File

@ -16,9 +16,9 @@ public class PptParse extends FileParse {
try {
FileUtil.writeFromStream(stream, fileTmpPath);
if (fileType.endsWith("pptx")) {
return OfficeFileUtil.parsePptx(fileTmpPath);
return OfficeFileUtil.parsePptxAllText(fileTmpPath);
}
return OfficeFileUtil.parsePpt(fileTmpPath);
return OfficeFileUtil.parsePptAllText(fileTmpPath);
} catch (Exception e) {
log.error("解析{}格式的ppt错误具体为{}",fileType, e.getMessage(), e);
}

View File

@ -16,9 +16,9 @@ public class WordParse extends FileParse {
try {
FileUtil.writeFromStream(stream, fileTmpPath);
if (fileType.endsWith("docx")) {
res = OfficeFileUtil.parseDocx(fileTmpPath);
res = OfficeFileUtil.parseDocxAllText(fileTmpPath);
} else {
res = OfficeFileUtil.parseDoc(fileTmpPath);
res = OfficeFileUtil.parseDocAllText(fileTmpPath);
}
} catch (Exception e) {
log.info("解析{}失败,原因{}", fileType, e.getMessage(), e);

View File

@ -81,7 +81,6 @@ public final class EleCommonUtil {
public static String parse(InputStream inputStream, String fileType) {
FileParse fileParse = PARSE_MAP.getOrDefault(fileType, new TextParse());
return fileParse.parseContent(inputStream, fileType);
}

View File

@ -12,7 +12,6 @@ import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xssf.usermodel.XSSFRow;
@ -51,11 +50,9 @@ public class OfficeFileUtil {
} else {
}
}
public static String parseDocx(String wordPath) throws IOException {
public static String parseDocxAllText(String wordPath) throws IOException {
InputStream fis = Files.newInputStream(Paths.get(wordPath));
XWPFDocument document = new XWPFDocument();
StringBuilder stringBuilder = new StringBuilder();
@ -68,7 +65,7 @@ public class OfficeFileUtil {
return stringBuilder.toString();
}
public static String parsePdf(String path) throws IOException {
public static String parsePdfAllText(String path) throws IOException {
// 加载PDF文档
PDDocument document = PDDocument.load(new File(path));
// 创建PDFTextStripper对象来解析文本
@ -79,7 +76,7 @@ public class OfficeFileUtil {
return text;
}
public static String parseDoc(String path) throws IOException {
public static String parseDocAllText(String path) throws IOException {
FileInputStream fis = new FileInputStream(path);
HWPFDocument document = new HWPFDocument(fis);
WordExtractor wordExtractor = new WordExtractor(document);
@ -92,7 +89,7 @@ public class OfficeFileUtil {
return stringBuilder.toString();
}
public static String parseXlsx(String path) throws IOException {
public static String parseXlsxAllText(String path) throws IOException {
XSSFWorkbook excel = new XSSFWorkbook(FileUtil.getInputStream(path));
int activeSheetIndex = excel.getNumberOfSheets();
StringBuilder stringBuilder = new StringBuilder();
@ -112,6 +109,26 @@ public class OfficeFileUtil {
return stringBuilder.toString();
}
public static String parseXlsAllText(String path) throws IOException {
Workbook sheets = WorkbookFactory.create(FileUtil.getInputStream(path));
int numberOfSheets = sheets.getNumberOfSheets();
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < numberOfSheets; i++) {
Sheet sheet = sheets.getSheetAt(i);
int allRows = sheet.getPhysicalNumberOfRows();
for (int j = 0; j < allRows; j++) {
Row row = sheet.getRow(j);
if (Objects.isNull(row)) {
continue;
}
for (Cell cell : row) {
stringBuilder.append(getCellValue(cell)).append("\n");
}
}
}
return stringBuilder.toString();
}
private static String getCellValue(Cell cell) {
if (Objects.isNull(cell)) {
return "";
@ -133,42 +150,18 @@ public class OfficeFileUtil {
}
}
public static String parseXls(String path) throws IOException {
InputStream inputStream = FileUtil.getInputStream(path);
Workbook sheets = WorkbookFactory.create(inputStream);
int numberOfSheets = sheets.getNumberOfSheets();
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < numberOfSheets; i++) {
Sheet sheet = sheets.getSheetAt(i);
int allRows = sheet.getPhysicalNumberOfRows();
for (int j = 0; j < allRows; j++) {
Row row = sheet.getRow(j);
if (Objects.isNull(row)) {
continue;
}
for (Cell cell : row) {
stringBuilder.append(getCellValue(cell)).append("\n");
}
}
}
return stringBuilder.toString();
public static String parsePptAllText(String filePath) throws IOException {
return handlePptFile(filePath, false);
}
public static String parsePpt(String path) throws IOException {
public static String parsePptxAllText(String filePath) throws IOException {
return handlePptFile(filePath, true);
}
private static String handlePptFile(String path, boolean isPptx) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
InputStream input = Files.newInputStream(Paths.get(path));
HSLFSlideShow hss = new HSLFSlideShow(input);
String pptText = new SlideShowExtractor(hss).getText();
stringBuilder.append(pptText);
return stringBuilder.toString();
}
public static String parsePptx(String filePath) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
InputStream input = Files.newInputStream(Paths.get(filePath));
XMLSlideShow xss = new XMLSlideShow(input);
String pptText = new SlideShowExtractor(xss).getText();
String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText();
stringBuilder.append(pptText);
return stringBuilder.toString();
}