reformat code
This commit is contained in:
parent
192672ab2c
commit
9a2a99e073
|
|
@ -16,9 +16,9 @@ public class ExcelParse extends FileParse {
|
|||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
if (fileType.endsWith("xlsx")) {
|
||||
return OfficeFileUtil.parseXlsx(fileTmpPath);
|
||||
return OfficeFileUtil.parseXlsxAllText(fileTmpPath);
|
||||
}
|
||||
return OfficeFileUtil.parseXls(fileTmpPath);
|
||||
return OfficeFileUtil.parseXlsAllText(fileTmpPath);
|
||||
} catch (Exception e) {
|
||||
log.error("解析{}格式的excel错误,具体为{}",fileType, e.getMessage(), e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ public class PdfParse extends FileParse {
|
|||
String res = "";
|
||||
String fileTmpPath = createFileTmpPath(fileType);
|
||||
try {
|
||||
res = OfficeFileUtil.parsePdf(fileTmpPath);
|
||||
res = OfficeFileUtil.parsePdfAllText(fileTmpPath);
|
||||
} catch (Exception e) {
|
||||
log.error("解析pdf文件失败{}", e.getMessage(), e);
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ public class PptParse extends FileParse {
|
|||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
if (fileType.endsWith("pptx")) {
|
||||
return OfficeFileUtil.parsePptx(fileTmpPath);
|
||||
return OfficeFileUtil.parsePptxAllText(fileTmpPath);
|
||||
}
|
||||
return OfficeFileUtil.parsePpt(fileTmpPath);
|
||||
return OfficeFileUtil.parsePptAllText(fileTmpPath);
|
||||
} catch (Exception e) {
|
||||
log.error("解析{}格式的ppt错误,具体为{}",fileType, e.getMessage(), e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ public class WordParse extends FileParse {
|
|||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
if (fileType.endsWith("docx")) {
|
||||
res = OfficeFileUtil.parseDocx(fileTmpPath);
|
||||
res = OfficeFileUtil.parseDocxAllText(fileTmpPath);
|
||||
} else {
|
||||
res = OfficeFileUtil.parseDoc(fileTmpPath);
|
||||
res = OfficeFileUtil.parseDocAllText(fileTmpPath);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.info("解析{}失败,原因{}", fileType, e.getMessage(), e);
|
||||
|
|
|
|||
|
|
@ -81,7 +81,6 @@ public final class EleCommonUtil {
|
|||
public static String parse(InputStream inputStream, String fileType) {
|
||||
|
||||
FileParse fileParse = PARSE_MAP.getOrDefault(fileType, new TextParse());
|
||||
|
||||
return fileParse.parseContent(inputStream, fileType);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ import org.apache.poi.hslf.usermodel.HSLFSlideShow;
|
|||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.SlideShow;
|
||||
import org.apache.poi.ss.usermodel.*;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRow;
|
||||
|
|
@ -51,11 +50,9 @@ public class OfficeFileUtil {
|
|||
} else {
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static String parseDocx(String wordPath) throws IOException {
|
||||
public static String parseDocxAllText(String wordPath) throws IOException {
|
||||
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
||||
XWPFDocument document = new XWPFDocument();
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
|
|
@ -68,7 +65,7 @@ public class OfficeFileUtil {
|
|||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String parsePdf(String path) throws IOException {
|
||||
public static String parsePdfAllText(String path) throws IOException {
|
||||
// 加载PDF文档
|
||||
PDDocument document = PDDocument.load(new File(path));
|
||||
// 创建PDFTextStripper对象来解析文本
|
||||
|
|
@ -79,7 +76,7 @@ public class OfficeFileUtil {
|
|||
return text;
|
||||
}
|
||||
|
||||
public static String parseDoc(String path) throws IOException {
|
||||
public static String parseDocAllText(String path) throws IOException {
|
||||
FileInputStream fis = new FileInputStream(path);
|
||||
HWPFDocument document = new HWPFDocument(fis);
|
||||
WordExtractor wordExtractor = new WordExtractor(document);
|
||||
|
|
@ -92,7 +89,7 @@ public class OfficeFileUtil {
|
|||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String parseXlsx(String path) throws IOException {
|
||||
public static String parseXlsxAllText(String path) throws IOException {
|
||||
XSSFWorkbook excel = new XSSFWorkbook(FileUtil.getInputStream(path));
|
||||
int activeSheetIndex = excel.getNumberOfSheets();
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
|
|
@ -112,6 +109,26 @@ public class OfficeFileUtil {
|
|||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String parseXlsAllText(String path) throws IOException {
|
||||
Workbook sheets = WorkbookFactory.create(FileUtil.getInputStream(path));
|
||||
int numberOfSheets = sheets.getNumberOfSheets();
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for (int i = 0; i < numberOfSheets; i++) {
|
||||
Sheet sheet = sheets.getSheetAt(i);
|
||||
int allRows = sheet.getPhysicalNumberOfRows();
|
||||
for (int j = 0; j < allRows; j++) {
|
||||
Row row = sheet.getRow(j);
|
||||
if (Objects.isNull(row)) {
|
||||
continue;
|
||||
}
|
||||
for (Cell cell : row) {
|
||||
stringBuilder.append(getCellValue(cell)).append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
private static String getCellValue(Cell cell) {
|
||||
if (Objects.isNull(cell)) {
|
||||
return "";
|
||||
|
|
@ -133,42 +150,18 @@ public class OfficeFileUtil {
|
|||
}
|
||||
}
|
||||
|
||||
public static String parseXls(String path) throws IOException {
|
||||
InputStream inputStream = FileUtil.getInputStream(path);
|
||||
Workbook sheets = WorkbookFactory.create(inputStream);
|
||||
int numberOfSheets = sheets.getNumberOfSheets();
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for (int i = 0; i < numberOfSheets; i++) {
|
||||
Sheet sheet = sheets.getSheetAt(i);
|
||||
int allRows = sheet.getPhysicalNumberOfRows();
|
||||
for (int j = 0; j < allRows; j++) {
|
||||
Row row = sheet.getRow(j);
|
||||
if (Objects.isNull(row)) {
|
||||
continue;
|
||||
}
|
||||
for (Cell cell : row) {
|
||||
stringBuilder.append(getCellValue(cell)).append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
public static String parsePptAllText(String filePath) throws IOException {
|
||||
return handlePptFile(filePath, false);
|
||||
}
|
||||
|
||||
public static String parsePpt(String path) throws IOException {
|
||||
public static String parsePptxAllText(String filePath) throws IOException {
|
||||
return handlePptFile(filePath, true);
|
||||
}
|
||||
|
||||
private static String handlePptFile(String path, boolean isPptx) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
InputStream input = Files.newInputStream(Paths.get(path));
|
||||
HSLFSlideShow hss = new HSLFSlideShow(input);
|
||||
String pptText = new SlideShowExtractor(hss).getText();
|
||||
stringBuilder.append(pptText);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
|
||||
public static String parsePptx(String filePath) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
InputStream input = Files.newInputStream(Paths.get(filePath));
|
||||
XMLSlideShow xss = new XMLSlideShow(input);
|
||||
String pptText = new SlideShowExtractor(xss).getText();
|
||||
String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText();
|
||||
stringBuilder.append(pptText);
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue