reformat code
This commit is contained in:
parent
192672ab2c
commit
9a2a99e073
|
|
@ -16,9 +16,9 @@ public class ExcelParse extends FileParse {
|
||||||
try {
|
try {
|
||||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||||
if (fileType.endsWith("xlsx")) {
|
if (fileType.endsWith("xlsx")) {
|
||||||
return OfficeFileUtil.parseXlsx(fileTmpPath);
|
return OfficeFileUtil.parseXlsxAllText(fileTmpPath);
|
||||||
}
|
}
|
||||||
return OfficeFileUtil.parseXls(fileTmpPath);
|
return OfficeFileUtil.parseXlsAllText(fileTmpPath);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("解析{}格式的excel错误,具体为{}",fileType, e.getMessage(), e);
|
log.error("解析{}格式的excel错误,具体为{}",fileType, e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ public class PdfParse extends FileParse {
|
||||||
String res = "";
|
String res = "";
|
||||||
String fileTmpPath = createFileTmpPath(fileType);
|
String fileTmpPath = createFileTmpPath(fileType);
|
||||||
try {
|
try {
|
||||||
res = OfficeFileUtil.parsePdf(fileTmpPath);
|
res = OfficeFileUtil.parsePdfAllText(fileTmpPath);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("解析pdf文件失败{}", e.getMessage(), e);
|
log.error("解析pdf文件失败{}", e.getMessage(), e);
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,9 @@ public class PptParse extends FileParse {
|
||||||
try {
|
try {
|
||||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||||
if (fileType.endsWith("pptx")) {
|
if (fileType.endsWith("pptx")) {
|
||||||
return OfficeFileUtil.parsePptx(fileTmpPath);
|
return OfficeFileUtil.parsePptxAllText(fileTmpPath);
|
||||||
}
|
}
|
||||||
return OfficeFileUtil.parsePpt(fileTmpPath);
|
return OfficeFileUtil.parsePptAllText(fileTmpPath);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("解析{}格式的ppt错误,具体为{}",fileType, e.getMessage(), e);
|
log.error("解析{}格式的ppt错误,具体为{}",fileType, e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,9 @@ public class WordParse extends FileParse {
|
||||||
try {
|
try {
|
||||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||||
if (fileType.endsWith("docx")) {
|
if (fileType.endsWith("docx")) {
|
||||||
res = OfficeFileUtil.parseDocx(fileTmpPath);
|
res = OfficeFileUtil.parseDocxAllText(fileTmpPath);
|
||||||
} else {
|
} else {
|
||||||
res = OfficeFileUtil.parseDoc(fileTmpPath);
|
res = OfficeFileUtil.parseDocAllText(fileTmpPath);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.info("解析{}失败,原因{}", fileType, e.getMessage(), e);
|
log.info("解析{}失败,原因{}", fileType, e.getMessage(), e);
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,6 @@ public final class EleCommonUtil {
|
||||||
public static String parse(InputStream inputStream, String fileType) {
|
public static String parse(InputStream inputStream, String fileType) {
|
||||||
|
|
||||||
FileParse fileParse = PARSE_MAP.getOrDefault(fileType, new TextParse());
|
FileParse fileParse = PARSE_MAP.getOrDefault(fileType, new TextParse());
|
||||||
|
|
||||||
return fileParse.parseContent(inputStream, fileType);
|
return fileParse.parseContent(inputStream, fileType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ import org.apache.poi.hslf.usermodel.HSLFSlideShow;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.extractor.WordExtractor;
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.sl.usermodel.SlideShow;
|
|
||||||
import org.apache.poi.ss.usermodel.*;
|
import org.apache.poi.ss.usermodel.*;
|
||||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFRow;
|
import org.apache.poi.xssf.usermodel.XSSFRow;
|
||||||
|
|
@ -51,11 +50,9 @@ public class OfficeFileUtil {
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseDocx(String wordPath) throws IOException {
|
public static String parseDocxAllText(String wordPath) throws IOException {
|
||||||
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
||||||
XWPFDocument document = new XWPFDocument();
|
XWPFDocument document = new XWPFDocument();
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
|
@ -68,7 +65,7 @@ public class OfficeFileUtil {
|
||||||
return stringBuilder.toString();
|
return stringBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parsePdf(String path) throws IOException {
|
public static String parsePdfAllText(String path) throws IOException {
|
||||||
// 加载PDF文档
|
// 加载PDF文档
|
||||||
PDDocument document = PDDocument.load(new File(path));
|
PDDocument document = PDDocument.load(new File(path));
|
||||||
// 创建PDFTextStripper对象来解析文本
|
// 创建PDFTextStripper对象来解析文本
|
||||||
|
|
@ -79,7 +76,7 @@ public class OfficeFileUtil {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseDoc(String path) throws IOException {
|
public static String parseDocAllText(String path) throws IOException {
|
||||||
FileInputStream fis = new FileInputStream(path);
|
FileInputStream fis = new FileInputStream(path);
|
||||||
HWPFDocument document = new HWPFDocument(fis);
|
HWPFDocument document = new HWPFDocument(fis);
|
||||||
WordExtractor wordExtractor = new WordExtractor(document);
|
WordExtractor wordExtractor = new WordExtractor(document);
|
||||||
|
|
@ -92,7 +89,7 @@ public class OfficeFileUtil {
|
||||||
return stringBuilder.toString();
|
return stringBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseXlsx(String path) throws IOException {
|
public static String parseXlsxAllText(String path) throws IOException {
|
||||||
XSSFWorkbook excel = new XSSFWorkbook(FileUtil.getInputStream(path));
|
XSSFWorkbook excel = new XSSFWorkbook(FileUtil.getInputStream(path));
|
||||||
int activeSheetIndex = excel.getNumberOfSheets();
|
int activeSheetIndex = excel.getNumberOfSheets();
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
|
@ -112,6 +109,26 @@ public class OfficeFileUtil {
|
||||||
return stringBuilder.toString();
|
return stringBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String parseXlsAllText(String path) throws IOException {
|
||||||
|
Workbook sheets = WorkbookFactory.create(FileUtil.getInputStream(path));
|
||||||
|
int numberOfSheets = sheets.getNumberOfSheets();
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
for (int i = 0; i < numberOfSheets; i++) {
|
||||||
|
Sheet sheet = sheets.getSheetAt(i);
|
||||||
|
int allRows = sheet.getPhysicalNumberOfRows();
|
||||||
|
for (int j = 0; j < allRows; j++) {
|
||||||
|
Row row = sheet.getRow(j);
|
||||||
|
if (Objects.isNull(row)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (Cell cell : row) {
|
||||||
|
stringBuilder.append(getCellValue(cell)).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stringBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
private static String getCellValue(Cell cell) {
|
private static String getCellValue(Cell cell) {
|
||||||
if (Objects.isNull(cell)) {
|
if (Objects.isNull(cell)) {
|
||||||
return "";
|
return "";
|
||||||
|
|
@ -133,42 +150,18 @@ public class OfficeFileUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseXls(String path) throws IOException {
|
public static String parsePptAllText(String filePath) throws IOException {
|
||||||
InputStream inputStream = FileUtil.getInputStream(path);
|
return handlePptFile(filePath, false);
|
||||||
Workbook sheets = WorkbookFactory.create(inputStream);
|
|
||||||
int numberOfSheets = sheets.getNumberOfSheets();
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
|
||||||
for (int i = 0; i < numberOfSheets; i++) {
|
|
||||||
Sheet sheet = sheets.getSheetAt(i);
|
|
||||||
int allRows = sheet.getPhysicalNumberOfRows();
|
|
||||||
for (int j = 0; j < allRows; j++) {
|
|
||||||
Row row = sheet.getRow(j);
|
|
||||||
if (Objects.isNull(row)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (Cell cell : row) {
|
|
||||||
stringBuilder.append(getCellValue(cell)).append("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return stringBuilder.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parsePpt(String path) throws IOException {
|
public static String parsePptxAllText(String filePath) throws IOException {
|
||||||
|
return handlePptFile(filePath, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String handlePptFile(String path, boolean isPptx) throws IOException {
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
InputStream input = Files.newInputStream(Paths.get(path));
|
InputStream input = Files.newInputStream(Paths.get(path));
|
||||||
HSLFSlideShow hss = new HSLFSlideShow(input);
|
String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText();
|
||||||
String pptText = new SlideShowExtractor(hss).getText();
|
|
||||||
stringBuilder.append(pptText);
|
|
||||||
return stringBuilder.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static String parsePptx(String filePath) throws IOException {
|
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
|
||||||
InputStream input = Files.newInputStream(Paths.get(filePath));
|
|
||||||
XMLSlideShow xss = new XMLSlideShow(input);
|
|
||||||
String pptText = new SlideShowExtractor(xss).getText();
|
|
||||||
stringBuilder.append(pptText);
|
stringBuilder.append(pptText);
|
||||||
return stringBuilder.toString();
|
return stringBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue