调通解析Word文件
This commit is contained in:
parent
3c8d8b165e
commit
5b8dfe9577
|
|
@ -21,7 +21,7 @@ public class WordParse extends FileParse {
|
||||||
res = OfficeFileUtil.parseDocAllText(fileTmpPath);
|
res = OfficeFileUtil.parseDocAllText(fileTmpPath);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.info("解析{}失败,原因{}", fileType, e.getMessage(), e);
|
log.error("解析{}失败,原因{}", fileType, e.getMessage(), e);
|
||||||
} finally {
|
} finally {
|
||||||
FileUtil.del(fileTmpPath);
|
FileUtil.del(fileTmpPath);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFRow;
|
import org.apache.poi.xssf.usermodel.XSSFRow;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||||
|
|
||||||
|
|
@ -79,15 +80,12 @@ public class OfficeFileUtil {
|
||||||
public static String parseDocxAllText(String wordPath) throws IOException {
|
public static String parseDocxAllText(String wordPath) throws IOException {
|
||||||
log.info("Start parse docx file, path is {}", wordPath);
|
log.info("Start parse docx file, path is {}", wordPath);
|
||||||
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
||||||
XWPFDocument document = new XWPFDocument();
|
XWPFDocument document = new XWPFDocument(fis);
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
|
||||||
List<XWPFParagraph> paragraphs = document.getParagraphs();
|
String text = extractor.getText();
|
||||||
for (XWPFParagraph paragraph : paragraphs) {
|
|
||||||
stringBuilder.append(paragraph.getText());
|
|
||||||
}
|
|
||||||
document.close();
|
document.close();
|
||||||
fis.close();
|
fis.close();
|
||||||
return stringBuilder.toString();
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parsePdfAllText(String path) throws IOException {
|
public static String parsePdfAllText(String path) throws IOException {
|
||||||
|
|
@ -102,18 +100,15 @@ public class OfficeFileUtil {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseDocAllText(String path) throws IOException {
|
public static String parseDocAllText(String wordPath) throws IOException {
|
||||||
log.info("Start parse doc file, path is {}", path);
|
log.info("Start parse doc file, path is {}", wordPath);
|
||||||
FileInputStream fis = new FileInputStream(path);
|
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
||||||
HWPFDocument document = new HWPFDocument(fis);
|
HWPFDocument document = new HWPFDocument(fis);
|
||||||
WordExtractor wordExtractor = new WordExtractor(document);
|
WordExtractor wordExtractor = new WordExtractor(document);
|
||||||
String[] paragraphText = wordExtractor.getParagraphText();
|
String text = wordExtractor.getText();
|
||||||
StringBuilder stringBuilder = new StringBuilder();
|
document.close();
|
||||||
for (String paragraph : paragraphText) {
|
|
||||||
stringBuilder.append(paragraph);
|
|
||||||
}
|
|
||||||
fis.close();
|
fis.close();
|
||||||
return stringBuilder.toString();
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseXlsxAllText(String path) throws IOException {
|
public static String parseXlsxAllText(String path) throws IOException {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue