调通了pdf文件上传
This commit is contained in:
parent
5b8dfe9577
commit
f9a9fee343
|
|
@ -13,6 +13,7 @@ public class PdfParse extends FileParse {
|
|||
String res = "";
|
||||
String fileTmpPath = createFileTmpPath(fileType);
|
||||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
res = OfficeFileUtil.parsePdfAllText(fileTmpPath);
|
||||
} catch (Exception e) {
|
||||
log.error("解析pdf文件失败{}", e.getMessage(), e);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package com.electromagnetic.industry.software.common.parse;
|
||||
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import com.electromagnetic.industry.software.common.util.EleCommonUtil;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
|
@ -16,15 +17,7 @@ public class TextParse extends FileParse {
|
|||
try {
|
||||
FileUtil.writeFromStream(stream, fileTmpPath);
|
||||
res = FileUtil.readString(fileTmpPath, Charset.defaultCharset());
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for (char c : res.toCharArray()) {
|
||||
if (c <= 176 && c >= 32) {
|
||||
stringBuilder.append(c);
|
||||
} else {
|
||||
stringBuilder.append(" ");
|
||||
}
|
||||
}
|
||||
res = stringBuilder.toString();
|
||||
res = EleCommonUtil.formateString(res);
|
||||
} catch (Exception e) {
|
||||
log.info("解析文件 {} 错误, 原因 {}", fileType, e.getMessage(), e);
|
||||
} finally {
|
||||
|
|
@ -32,4 +25,7 @@ public class TextParse extends FileParse {
|
|||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ public final class EleCommonUtil {
|
|||
PARSE_MAP.put("ppt", new PptParse());
|
||||
PARSE_MAP.put("pptx", new PptParse());
|
||||
PARSE_MAP.put("text", new TextParse());
|
||||
PARSE_MAP.put("pdf", new PdfParse());
|
||||
}
|
||||
|
||||
// 正则表达式模式,匹配中文字符、下划线、连字符、加号、数字和英文字符
|
||||
|
|
@ -96,4 +97,19 @@ public final class EleCommonUtil {
|
|||
return fileParse.parseAllText(inputStream, fileType);
|
||||
}
|
||||
|
||||
private static boolean between(int start, int end, char value) {
|
||||
return value >= start && value <= end;
|
||||
}
|
||||
|
||||
public static String formateString(String content) {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
for (char c : content.toCharArray()) {
|
||||
if (between(0, 31, c) || between(127, 127, c)) {
|
||||
stringBuilder.append(" ");
|
||||
} else {
|
||||
stringBuilder.append(c);
|
||||
}
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class OfficeFileUtil {
|
|||
String text = extractor.getText();
|
||||
document.close();
|
||||
fis.close();
|
||||
return text;
|
||||
return EleCommonUtil.formateString(text);
|
||||
}
|
||||
|
||||
public static String parsePdfAllText(String path) throws IOException {
|
||||
|
|
@ -97,7 +97,7 @@ public class OfficeFileUtil {
|
|||
// 提取文本
|
||||
String text = pdfStripper.getText(document);
|
||||
document.close();
|
||||
return text;
|
||||
return EleCommonUtil.formateString(text);
|
||||
}
|
||||
|
||||
public static String parseDocAllText(String wordPath) throws IOException {
|
||||
|
|
@ -108,7 +108,7 @@ public class OfficeFileUtil {
|
|||
String text = wordExtractor.getText();
|
||||
document.close();
|
||||
fis.close();
|
||||
return text;
|
||||
return EleCommonUtil.formateString(text);
|
||||
}
|
||||
|
||||
public static String parseXlsxAllText(String path) throws IOException {
|
||||
|
|
@ -129,7 +129,7 @@ public class OfficeFileUtil {
|
|||
}
|
||||
}
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
return EleCommonUtil.formateString(stringBuilder.toString());
|
||||
}
|
||||
|
||||
public static String parseXlsAllText(String path) throws IOException {
|
||||
|
|
@ -150,7 +150,7 @@ public class OfficeFileUtil {
|
|||
}
|
||||
}
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
return EleCommonUtil.formateString(stringBuilder.toString());
|
||||
}
|
||||
|
||||
private static String getCellValue(Cell cell) {
|
||||
|
|
@ -187,6 +187,7 @@ public class OfficeFileUtil {
|
|||
InputStream input = Files.newInputStream(Paths.get(path));
|
||||
String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText();
|
||||
stringBuilder.append(pptText);
|
||||
return stringBuilder.toString();
|
||||
return EleCommonUtil.formateString(stringBuilder.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue