调通了pdf文件上传

This commit is contained in:
chenxudong 2025-02-18 09:42:13 +08:00
parent 5b8dfe9577
commit f9a9fee343
4 changed files with 29 additions and 15 deletions

View File

@ -13,6 +13,7 @@ public class PdfParse extends FileParse {
String res = ""; String res = "";
String fileTmpPath = createFileTmpPath(fileType); String fileTmpPath = createFileTmpPath(fileType);
try { try {
FileUtil.writeFromStream(stream, fileTmpPath);
res = OfficeFileUtil.parsePdfAllText(fileTmpPath); res = OfficeFileUtil.parsePdfAllText(fileTmpPath);
} catch (Exception e) { } catch (Exception e) {
log.error("解析pdf文件失败{}", e.getMessage(), e); log.error("解析pdf文件失败{}", e.getMessage(), e);

View File

@ -1,6 +1,7 @@
package com.electromagnetic.industry.software.common.parse; package com.electromagnetic.industry.software.common.parse;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import com.electromagnetic.industry.software.common.util.EleCommonUtil;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import java.io.InputStream; import java.io.InputStream;
@ -16,15 +17,7 @@ public class TextParse extends FileParse {
try { try {
FileUtil.writeFromStream(stream, fileTmpPath); FileUtil.writeFromStream(stream, fileTmpPath);
res = FileUtil.readString(fileTmpPath, Charset.defaultCharset()); res = FileUtil.readString(fileTmpPath, Charset.defaultCharset());
StringBuilder stringBuilder = new StringBuilder(); res = EleCommonUtil.formateString(res);
for (char c : res.toCharArray()) {
if (c <= 176 && c >= 32) {
stringBuilder.append(c);
} else {
stringBuilder.append(" ");
}
}
res = stringBuilder.toString();
} catch (Exception e) { } catch (Exception e) {
log.info("解析文件 {} 错误, 原因 {}", fileType, e.getMessage(), e); log.info("解析文件 {} 错误, 原因 {}", fileType, e.getMessage(), e);
} finally { } finally {
@ -32,4 +25,7 @@ public class TextParse extends FileParse {
} }
return res; return res;
} }
} }

View File

@ -29,6 +29,7 @@ public final class EleCommonUtil {
PARSE_MAP.put("ppt", new PptParse()); PARSE_MAP.put("ppt", new PptParse());
PARSE_MAP.put("pptx", new PptParse()); PARSE_MAP.put("pptx", new PptParse());
PARSE_MAP.put("text", new TextParse()); PARSE_MAP.put("text", new TextParse());
PARSE_MAP.put("pdf", new PdfParse());
} }
// 正则表达式模式匹配中文字符下划线连字符加号数字和英文字符 // 正则表达式模式匹配中文字符下划线连字符加号数字和英文字符
@ -96,4 +97,19 @@ public final class EleCommonUtil {
return fileParse.parseAllText(inputStream, fileType); return fileParse.parseAllText(inputStream, fileType);
} }
private static boolean between(int start, int end, char value) {
return value >= start && value <= end;
}
public static String formateString(String content) {
StringBuilder stringBuilder = new StringBuilder();
for (char c : content.toCharArray()) {
if (between(0, 31, c) || between(127, 127, c)) {
stringBuilder.append(" ");
} else {
stringBuilder.append(c);
}
}
return stringBuilder.toString();
}
} }

View File

@ -85,7 +85,7 @@ public class OfficeFileUtil {
String text = extractor.getText(); String text = extractor.getText();
document.close(); document.close();
fis.close(); fis.close();
return text; return EleCommonUtil.formateString(text);
} }
public static String parsePdfAllText(String path) throws IOException { public static String parsePdfAllText(String path) throws IOException {
@ -97,7 +97,7 @@ public class OfficeFileUtil {
// 提取文本 // 提取文本
String text = pdfStripper.getText(document); String text = pdfStripper.getText(document);
document.close(); document.close();
return text; return EleCommonUtil.formateString(text);
} }
public static String parseDocAllText(String wordPath) throws IOException { public static String parseDocAllText(String wordPath) throws IOException {
@ -108,7 +108,7 @@ public class OfficeFileUtil {
String text = wordExtractor.getText(); String text = wordExtractor.getText();
document.close(); document.close();
fis.close(); fis.close();
return text; return EleCommonUtil.formateString(text);
} }
public static String parseXlsxAllText(String path) throws IOException { public static String parseXlsxAllText(String path) throws IOException {
@ -129,7 +129,7 @@ public class OfficeFileUtil {
} }
} }
} }
return stringBuilder.toString(); return EleCommonUtil.formateString(stringBuilder.toString());
} }
public static String parseXlsAllText(String path) throws IOException { public static String parseXlsAllText(String path) throws IOException {
@ -150,7 +150,7 @@ public class OfficeFileUtil {
} }
} }
} }
return stringBuilder.toString(); return EleCommonUtil.formateString(stringBuilder.toString());
} }
private static String getCellValue(Cell cell) { private static String getCellValue(Cell cell) {
@ -187,6 +187,7 @@ public class OfficeFileUtil {
InputStream input = Files.newInputStream(Paths.get(path)); InputStream input = Files.newInputStream(Paths.get(path));
String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText(); String pptText = isPptx ? new SlideShowExtractor(new HSLFSlideShow(input)).getText() : new SlideShowExtractor(new XMLSlideShow(input)).getText();
stringBuilder.append(pptText); stringBuilder.append(pptText);
return stringBuilder.toString(); return EleCommonUtil.formateString(stringBuilder.toString());
} }
} }