问支持word,ppt,excel,text,csv等上传
This commit is contained in:
parent
b0fe94d2ee
commit
46d68f8d4b
|
|
@ -1,17 +1,20 @@
|
|||
package com.electromagnetic.industry.software.manage.service.serviceimpl;
|
||||
|
||||
import cn.hutool.core.io.FileUtil;
|
||||
import cn.hutool.core.text.StrFormatter;
|
||||
import cn.hutool.core.util.IdUtil;
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.crypto.digest.DigestUtil;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
||||
import com.electromagnetic.industry.software.common.enums.EffectFlagEnum;
|
||||
import com.electromagnetic.industry.software.common.pojo.PageFile;
|
||||
import com.electromagnetic.industry.software.common.pojo.UserLoginInfo;
|
||||
import com.electromagnetic.industry.software.common.resp.ElectromagneticResult;
|
||||
import com.electromagnetic.industry.software.common.util.ElectromagneticResultUtil;
|
||||
import com.electromagnetic.industry.software.common.util.IdWorker;
|
||||
import com.electromagnetic.industry.software.common.util.OfficeFileUtil;
|
||||
import com.electromagnetic.industry.software.common.util.UserThreadLocal;
|
||||
import com.electromagnetic.industry.software.manage.config.ElePropertyConfig;
|
||||
import com.electromagnetic.industry.software.manage.mapper.AiFileUploadRecordMapper;
|
||||
import com.electromagnetic.industry.software.manage.mapper.AiQuestionRecordMapper;
|
||||
import com.electromagnetic.industry.software.manage.pojo.models.AiFileUploadRecord;
|
||||
|
|
@ -25,7 +28,9 @@ import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor;
|
|||
import org.springframework.ai.chat.messages.UserMessage;
|
||||
import org.springframework.ai.chat.prompt.Prompt;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.document.DocumentReader;
|
||||
import org.springframework.ai.ollama.OllamaChatModel;
|
||||
import org.springframework.ai.reader.TextReader;
|
||||
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
|
||||
import org.springframework.ai.vectorstore.VectorStore;
|
||||
import org.springframework.ai.chat.model.ChatResponse;
|
||||
|
|
@ -34,12 +39,11 @@ import org.springframework.transaction.annotation.Transactional;
|
|||
import org.springframework.web.multipart.MultipartFile;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
|
|
@ -64,6 +68,11 @@ public class ChatService {
|
|||
@Resource
|
||||
private AiQuestionRecordMapper aiQuestionRecordMapper;
|
||||
|
||||
@Resource
|
||||
private ElePropertyConfig elePropertyConfig;
|
||||
|
||||
private static final List<String> ALLOWS_FILE_TYPES = List.of("doc", "docx", "txt", "csv", "xls", "xlsx", "pdf", "ppt", "pptx");
|
||||
|
||||
public ElectromagneticResult<?> deleteDocument(List<String> ids) {
|
||||
List<String> vectorIds = aiFileUploadRecordMapper.selectList(Wrappers.lambdaQuery(AiFileUploadRecord.class)
|
||||
.in(AiFileUploadRecord::getId, ids))
|
||||
|
|
@ -88,10 +97,9 @@ public class ChatService {
|
|||
return ElectromagneticResultUtil.fail("-1", "文件为空");
|
||||
}
|
||||
|
||||
// 当前仅支持pdf文件
|
||||
String fileType = FileUtil.extName(file.getOriginalFilename());
|
||||
if (!StrUtil.equals(fileType, "pdf")) {
|
||||
return ElectromagneticResultUtil.fail("-1", "当前仅支持pdf格式文件");
|
||||
if (!ALLOWS_FILE_TYPES.contains(fileType)) {
|
||||
return ElectromagneticResultUtil.fail("-1", StrFormatter.format("当前格式 {} 不支持", fileType));
|
||||
}
|
||||
|
||||
// 通过md5值判断文件是否被上传过
|
||||
|
|
@ -101,11 +109,109 @@ public class ChatService {
|
|||
if (count > 0) {
|
||||
return ElectromagneticResultUtil.success(fileMd5);
|
||||
}
|
||||
List<Document> documents = new ArrayList<>();
|
||||
|
||||
switch (Objects.requireNonNull(fileType)) {
|
||||
case "txt", "csv", "text" -> {
|
||||
Path tempFile = saveUploadedFileToTemp(file);
|
||||
DocumentReader documentReader = new TextReader(String.valueOf(tempFile.toUri().toURL()));
|
||||
documents = documentReader.get();
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
case "xls" -> {
|
||||
String filePath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".xls";
|
||||
List<PageFile> pageInfo = OfficeFileUtil.parseXlsByPage(filePath);
|
||||
for (PageFile pageFile : pageInfo) {
|
||||
Map<String, Object> metaData = Map.of("page_number", pageFile.getPageNumber(), "file_name", file.getOriginalFilename());
|
||||
Document document = new Document(pageFile.getContent(), metaData);
|
||||
documents.add(document);
|
||||
}
|
||||
FileUtil.del(filePath);
|
||||
}
|
||||
case "xlsx" -> {
|
||||
String filePath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".xlsx";
|
||||
List<PageFile> pageInfo = OfficeFileUtil.parseXlsxByPage(filePath);
|
||||
for (PageFile pageFile : pageInfo) {
|
||||
Map<String, Object> metaData = Map.of("page_number", pageFile.getPageNumber(), "file_name", file.getOriginalFilename());
|
||||
Document document = new Document(pageFile.getContent(), metaData);
|
||||
documents.add(document);
|
||||
}
|
||||
FileUtil.del(filePath);
|
||||
}
|
||||
case "doc" -> {
|
||||
String wordPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".doc";
|
||||
String pdfPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".pdf";
|
||||
OfficeFileUtil.doc2pdf(wordPath, pdfPath);
|
||||
Path path = new File(pdfPath).toPath();
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(path.toUri().toURL()));
|
||||
List<Document> tmp = reader.get();
|
||||
for (Document document : tmp) {
|
||||
Map<String, Object> metadata = document.getMetadata();
|
||||
Map<String, Object> metaData = Map.of("page_number", metadata.get("page_number"), "file_name", file.getOriginalFilename());
|
||||
Document doc = new Document(document.getContent(), metaData);
|
||||
documents.add(doc);
|
||||
}
|
||||
Files.deleteIfExists(path);
|
||||
FileUtil.del(wordPath);
|
||||
}
|
||||
case "docx" -> {
|
||||
String wordPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".docx";
|
||||
String pdfPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".pdf";
|
||||
OfficeFileUtil.doc2pdf(wordPath, pdfPath);
|
||||
Path path = new File(pdfPath).toPath();
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(path.toUri().toURL()));
|
||||
List<Document> tmp = reader.get();
|
||||
for (Document document : tmp) {
|
||||
Map<String, Object> metadata = document.getMetadata();
|
||||
Map<String, Object> metaData = Map.of("page_number", metadata.get("page_number"), "file_name", file.getOriginalFilename());
|
||||
Document doc = new Document(document.getContent(), metaData);
|
||||
documents.add(doc);
|
||||
}
|
||||
Files.deleteIfExists(path);
|
||||
FileUtil.del(wordPath);
|
||||
}
|
||||
case "ppt" -> {
|
||||
String pptPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".ppt";
|
||||
String pdfPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".pdf";
|
||||
OfficeFileUtil.ppt2pdf(pptPath, pdfPath);
|
||||
Path path = new File(pdfPath).toPath();
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(path.toUri().toURL()));
|
||||
List<Document> tmp = reader.get();
|
||||
for (Document document : tmp) {
|
||||
Map<String, Object> metadata = document.getMetadata();
|
||||
Map<String, Object> metaData = Map.of("page_number", metadata.get("page_number"), "file_name", file.getOriginalFilename());
|
||||
Document doc = new Document(document.getContent(), metaData);
|
||||
documents.add(doc);
|
||||
}
|
||||
Files.deleteIfExists(path);
|
||||
FileUtil.del(pptPath);
|
||||
}
|
||||
case "pptx" -> {
|
||||
String pptxPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".pptx";
|
||||
String pdfPath = elePropertyConfig.getEleTmpPath() + File.separator + fileMd5 + ".pdf";
|
||||
OfficeFileUtil.ppt2pdf(pptxPath, pdfPath);
|
||||
Path path = new File(pdfPath).toPath();
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(path.toUri().toURL()));
|
||||
List<Document> tmp = reader.get();
|
||||
for (Document document : tmp) {
|
||||
Map<String, Object> metadata = document.getMetadata();
|
||||
Map<String, Object> metaData = Map.of("page_number", metadata.get("page_number"), "file_name", file.getOriginalFilename());
|
||||
Document doc = new Document(document.getContent(), metaData);
|
||||
documents.add(doc);
|
||||
}
|
||||
Files.deleteIfExists(path);
|
||||
FileUtil.del(pptxPath);
|
||||
}
|
||||
case "pdf" -> {
|
||||
Path tempFile = saveUploadedFileToTemp(file);
|
||||
PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(tempFile.toUri().toURL()));
|
||||
List<Document> documents = reader.get();
|
||||
documents = reader.get();
|
||||
Files.deleteIfExists(tempFile);
|
||||
}
|
||||
default -> {
|
||||
return ElectromagneticResultUtil.fail("-1", StrFormatter.format("当前格式 {} 不支持", fileType));
|
||||
}
|
||||
}
|
||||
vectorStore.write(documents);
|
||||
for (Document document : documents) {
|
||||
aiFileUploadRecordMapper.insert(new AiFileUploadRecord().setId(IdWorker.getSnowFlakeIdString())
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
package com.electromagnetic.industry.software.common.pojo;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class PageFile {
|
||||
|
||||
private int pageNumber;
|
||||
|
||||
private String content;
|
||||
|
||||
private String fileName;
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ import com.documents4j.api.DocumentType;
|
|||
import com.documents4j.api.IConverter;
|
||||
import com.documents4j.job.LocalConverter;
|
||||
import com.electromagnetic.industry.software.common.exception.BizException;
|
||||
import com.electromagnetic.industry.software.common.pojo.PageFile;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
|
@ -30,6 +31,7 @@ import java.io.InputStream;
|
|||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Slf4j
|
||||
|
|
@ -46,10 +48,18 @@ public class OfficeFileUtil {
|
|||
try (InputStream docxInputStream = Files.newInputStream(inputWord.toPath());
|
||||
OutputStream outputStream = Files.newOutputStream(outputFile.toPath())) {
|
||||
IConverter build = LocalConverter.builder().build();
|
||||
boolean execute = build.convert(docxInputStream)
|
||||
boolean execute;
|
||||
if (wordPath.endsWith(".docx")) {
|
||||
execute = build.convert(docxInputStream)
|
||||
.as(DocumentType.DOCX)
|
||||
.to(outputStream)
|
||||
.as(DocumentType.PDF).schedule().get();
|
||||
} else {
|
||||
execute = build.convert(docxInputStream)
|
||||
.as(DocumentType.DOC)
|
||||
.to(outputStream)
|
||||
.as(DocumentType.PDF).schedule().get();
|
||||
}
|
||||
Assert.isTrue(execute, "转换失败");
|
||||
log.info("转换完毕 targetPath = {}", outputFile.getAbsolutePath());
|
||||
build.shutDown();
|
||||
|
|
@ -76,6 +86,31 @@ public class OfficeFileUtil {
|
|||
}
|
||||
}
|
||||
|
||||
public static void ppt2pdf(String pptPath, String pdfPath) {
|
||||
if (FileUtil.exist(pdfPath)) {
|
||||
return;
|
||||
}
|
||||
log.info("Start convert ppt file to pdf, word path: {}, pdf path: {}", pptPath, pdfPath);
|
||||
if (EleCommonUtil.isWinOs()) {
|
||||
throw new BizException("windows平台暂不支持");
|
||||
} else {
|
||||
try {
|
||||
StringBuilder command = new StringBuilder();
|
||||
command.append("unoconv -f pdf -o").append(" ").append(pdfPath).append(" ").append(pptPath);
|
||||
log.info("convert word file to pdf, command: {}", command);
|
||||
Process process = RuntimeUtil.exec(command.toString());
|
||||
process.waitFor();
|
||||
if (process.exitValue() != 0) {
|
||||
String info = StrFormatter.format("ppt文档{}转换成pdf文档{}失败", pptPath, pdfPath);
|
||||
throw new BizException(info);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new BizException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String parseDocxAllText(String wordPath) throws IOException {
|
||||
log.info("Start parse docx file, path is {}", wordPath);
|
||||
InputStream fis = Files.newInputStream(Paths.get(wordPath));
|
||||
|
|
@ -194,4 +229,14 @@ public class OfficeFileUtil {
|
|||
return EleCommonUtil.formateString(stringBuilder.toString());
|
||||
}
|
||||
|
||||
public static List<PageFile> parseXlsxByPage(String filePath) {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static List<PageFile> parseXlsByPage(String filePath) {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue