diff --git a/electrmangnetic/pom.xml b/electrmangnetic/pom.xml index 81c254e..18e7476 100644 --- a/electrmangnetic/pom.xml +++ b/electrmangnetic/pom.xml @@ -128,6 +128,10 @@ elasticsearch-java 8.13.4 + + org.springframework.ai + spring-ai-pdf-document-reader + diff --git a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread.java b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread1.java similarity index 90% rename from electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread.java rename to electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread1.java index bbc2e2b..c847062 100644 --- a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread.java +++ b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread1.java @@ -10,7 +10,7 @@ import java.util.concurrent.Callable; @AllArgsConstructor @NoArgsConstructor -public class ChatTaskThread implements Callable> { +public class ChatTaskThread1 implements Callable> { private ChatService chatService; private QueryDTO queryDTO; diff --git a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread2.java b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread2.java new file mode 100644 index 0000000..d139cdd --- /dev/null +++ b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/ai/ChatTaskThread2.java @@ -0,0 +1,23 @@ +package com.electromagnetic.industry.software.manage.ai; + +import com.electromagnetic.industry.software.manage.pojo.req.QueryDTO; +import com.electromagnetic.industry.software.manage.service.serviceimpl.ChatService; +import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; +import org.springframework.ai.chat.model.ChatResponse; +import reactor.core.publisher.Flux; + +import java.util.concurrent.Callable; + +@AllArgsConstructor +@NoArgsConstructor +public class ChatTaskThread2 implements Callable> { + + private ChatService chatService; + private QueryDTO queryDTO; + + @Override + public Flux call() throws Exception { + return chatService.chatStreamResponse(queryDTO); + } +} \ No newline at end of file diff --git a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/controller/AiController.java b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/controller/AiController.java index 5a53cd0..af9be4c 100644 --- a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/controller/AiController.java +++ b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/controller/AiController.java @@ -2,12 +2,14 @@ package com.electromagnetic.industry.software.manage.controller; import cn.hutool.core.util.StrUtil; import com.electromagnetic.industry.software.common.resp.ElectromagneticResult; -import com.electromagnetic.industry.software.manage.ai.ChatTaskThread; +import com.electromagnetic.industry.software.manage.ai.ChatTaskThread1; +import com.electromagnetic.industry.software.manage.ai.ChatTaskThread2; import com.electromagnetic.industry.software.manage.ai.ThreadUtil; import com.electromagnetic.industry.software.manage.pojo.req.QueryDTO; import com.electromagnetic.industry.software.manage.service.serviceimpl.ChatService; import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.chat.model.ChatResponse; import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; @@ -44,19 +46,19 @@ public class AiController { if (StrUtil.isEmpty(queryDTO.getMsg())) { return Flux.empty(); } - ChatTaskThread chatTaskThread = new ChatTaskThread(chatService, queryDTO); + ChatTaskThread1 chatTaskThread = new ChatTaskThread1(chatService, queryDTO); Future> future = ThreadUtil.getThreadPool().submit(chatTaskThread); return future.get(); } -// @PostMapping(path = "/chatStreamResp", produces = MediaType.TEXT_EVENT_STREAM_VALUE) -// public Flux chatStreamResp(@RequestBody QueryDTO queryDTO) throws ExecutionException, InterruptedException { -// if (StrUtil.isEmpty(queryDTO.getMsg())) { -// return Flux.empty(); -// } -// ChatTaskThread> chatTaskThread = new ChatTaskThread<>(chatService, queryDTO); -// Future> future = ThreadUtil.getThreadPool().submit(chatTaskThread); -// return future.get(); -// } + @PostMapping(path = "/chatStreamResp", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + public Flux chatStreamResp(@RequestBody QueryDTO queryDTO) throws ExecutionException, InterruptedException { + if (StrUtil.isEmpty(queryDTO.getMsg())) { + return Flux.empty(); + } + ChatTaskThread2 chatTaskThread2 = new ChatTaskThread2(chatService, queryDTO); + Future> future = ThreadUtil.getThreadPool().submit(chatTaskThread2); + return future.get(); + } } diff --git a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/service/serviceimpl/ChatService.java b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/service/serviceimpl/ChatService.java index 16a1a80..99f6838 100644 --- a/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/service/serviceimpl/ChatService.java +++ b/electrmangnetic/src/main/java/com/electromagnetic/industry/software/manage/service/serviceimpl/ChatService.java @@ -1,16 +1,16 @@ package com.electromagnetic.industry.software.manage.service.serviceimpl; -import cn.hutool.core.collection.ListUtil; import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.IdUtil; import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.StrUtil; import cn.hutool.crypto.digest.DigestUtil; import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.electromagnetic.industry.software.common.enums.EffectFlagEnum; import com.electromagnetic.industry.software.common.resp.ElectromagneticResult; -import com.electromagnetic.industry.software.common.util.EleCommonUtil; import com.electromagnetic.industry.software.common.util.ElectromagneticResultUtil; import com.electromagnetic.industry.software.common.util.IdWorker; +import com.electromagnetic.industry.software.manage.config.ElePropertyConfig; import com.electromagnetic.industry.software.manage.mapper.AiFileUploadRecordMapper; import com.electromagnetic.industry.software.manage.pojo.models.AiFileUploadRecord; import com.electromagnetic.industry.software.manage.pojo.req.QueryDTO; @@ -19,15 +19,24 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor; import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor; +import org.springframework.ai.chat.messages.UserMessage; +import org.springframework.ai.chat.prompt.Prompt; import org.springframework.ai.document.Document; import org.springframework.ai.ollama.OllamaChatModel; +import org.springframework.ai.reader.pdf.PagePdfDocumentReader; +import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig; import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.chat.model.ChatResponse; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; import reactor.core.publisher.Flux; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -51,6 +60,9 @@ public class ChatService { @Resource private AiFileUploadRecordMapper aiFileUploadRecordMapper; + @Resource + private ElePropertyConfig elePropertyConfig; + public void add(String content) { List documents = Stream.of(content).map(Document::new).collect(Collectors.toList()); vectorStore.write(documents); @@ -75,53 +87,55 @@ public class ChatService { @Transactional(rollbackFor = Exception.class) public ElectromagneticResult addFromUpload(MultipartFile file) throws Exception { + // 文件是否为空 if (file.isEmpty()) { return ElectromagneticResultUtil.fail("-1", "文件为空"); } + // 当前仅支持pdf文件 String fileType = FileUtil.extName(file.getOriginalFilename()); if (!StrUtil.equals(fileType, "pdf")) { return ElectromagneticResultUtil.fail("-1", "当前仅支持pdf格式文件"); } - String fileMd5 = DigestUtil.md5Hex(file.getInputStream()); + // 通过md5值判断文件是否被上传过 + String fileMd5 = DigestUtil.md5Hex(file.getInputStream()); Long count = aiFileUploadRecordMapper.selectCount(Wrappers.lambdaQuery(AiFileUploadRecord.class) .eq(AiFileUploadRecord::getFileMd5, fileMd5)); if (count > 0) { return ElectromagneticResultUtil.success(fileMd5); } - String content = EleCommonUtil.parse(file.getInputStream(), "pdf"); - Document document = new Document(content); - vectorStore.write(ListUtil.of(document)); - aiFileUploadRecordMapper.insert(new AiFileUploadRecord().setId(IdWorker.getSnowFlakeIdString()) - .setVectorId(document.getId()) - .setFileSize(file.getSize()) - .setFileMd5(fileMd5) - .setFileName(file.getOriginalFilename())); - + Path tempFile = saveUploadedFileToTemp(file); + PdfDocumentReaderConfig config = PdfDocumentReaderConfig.builder().build(); + PagePdfDocumentReader reader = new PagePdfDocumentReader(String.valueOf(tempFile.toUri().toURL()), config); + List documents = reader.get(); + Files.deleteIfExists(tempFile); + vectorStore.write(documents); + for (Document document : documents) { + aiFileUploadRecordMapper.insert(new AiFileUploadRecord().setId(IdWorker.getSnowFlakeIdString()) + .setVectorId(document.getId()) + .setFileSize(file.getSize()) + .setFileMd5(fileMd5) + .setFileName(file.getOriginalFilename())); + } return ElectromagneticResultUtil.success(fileMd5); } -// public String chat(String msg) { -// -// log.info("Start call model to answer"); -// -// return ChatClient.builder(model).defaultAdvisors(messageChatMemoryAdvisor, questionAnswerAdvisor).build().prompt() -// .user(msg) -// .advisors(advisorSpec -> advisorSpec -//// .param(CHAT_MEMORY_CONVERSATION_ID_KEY, queryDTO.getUserId()) -// .param(AbstractChatMemoryAdvisor.CHAT_MEMORY_RETRIEVE_SIZE_KEY, 100)) -// .call() -// .content(); -// } -// -// public Flux chatStreamResponse(String msg) { -// ChatClient.StreamResponseSpec stream = ChatClient.builder(model).defaultAdvisors(messageChatMemoryAdvisor, questionAnswerAdvisor).build().prompt(new Prompt(new UserMessage(msg))).stream(); -// return stream.chatResponse(); -// } + private Path saveUploadedFileToTemp(MultipartFile file) throws IOException { + Path tempDir = Files.createTempDirectory(IdUtil.simpleUUID()); + Path tempFile = tempDir.resolve(Objects.requireNonNull(file.getOriginalFilename())); + file.transferTo(tempFile); + return tempFile; + } public Flux chatStreamStr(QueryDTO queryDTO) { return ChatClient.builder(model).defaultAdvisors(messageChatMemoryAdvisor, questionAnswerAdvisor).build().prompt(queryDTO.getMsg()).stream().content(); } + + public Flux chatStreamResponse(QueryDTO queryDTO) { + return ChatClient.builder(model).defaultAdvisors(messageChatMemoryAdvisor, questionAnswerAdvisor).build() + .prompt(new Prompt(new UserMessage(queryDTO.getMsg()))) + .stream().chatResponse(); + } } diff --git a/electrmangnetic/src/test/java/Test1.java b/electrmangnetic/src/test/java/Test1.java index deae521..28b9bf2 100644 --- a/electrmangnetic/src/test/java/Test1.java +++ b/electrmangnetic/src/test/java/Test1.java @@ -1,6 +1,8 @@ //import com.electromagnetic.industry.software.manage.Application; //import jakarta.annotation.Resource; //import org.junit.jupiter.api.Test; +//import org.springframework.ai.document.Document; +//import org.springframework.ai.reader.pdf.PagePdfDocumentReader; //import org.springframework.ai.vectorstore.VectorStore; //import org.springframework.boot.test.context.SpringBootTest; // @@ -14,8 +16,10 @@ // // @Test // public void testTree() { -// String id = "c32666b2-36a5-40b5-9048-11349f090cd7"; -// vectorStore.delete(List.of(id)); +// String path = "D:/wjj.pdf"; +// PagePdfDocumentReader pagePdfDocumentReader = new PagePdfDocumentReader(path); +// List read = pagePdfDocumentReader.read(); +// System.out.println("read = " + read); // } // //} diff --git a/electromagnetic-common/pom.xml b/electromagnetic-common/pom.xml index 622aad7..cee461c 100644 --- a/electromagnetic-common/pom.xml +++ b/electromagnetic-common/pom.xml @@ -89,7 +89,7 @@ org.apache.pdfbox pdfbox - 2.0.24 + 3.0.3 javax.xml.bind diff --git a/electromagnetic-common/src/main/java/com/electromagnetic/industry/software/common/util/OfficeFileUtil.java b/electromagnetic-common/src/main/java/com/electromagnetic/industry/software/common/util/OfficeFileUtil.java index c186a91..c731f12 100644 --- a/electromagnetic-common/src/main/java/com/electromagnetic/industry/software/common/util/OfficeFileUtil.java +++ b/electromagnetic-common/src/main/java/com/electromagnetic/industry/software/common/util/OfficeFileUtil.java @@ -9,6 +9,7 @@ import com.documents4j.api.IConverter; import com.documents4j.job.LocalConverter; import com.electromagnetic.industry.software.common.exception.BizException; import lombok.extern.slf4j.Slf4j; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.poi.hslf.usermodel.HSLFSlideShow; @@ -89,7 +90,7 @@ public class OfficeFileUtil { public static String parsePdfAllText(String path) throws IOException { log.info("Start parse pdf file, path is {}", path); // 加载PDF文档 - PDDocument document = PDDocument.load(new File(path)); + PDDocument document = Loader.loadPDF(new File(path)); // 创建PDFTextStripper对象来解析文本 PDFTextStripper pdfStripper = new PDFTextStripper(); // 提取文本