services:
postgres:
image: pgvector/pgvector:pg17
environment:
POSTGRES_DB: vectordb
POSTGRES_USER: vector-admin
POSTGRES_PASSWORD: vector-admin
ports:
- "5434:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
volumes:
postgres_data:
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<!-- pgVector 向量数据库-->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-vector-store-pgvector</artifactId>
</dependency>
<!-- 添加向量数据库的问答顾问 -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-advisors-vector-store</artifactId>
</dependency>
<!-- tika Reader (for ETL) -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>
<!-- 基于硅基流动 -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-openai</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.16.0</version>
</dependency>
</dependencies>
server:
port: 8080
spring:
application:
name: spring-ai-rag
ai:
openai:
api-key: sk-uwafzmskggiswtwznmhgzxsyvyfrfbeiduonpdyygxldgnfx
base-url: <https://api.siliconflow.cn/> # 硅基流动的API端点
embedding:
options:
model: netease-youdao/bce-embedding-base_v1
chat:
options:
model: Qwen/Qwen3-8B
vectorstore:
pgvector:
index-type: HNSW
distance-type: COSINE_DISTANCE
dimensions: 768
initialize-schema: true # 自动初始化 schema
table-name: vector_store
datasource:
url: jdbc:postgresql://localhost:5434/vectordb
username: vector-admin
password: vector-admin
driver-class-name: org.postgresql.Driver
logging:
level:
org:
springframework:
ai: DEBUG
读取文档
@Component
public class DefaultTikaDocumentReader {
private final Resource resource;
DefaultTikaDocumentReader(@Value("classpath:/sample1.pdf") Resource resource) {
this.resource = resource;
}
List<Document> loadText() {
TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(this.resource);
return tikaDocumentReader.read();
}
}
转换文档
@Component
public class DocumentTransformer {
public List<Document> splitDocuments(List<Document> documents) {
TokenTextSplitter splitter = new TokenTextSplitter();
return splitter.apply(documents);
}
public List<Document> splitCustomized(List<Document> documents) {
TokenTextSplitter splitter = new TokenTextSplitter(400, 200, 10, 5000, true);
return splitter.apply(documents);
}
}
加载文档
@Component
@RequiredArgsConstructor
public class EtlLoader {
private final VectorStore vectorStore;
private final DefaultTikaDocumentReader reader;
private final DocumentTransformer transformer;
// @Override
public void run(String... args) {
List<Document> docs = reader.loadText();
List<Document> transformed = transformer.splitCustomized(docs);
vectorStore.add(transformed); // 自动嵌入并存储到 pgVector
System.out.println("ETL 完成:文档已加载到 pgVector。");
}
}
@Slf4j
@RestController
@RequestMapping("/rag/chat")
@RequiredArgsConstructor
public class ChatController {
private final VectorStore vectorStore;
private final ChatModel chatModel;
// 流式响应需指定 SSE 媒体类型
@GetMapping(value = "/main")
public String chat() {
String response = ChatClient.builder(chatModel)
.build().prompt()
.advisors(new QuestionAnswerAdvisor(vectorStore))
.user("什么时候会使用内部临时表")
.call()
.content();
return response;
}
}