1 安装向量数据库

services:
  postgres:
    image: pgvector/pgvector:pg17
    environment:
      POSTGRES_DB: vectordb
      POSTGRES_USER: vector-admin
      POSTGRES_PASSWORD: vector-admin
    ports:
      - "5434:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
volumes:
  postgres_data:

2 添加jar包

<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>

    <dependency>
        <groupId>org.projectlombok</groupId>
        <artifactId>lombok</artifactId>
    </dependency>

    <!-- pgVector 向量数据库-->
    <dependency>
        <groupId>org.springframework.ai</groupId>
        <artifactId>spring-ai-starter-vector-store-pgvector</artifactId>
    </dependency>

    <!-- 添加向量数据库的问答顾问 -->
    <dependency>
        <groupId>org.springframework.ai</groupId>
        <artifactId>spring-ai-advisors-vector-store</artifactId>
    </dependency>

    <!-- tika Reader (for ETL) -->
    <dependency>
        <groupId>org.springframework.ai</groupId>
        <artifactId>spring-ai-tika-document-reader</artifactId>
    </dependency>

    <!-- 基于硅基流动 -->
    <dependency>
        <groupId>org.springframework.ai</groupId>
        <artifactId>spring-ai-starter-model-openai</artifactId>
    </dependency>

    <dependency>
        <groupId>org.apache.commons</groupId>
        <artifactId>commons-lang3</artifactId>
        <version>3.16.0</version>
    </dependency>

</dependencies>

3 修改application.yaml

server:
  port: 8080
spring:
  application:
    name: spring-ai-rag
  ai:
    openai:
      api-key: sk-uwafzmskggiswtwznmhgzxsyvyfrfbeiduonpdyygxldgnfx
      base-url: <https://api.siliconflow.cn/>  # 硅基流动的API端点
      embedding:
        options:
          model: netease-youdao/bce-embedding-base_v1
      chat:
        options:
          model: Qwen/Qwen3-8B
    vectorstore:
      pgvector:
        index-type: HNSW
        distance-type: COSINE_DISTANCE
        dimensions: 768
        initialize-schema: true  # 自动初始化 schema
        table-name: vector_store
  datasource:
    url: jdbc:postgresql://localhost:5434/vectordb
    username: vector-admin
    password: vector-admin
    driver-class-name: org.postgresql.Driver

logging:
  level:
    org:
      springframework:
        ai: DEBUG

4 添加ETL

读取文档

@Component
public class DefaultTikaDocumentReader {

    private final Resource resource;

    DefaultTikaDocumentReader(@Value("classpath:/sample1.pdf") Resource resource) {
        this.resource = resource;
    }

    List<Document> loadText() {
        TikaDocumentReader tikaDocumentReader = new TikaDocumentReader(this.resource);
        return tikaDocumentReader.read();
    }
}

转换文档

@Component
public class DocumentTransformer {

    public List<Document> splitDocuments(List<Document> documents) {
        TokenTextSplitter splitter = new TokenTextSplitter();
        return splitter.apply(documents);
    }

    public List<Document> splitCustomized(List<Document> documents) {
        TokenTextSplitter splitter = new TokenTextSplitter(400, 200, 10, 5000, true);
        return splitter.apply(documents);
    }

}

加载文档

@Component
@RequiredArgsConstructor
public class EtlLoader  {
    private final VectorStore vectorStore;
    private final DefaultTikaDocumentReader reader;
    private final DocumentTransformer transformer;

//    @Override
    public void run(String... args) {
        List<Document> docs = reader.loadText();
        List<Document> transformed = transformer.splitCustomized(docs);
        vectorStore.add(transformed);  // 自动嵌入并存储到 pgVector
        System.out.println("ETL 完成:文档已加载到 pgVector。");
    }
}

5 添加聊天接口

@Slf4j
@RestController
@RequestMapping("/rag/chat")
@RequiredArgsConstructor
public class ChatController {

    private final VectorStore vectorStore;

    private final ChatModel chatModel;

    // 流式响应需指定 SSE 媒体类型
    @GetMapping(value = "/main")
    public String chat() {
        String response = ChatClient.builder(chatModel)
                .build().prompt()
                .advisors(new QuestionAnswerAdvisor(vectorStore))
                .user("什么时候会使用内部临时表")
                .call()
                .content();
        return response;
    }

}