clone项目,自行打包docker(docker hub 镜像更新落后)
git clone <https://github.com/chroma-core/chroma.git>
生成随机Tokens令牌
import secrets
# 生成一个随机的Token令牌
token = secrets.token_urlsafe(32) # 生成一个32字节的URL安全令牌
print(token)
创建环境变量.chroma_env
文件,写入下面的内容
CHROMA_SERVER_AUTHN_CREDENTIALS="your-token"
CHROMA_SERVER_AUTHN_PROVIDER="chromadb.auth.token_authn.TokenAuthenticationServerProvider"
启动
docker compose up -d
连接
import chromadb
client = chromadb.HttpClient(host="127.0.0.1",
port=8000,
settings=chromadb.Settings(
chroma_client_auth_provider="chromadb.auth.token_authn.TokenAuthClientProvider",
chroma_client_auth_credentials="your_token"))
创建colletion,并使用兼容langchain的embedding
embedding 部署见Embedding+Rerank部署
from chromadb.utils import embedding_functions
from langchain_openai import OpenAIEmbeddings
embedding = embedding_functions.create_langchain_embedding(OpenAIEmbeddings())
collection = client.get_or_create_collection(name="test", embedding_function=embedding)
添加数据
collection.add(ids=["1"],
documents="test",
metadatas={"author":"admin"})
查询collection数据量
collection.count()
简单查询collection
collection = client.get_collection("test", embedding_function=embedding)
r = collection.query(query_texts="test", n_results=1)