Qdrant向量数据库操作指南:查询搜索


1.查询数据入口

query = {'text':"""腾讯财报利润"""}

2.大文本必须切分,根据embedding model大小跟需要chunk

def to_splitter(content):

doc_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=0)

split_text = doc_splitter.split_text(content['text'])

return split_text

3.embedding化

def to_embeddings(items):

#embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

#embeddings = HuggingFaceEmbeddings(model_name='shibing624/text2vec-base-chinese')

sentence_embeddings = HuggingFaceEmbeddings(model_name='GanymedeNil/text2vec-large-chinese')

return sentence_embeddings.embed_documents(items)

4.向量检索

def search(query):

client = QdrantClient("*****", port=6333)

collection_name = "test_collection"

# 过滤条件

query_filter = Filter(

must=[

FieldCondition(

key="collect_type",

match=MatchValue(value="news", ),

)

])

"""

# 单查询搜索

search_result = client.search(

collection_name=collection_name,

query_vector=query[0],

# query_filter=query_filter,

limit=3,

search_params={"exact": False, "hnsw_ef": 128},

with_payload=True,

# with_vectors=True

)

"""

#batch相似度搜索

search_queries = [SearchRequest(vector=search,

#filter=query_filter,

limit=3,

with_payload=True,

search_params={"exact": False, "hnsw_ef": 128}

)

for search in query]

search_result = client.search_batch(

collection_name=collection_name,

requests=search_queries

)

print(search_result)

answers = []

tags = []

"""

注意前端提示词的长度限制

"""

text_append = ""

for result in search_result:

for r in result:

if len(r.payload["text"]) > 512:

text = r.payload["text"][:512]

else:

text = r.payload["text"]

text_append+=text

answers.append({"text": text_append})

return answers

展开阅读全文

页面更新:2024-02-29

标签:向量   切分   腾讯   长度   入口   文本   利润   大小   提示   条件   数据库

1 2 3 4 5

上滑加载更多 ↓
推荐阅读:
友情链接:
更多:

本站资料均由网友自行发布提供,仅用于学习交流。如有版权问题,请与我联系,QQ:4156828  

© CopyRight 2008-2024 All Rights Reserved. Powered By bs178.com 闽ICP备11008920号-3
闽公网安备35020302034844号

Top