Oracle AI Vector Search是为人工智能(AI)工作负载设计的,允许您基于语义而不是关键词查询数据。它最大的优势在于可以将对非结构化数据的语义搜索与对业务数据的关系型搜索结合在一个系统中。这不仅非常强大,而且显著提高了效率,因为您不需要添加专门的向量数据库,从而消除了多个系统之间的数据碎片化问题。
此外,您的向量还可以利用Oracle数据库的所有强大功能,例如分区支持、真实应用集群(RAC)可扩展性、Exadata智能扫描、跨地理分布数据库的分片处理、事务、并行SQL、灾难恢复和安全性等。
Oracle AI Vector Search将文本数据转换为向量,并存储在Oracle数据库中。然后,您可以使用不同的距离策略(如点积、余弦距离、欧几里得距离)对这些向量进行相似性搜索。通过这种方式,不仅可以支持复杂的查询,还可以利用Oracle数据库的强大功能来优化性能和安全性。
让我们通过一段完整的示例代码来展示如何使用Langchain与Oracle AI Vector Search进行集成和使用。
首先,我们需要安装必要的包:
pip install langchain-community
pip install oracledb
import oracledb
username = "your-username"
password = "your-password"
dsn = "your-dsn"
try:
connection = oracledb.connect(user=username, password=password, dsn=dsn)
print("Connection successful!")
except Exception as e:
print("Connection failed!", e)
from langchain_community.vectorstores import oraclevs
from langchain_community.vectorstores.oraclevs import OracleVS
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
documents_json_list = [
{
"id": "cncpt_15.5.3.2.2_P4",
"text": "If the answer to any preceding questions is yes, then the database stops the search and allocates space from the specified tablespace; otherwise, space is allocated from the database default shared temporary tablespace.",
"link": "https://docs.oracle.com/en/database/oracle/oracle-database/23/cncpt/logical-storage-structures.html#GUID-5387D7B2-C0CA-4C1E-811B-C7EB9B636442",
},
# 更多文档...
]
documents_langchain = []
for doc in documents_json_list:
metadata = {"id": doc["id"], "link": doc["link"]}
doc_langchain = Document(page_content=doc["text"], metadata=metadata)
documents_langchain.append(doc_langchain)
model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store_dot = OracleVS.from_documents(
documents_langchain,
model,
client=connection,
table_name="Documents_DOT",
distance_strategy=DistanceStrategy.DOT_PRODUCT,
)
vector_store_cosine = OracleVS.from_documents(
documents_langchain,
model,
client=connection,
table_name="Documents_COSINE",
distance_strategy=DistanceStrategy.COSINE,
)
vector_store_euclidean = OracleVS.from_documents(
documents_langchain,
model,
client=connection,
table_name="Documents_EUCLIDEAN",
distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
)
def manage_texts(vector_stores):
texts = ["Rohan", "Shailendra"]
metadata = [
{"id": "100", "link": "Document Example Test 1"},
{"id": "101", "link": "Document Example Test 2"},
]
for i, vs in enumerate(vector_stores, start=1):
try:
vs.add_texts(texts, metadata)
print(f"Add texts complete for vector store {i}")
except Exception as ex:
print(f"Expected error on duplicate add for vector store {i}")
vs.delete([metadata[0]["id"]])
print(f"Delete texts complete for vector store {i}")
results = vs.similarity_search("How are LOBS stored in Oracle Database", 2)
print(f"Similarity search results for vector store {i}: {results}")
vector_store_list = [
vector_store_dot,
vector_store_cosine,
vector_store_euclidean,
]
manage_texts(vector_store_list)
def create_search_indices(connection):
oraclevs.create_index(
connection,
vector_store_dot,
params={"idx_name": "hnsw_idx1", "idx_type": "HNSW"},
)
oraclevs.create_index(
connection,
vector_store_cosine,
params={
"idx_name": "hnsw_idx2",
"idx_type": "HNSW",
"accuracy": 97,
"parallel": 16,
},
)
oraclevs.create_index(
connection,
vector_store_euclidean,
params={
"idx_name": "hnsw_idx3",
"idx_type": "HNSW",
"neighbors": 64,
"efConstruction": 100,
},
)
print("Index creation complete.")
create_search_indices(connection)
def conduct_advanced_searches(vector_stores):
query = "How are LOBS stored in Oracle Database"
filter_criteria = {"id": ["101"]}
for i, vs in enumerate(vector_stores, start=1):
print(f"\n--- Vector Store {i} Advanced Searches ---")
print("Similarity search results without filter:")
print(vs.similarity_search(query, 2))
print("Similarity search results with filter:")
print(vs.similarity_search(query, 2, filter=filter_criteria))
print("Similarity search with relevance score:")
print(vs.similarity_search_with_score(query, 2))
print("Similarity search with relevance score with filter:")
print(vs.similarity_search_with_score(query, 2, filter=filter_criteria))
print("Max marginal relevance search results:")
print(vs.max_marginal_relevance_search(query, 2, fetch_k=20, lambda_mult=0.5))
print("Max marginal relevance search results with filter:")
print(vs.max_marginal_relevance_search(query, 2, fetch_k=20, lambda_mult=0.5, filter=filter_criteria))
conduct_advanced_searches(vector_store_list)
Oracle AI Vector Search在处理各种需要高效语义搜索的场景中非常有用。例如,企业可以在客户服务中使用它来快速查找相关支持文档;开发人员可以用它来检索相似的代码片段;研究人员可以利用它在大量学术文章中搜索相关内容。
如果遇到问题欢迎在评论区交流。