LangChain是一个用于构建大语言模型(LLM)应用的框架,其核心组件构成了开发AI应用的基础架构。以下是LangChain的核心组件及其功能的详细说明:
功能:处理与语言模型的交互
from langchain_core.language_models import BaseLanguageModel
from langchain_openai import ChatOpenAI
# 初始化模型
llm = ChatOpenAI(model="gpt-4-turbo")
# 基本调用
response = llm.invoke("解释量子力学基础")
包含子组件:
Prompts:提示词模板管理
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
"你是一位{role},请用{style}风格回答:{question}"
)
formatted = prompt.format(
role="物理学家",
style="通俗易懂",
question="什么是波粒二象性?"
)
Output Parsers:输出解析器
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()
parsed = parser.parse(response.content)
功能:实现知识增强的检索功能
典型实现:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
# 文档加载与处理
loader = WebBaseLoader("https://example.com")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
splits = splitter.split_documents(docs)
# 向量存储
vectorstore = FAISS.from_documents(
splits,
OpenAIEmbeddings()
)
# 检索
retriever = vectorstore.as_retriever()
功能:管理对话历史状态
from langchain_core.memory import ConversationBufferMemory
memory = ConversationBufferMemory()
memory.save_context(
{"input": "你好"},
{"output": "你好!我是AI助手"}
)
print(memory.load_memory_variables({}))
记忆类型:
功能:组合多个组件形成工作流
from langchain_core.runnables import RunnablePassthrough
# 构建RAG链
retrieval_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
| parser
)
result = retrieval_chain.invoke("LangChain是什么?")
高级链类型:
功能:动态决策和工具使用
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools import Tool
def search_api(query):
return "搜索结果..."
tools = [
Tool(
name="Search",
func=search_api,
description="用于搜索信息"
)
]
agent = create_tool_calling_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)
response = agent_executor.invoke(
{"input": "找关于LangChain的最新信息"}
)
功能:监控和记录执行过程
from langchain_core.callbacks import FileCallbackHandler
handler = FileCallbackHandler("logs.json")
result = chain.invoke(
{"question": "解释神经网络"},
{"callbacks": [handler]}
)
支持格式:
from langchain_community.document_loaders import (
PDFMinerLoader,
Docx2txtLoader,
SeleniumURLLoader
)
# PDF加载示例
loader = PDFMinerLoader("file.pdf")
documents = loader.load()
分割策略:
from langchain_text_splitters import (
CharacterTextSplitter,
TokenTextSplitter,
SemanticChunker
)
# 语义分块
splitter = SemanticChunker(OpenAIEmbeddings())
chunks = splitter.split_documents(documents)
主流选项:
存储方案 | 特点 | 适用场景 |
---|---|---|
FAISS | 本地运行,高效 | 中小规模数据集 |
Pinecone | 全托管云服务 | 生产环境大规模应用 |
Chroma | 开源轻量级 | 快速原型开发 |
Weaviate | 支持混合搜索 | 复杂检索需求 |
扩展能力:
from langchain_community.tools import (
WikipediaQueryRun,
ArxivQueryRun,
YouTubeSearchTool
)
tools = [
WikipediaQueryRun(),
ArxivQueryRun(),
YouTubeSearchTool()
]
复杂流程编排:
from langgraph.graph import Graph
workflow = Graph()
workflow.add_node("research", research_chain)
workflow.add_node("write", writing_chain)
workflow.add_edge("research", "write")
app = workflow.compile()
性能优化:
from langchain.cache import SQLiteCache
import langchain
langchain.llm_cache = SQLiteCache(database_path=".langchain.db")
组件选型原则:
性能优化组合:
# 高效生产配置示例
from langchain_community.vectorstores import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
vectorstore = Pinecone.from_existing_index(
"my-index",
OpenAIEmbeddings()
)
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=ChatOpenAI(temperature=0),
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
扩展开发模式:
# 自定义组件示例
from langchain_core.runnables import RunnableLambda
def custom_processor(text: str) -> str:
return text.upper()
chain = RunnableLambda(custom_processor) | llm
LangChain通过这组模块化组件,使开发者能够:
最新版本的LangChain更强调: