Pydantic 是一个强大的 Python 库,主要用于数据验证和设置管理。它通过 Python 类型注解来定义数据结构,并自动提供数据验证、序列化和文档生成功能。本教程将带你从基础到高级全面掌握 Pydantic。
Pydantic 的核心是模型(Model),它类似于 Python 的数据类(dataclass),但提供了更多功能:
类型验证:自动验证输入数据的类型
数据转换:自动将输入数据转换为正确的类型
错误处理:提供清晰的错误信息
序列化:轻松转换为字典或 JSON
设置管理:非常适合管理应用程序配置
pip3 install pydantic
对于最新功能,可以安装预发布版本:
pip3 install pydantic --pre
from datetime import datetime
from typing import Optional, List
from pydantic import BaseModel, ValidationError
class User(BaseModel):
id: int # 必填字段
name: str = "John Doe" # 有默认值,选填字段
signup_ts: Optional[datetime] = None
friends: List[int] = [] # 列表中元素是int类型或者可以直接转换成int类型
# 创建实例
user_data = {
"id": "123", # 注意这里是字符串
"signup_ts": "2025-05-09 15:00", # 自动转换为datetime
"friends": [1, 2, "3"], # "3" 会自动转换为整数
}
user = User(**user_data)
print(user)
# 输出: id=123 name='John Doe' signup_ts=datetime.datetime(2023, 1, 1, 12, 22) friends=[1, 2, 3]
# 校验失败处理
try:
User(id="123", friends=[1, 2, "not number"])
except ValidationError as e:
print(e.json())
输出:
[{
"type": "int_parsing",
"loc": ["friends", 2],
"msg": "Input should be a valid integer, unable to parse string as an integer",
"input": "not number",
"url": "https://errors.pydantic.dev/2.11/v/int_parsing"
}]
# 转换为字典
user_dict = user.model_dump()
print(
"转换为字典:", user_dict
) # 转换为字典: {'id': 123, 'name': 'John Doe', 'signup_ts': datetime.datetime(2023, 1, 1, 12, 22), 'friends': [1, 2, 3]}
# 转换为JSON
user_json = user.model_dump_json()
print(
"转换为JSON:", user_json
) # 转换为JSON: {"id":123,"name":"John Doe","signup_ts":"2023-01-01T12:22:00","friends":[1,2,3]}
# 从JSON加载
user_from_json = User.model_validate_json(user_json)
print(user_from_json == user) # True
Pydantic 支持所有标准 Python 类型:
from typing import List, Set, Dict, Optional, Union
from datetime import datetime, date, time, timedelta
from uuid import UUID
from pydantic import BaseModel
class Model(BaseModel):
# 基本类型
int_field: int
float_field: float
str_field: str
bool_field: bool
# 复杂类型
list_field: List[int]
set_field: Set[str]
dict_field: Dict[str, float]
# 可选字段
optional_field: Optional[str] = None
# 联合类型
union_field: Union[int, str]
# 日期时间
date_field: date
datetime_field: datetime
time_field: time
timedelta_field: timedelta
# UUID
uuid_field: UUID
# 字节数据
bytes_field: bytes
Pydantic 提供了许多有用的特殊字段类型:
from pydantic import (
BaseModel,
EmailStr,
HttpUrl,
IPvAnyAddress,
PositiveInt,
NegativeInt,
conint,
Field
)
#明确安装 email-validator: pip3 install email-validator
class SpecialTypesModel(BaseModel):
# 电子邮件验证
email: EmailStr
# URL验证
website: HttpUrl
# IP地址验证
ip_address: IPvAnyAddress
# 数值限制
positive: PositiveInt
negative: NegativeInt
constrained_int: conint(gt=10, lt=100)
constrained_str: str = Field(
min_length=2,
max_length=10,
pattern=r'^[a-z]+$' # 注意这里去掉了字符串引号
)
from pydantic import BaseModel, validator
class UserModel(BaseModel):
username: str
password: str
password2: str
@validator('username')
def username_must_contain_letter(cls, v):
if not any(c.isalpha() for c in v):
raise ValueError('必须包含至少一个字母')
return v
@validator('password2')
def passwords_match(cls, v, values, **kwargs):
if 'password' in values and v != values['password']:
raise ValueError('密码不匹配')
return v
# 测试
try:
UserModel(username='123', password='abc', password2='abcd')
except ValueError as e:
print(e)
# 输出: 1 validation error for UserModel
# username
# 必须包含至少一个字母 (type=value_error)
from pydantic import BaseModel, root_validator
class Model(BaseModel):
field1: int
field2: int
@root_validator
def check_fields(cls, values):
field1 = values.get('field1')
field2 = values.get('field2')
if field1 is not None and field2 is not None and field1 >= field2:
raise ValueError('field1必须小于field2')
return values
# 测试
try:
Model(field1=5, field2=3)
except ValueError as e:
print(e)
# 输出: 1 validation error for Model
# __root__
# field1必须小于field2 (type=value_error)
from pydantic import BaseModel
class ConfigModel(BaseModel):
model_config = {
# 允许任意额外字段
'extra': 'allow',
# 禁止额外字段(默认)
# 'extra': 'forbid',
# 忽略额外字段
# 'extra': 'ignore',
# 字段别名
'allow_population_by_field_name': True,
# 验证时赋值
'validate_assignment': True,
# 使用枚举值
'use_enum_values': True,
# 允许任意类型
'arbitrary_types_allowed': True,
# JSON编码器自定义
'json_encoders': {
datetime: lambda v: v.timestamp(),
},
}
name: str
age: int
# 使用
model = ConfigModel(name='Alice', age=25, extra_field='test') # 允许额外字段
print(model.model_dump()) # 包含额外字段
from pydantic import BaseSettings
class Settings(BaseSettings):
app_name: str = "Awesome API"
admin_email: str
items_per_user: int = 50
class Config:
env_file = ".env"
env_file_encoding = 'utf-8'
# .env 文件内容:
# [email protected]
# ITEMS_PER_USER=30
settings = Settings()
print(settings)
# 输出: app_name='Awesome API' admin_email='[email protected]' items_per_user=30
from typing import List
from pydantic import BaseModel
class Item(BaseModel):
name: str
price: float
class User(BaseModel):
name: str
items: List[Item]
# 使用
user = User(
name="Alice",
items=[
{"name": "Laptop", "price": 999.99},
{"name": "Mouse", "price": 25.50},
],
)
print(user)
from typing import Generic, TypeVar, List
from pydantic import BaseModel
from pydantic.generics import GenericModel
T = TypeVar('T')
class Response(GenericModel, Generic[T]):
success: bool
data: T
message: str = ""
# 使用
user_data = {"name": "Alice", "age": 25}
response = Response[dict](success=True, data=user_data)
print(response)
from pydantic import BaseModel, create_model
DynamicModel = create_model(
'DynamicModel',
field1=(str, ...), # 必填字段
field2=(int, 0), # 可选字段,默认值为0
)
model = DynamicModel(field1="hello")
print(model) # field1='hello' field2=0
strict
模式from pydantic import BaseModel, StrictInt, StrictStr
class StrictModel(BaseModel):
int_field: StrictInt
str_field: StrictStr
# 这会失败,因为'123'是字符串而不是整数
try:
StrictModel(int_field='123', str_field='abc')
except ValueError as e:
print(e)
parse_obj_as
进行批量验证from typing import List
from pydantic import parse_obj_as
class Item(BaseModel):
name: str
price: float
# 批量验证列表
items = parse_obj_as(List[Item], [{'name': 'Apple', 'price': 1.2}, {'name': 'Banana', 'price': 0.8}])
print(items)
validate_arguments
装饰器from pydantic import validate_arguments
@validate_arguments
def calculate_price(quantity: int, price_per_unit: float, discount: float = 0.0) -> float:
"""计算总价格,应用折扣"""
return quantity * price_per_unit * (1 - discount)
# 自动验证参数
print(calculate_price(3, 10.5, 0.1)) # 28.35
try:
calculate_price("three", "ten")
except ValueError as e:
print(e)
当模型相互引用时,需要使用 ForwardRef
:
from typing import ForwardRef
from pydantic import BaseModel
class User(BaseModel):
name: str
friends: list[ForwardRef('User')] = []
# 更新前向引用
User.model_rebuild()
user1 = User(name="Alice")
user2 = User(name="Bob", friends=[user1])
print(user2)
对于高性能场景,考虑使用 strict
模式减少类型转换开销
大型模型解析可能会消耗较多内存,考虑分批处理
复杂验证器会增加处理时间
from pydantic import BaseModel
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class UserDB(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String)
email = Column(String)
class UserModel(BaseModel):
id: int
name: str
email: str
class Config:
orm_mode = True
# 从ORM实例创建Pydantic模型
db_user = UserDB(id=1, name="Alice", email="[email protected]")
user_model = UserModel.from_orm(db_user)
print(user_model)
Pydantic v1 和 v2 有重大变化,注意版本选择
使用 pydantic.v1
可以同时兼容两个版本
# 兼容性导入
try:
from pydantic.v1 import BaseModel
except ImportError:
from pydantic import BaseModel
from typing import Optional, List
from pydantic import BaseModel, EmailStr, Field
class UserBase(BaseModel):
email: EmailStr
username: str = Field(..., min_length=3, max_length=20)
class UserCreate(UserBase):
password: str = Field(..., min_length=8)
class UserUpdate(BaseModel):
password: Optional[str] = Field(None, min_length=8)
username: Optional[str] = Field(None, min_length=3, max_length=20)
class UserInDB(UserBase):
id: int
is_active: bool
items: List['Item'] = []
class Config:
orm_mode = True
class ItemBase(BaseModel):
title: str
description: Optional[str] = None
class ItemCreate(ItemBase):
pass
class ItemInDB(ItemBase):
id: int
owner_id: int
class Config:
orm_mode = True
# 更新前向引用
UserInDB.model_rebuild()
from pydantic import BaseSettings, PostgresDsn, RedisDsn
class Settings(BaseSettings):
app_name: str = "My App"
debug: bool = False
# 数据库配置
postgres_dsn: PostgresDsn = "postgres://user:pass@localhost:5432/db"
redis_dsn: RedisDsn = "redis://localhost:6379/0"
# 认证配置
secret_key: str
algorithm: str = "HS256"
access_token_expire_minutes: int = 30
class Config:
env_file = ".env"
env_prefix = "APP_"
settings = Settings()
from datetime import datetime
from typing import Optional
from pydantic import BaseModel, validator, root_validator
class Event(BaseModel):
name: str
start_time: datetime
end_time: datetime
location: str
max_attendees: int = 100
description: Optional[str] = None
@validator('name')
def name_must_not_be_empty(cls, v):
if not v.strip():
raise ValueError('名称不能为空')
return v.strip()
@root_validator
def check_times(cls, values):
start = values.get('start_time')
end = values.get('end_time')
if start and end and end <= start:
raise ValueError('结束时间必须晚于开始时间')
if start and start < datetime.now():
raise ValueError('开始时间不能是过去时间')
return values
# 使用
try:
event = Event(
name=" ", # 空格会被验证器捕获
start_time="2023-01-01 10:00",
end_time="2023-01-01 09:00", # 结束时间早于开始时间
location="Conference Room"
)
except ValueError as e:
print(e)
Pydantic 是一个功能强大且灵活的数据验证和设置管理库,它通过 Python 类型注解提供了优雅的解决方案。关键要点:
模型优先:从定义模型开始,让 Pydantic 处理验证和转换
类型提示:充分利用 Python 的类型系统
灵活验证:使用验证器处理复杂业务规则
配置管理:非常适合应用程序设置和环境变量
性能考虑:在需要高性能的场景中使用严格模式
通过本教程,你应该已经掌握了 Pydantic 的核心概念和高级用法。现在可以开始在项目中应用这些知识,构建更健壮的数据处理流程了!