Pydantic 保姆级教程:Python 数据验证与设置管理的终极指南

        Pydantic 是一个强大的 Python 库,主要用于数据验证和设置管理。它通过 Python 类型注解来定义数据结构,并自动提供数据验证、序列化和文档生成功能。本教程将带你从基础到高级全面掌握 Pydantic。

核心概念

Pydantic 的核心是模型(Model),它类似于 Python 的数据类(dataclass),但提供了更多功能:

  • 类型验证:自动验证输入数据的类型

  • 数据转换:自动将输入数据转换为正确的类型

  • 错误处理:提供清晰的错误信息

  • 序列化:轻松转换为字典或 JSON

  • 设置管理:非常适合管理应用程序配置

基础使用

安装 Pydantic

pip3 install pydantic

对于最新功能,可以安装预发布版本:

pip3 install pydantic --pre

第一个 Pydantic 模型

from datetime import datetime
from typing import Optional, List
from pydantic import BaseModel, ValidationError


class User(BaseModel):
    id: int  # 必填字段
    name: str = "John Doe"  # 有默认值,选填字段
    signup_ts: Optional[datetime] = None
    friends: List[int] = []  # 列表中元素是int类型或者可以直接转换成int类型


# 创建实例
user_data = {
    "id": "123",  # 注意这里是字符串
    "signup_ts": "2025-05-09 15:00",  # 自动转换为datetime
    "friends": [1, 2, "3"],  # "3" 会自动转换为整数
}

user = User(**user_data)

print(user)
# 输出: id=123 name='John Doe' signup_ts=datetime.datetime(2023, 1, 1, 12, 22) friends=[1, 2, 3]

校验失败处理:

# 校验失败处理
try:
    User(id="123", friends=[1, 2, "not number"])
except ValidationError as e:
    print(e.json())

输出:

[{
	"type": "int_parsing",
	"loc": ["friends", 2],
	"msg": "Input should be a valid integer, unable to parse string as an integer",
	"input": "not number",
	"url": "https://errors.pydantic.dev/2.11/v/int_parsing"
}]

模型方法

# 转换为字典
user_dict = user.model_dump()
print(
    "转换为字典:", user_dict
)  # 转换为字典: {'id': 123, 'name': 'John Doe', 'signup_ts': datetime.datetime(2023, 1, 1, 12, 22), 'friends': [1, 2, 3]}

# 转换为JSON
user_json = user.model_dump_json()
print(
    "转换为JSON:", user_json
)  # 转换为JSON: {"id":123,"name":"John Doe","signup_ts":"2023-01-01T12:22:00","friends":[1,2,3]}


# 从JSON加载
user_from_json = User.model_validate_json(user_json)
print(user_from_json == user)  # True

模型定义与字段类型

基本字段类型

Pydantic 支持所有标准 Python 类型:

from typing import List, Set, Dict, Optional, Union
from datetime import datetime, date, time, timedelta
from uuid import UUID
from pydantic import BaseModel

class Model(BaseModel):
    # 基本类型
    int_field: int
    float_field: float
    str_field: str
    bool_field: bool
    
    # 复杂类型
    list_field: List[int]
    set_field: Set[str]
    dict_field: Dict[str, float]
    
    # 可选字段
    optional_field: Optional[str] = None
    
    # 联合类型
    union_field: Union[int, str]
    
    # 日期时间
    date_field: date
    datetime_field: datetime
    time_field: time
    timedelta_field: timedelta
    
    # UUID
    uuid_field: UUID
    
    # 字节数据
    bytes_field: bytes

特殊字段类型

Pydantic 提供了许多有用的特殊字段类型:

from pydantic import (
    BaseModel,
    EmailStr,
    HttpUrl,
    IPvAnyAddress,
    PositiveInt,
    NegativeInt,
    conint,
    Field
)
#明确安装 email-validator: pip3 install email-validator


class SpecialTypesModel(BaseModel):
    # 电子邮件验证
    email: EmailStr

    # URL验证
    website: HttpUrl

    # IP地址验证
    ip_address: IPvAnyAddress

    # 数值限制
    positive: PositiveInt
    negative: NegativeInt
    constrained_int: conint(gt=10, lt=100)

    constrained_str: str = Field(
        min_length=2,
        max_length=10,
        pattern=r'^[a-z]+$'  # 注意这里去掉了字符串引号
    )

数据验证

自定义验证器

from pydantic import BaseModel, validator

class UserModel(BaseModel):
    username: str
    password: str
    password2: str
    
    @validator('username')
    def username_must_contain_letter(cls, v):
        if not any(c.isalpha() for c in v):
            raise ValueError('必须包含至少一个字母')
        return v
    
    @validator('password2')
    def passwords_match(cls, v, values, **kwargs):
        if 'password' in values and v != values['password']:
            raise ValueError('密码不匹配')
        return v

# 测试
try:
    UserModel(username='123', password='abc', password2='abcd')
except ValueError as e:
    print(e)
    # 输出: 1 validation error for UserModel
    # username
    #   必须包含至少一个字母 (type=value_error)

根验证器

from pydantic import BaseModel, root_validator

class Model(BaseModel):
    field1: int
    field2: int
    
    @root_validator
    def check_fields(cls, values):
        field1 = values.get('field1')
        field2 = values.get('field2')
        
        if field1 is not None and field2 is not None and field1 >= field2:
            raise ValueError('field1必须小于field2')
        return values

# 测试
try:
    Model(field1=5, field2=3)
except ValueError as e:
    print(e)
    # 输出: 1 validation error for Model
    # __root__
    #   field1必须小于field2 (type=value_error)

模型配置

模型配置选项

from pydantic import BaseModel

class ConfigModel(BaseModel):
    model_config = {
        # 允许任意额外字段
        'extra': 'allow',
        
        # 禁止额外字段(默认)
        # 'extra': 'forbid',
        
        # 忽略额外字段
        # 'extra': 'ignore',
        
        # 字段别名
        'allow_population_by_field_name': True,
        
        # 验证时赋值
        'validate_assignment': True,
        
        # 使用枚举值
        'use_enum_values': True,
        
        # 允许任意类型
        'arbitrary_types_allowed': True,
        
        # JSON编码器自定义
        'json_encoders': {
            datetime: lambda v: v.timestamp(),
        },
    }
    
    name: str
    age: int

# 使用
model = ConfigModel(name='Alice', age=25, extra_field='test')  # 允许额外字段
print(model.model_dump())  # 包含额外字段

环境变量配置

from pydantic import BaseSettings

class Settings(BaseSettings):
    app_name: str = "Awesome API"
    admin_email: str
    items_per_user: int = 50
    
    class Config:
        env_file = ".env"
        env_file_encoding = 'utf-8'

# .env 文件内容:
# [email protected]
# ITEMS_PER_USER=30

settings = Settings()
print(settings)
# 输出: app_name='Awesome API' admin_email='[email protected]' items_per_user=30

高级特性

递归模型

from typing import List
from pydantic import BaseModel

class Item(BaseModel):
    name: str
    price: float

class User(BaseModel):
    name: str
    items: List[Item]

# 使用
user = User(
    name="Alice",
    items=[
        {"name": "Laptop", "price": 999.99},
        {"name": "Mouse", "price": 25.50},
    ],
)
print(user)

泛型模型

from typing import Generic, TypeVar, List
from pydantic import BaseModel
from pydantic.generics import GenericModel

T = TypeVar('T')

class Response(GenericModel, Generic[T]):
    success: bool
    data: T
    message: str = ""

# 使用
user_data = {"name": "Alice", "age": 25}
response = Response[dict](success=True, data=user_data)
print(response)

动态模型创建

from pydantic import BaseModel, create_model

DynamicModel = create_model(
    'DynamicModel',
    field1=(str, ...),  # 必填字段
    field2=(int, 0),    # 可选字段,默认值为0
)

model = DynamicModel(field1="hello")
print(model)  # field1='hello' field2=0

性能优化

使用 strict 模式

from pydantic import BaseModel, StrictInt, StrictStr

class StrictModel(BaseModel):
    int_field: StrictInt
    str_field: StrictStr

# 这会失败,因为'123'是字符串而不是整数
try:
    StrictModel(int_field='123', str_field='abc')
except ValueError as e:
    print(e)

使用 parse_obj_as 进行批量验证

from typing import List
from pydantic import parse_obj_as

class Item(BaseModel):
    name: str
    price: float

# 批量验证列表
items = parse_obj_as(List[Item], [{'name': 'Apple', 'price': 1.2}, {'name': 'Banana', 'price': 0.8}])
print(items)

使用 validate_arguments 装饰器

from pydantic import validate_arguments

@validate_arguments
def calculate_price(quantity: int, price_per_unit: float, discount: float = 0.0) -> float:
    """计算总价格,应用折扣"""
    return quantity * price_per_unit * (1 - discount)

# 自动验证参数
print(calculate_price(3, 10.5, 0.1))  # 28.35
try:
    calculate_price("three", "ten")
except ValueError as e:
    print(e)

常见问题与注意事项

1. 循环引用问题

当模型相互引用时,需要使用 ForwardRef

from typing import ForwardRef
from pydantic import BaseModel

class User(BaseModel):
    name: str
    friends: list[ForwardRef('User')] = []

# 更新前向引用
User.model_rebuild()

user1 = User(name="Alice")
user2 = User(name="Bob", friends=[user1])
print(user2)

2. 性能考虑

  • 对于高性能场景,考虑使用 strict 模式减少类型转换开销

  • 大型模型解析可能会消耗较多内存,考虑分批处理

  • 复杂验证器会增加处理时间

3. 与 ORM 集成

from pydantic import BaseModel
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

class UserDB(Base):
    __tablename__ = "users"
    id = Column(Integer, primary_key=True)
    name = Column(String)
    email = Column(String)

class UserModel(BaseModel):
    id: int
    name: str
    email: str

    class Config:
        orm_mode = True

# 从ORM实例创建Pydantic模型
db_user = UserDB(id=1, name="Alice", email="[email protected]")
user_model = UserModel.from_orm(db_user)
print(user_model)

4. 版本兼容性

  • Pydantic v1 和 v2 有重大变化,注意版本选择

  • 使用 pydantic.v1 可以同时兼容两个版本

# 兼容性导入
try:
    from pydantic.v1 import BaseModel
except ImportError:
    from pydantic import BaseModel

实战案例

案例1:API 请求/响应模型

from typing import Optional, List
from pydantic import BaseModel, EmailStr, Field

class UserBase(BaseModel):
    email: EmailStr
    username: str = Field(..., min_length=3, max_length=20)

class UserCreate(UserBase):
    password: str = Field(..., min_length=8)

class UserUpdate(BaseModel):
    password: Optional[str] = Field(None, min_length=8)
    username: Optional[str] = Field(None, min_length=3, max_length=20)

class UserInDB(UserBase):
    id: int
    is_active: bool
    items: List['Item'] = []

    class Config:
        orm_mode = True

class ItemBase(BaseModel):
    title: str
    description: Optional[str] = None

class ItemCreate(ItemBase):
    pass

class ItemInDB(ItemBase):
    id: int
    owner_id: int

    class Config:
        orm_mode = True

# 更新前向引用
UserInDB.model_rebuild()

案例2:配置管理

from pydantic import BaseSettings, PostgresDsn, RedisDsn

class Settings(BaseSettings):
    app_name: str = "My App"
    debug: bool = False
    
    # 数据库配置
    postgres_dsn: PostgresDsn = "postgres://user:pass@localhost:5432/db"
    redis_dsn: RedisDsn = "redis://localhost:6379/0"
    
    # 认证配置
    secret_key: str
    algorithm: str = "HS256"
    access_token_expire_minutes: int = 30
    
    class Config:
        env_file = ".env"
        env_prefix = "APP_"

settings = Settings()

案例3:复杂数据验证

from datetime import datetime
from typing import Optional
from pydantic import BaseModel, validator, root_validator

class Event(BaseModel):
    name: str
    start_time: datetime
    end_time: datetime
    location: str
    max_attendees: int = 100
    description: Optional[str] = None
    
    @validator('name')
    def name_must_not_be_empty(cls, v):
        if not v.strip():
            raise ValueError('名称不能为空')
        return v.strip()
    
    @root_validator
    def check_times(cls, values):
        start = values.get('start_time')
        end = values.get('end_time')
        
        if start and end and end <= start:
            raise ValueError('结束时间必须晚于开始时间')
        
        if start and start < datetime.now():
            raise ValueError('开始时间不能是过去时间')
        
        return values

# 使用
try:
    event = Event(
        name="  ",  # 空格会被验证器捕获
        start_time="2023-01-01 10:00",
        end_time="2023-01-01 09:00",  # 结束时间早于开始时间
        location="Conference Room"
    )
except ValueError as e:
    print(e)

总结

Pydantic 是一个功能强大且灵活的数据验证和设置管理库,它通过 Python 类型注解提供了优雅的解决方案。关键要点:

  1. 模型优先:从定义模型开始,让 Pydantic 处理验证和转换

  2. 类型提示:充分利用 Python 的类型系统

  3. 灵活验证:使用验证器处理复杂业务规则

  4. 配置管理:非常适合应用程序设置和环境变量

  5. 性能考虑:在需要高性能的场景中使用严格模式

通过本教程,你应该已经掌握了 Pydantic 的核心概念和高级用法。现在可以开始在项目中应用这些知识,构建更健壮的数据处理流程了!

你可能感兴趣的:(基础知识(Python),python,Pydantic,数据验证,设置管理库)