基于OpenCV 的人体姿态估计

这是一个基于 OpenCV 的人体姿态估计系统,能够从摄像头视频流中实时检测人体关键点,并通过简化算法重建 3D 姿态,最后在 3D 空间中进行仿真展示。系统主要包含 2D 姿态检测、3D 姿态重建和 3D 仿真三个核心模块。

模块导入与环境准备

python

运行

import cv2
import numpy as np
import os
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# 确保目录存在
os.makedirs("results/2d_poses", exist_ok=True)
os.makedirs("results/3d_poses", exist_ok=True)
os.makedirs("results/simulations", exist_ok=True)

  • 导入必要的库:计算机视觉 (cv2)、数值计算 (numpy)、文件操作 (os)、时间测量 (time) 和绘图工具 (matplotlib)
  • 创建结果保存目录,exist_ok=True 确保目录存在时不会报错

常量定义

python

运行

JOINT_CONNECTIONS = [
    (0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (6, 7),  # 头部
    (0, 11), (0, 12), (11, 12),  # 躯干
    (11, 13), (13, 15), (15, 17), (17, 19), (19, 21),  # 左臂
    (12, 14), (14, 16), (16, 18), (18, 20), (20, 22),  # 右臂
    (11, 23), (12, 24), (23, 24),  # 骨盆
    (23, 25), (25, 27), (27, 29), (29, 31),  # 左腿
    (24, 26), (26, 28), (28, 30), (30, 32)  # 右腿
]

  • 定义 33 个人体关键点的连接关系,用于后续绘制骨架

2D 姿态估计类

python

运行

class HumanPoseEstimator:
    def __init__(self):
        """初始化OpenCV人体姿态估计器"""
        # 使用OpenCV的DNN模块加载预训练的姿态估计模型
        self.proto_file = "pose_deploy_linevec_faster_4_stages.prototxt"
        self.weights_file = "pose_iter_160000.caffemodel"
        self.n_points = 18
        
        # 检查模型文件是否存在
        if not os.path.exists(self.proto_file) or not os.path.exists(self.weights_file):
            print("警告: 找不到OpenCV姿态估计模型文件")
            print("请从https://github.com/CMU-Perceptual-Computing-Lab/openpose下载模型文件")
            self.net = None
        else:
            self.net = cv2.dnn.readNetFromCaffe(self.proto_file, self.weights_file)
            
        # 定义COCO人体关键点映射到33点格式
        self.coco_to_mp = {
            0: 0,    # 鼻子
            1: 1,    # 脖子
            2: 12,   # 右肩
            3: 14,   # 右肘
            4: 16,   # 右腕
            5: 11,   # 左肩
            6: 13,   # 左肘
            7: 15,   # 左腕
            8: 24,   # 右髋
            9: 26,   # 右膝
            10: 28,  # 右踝
            11: 23,  # 左髋
            12: 25,  # 左膝
            13: 27,  # 左踝
            14: 5,   # 右眼
            15: 2,   # 左眼
            16: 7,   # 右耳
            17: 4    # 左耳
        }

  • 类初始化:加载 OpenCV 预训练的 Caffe 模型
  • 关键点映射表:将 COCO 数据集的 18 个关键点映射到 MediaPipe 的 33 点格式

python

运行

    def detect_keypoints(self, image):
        """
        从图像中检测人体关键点
        
        返回:
            keypoints_2d: 二维关键点坐标 [33, 3] (x, y, confidence)
            annotated_image: 标注后的图像
        """
        if self.net is None:
            print("错误: 姿态估计模型未正确加载")
            return None, image
            
        # 准备输入
        blob = cv2.dnn.blobFromImage(
            image, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False
        )
        self.net.setInput(blob)
        
        # 前向传播
        output = self.net.forward()
        
        # 获取图像尺寸
        h, w = image.shape[:2]
        
        # 初始化33个关键点的数组
        keypoints_2d = np.zeros((33, 3))
        
        # 处理检测结果
        points = []
        for i in range(self.n_points):
            # 查找关键点的置信度图
            prob_map = output[0, i, :, :]
            min_val, prob, min_loc, point = cv2.minMaxLoc(prob_map)
            
            # 缩放坐标
            x = (w * point[0]) / output.shape[3]
            y = (h * point[1]) / output.shape[2]
            
            if prob > 0.1:  # 置信度阈值
                points.append((int(x), int(y)))
                
                # 映射到33点格式
                if i in self.coco_to_mp:
                    mp_idx = self.coco_to_mp[i]
                    keypoints_2d[mp_idx] = [x / w, y / h, prob]
            else:
                points.append(None)
        
        # 可视化关键点
        annotated_image = image.copy()
        for i, p in enumerate(points):
            if p is not None:
                cv2.circle(annotated_image, p, 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                cv2.putText(annotated_image, f"{i}", p, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
        
        # 绘制骨架连接
        skeleton_pairs = [
            (1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7),
            (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13),
            (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)
        ]
        
        for pair in skeleton_pairs:
            part_a, part_b = pair
            if points[part_a] and points[part_b]:
                cv2.line(annotated_image, points[part_a], points[part_b], (0, 255, 0), 2)
        
        return keypoints_2d, annotated_image

  • 图像预处理:将输入图像转换为网络可接受的格式 (368x368)
  • 模型推理:通过前向传播获取关键点的置信度图
  • 后处理:从置信度图中提取关键点坐标,应用阈值过滤低置信度点
  • 可视化:在原图上绘制关键点和骨架连接,返回标准化的关键点坐标和可视化后的图像

3D 姿态估计类

python

运行

class Simple3DPoseEstimator:
    def __init__(self):
        """简单的3D姿态估计器,使用固定比例关系"""
        # 定义人体各部分的平均比例(单位:米)
        self.body_proportions = {
            "head": 0.25,
            "torso": 0.5,
            "upper_arm": 0.3,
            "forearm": 0.25,
            "hand": 0.1,
            "upper_leg": 0.5,
            "lower_leg": 0.5,
            "foot": 0.2
        }
        
        # 用于可视化
        self.fig = plt.figure(figsize=(10, 8))
        self.ax = self.fig.add_subplot(111, projection='3d')

  • 初始化:定义人体各部分的标准比例(单位:米)
  • 创建 3D 绘图环境用于可视化 3D 姿态

python

运行

    def estimate_3d_pose(self, keypoints_2d, image_shape, visualize=False):
        """
        简单估计3D姿态
        
        参数:
            keypoints_2d: 二维关键点 [33, 3]
            image_shape: 图像形状 (h, w)
            visualize: 是否可视化3D姿态
            
        返回:
            keypoints_3d: 3D关键点 numpy数组 [33, 3]
        """
        if keypoints_2d is None:
            return None
            
        h, w = image_shape[:2]
        
        # 创建3D关键点数组
        keypoints_3d = np.zeros((33, 3))
        
        # 提取有效关键点
        valid_mask = keypoints_2d[:, 2] > 0.3
        if not np.any(valid_mask):
            return None
            
        # 将2D坐标转换为图像坐标系
        kp_2d_img = keypoints_2d.copy()
        kp_2d_img[:, 0] *= w
        kp_2d_img[:, 1] *= h
        
        # 计算人体中心
        center = np.mean(kp_2d_img[valid_mask, :2], axis=0)
        
        # 估计人体尺寸
        # 这里简化为使用肩宽作为参考
        if valid_mask[11] and valid_mask[12]:  # 左右肩
            shoulder_width = np.linalg.norm(kp_2d_img[11, :2] - kp_2d_img[12, :2])
            scale = 0.4 / shoulder_width  # 假设平均肩宽为0.4米
        else:
            scale = 0.001  # 默认缩放比例
            
        # 基于2D关键点和人体比例估计3D位置
        # 这里使用简化模型,主要基于深度感知和人体比例
        for i in range(33):
            if valid_mask[i]:
                x, y = kp_2d_img[i, :2]
                
                # 计算相对中心的位置
                rel_x = (x - center[0]) * scale
                rel_y = (y - center[1]) * scale
                
                # 估计深度(z轴)
                # 这里使用简化方法:离图像中心越远的点假设越远
                depth_factor = np.sqrt(rel_x**2 + rel_y**2) / max(w, h) * 0.5
                
                # 设置3D坐标
                keypoints_3d[i] = [rel_x, rel_y, depth_factor]
        
        # 可视化
        if visualize:
            self.visualize_3d_pose(keypoints_3d)
            
        return keypoints_3d

  • 3D 姿态估计:基于 2D 关键点和人体比例关系计算 3D 坐标
  • 坐标缩放:使用肩宽作为参考来估计人体尺寸比例
  • 深度估计:使用离图像中心的距离来粗略估计深度信息(z 轴)

python

运行

    def visualize_3d_pose(self, keypoints_3d, frame_id=None):
        """可视化3D姿态"""
        self.ax.clear()
        
        # 设置坐标轴范围
        max_range = np.max(np.abs(keypoints_3d))
        self.ax.set_xlim(-max_range, max_range)
        self.ax.set_ylim(-max_range, max_range)
        self.ax.set_zlim(-max_range, max_range)
        
        # 设置坐标轴标签
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制关键点
        self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
        
        # 绘制连接关系
        for connection in JOINT_CONNECTIONS:
            start_idx, end_idx = connection
            if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
                self.ax.plot(
                    [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
                    [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
                    [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
                    c='b', linewidth=2
                )
        
        # 设置视角
        self.ax.view_init(elev=-90, azim=90)  # 俯视视角
        
        # 保存图像
        if frame_id is not None:
            plt.savefig(f"results/3d_poses/3d_pose_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
        else:
            plt.pause(0.01)

  • 3D 姿态可视化:在 3D 空间中绘制关键点和骨架连接
  • 视角设置:默认使用俯视视角 (-90 度仰角,90 度方位角)
  • 图像保存:根据需要保存 3D 姿态图像

3D 仿真器类

python

运行

class SimpleSimulator:
    def __init__(self, use_gui=True):
        """简单的3D仿真器,使用matplotlib进行可视化"""
        self.use_gui = use_gui
        
        # 用于可视化
        self.fig = plt.figure(figsize=(10, 8))
        self.ax = self.fig.add_subplot(111, projection='3d')
        
        # 设置固定的相机位置
        self.ax.set_xlim(-1.5, 1.5)
        self.ax.set_ylim(-1.5, 1.5)
        self.ax.set_zlim(0, 2)
        
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制地面
        x = np.linspace(-1.5, 1.5, 100)
        y = np.linspace(-1.5, 1.5, 100)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
        
        print("使用简单的3D可视化模拟器")

  • 初始化:创建 3D 绘图环境和固定大小的场景
  • 绘制地面平面:使用绿色半透明平面表示地面

python

运行

    def update_pose(self, keypoints_3d):
        """
        根据3D姿态更新仿真模型
        
        参数:
            keypoints_3d: 3D关键点 [33, 3]
        """
        if keypoints_3d is None:
            return
            
        self.ax.clear()
        
        # 设置坐标轴范围
        self.ax.set_xlim(-1.5, 1.5)
        self.ax.set_ylim(-1.5, 1.5)
        self.ax.set_zlim(0, 2)
        
        # 设置坐标轴标签
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制地面
        x = np.linspace(-1.5, 1.5, 100)
        y = np.linspace(-1.5, 1.5, 100)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
        
        # 绘制关键点
        self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
        
        # 绘制连接关系
        for connection in JOINT_CONNECTIONS:
            start_idx, end_idx = connection
            if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
                self.ax.plot(
                    [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
                    [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
                    [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
                    c='b', linewidth=2
                )
        
        # 设置视角
        self.ax.view_init(elev=30, azim=45)  # 侧视视角
        
        if self.use_gui:
            plt.pause(0.01)
    
    def render_scene(self, frame_id):
        """
        渲染当前场景并保存
        
        参数:
            frame_id: 帧ID
        """
        plt.savefig(f"results/simulations/simulation_frame_{frame_id}.png", dpi=300, bbox_inches='tight')

  • 更新姿态:根据新的 3D 关键点数据更新场景
  • 固定视角:使用侧视视角 (30 度仰角,45 度方位角)
  • 场景渲染:将当前场景保存为图像

主函数

python

运行

def main(camera_id=0, use_gui=True):
    """
    完整流程:从摄像头读取到3D仿真
    
    参数:
        camera_id: 摄像头ID,0表示默认摄像头
        use_gui: 是否使用GUI模式
    """
    # 1. 初始化模块
    pose_estimator = HumanPoseEstimator()
    pose_3d_estimator = Simple3DPoseEstimator()
    simulator = SimpleSimulator(use_gui=use_gui)
    
    # 2. 打开摄像头
    cap = cv2.VideoCapture(camera_id)
    
    # 检查摄像头是否成功打开
    if not cap.isOpened():
        print(f"无法打开摄像头 {camera_id}")
        return
    
    # 获取摄像头信息
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"摄像头参数: {width}x{height}, 帧率: {fps}")
    
    # 创建窗口
    cv2.namedWindow("2D Pose Estimation", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("2D Pose Estimation", 800, 600)
    
    frame_id = 0
    
    # 3. 处理摄像头帧
    while True:
        ret, frame = cap.read()
        if not ret:
            print("无法获取帧,退出...")
            break
            
        # 翻转帧,使其成为镜像效果
        frame = cv2.flip(frame, 1)
        
        print(f"处理第{frame_id}帧...")
        
        # 3.1 2D姿态识别
        start_time = time.time()
        keypoints_2d, vis_frame = pose_estimator.detect_keypoints(frame)
        
        # 显示2D姿态结果
        cv2.imshow("2D Pose Estimation", vis_frame)
        
        # 保存2D姿态结果
        cv2.imwrite(f"results/2d_poses/2d_pose_frame_{frame_id}.png", vis_frame)
        
        # 3.2 3D姿态重建
        keypoints_3d = pose_3d_estimator.estimate_3d_pose(
            keypoints_2d, frame.shape, visualize=False
        )
        
        # 可视化3D姿态
        if keypoints_3d is not None:
            pose_3d_estimator.visualize_3d_pose(keypoints_3d, frame_id)
        
        # 3.3 更新3D仿真
        simulator.update_pose(keypoints_3d)
        
        # 3.4 渲染场景
        simulator.render_scene(frame_id)
        
        # 计算处理时间
        process_time = time.time() - start_time
        print(f"处理时间: {process_time:.3f}秒")
        
        frame_id += 1
        
        # 按ESC键退出
        key = cv2.waitKey(1)
        if key == 27:  # ESC键
            break
    
    # 4. 释放资源
    cap.release()
    cv2.destroyAllWindows()
    print(f"处理完成,共{frame_id}帧,结果保存在results目录")

  • 初始化所有模块:2D 姿态估计器、3D 姿态估计器和 3D 仿真器
  • 打开摄像头并获取视频流参数
  • 主循环处理每一帧:
    1. 读取摄像头帧并翻转
    2. 进行 2D 姿态检测
    3. 基于 2D 结果进行 3D 姿态重建
    4. 更新 3D 仿真场景
    5. 保存所有处理结果
    6. 计算处理时间
  • 资源释放:关闭摄像头和窗口

程序入口

python

运行

if __name__ == "__main__":
    # 运行主程序
    main(
        camera_id=0,  # 摄像头ID,0表示默认摄像头
        use_gui=True  # 是否使用GUI模式
    )

  • 程序入口点,调用 main 函数启动整个系统
  • 可以通过修改参数来调整系统行为

总结

这段代码实现了一个完整的人体姿态估计和 3D 仿真系统,主要特点包括:

  1. 使用 OpenCV 预训练模型进行 2D 姿态检测
  2. 基于人体比例关系的简化 3D 姿态重建方法
  3. 使用 matplotlib 进行 3D 姿态可视化和仿真
  4. 实时处理摄像头视频流
  5. 保存所有处理结果到指定目录

该系统可以用于姿势分析、运动跟踪、人机交互等多种应用场景,并且提供了良好的扩展性,可以根据需求进一步优化 3D 姿态估计算法或添加更多功能。

完整代码

import cv2
import numpy as np
import os
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# 确保目录存在
os.makedirs("results/2d_poses", exist_ok=True)
os.makedirs("results/3d_poses", exist_ok=True)
os.makedirs("results/simulations", exist_ok=True)

# 定义常量
JOINT_CONNECTIONS = [
    (0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (6, 7),  # 头部
    (0, 11), (0, 12), (11, 12),  # 躯干
    (11, 13), (13, 15), (15, 17), (17, 19), (19, 21),  # 左臂
    (12, 14), (14, 16), (16, 18), (18, 20), (20, 22),  # 右臂
    (11, 23), (12, 24), (23, 24),  # 骨盆
    (23, 25), (25, 27), (27, 29), (29, 31),  # 左腿
    (24, 26), (26, 28), (28, 30), (30, 32)  # 右腿
]

class HumanPoseEstimator:
    def __init__(self):
        """初始化OpenCV人体姿态估计器"""
        # 使用OpenCV的DNN模块加载预训练的姿态估计模型
        self.proto_file = "pose_deploy_linevec_faster_4_stages.prototxt"
        self.weights_file = "pose_iter_160000.caffemodel"
        self.n_points = 18
        
        # 检查模型文件是否存在
        if not os.path.exists(self.proto_file) or not os.path.exists(self.weights_file):
            print("警告: 找不到OpenCV姿态估计模型文件")
            print("请从https://github.com/CMU-Perceptual-Computing-Lab/openpose下载模型文件")
            self.net = None
        else:
            self.net = cv2.dnn.readNetFromCaffe(self.proto_file, self.weights_file)
            
        # 定义COCO人体关键点映射到33点格式
        self.coco_to_mp = {
            0: 0,    # 鼻子
            1: 1,    # 脖子
            2: 12,   # 右肩
            3: 14,   # 右肘
            4: 16,   # 右腕
            5: 11,   # 左肩
            6: 13,   # 左肘
            7: 15,   # 左腕
            8: 24,   # 右髋
            9: 26,   # 右膝
            10: 28,  # 右踝
            11: 23,  # 左髋
            12: 25,  # 左膝
            13: 27,  # 左踝
            14: 5,   # 右眼
            15: 2,   # 左眼
            16: 7,   # 右耳
            17: 4    # 左耳
        }
        
    def detect_keypoints(self, image):
        """
        从图像中检测人体关键点
        
        返回:
            keypoints_2d: 二维关键点坐标 [33, 3] (x, y, confidence)
            annotated_image: 标注后的图像
        """
        if self.net is None:
            print("错误: 姿态估计模型未正确加载")
            return None, image
            
        # 准备输入
        blob = cv2.dnn.blobFromImage(
            image, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False
        )
        self.net.setInput(blob)
        
        # 前向传播
        output = self.net.forward()
        
        # 获取图像尺寸
        h, w = image.shape[:2]
        
        # 初始化33个关键点的数组
        keypoints_2d = np.zeros((33, 3))
        
        # 处理检测结果
        points = []
        for i in range(self.n_points):
            # 查找关键点的置信度图
            prob_map = output[0, i, :, :]
            min_val, prob, min_loc, point = cv2.minMaxLoc(prob_map)
            
            # 缩放坐标
            x = (w * point[0]) / output.shape[3]
            y = (h * point[1]) / output.shape[2]
            
            if prob > 0.1:  # 置信度阈值
                points.append((int(x), int(y)))
                
                # 映射到33点格式
                if i in self.coco_to_mp:
                    mp_idx = self.coco_to_mp[i]
                    keypoints_2d[mp_idx] = [x / w, y / h, prob]
            else:
                points.append(None)
        
        # 可视化关键点
        annotated_image = image.copy()
        for i, p in enumerate(points):
            if p is not None:
                cv2.circle(annotated_image, p, 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                cv2.putText(annotated_image, f"{i}", p, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
        
        # 绘制骨架连接
        skeleton_pairs = [
            (1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7),
            (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13),
            (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)
        ]
        
        for pair in skeleton_pairs:
            part_a, part_b = pair
            if points[part_a] and points[part_b]:
                cv2.line(annotated_image, points[part_a], points[part_b], (0, 255, 0), 2)
        
        return keypoints_2d, annotated_image

class Simple3DPoseEstimator:
    def __init__(self):
        """简单的3D姿态估计器,使用固定比例关系"""
        # 定义人体各部分的平均比例(单位:米)
        self.body_proportions = {
            "head": 0.25,
            "torso": 0.5,
            "upper_arm": 0.3,
            "forearm": 0.25,
            "hand": 0.1,
            "upper_leg": 0.5,
            "lower_leg": 0.5,
            "foot": 0.2
        }
        
        # 用于可视化
        self.fig = plt.figure(figsize=(10, 8))
        self.ax = self.fig.add_subplot(111, projection='3d')
    
    def estimate_3d_pose(self, keypoints_2d, image_shape, visualize=False):
        """
        简单估计3D姿态
        
        参数:
            keypoints_2d: 二维关键点 [33, 3]
            image_shape: 图像形状 (h, w)
            visualize: 是否可视化3D姿态
            
        返回:
            keypoints_3d: 3D关键点 numpy数组 [33, 3]
        """
        if keypoints_2d is None:
            return None
            
        h, w = image_shape[:2]
        
        # 创建3D关键点数组
        keypoints_3d = np.zeros((33, 3))
        
        # 提取有效关键点
        valid_mask = keypoints_2d[:, 2] > 0.3
        if not np.any(valid_mask):
            return None
            
        # 将2D坐标转换为图像坐标系
        kp_2d_img = keypoints_2d.copy()
        kp_2d_img[:, 0] *= w
        kp_2d_img[:, 1] *= h
        
        # 计算人体中心
        center = np.mean(kp_2d_img[valid_mask, :2], axis=0)
        
        # 估计人体尺寸
        # 这里简化为使用肩宽作为参考
        if valid_mask[11] and valid_mask[12]:  # 左右肩
            shoulder_width = np.linalg.norm(kp_2d_img[11, :2] - kp_2d_img[12, :2])
            scale = 0.4 / shoulder_width  # 假设平均肩宽为0.4米
        else:
            scale = 0.001  # 默认缩放比例
            
        # 基于2D关键点和人体比例估计3D位置
        # 这里使用简化模型,主要基于深度感知和人体比例
        for i in range(33):
            if valid_mask[i]:
                x, y = kp_2d_img[i, :2]
                
                # 计算相对中心的位置
                rel_x = (x - center[0]) * scale
                rel_y = (y - center[1]) * scale
                
                # 估计深度(z轴)
                # 这里使用简化方法:离图像中心越远的点假设越远
                depth_factor = np.sqrt(rel_x**2 + rel_y**2) / max(w, h) * 0.5
                
                # 设置3D坐标
                keypoints_3d[i] = [rel_x, rel_y, depth_factor]
        
        # 可视化
        if visualize:
            self.visualize_3d_pose(keypoints_3d)
            
        return keypoints_3d
    
    def visualize_3d_pose(self, keypoints_3d, frame_id=None):
        """可视化3D姿态"""
        self.ax.clear()
        
        # 设置坐标轴范围
        max_range = np.max(np.abs(keypoints_3d))
        self.ax.set_xlim(-max_range, max_range)
        self.ax.set_ylim(-max_range, max_range)
        self.ax.set_zlim(-max_range, max_range)
        
        # 设置坐标轴标签
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制关键点
        self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
        
        # 绘制连接关系
        for connection in JOINT_CONNECTIONS:
            start_idx, end_idx = connection
            if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
                self.ax.plot(
                    [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
                    [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
                    [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
                    c='b', linewidth=2
                )
        
        # 设置视角
        self.ax.view_init(elev=-90, azim=90)  # 俯视视角
        
        # 保存图像
        if frame_id is not None:
            plt.savefig(f"results/3d_poses/3d_pose_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
        else:
            plt.pause(0.01)

class SimpleSimulator:
    def __init__(self, use_gui=True):
        """简单的3D仿真器,使用matplotlib进行可视化"""
        self.use_gui = use_gui
        
        # 用于可视化
        self.fig = plt.figure(figsize=(10, 8))
        self.ax = self.fig.add_subplot(111, projection='3d')
        
        # 设置固定的相机位置
        self.ax.set_xlim(-1.5, 1.5)
        self.ax.set_ylim(-1.5, 1.5)
        self.ax.set_zlim(0, 2)
        
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制地面
        x = np.linspace(-1.5, 1.5, 100)
        y = np.linspace(-1.5, 1.5, 100)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
        
        print("使用简单的3D可视化模拟器")
        
    def update_pose(self, keypoints_3d):
        """
        根据3D姿态更新仿真模型
        
        参数:
            keypoints_3d: 3D关键点 [33, 3]
        """
        if keypoints_3d is None:
            return
            
        self.ax.clear()
        
        # 设置坐标轴范围
        self.ax.set_xlim(-1.5, 1.5)
        self.ax.set_ylim(-1.5, 1.5)
        self.ax.set_zlim(0, 2)
        
        # 设置坐标轴标签
        self.ax.set_xlabel('X')
        self.ax.set_ylabel('Y')
        self.ax.set_zlabel('Z')
        
        # 绘制地面
        x = np.linspace(-1.5, 1.5, 100)
        y = np.linspace(-1.5, 1.5, 100)
        X, Y = np.meshgrid(x, y)
        Z = np.zeros_like(X)
        self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
        
        # 绘制关键点
        self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
        
        # 绘制连接关系
        for connection in JOINT_CONNECTIONS:
            start_idx, end_idx = connection
            if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
                self.ax.plot(
                    [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
                    [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
                    [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
                    c='b', linewidth=2
                )
        
        # 设置视角
        self.ax.view_init(elev=30, azim=45)  # 侧视视角
        
        if self.use_gui:
            plt.pause(0.01)
    
    def render_scene(self, frame_id):
        """
        渲染当前场景并保存
        
        参数:
            frame_id: 帧ID
        """
        plt.savefig(f"results/simulations/simulation_frame_{frame_id}.png", dpi=300, bbox_inches='tight')

def main(camera_id=0, use_gui=True):
    """
    完整流程:从摄像头读取到3D仿真
    
    参数:
        camera_id: 摄像头ID,0表示默认摄像头
        use_gui: 是否使用GUI模式
    """
    # 1. 初始化模块
    pose_estimator = HumanPoseEstimator()
    pose_3d_estimator = Simple3DPoseEstimator()
    simulator = SimpleSimulator(use_gui=use_gui)
    
    # 2. 打开摄像头
    cap = cv2.VideoCapture(camera_id)
    
    # 检查摄像头是否成功打开
    if not cap.isOpened():
        print(f"无法打开摄像头 {camera_id}")
        return
    
    # 获取摄像头信息
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"摄像头参数: {width}x{height}, 帧率: {fps}")
    
    # 创建窗口
    cv2.namedWindow("2D Pose Estimation", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("2D Pose Estimation", 800, 600)
    
    frame_id = 0
    
    # 3. 处理摄像头帧
    while True:
        ret, frame = cap.read()
        if not ret:
            print("无法获取帧,退出...")
            break
            
        # 翻转帧,使其成为镜像效果
        frame = cv2.flip(frame, 1)
        
        print(f"处理第{frame_id}帧...")
        
        # 3.1 2D姿态识别
        start_time = time.time()
        keypoints_2d, vis_frame = pose_estimator.detect_keypoints(frame)
        
        # 显示2D姿态结果
        cv2.imshow("2D Pose Estimation", vis_frame)
        
        # 保存2D姿态结果
        cv2.imwrite(f"results/2d_poses/2d_pose_frame_{frame_id}.png", vis_frame)
        
        # 3.2 3D姿态重建
        keypoints_3d = pose_3d_estimator.estimate_3d_pose(
            keypoints_2d, frame.shape, visualize=False
        )
        
        # 可视化3D姿态
        if keypoints_3d is not None:
            pose_3d_estimator.visualize_3d_pose(keypoints_3d, frame_id)
        
        # 3.3 更新3D仿真
        simulator.update_pose(keypoints_3d)
        
        # 3.4 渲染场景
        simulator.render_scene(frame_id)
        
        # 计算处理时间
        process_time = time.time() - start_time
        print(f"处理时间: {process_time:.3f}秒")
        
        frame_id += 1
        
        # 按ESC键退出
        key = cv2.waitKey(1)
        if key == 27:  # ESC键
            break
    
    # 4. 释放资源
    cap.release()
    cv2.destroyAllWindows()
    print(f"处理完成,共{frame_id}帧,结果保存在results目录")

if __name__ == "__main__":
    # 运行主程序
    main(
        camera_id=0,  # 摄像头ID,0表示默认摄像头
        use_gui=True  # 是否使用GUI模式
    )

你可能感兴趣的:(3d)