关键点检测 手势动作实时识别项目(基于mediapipe、keras进行实现)

关键点检测 手势动作实时识别项目(基于mediapipe、keras进行实现)

  • 0、功能展示
  • 1、项目原理介绍
  • 2、数据集采集脚本
  • 3、将采集到的动作数据集利用mediapipe库检测手部关键点信息,转换成数据信息保存到本地
  • 4、训练一个效果一般的随机森林分类器
  • 5、使用Kreas训练一个效果好点的全连接层分类器
  • 6、实时手部动作检测效果测试

项目代码下载地址
更多相关内容请参考:
姿态估计 MediaPipe实现手势,人体姿态,面部动作估计的用法

0、功能展示

1、项目原理介绍

本项目主要实现可以实时动态进行人的手势动作的识别,识别的手势动作由以下几类为例:
关键点检测 手势动作实时识别项目(基于mediapipe、keras进行实现)_第1张图片
项目的主要思路是:
编写脚本,采集以上9类动作图像各100张作为训练集,使用mediapipe库作为工具检测手部关节,获取到手部的20个关键点信息坐标,将关键点信息坐标去除污染数据后,作为训练数据用于训练Keras全连接网络分类器,识别手势动作。
本次项目中,分别采用scikit-learn库训练随机森林分类器与Keras全连接层分类器,对比发现后者效果更好
关键点检测 手势动作实时识别项目(基于mediapipe、keras进行实现)_第2张图片

opencv-python==4.7.0.68
mediapipe==0.9.0.1
scikit-learn==1.0.2
Keras==2.0.2

2、数据集采集脚本

import cv2,os

DATA_DIR="./data"
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes=1
dataset_size=100
print("-------")

cap=cv2.VideoCapture(0)#mac系统选择1   如果打开本地视频,就直接将0改为视频的地址就可以了
success=cap.isOpened()#返回的是bool类型,用于查看是否可以打开本地的视频
print(success)

for num in range(number_of_classes):
    if not os.path.exists(os.path.join(DATA_DIR,str(num))):
        os.makedirs(os.path.join(DATA_DIR,str(num)))

    print("Collecting data for class {}".format(num))
    print("000000000000000000000000000000000000000")
    counter = 0
    while True:
        ret,frame=cap.read()
        cv2.putText(frame,"Ready? Press 'Q' !:",(100,50),cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,cv2.LINE_AA)
        cv2.imshow("frame",frame)
        if cv2.waitKey(1)==ord('q'):
            break
        if  counter < dataset_size:
            ret,frame=cap.read()
            frame_=frame.copy()
            cv2.putText(frame_, "Ready? Press 'Q' !:", (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                        cv2.LINE_AA)
            cv2.imshow("frame",frame_)
            if cv2.waitKey(1)==ord('w'):
                cv2.imwrite(os.path.join(DATA_DIR,str(num),'{}.jpg'.format(counter)),frame)
                print("--------------------------------------已保存本地",counter,num)
                counter+=1
        else:
            break

cap.release()
cv2.destroyAllWindows()

3、将采集到的动作数据集利用mediapipe库检测手部关键点信息,转换成数据信息保存到本地

import os
import pickle

import mediapipe as mp
import cv2
import matplotlib.pyplot as plt


mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

4、训练一个效果一般的随机森林分类器

import pickle

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np


data_dict = pickle.load(open('./data.pickle', 'rb'))
data_arr=data_dict['data']
labels=data_dict['labels']
print(len(data_arr),len(labels))
for index,data_in in enumerate(data_arr):
    if len(data_in) !=42:
        print('---删除掉异常元素---',index)
        del data_arr[index]
        del labels[index]

print(len(data_arr),len(labels))

x_train, x_test, y_train, y_test = train_test_split(data_arr, labels, test_size=0.1, shuffle=True, stratify=labels)

model = RandomForestClassifier(n_estimators=500,random_state=0)

model.fit(x_train, y_train)

y_predict = model.predict(x_test)

score = accuracy_score(y_predict, y_test)

print('{}% of samples were classified correctly !'.format(score * 100))

f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

5、使用Kreas训练一个效果好点的全连接层分类器

import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from keras.regularizers import l2
import matplotlib.pyplot as plt


data_dict = pickle.load(open('./data.pickle', 'rb'))

data = data_dict['data']
labels = data_dict['labels']
for index_data,data_arr in enumerate(data):
    if len(data_arr) != 42:
        print('---删除掉异常元素---', index_data)
        del data[index_data]
        del labels[index_data]

data=np.vstack(data)

print('+++++++++++++++++++',data.shape)
labels = np.asarray(data_dict['labels'])

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

print(x_train.shape)


# 将数据集标签改为one-hot编码的格式
y_train = np_utils.to_categorical(y_train, num_classes=9)
y_test = np_utils.to_categorical(y_test, num_classes=9)


# 创建网络模型,输入是784个神经元,输出为10类
model = Sequential([
    Dense(units=200,input_dim=42,bias_initializer="one",activation="relu",kernel_regularizer=l2(0.0003)),
    # Dropout(0.5),

    Dense(units=500,bias_initializer="one",activation="relu",kernel_regularizer=l2(0.0003)),
    Dense(units=100,bias_initializer="one",activation="relu",kernel_regularizer=l2(0.0003)),
    # Dropout(0.5),
    Dense(units=9,bias_initializer="one",activation="softmax",kernel_regularizer=l2(0.0003))
])

# 定义优化器
sgd = SGD(lr=0.01)
# 编译模型,定义优化器,loss_function,训练过程计算准确率
model.compile(optimizer=sgd,loss="categorical_crossentropy",metrics=["accuracy"])

# 训练数据集
history =model.fit(x_train,y_train,batch_size=32,epochs=200,validation_data=(x_test,y_test))
model.save("classmodel.h5")  # 会保存成HDF5的文件,pip install h5py
# 此种模型保存方式为保存模型的通用方式,既可以保存模型的结构,又可以保存模型的参数

# 评估模型
loss,accuracy = model.evaluate(x_test, y_test)
print("test loss:",loss,"accuracy:",accuracy)



history_dict = history.history
print(history_dict.keys())

import matplotlib.pyplot as plt

"""********************绘制训练损失与验证损失的训练结果********************"""

# 纵坐标,所需绘制的数据
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

# 横坐标,步长
epochs = range(1, len(loss_values) + 1)

# 绘制图像
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')

# 标题
plt.title('Training and validation loss')

# 横、纵坐标标签
plt.xlabel('Epochs')
plt.ylabel('Loss')

# 自适应标签的位置
plt.legend()

# 显示图像
plt.show()


"""********************绘制训练精度与验证精度的训练结果********************"""
# 清除图像
plt.clf()

acc = history_dict['acc']
val_acc = history_dict['val_acc']

plt.plot(epochs, val_acc, 'bo', label='Training acc')
plt.plot(epochs, acc, 'b', label='Validation acc')

plt.title('Training and validation acc')

plt.xlabel('Epochs')
plt.ylabel('Acc')

plt.legend()

plt.show()



# 加载模型并检测
from keras.models import load_model
model = load_model('classmodel.h5')    # 需要安装keras==2.0.4版本
result=model.predict(x_test)
print(result)

6、实时手部动作检测效果测试

随机森林模型版本测试

import pickle

import cv2
import mediapipe as mp
import numpy as np

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: '1', 2: '2',3:'3',4:'4',5:'5',6:'B',7:'C',8:'L'}
while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    # frame = cv2.flip(frame, 1)

    H, W, _ = frame.shape


    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = labels_dict[int(prediction[0])]

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)

    cv2.imshow('frame', frame)
    cv2.waitKey(1)


cap.release()
cv2.destroyAllWindows()

Keras全连接层分类器效果

import pickle

import cv2
import mediapipe as mp
from keras.models import load_model
import numpy as np
model_dense = load_model('classmodel.h5')    # 需要安装keras==2.0.4版本

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'A', 1: '1', 2: '2',3:'3',4:'4',5:'5',6:'B',7:'C',8:'L'}
while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()
    # frame = cv2.flip(frame, 1)

    H, W, _ = frame.shape


    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        # data=[np.asarray(data_aux)].reshape(-1,42)
        data=np.asarray(data_aux)

        data=np.vstack([data])
        prediction = model_dense.predict(data)
        print("******",np.argmax(prediction[0]))
        predicted_character = labels_dict[np.argmax(prediction[0])]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)

    cv2.imshow('frame', frame)
    cv2.waitKey(1)


cap.release()
cv2.destroyAllWindows()

你可能感兴趣的:(手势动作识别,实时手部动作识别)