OpenCV_mediapipe手势识别
概述
MediaPipe 是一款由 Google Research 开发并开源的多媒体机器学习模型应用框架,提供面部识别、手势识别的开源解决方案,支持python和java等语言
手部的识别会返回21个手势坐标点,具体如下图所示
对于mediapipe模块具体见官网 https://google.github.io/mediapipe/solutions/hands
代码
手势识别模块
文件名:HandTrackingModule.py
代码语言:javascript复制# -*- coding:utf-8 -*-
import cv2
import mediapipe as mp
class HandDetector:
def __init__(self, mode=False, maxHands=2,modelComplexity=1, detectionCon=0.5, minTrackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.minTrackCon = minTrackCon
self.modelComplex=modelComplexity
self.mpHands = mp.solutions.hands # mp的手部支持模块
# Hands完成对Hands初始化配置
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplex,
self.detectionCon, self.minTrackCon)
#mode为True为图片输入
# maxhand 为最大手数目
# modelcomplex为模型的复杂度
# detectioncon和trackcon为置信度阈值,越大越准确
self.mpDraw = mp.solutions.drawing_utils# 用于绘制
self.tipIds = [4, 8, 12, 16, 20]#对应手指的指尖
self.fingers = []
self.lmList = []
def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 将传入的图转为RGB模式,
# 返回一个列表,包含21个标志点的x、y、z的值
self.results = self.hands.process(imgRGB)# 完成图像的处理,输入必须为RGB格式的图片
if self.results.multi_hand_landmarks:# hands in list
for handLms in self.results.multi_hand_landmarks:# 提取每个指头并绘制标点和连接线
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
xList = []
yList = []
bbox = []
bboxInfo =[]
self.lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
h, w, c = img.shape
px, py = int(lm.x * w), int(lm.y * h)
xList.append(px)
yList.append(py)
self.lmList.append([px, py])
if draw:
cv2.circle(img, (px, py), 5, (255, 0, 255), cv2.FILLED)
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
boxW, boxH = xmax - xmin, ymax - ymin
bbox = xmin, ymin, boxW, boxH
cx, cy = bbox[0] (bbox[2] // 2),
bbox[1] (bbox[3] // 2)
bboxInfo = {"id": id, "bbox": bbox,"center": (cx, cy)}
if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] bbox[2] 20, bbox[1] bbox[3] 20),
(0, 255, 0), 2)
return self.lmList, bboxInfo
def fingersUp(self):
if self.results.multi_hand_landmarks:
myHandType = self.handType()
fingers = []
# Thumb
if myHandType == "Right":
if self.lmList[self.tipIds[0]][0] > self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if self.lmList[self.tipIds[0]][0] < self.lmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for id in range(1, 5):
if self.lmList[self.tipIds[id]][1] < self.lmList[self.tipIds[id] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
# 判别左右手
def handType(self):
if self.results.multi_hand_landmarks:
if self.lmList[17][0] < self.lmList[5][0]:
return "Right"
else:
return "Left"
主函数
文件名:main.py
代码语言:javascript复制# -*- coding:utf-8 -*-
import cv2
from HandTrackingModule import HandDetector
class Main:
def __init__(self):
self.camera = cv2.VideoCapture(0,cv2.CAP_DSHOW)# 摄像头捕获
self.camera.set(3, 1280)#分辨率
self.camera.set(4, 720)
# 手势识别的函数
def Gesture_recognition(self):
while True:
self.detector = HandDetector()# 调用hand模块
frame, img = self.camera.read()#捕获摄像头输入
img = self.detector.findHands(img)# 调用findhand函数
lmList, bbox = self.detector.findPosition(img)# 手势识别 lmlist关节位置方位bbox为方框
if lmList:# 如果非空,返回TRUE
x_1, y_1 = bbox["bbox"][0], bbox["bbox"][1]
x1, x2, x3, x4, x5 = self.detector.fingersUp()
if (x2 == 1 and x3 == 1) and (x4 == 0 and x5 == 0 and x1 == 0):
cv2.putText(img, "2_TWO", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif (x2 == 1 and x3 == 1 and x4 == 1) and (x1 == 0 and x5 == 0):
cv2.putText(img, "3_THREE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif (x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1) and (x1 == 0):
cv2.putText(img, "4_FOUR", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif x1 == 1 and x2 == 1 and x3 == 1 and x4 == 1 and x5 == 1:
cv2.putText(img, "5_FIVE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif x2 == 1 and (x1 == 0, x3 == 0, x4 == 0, x5 == 0):
cv2.putText(img, "1_ONE", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
elif x1 and (x2 == 0, x3 == 0, x4 == 0, x5 == 0):
cv2.putText(img, "GOOD!", (x_1, y_1), cv2.FONT_HERSHEY_PLAIN, 3,
(0, 0, 255), 3)
cv2.imshow("camera", img)# 显示图片
# 点击窗口关闭按钮退出程序
if cv2.getWindowProperty('camera', cv2.WND_PROP_VISIBLE) < 1:
break
# 点击小写字母q 退出程序
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# main
if __name__ == '__main__':
Solution = Main()
Solution.Gesture_recognition()
效果展示
结束语
后续多研究下这个包
再见2021
love&peace