Created
July 19, 2021 11:33
-
-
Save Mikadun/7ee616d34415949d09ea7ee53e52fca5 to your computer and use it in GitHub Desktop.
Gesture volume control with python opencv. Upgraded version of Murtaza's workshop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import mediapipe as mp | |
import numpy as np | |
import math | |
from ctypes import cast, POINTER | |
from comtypes import CLSCTX_ALL | |
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume | |
np.set_printoptions(precision=3) | |
# short names to reference | |
mpDrawing = mp.solutions.drawing_utils | |
mpHands = mp.solutions.hands | |
LMS = mpHands.HandLandmark | |
# pycaw base code for master volume control | |
devices = AudioUtilities.GetSpeakers() | |
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None) | |
volume = cast(interface, POINTER(IAudioEndpointVolume)) | |
# take a video from camera with id = 0 | |
capture = cv2.VideoCapture(0) | |
hands = mpHands.Hands(min_detection_confidence=0.7, max_num_hands=1) | |
while capture.isOpened(): | |
success, image = capture.read() | |
# ignoring empty frame | |
if not success: | |
continue | |
# convert image to RGB format | |
imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
processedHands = hands.process(imageRGB) | |
# check if hands were found | |
if processedHands.multi_hand_landmarks is not None: | |
# loop through landmarks for each hand | |
for handLandmarks in processedHands.multi_hand_landmarks: | |
mp.solutions.drawing_utils.draw_landmarks(image, handLandmarks, mpHands.HAND_CONNECTIONS) | |
# Get coordinates for thumb tip and index finger tip. Also get coordinates for wrist to use in the Pythagoras rule | |
# Assuming that at maximum extension the fingers form a triangle we can calculate max distance between tips | |
# Using that fact we can normalize distance between finger tips | |
lm = handLandmarks.landmark | |
thumb = np.array([lm[LMS.THUMB_TIP].x, lm[LMS.THUMB_TIP].y]) | |
index = np.array([lm[LMS.INDEX_FINGER_TIP].x, lm[LMS.INDEX_FINGER_TIP].y]) | |
wrist = np.array([lm[LMS.WRIST].x, lm[LMS.WRIST].y]) | |
# Get rough length of both fingers | |
indexLength = np.linalg.norm(index - wrist) | |
thumbLength = np.linalg.norm(thumb - wrist) | |
# Now calculate max distance (also lower it down because it rough calculations) | |
maxDistance = 0.7 * math.hypot(indexLength, thumbLength) | |
# Calculate current distance | |
length = np.linalg.norm(index - thumb) | |
# Map length value to value from -VOLUME_SPEED to VOLUME_SPEED | |
VOLUME_SPEED = 0.1 | |
diff = np.interp(length, (0.0, maxDistance), [-VOLUME_SPEED, VOLUME_SPEED]) | |
# Get volume in scalar (not in decibels) and set it in scalars | |
currentVolume = volume.GetMasterVolumeLevelScalar() | |
newVolume = min(max((currentVolume + diff), 0.0), 1.0) | |
volume.SetMasterVolumeLevelScalar(newVolume, None) | |
cv2.imshow('Image', image) | |
if cv2.waitKey(5) & 0xFF == 27: | |
break | |
# Finalization | |
capture.release() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment