Commit 62fa3225 authored by A.H.L.R Weerasinghe's avatar A.H.L.R Weerasinghe

emotion detector

parent d6c0db09
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import pprint\n",
"import pathlib\n",
"import cv2 as cv\n",
"import numpy as np\n",
"import cv2, dlib, math\n",
"import mediapipe as mp\n",
"from fastai.vision.all import *\n",
"\n",
"temp = pathlib.PosixPath\n",
"pathlib.PosixPath = pathlib.WindowsPath"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"mp_face_mesh = mp.solutions.face_mesh\n",
"face_mesh = mp_face_mesh.FaceMesh(\n",
" min_detection_confidence=0.5, \n",
" min_tracking_confidence=0.5\n",
" )\n",
"\n",
"face_detector = dlib.get_frontal_face_detector()\n",
"shape_predictor = dlib.shape_predictor(\"weights/shape_predictor_68_face_landmarks.dat\")\n",
"\n",
"learn_emotion = load_learner('weights/emotions_rf.pkl')\n",
"learn_emotion_labels = learn_emotion.dls.vocab"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"head_pose_dict = {\n",
" \"Looking Left\" : 0,\n",
" \"Looking Right\" : 1,\n",
" \"Looking Up\" : 2,\n",
" \"Looking Down\" : 3,\n",
" \"Looking Forward\" : 4\n",
" } \n",
"\n",
"drowsiness_dict = {\n",
" \"Sleepy\" : 0,\n",
" \"Not Sleepy\" : 1\n",
" } \n",
"\n",
"emotion_dict = {\n",
" \"angry\" : 0, \n",
" \"disgust\" : 1,\n",
" \"fear\" : 2,\n",
" \"happy\" : 3,\n",
" \"neutral\" : 4,\n",
" \"sad\" : 5,\n",
" \"surprise\" : 6\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def mid(p1 ,p2):\n",
" return int((p1.x + p2.x)/2), int((p1.y + p2.y)/2)\n",
"\n",
"def eye_aspect_ratio(eye_landmark, face_roi_landmark):\n",
" left_point = (face_roi_landmark.part(eye_landmark[0]).x, face_roi_landmark.part(eye_landmark[0]).y)\n",
" right_point = (face_roi_landmark.part(eye_landmark[3]).x, face_roi_landmark.part(eye_landmark[3]).y)\n",
"\n",
" center_top = mid(face_roi_landmark.part(eye_landmark[1]), face_roi_landmark.part(eye_landmark[2]))\n",
" center_bottom = mid(face_roi_landmark.part(eye_landmark[5]), face_roi_landmark.part(eye_landmark[4]))\n",
"\n",
" hor_line_length = math.hypot((left_point[0] - right_point[0]), (left_point[1] - right_point[1]))\n",
" ver_line_length = math.hypot((center_top[0] - center_bottom[0]), (center_top[1] - center_bottom[1]))\n",
"\n",
" ratio = hor_line_length / ver_line_length\n",
" return ratio\n",
"\n",
"def mouth_aspect_ratio(lips_landmark, face_roi_landmark):\n",
" left_point = (face_roi_landmark.part(lips_landmark[0]).x, face_roi_landmark.part(lips_landmark[0]).y)\n",
" right_point = (face_roi_landmark.part(lips_landmark[2]).x, face_roi_landmark.part(lips_landmark[2]).y)\n",
"\n",
" center_top = (face_roi_landmark.part(lips_landmark[1]).x, face_roi_landmark.part(lips_landmark[1]).y)\n",
" center_bottom = (face_roi_landmark.part(lips_landmark[3]).x, face_roi_landmark.part(lips_landmark[3]).y)\n",
"\n",
" hor_line_length = math.hypot((left_point[0] - right_point[0]), (left_point[1] - right_point[1]))\n",
" ver_line_length = math.hypot((center_top[0] - center_bottom[0]), (center_top[1] - center_bottom[1]))\n",
" if hor_line_length == 0:\n",
" return ver_line_length\n",
" ratio = ver_line_length / hor_line_length\n",
" return ratio\n",
"\n",
"def predict_emotion(img_path):\n",
" img = cv.imread(img_path)\n",
" img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)\n",
" img = cv.resize(img, (48, 48))\n",
" img = PILImage.create(img)\n",
" probs_emotion = learn_emotion.predict(img)[-1]\n",
" emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}\n",
" return emotions"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def head_pose_estimation(\n",
" video_path=None,\n",
" is_visualize=False\n",
" ): \n",
" if video_path is not None:\n",
" cap = cv2.VideoCapture(video_path)\n",
" else: \n",
" cap = cv2.VideoCapture(0)\n",
"\n",
" head_pose_state = []\n",
"\n",
" while cap.isOpened():\n",
" _, image = cap.read()\n",
" if image is None:\n",
" break\n",
" \n",
" image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)\n",
" image.flags.writeable = False\n",
" results = face_mesh.process(image)\n",
" image.flags.writeable = True\n",
" \n",
" image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
" img_h, img_w, _ = image.shape\n",
" face_3d = []\n",
" face_2d = []\n",
"\n",
" if results.multi_face_landmarks:\n",
" for face_landmarks in results.multi_face_landmarks:\n",
" for idx, lm in enumerate(face_landmarks.landmark):\n",
" if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:\n",
" if idx == 1:\n",
" nose_2d = (lm.x * img_w, lm.y * img_h)\n",
" nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000)\n",
"\n",
" x, y = int(lm.x * img_w), int(lm.y * img_h)\n",
"\n",
" face_2d.append([x, y])\n",
" face_3d.append([x, y, lm.z]) \n",
" \n",
" face_2d = np.array(face_2d, dtype=np.float64)\n",
" face_3d = np.array(face_3d, dtype=np.float64)\n",
" focal_length = 1 * img_w\n",
"\n",
" cam_matrix = np.array([ [focal_length, 0, img_h / 2],\n",
" [0, focal_length, img_w / 2],\n",
" [0, 0, 1]])\n",
"\n",
"\n",
" dist_matrix = np.zeros((4, 1), dtype=np.float64)\n",
" success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)\n",
"\n",
" rmat, jac = cv2.Rodrigues(rot_vec)\n",
" angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)\n",
"\n",
" x = angles[0] * 360\n",
" y = angles[1] * 360\n",
"\n",
" if y < -10:\n",
" text = \"Looking Left\"\n",
" elif y > 10:\n",
" text = \"Looking Right\"\n",
" elif x < -10:\n",
" text = \"Looking Down\"\n",
" elif x > 10:\n",
" text = \"Looking Up\"\n",
" else:\n",
" text = \"Looking Forward\"\n",
" head_pose_state.append(head_pose_dict[text])\n",
"\n",
" if is_visualize:\n",
" nose_3d_projection, _ = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)\n",
"\n",
" p1 = (int(nose_2d[0]), int(nose_2d[1]))\n",
" p2 = (int(nose_3d_projection[0][0][0]), int(nose_3d_projection[0][0][1]))\n",
" cv2.line(image, p1, p2, (255, 0, 0), 2)\n",
" cv2.putText(image, text, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)\n",
"\n",
" if is_visualize:\n",
" cv2.imshow('Head Pose Estimation', image)\n",
" if cv2.waitKey(30) & 0xFF == 27:\n",
" break\n",
"\n",
" cap.release()\n",
" return head_pose_state\n",
"\n",
"\n",
"def drowsiness_detection(\n",
" video_path=None,\n",
" is_visualize=False,\n",
" font = cv2.FONT_HERSHEY_TRIPLEX\n",
" ): \n",
" if video_path is not None:\n",
" cap = cv2.VideoCapture(video_path)\n",
" else:\n",
" cap = cv2.VideoCapture(0)\n",
"\n",
" count = 0\n",
" drowsiness_state = []\n",
" while True:\n",
" _, img = cap.read()\n",
" if img is None:\n",
" break\n",
"\n",
" img = cv2.flip(img,1)\n",
" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"\n",
" faces = face_detector(gray)\n",
"\n",
" for face_roi in faces:\n",
"\n",
" landmark_list = shape_predictor(gray, face_roi)\n",
"\n",
" left_eye_ratio = eye_aspect_ratio([36, 37, 38, 39, 40, 41], landmark_list)\n",
" right_eye_ratio = eye_aspect_ratio([42, 43, 44, 45, 46, 47], landmark_list)\n",
" eye_open_ratio = (left_eye_ratio + right_eye_ratio) / 2\n",
" if is_visualize:\n",
" cv2.putText(img, str(eye_open_ratio), (0, 13), font, 0.5, (100, 100, 100))\n",
"\n",
" inner_lip_ratio = mouth_aspect_ratio([60,62,64,66], landmark_list)\n",
" outter_lip_ratio = mouth_aspect_ratio([48,51,54,57], landmark_list)\n",
" mouth_open_ratio = (inner_lip_ratio + outter_lip_ratio) / 2;\n",
" if is_visualize:\n",
" cv2.putText(img, str(mouth_open_ratio), (448, 13), font, 0.5, (100, 100, 100))\n",
"\n",
" if mouth_open_ratio > 0.380 and eye_open_ratio > 4.0 or eye_open_ratio > 4.30:\n",
" count +=1\n",
" else:\n",
" count = 0\n",
" x,y = face_roi.left(), face_roi.top()\n",
" x1,y1 = face_roi.right(), face_roi.bottom()\n",
" if count>10:\n",
" if is_visualize:\n",
" cv2.rectangle(img, (x,y), (x1,y1), (0, 0, 255), 2)\n",
" cv2.putText(img, \"Sleepy\", (x, y-5), font, 0.5, (0, 0, 255))\n",
" drowsiness_state.append(drowsiness_dict[\"Sleepy\"])\n",
" \n",
" else:\n",
" if is_visualize:\n",
" cv2.rectangle(img, (x,y), (x1,y1), (0, 255, 0), 2)\n",
" drowsiness_state.append(drowsiness_dict[\"Not Sleepy\"])\n",
"\n",
" if is_visualize:\n",
" cv2.imshow(\"Drowsiness Detection\", img)\n",
" if cv2.waitKey(30) & 0xFF == 27:\n",
" break\n",
"\n",
" cap.release()\n",
" cv2.destroyAllWindows()\n",
" return drowsiness_state\n",
"\n",
"def emotion_detection(\n",
" video_path=None,\n",
" is_visualize=False\n",
" ):\n",
" if video_path is not None:\n",
" cap = cv2.VideoCapture(video_path)\n",
" else:\n",
" cap = cv2.VideoCapture(0)\n",
"\n",
" emotion_state = []\n",
" while True:\n",
" _, img = cap.read()\n",
" if img is None:\n",
" break\n",
"\n",
" img = cv2.flip(img,1)\n",
" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"\n",
" faces = face_detector(gray)\n",
"\n",
" for face_roi in faces:\n",
" x,y = face_roi.left(), face_roi.top()\n",
" x1,y1 = face_roi.right(), face_roi.bottom()\n",
" cv2.rectangle(img, (x,y), (x1,y1), (0, 255, 0), 2)\n",
" roi_gray = gray[y:y1,x:x1]\n",
" roi_color = img[y:y1,x:x1]\n",
" roi_gray = cv2.resize(roi_gray, (48, 48))\n",
" roi_gray = PILImage.create(roi_gray)\n",
" probs_emotion = learn_emotion.predict(roi_gray)[-1]\n",
" emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}\n",
" emotion = max(emotions, key=emotions.get)\n",
" if is_visualize:\n",
" cv2.putText(img, emotion, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))\n",
" emotion_state.append(emotion_dict[emotion])\n",
"\n",
" if is_visualize:\n",
" cv2.imshow(\"Emotion Detection\", img)\n",
" if cv2.waitKey(1) & 0xFF == 27:\n",
" break\n",
"\n",
" cap.release()\n",
" cv2.destroyAllWindows()\n",
" return emotion_state"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def inference_attention_analyzer(video_path):\n",
" head_pose_state = head_pose_estimation(video_path)\n",
" drowsiness_state = drowsiness_detection(video_path)\n",
" emotion_state = emotion_detection(video_path)\n",
"\n",
" head_pose_distribution = np.bincount(head_pose_state)\n",
" drowsiness_distribution = np.bincount(drowsiness_state)\n",
" emotion_distribution = np.bincount(emotion_state)\n",
"\n",
" head_pose_percentage = head_pose_distribution / len(head_pose_state) * 100\n",
" drowsiness_percentage = drowsiness_distribution / len(drowsiness_state) * 100\n",
" emotion_percentage = emotion_distribution / len(emotion_state) * 100\n",
"\n",
" head_pose_response = {key: f\"{round(value, 2)} %\" for key, value in zip(head_pose_dict.keys(), head_pose_percentage)}\n",
" drowsiness_response = {key: f\"{round(value, 2)} %\" for key, value in zip(drowsiness_dict.keys(), drowsiness_percentage)}\n",
" emotion_response = {key: f\"{round(value, 2)} %\" for key, value in zip(emotion_dict.keys(), emotion_percentage)}\n",
"\n",
" return head_pose_response, drowsiness_response, emotion_response"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"video_path = 'videos/111.mp4'\n",
"head_pose_response, drowsiness_response, emotion_response = inference_attention_analyzer(video_path)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Head Pose Estimation\n",
"{'Looking Down': '0.0 %',\n",
" 'Looking Forward': '66.5 %',\n",
" 'Looking Left': '3.38 %',\n",
" 'Looking Right': '5.84 %',\n",
" 'Looking Up': '24.28 %'}\n",
"\n",
"Drowsiness Detection\n",
"{'Not Sleepy': '86.89 %', 'Sleepy': '13.11 %'}\n",
"\n",
"Emotion Detection\n",
"{'angry': '16.6 %',\n",
" 'disgust': '1.12 %',\n",
" 'fear': '2.93 %',\n",
" 'happy': '0.28 %',\n",
" 'neutral': '60.95 %',\n",
" 'sad': '17.99 %',\n",
" 'surprise': '0.14 %'}\n"
]
}
],
"source": [
"print(\"Head Pose Estimation\")\n",
"pprint.pprint(head_pose_response)\n",
"\n",
"print(\"\\nDrowsiness Detection\")\n",
"pprint.pprint(drowsiness_response)\n",
"\n",
"print(\"\\nEmotion Detection\")\n",
"pprint.pprint(emotion_response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "tf210",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment