emotion detector

62fa3225 · A.H.L.R Weerasinghe · d6c0db09 · 62fa3225 · 62fa3225
Commit 62fa3225 authored Sep 08, 2023 by A.H.L.R Weerasinghe
Show whitespace changes
Inline Side-by-side

Showing with 780 additions and 0 deletions

ML/attention-analyzer.ipynb ML/attention-analyzer.ipynb +424 -0

ML/emotion-detection.ipynb ML/emotion-detection.ipynb +356 -0

No files found.
--- a/ML/attention-analyzer.ipynb
+++ b/ML/attention-analyzer.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pprint\n",
+    "import pathlib\n",
+    "import cv2 as cv\n",
+    "import numpy as np\n",
+    "import cv2, dlib, math\n",
+    "import mediapipe as mp\n",
+    "from fastai.vision.all import *\n",
+    "\n",
+    "temp = pathlib.PosixPath\n",
+    "pathlib.PosixPath = pathlib.WindowsPath"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mp_face_mesh = mp.solutions.face_mesh\n",
+    "face_mesh = mp_face_mesh.FaceMesh(\n",
+    "                                min_detection_confidence=0.5, \n",
+    "                                min_tracking_confidence=0.5\n",
+    "                                )\n",
+    "\n",
+    "face_detector = dlib.get_frontal_face_detector()\n",
+    "shape_predictor = dlib.shape_predictor(\"weights/shape_predictor_68_face_landmarks.dat\")\n",
+    "\n",
+    "learn_emotion = load_learner('weights/emotions_rf.pkl')\n",
+    "learn_emotion_labels = learn_emotion.dls.vocab"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "head_pose_dict = {\n",
+    "                \"Looking Left\" : 0,\n",
+    "                \"Looking Right\" : 1,\n",
+    "                \"Looking Up\" : 2,\n",
+    "                \"Looking Down\" : 3,\n",
+    "                \"Looking Forward\" : 4\n",
+    "                }   \n",
+    "\n",
+    "drowsiness_dict = {\n",
+    "                    \"Sleepy\" : 0,\n",
+    "                    \"Not Sleepy\" : 1\n",
+    "                    }      \n",
+    "\n",
+    "emotion_dict = {\n",
+    "                \"angry\" : 0,    \n",
+    "                \"disgust\" : 1,\n",
+    "                \"fear\" : 2,\n",
+    "                \"happy\" : 3,\n",
+    "                \"neutral\" : 4,\n",
+    "                \"sad\" : 5,\n",
+    "                \"surprise\" : 6\n",
+    "                }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def mid(p1 ,p2):\n",
+    "    return int((p1.x + p2.x)/2), int((p1.y + p2.y)/2)\n",
+    "\n",
+    "def eye_aspect_ratio(eye_landmark, face_roi_landmark):\n",
+    "    left_point = (face_roi_landmark.part(eye_landmark[0]).x, face_roi_landmark.part(eye_landmark[0]).y)\n",
+    "    right_point = (face_roi_landmark.part(eye_landmark[3]).x, face_roi_landmark.part(eye_landmark[3]).y)\n",
+    "\n",
+    "    center_top = mid(face_roi_landmark.part(eye_landmark[1]), face_roi_landmark.part(eye_landmark[2]))\n",
+    "    center_bottom = mid(face_roi_landmark.part(eye_landmark[5]), face_roi_landmark.part(eye_landmark[4]))\n",
+    "\n",
+    "    hor_line_length = math.hypot((left_point[0] - right_point[0]), (left_point[1] - right_point[1]))\n",
+    "    ver_line_length = math.hypot((center_top[0] - center_bottom[0]), (center_top[1] - center_bottom[1]))\n",
+    "\n",
+    "    ratio = hor_line_length / ver_line_length\n",
+    "    return ratio\n",
+    "\n",
+    "def mouth_aspect_ratio(lips_landmark, face_roi_landmark):\n",
+    "    left_point = (face_roi_landmark.part(lips_landmark[0]).x, face_roi_landmark.part(lips_landmark[0]).y)\n",
+    "    right_point = (face_roi_landmark.part(lips_landmark[2]).x, face_roi_landmark.part(lips_landmark[2]).y)\n",
+    "\n",
+    "    center_top = (face_roi_landmark.part(lips_landmark[1]).x, face_roi_landmark.part(lips_landmark[1]).y)\n",
+    "    center_bottom = (face_roi_landmark.part(lips_landmark[3]).x, face_roi_landmark.part(lips_landmark[3]).y)\n",
+    "\n",
+    "    hor_line_length = math.hypot((left_point[0] - right_point[0]), (left_point[1] - right_point[1]))\n",
+    "    ver_line_length = math.hypot((center_top[0] - center_bottom[0]), (center_top[1] - center_bottom[1]))\n",
+    "    if hor_line_length == 0:\n",
+    "        return ver_line_length\n",
+    "    ratio = ver_line_length / hor_line_length\n",
+    "    return ratio\n",
+    "\n",
+    "def predict_emotion(img_path):\n",
+    "    img = cv.imread(img_path)\n",
+    "    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)\n",
+    "    img = cv.resize(img, (48, 48))\n",
+    "    img = PILImage.create(img)\n",
+    "    probs_emotion = learn_emotion.predict(img)[-1]\n",
+    "    emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}\n",
+    "    return emotions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def head_pose_estimation(\n",
+    "                        video_path=None,\n",
+    "                        is_visualize=False\n",
+    "                        ): \n",
+    "    if video_path is not None:\n",
+    "        cap = cv2.VideoCapture(video_path)\n",
+    "    else:   \n",
+    "        cap = cv2.VideoCapture(0)\n",
+    "\n",
+    "    head_pose_state = []\n",
+    "\n",
+    "    while cap.isOpened():\n",
+    "        _, image = cap.read()\n",
+    "        if image is None:\n",
+    "            break\n",
+    "        \n",
+    "        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)\n",
+    "        image.flags.writeable = False\n",
+    "        results = face_mesh.process(image)\n",
+    "        image.flags.writeable = True\n",
+    "        \n",
+    "        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
+    "        img_h, img_w, _ = image.shape\n",
+    "        face_3d = []\n",
+    "        face_2d = []\n",
+    "\n",
+    "        if results.multi_face_landmarks:\n",
+    "            for face_landmarks in results.multi_face_landmarks:\n",
+    "                for idx, lm in enumerate(face_landmarks.landmark):\n",
+    "                    if idx == 33 or idx == 263 or idx == 1 or idx == 61 or idx == 291 or idx == 199:\n",
+    "                        if idx == 1:\n",
+    "                            nose_2d = (lm.x * img_w, lm.y * img_h)\n",
+    "                            nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000)\n",
+    "\n",
+    "                        x, y = int(lm.x * img_w), int(lm.y * img_h)\n",
+    "\n",
+    "                        face_2d.append([x, y])\n",
+    "                        face_3d.append([x, y, lm.z])       \n",
+    "                \n",
+    "                face_2d = np.array(face_2d, dtype=np.float64)\n",
+    "                face_3d = np.array(face_3d, dtype=np.float64)\n",
+    "                focal_length = 1 * img_w\n",
+    "\n",
+    "                cam_matrix = np.array([ [focal_length, 0, img_h / 2],\n",
+    "                                        [0, focal_length, img_w / 2],\n",
+    "                                        [0, 0, 1]])\n",
+    "\n",
+    "\n",
+    "                dist_matrix = np.zeros((4, 1), dtype=np.float64)\n",
+    "                success, rot_vec, trans_vec = cv2.solvePnP(face_3d, face_2d, cam_matrix, dist_matrix)\n",
+    "\n",
+    "                rmat, jac = cv2.Rodrigues(rot_vec)\n",
+    "                angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat)\n",
+    "\n",
+    "                x = angles[0] * 360\n",
+    "                y = angles[1] * 360\n",
+    "\n",
+    "                if y < -10:\n",
+    "                    text = \"Looking Left\"\n",
+    "                elif y > 10:\n",
+    "                    text = \"Looking Right\"\n",
+    "                elif x < -10:\n",
+    "                    text = \"Looking Down\"\n",
+    "                elif x > 10:\n",
+    "                    text = \"Looking Up\"\n",
+    "                else:\n",
+    "                    text = \"Looking Forward\"\n",
+    "                head_pose_state.append(head_pose_dict[text])\n",
+    "\n",
+    "                if is_visualize:\n",
+    "                    nose_3d_projection, _ = cv2.projectPoints(nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix)\n",
+    "\n",
+    "                    p1 = (int(nose_2d[0]), int(nose_2d[1]))\n",
+    "                    p2 = (int(nose_3d_projection[0][0][0]), int(nose_3d_projection[0][0][1]))\n",
+    "                    cv2.line(image, p1, p2, (255, 0, 0), 2)\n",
+    "                    cv2.putText(image, text, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)\n",
+    "\n",
+    "        if is_visualize:\n",
+    "            cv2.imshow('Head Pose Estimation', image)\n",
+    "            if cv2.waitKey(30) & 0xFF == 27:\n",
+    "                break\n",
+    "\n",
+    "    cap.release()\n",
+    "    return head_pose_state\n",
+    "\n",
+    "\n",
+    "def drowsiness_detection(\n",
+    "                        video_path=None,\n",
+    "                        is_visualize=False,\n",
+    "                        font = cv2.FONT_HERSHEY_TRIPLEX\n",
+    "                        ): \n",
+    "    if video_path is not None:\n",
+    "        cap = cv2.VideoCapture(video_path)\n",
+    "    else:\n",
+    "        cap = cv2.VideoCapture(0)\n",
+    "\n",
+    "    count = 0\n",
+    "    drowsiness_state = []\n",
+    "    while True:\n",
+    "        _, img = cap.read()\n",
+    "        if img is None:\n",
+    "            break\n",
+    "\n",
+    "        img = cv2.flip(img,1)\n",
+    "        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "\n",
+    "        faces = face_detector(gray)\n",
+    "\n",
+    "        for face_roi in faces:\n",
+    "\n",
+    "            landmark_list = shape_predictor(gray, face_roi)\n",
+    "\n",
+    "            left_eye_ratio = eye_aspect_ratio([36, 37, 38, 39, 40, 41], landmark_list)\n",
+    "            right_eye_ratio = eye_aspect_ratio([42, 43, 44, 45, 46, 47], landmark_list)\n",
+    "            eye_open_ratio = (left_eye_ratio + right_eye_ratio) / 2\n",
+    "            if is_visualize:\n",
+    "                cv2.putText(img, str(eye_open_ratio), (0, 13), font, 0.5, (100, 100, 100))\n",
+    "\n",
+    "            inner_lip_ratio = mouth_aspect_ratio([60,62,64,66], landmark_list)\n",
+    "            outter_lip_ratio = mouth_aspect_ratio([48,51,54,57], landmark_list)\n",
+    "            mouth_open_ratio = (inner_lip_ratio + outter_lip_ratio) / 2;\n",
+    "            if is_visualize:\n",
+    "                cv2.putText(img, str(mouth_open_ratio), (448, 13), font, 0.5, (100, 100, 100))\n",
+    "\n",
+    "            if mouth_open_ratio > 0.380 and eye_open_ratio > 4.0 or eye_open_ratio > 4.30:\n",
+    "                count +=1\n",
+    "            else:\n",
+    "                count = 0\n",
+    "            x,y = face_roi.left(), face_roi.top()\n",
+    "            x1,y1 = face_roi.right(), face_roi.bottom()\n",
+    "            if count>10:\n",
+    "                if is_visualize:\n",
+    "                    cv2.rectangle(img, (x,y), (x1,y1), (0, 0, 255), 2)\n",
+    "                    cv2.putText(img, \"Sleepy\", (x, y-5), font, 0.5, (0, 0, 255))\n",
+    "                drowsiness_state.append(drowsiness_dict[\"Sleepy\"])\n",
+    "                \n",
+    "            else:\n",
+    "                if is_visualize:\n",
+    "                    cv2.rectangle(img, (x,y), (x1,y1), (0, 255, 0), 2)\n",
+    "                drowsiness_state.append(drowsiness_dict[\"Not Sleepy\"])\n",
+    "\n",
+    "        if is_visualize:\n",
+    "            cv2.imshow(\"Drowsiness Detection\", img)\n",
+    "            if cv2.waitKey(30) & 0xFF == 27:\n",
+    "                break\n",
+    "\n",
+    "    cap.release()\n",
+    "    cv2.destroyAllWindows()\n",
+    "    return drowsiness_state\n",
+    "\n",
+    "def emotion_detection(\n",
+    "                    video_path=None,\n",
+    "                    is_visualize=False\n",
+    "                    ):\n",
+    "    if video_path is not None:\n",
+    "        cap = cv2.VideoCapture(video_path)\n",
+    "    else:\n",
+    "        cap = cv2.VideoCapture(0)\n",
+    "\n",
+    "    emotion_state = []\n",
+    "    while True:\n",
+    "        _, img = cap.read()\n",
+    "        if img is None:\n",
+    "            break\n",
+    "\n",
+    "        img = cv2.flip(img,1)\n",
+    "        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "\n",
+    "        faces = face_detector(gray)\n",
+    "\n",
+    "        for face_roi in faces:\n",
+    "            x,y = face_roi.left(), face_roi.top()\n",
+    "            x1,y1 = face_roi.right(), face_roi.bottom()\n",
+    "            cv2.rectangle(img, (x,y), (x1,y1), (0, 255, 0), 2)\n",
+    "            roi_gray = gray[y:y1,x:x1]\n",
+    "            roi_color = img[y:y1,x:x1]\n",
+    "            roi_gray = cv2.resize(roi_gray, (48, 48))\n",
+    "            roi_gray = PILImage.create(roi_gray)\n",
+    "            probs_emotion = learn_emotion.predict(roi_gray)[-1]\n",
+    "            emotions = {learn_emotion_labels[i]: float(probs_emotion[i]) for i in range(len(learn_emotion_labels))}\n",
+    "            emotion = max(emotions, key=emotions.get)\n",
+    "            if is_visualize:\n",
+    "                cv2.putText(img, emotion, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))\n",
+    "            emotion_state.append(emotion_dict[emotion])\n",
+    "\n",
+    "        if is_visualize:\n",
+    "            cv2.imshow(\"Emotion Detection\", img)\n",
+    "            if cv2.waitKey(1) & 0xFF == 27:\n",
+    "                break\n",
+    "\n",
+    "    cap.release()\n",
+    "    cv2.destroyAllWindows()\n",
+    "    return emotion_state"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def inference_attention_analyzer(video_path):\n",
+    "    head_pose_state = head_pose_estimation(video_path)\n",
+    "    drowsiness_state = drowsiness_detection(video_path)\n",
+    "    emotion_state = emotion_detection(video_path)\n",
+    "\n",
+    "    head_pose_distribution = np.bincount(head_pose_state)\n",
+    "    drowsiness_distribution = np.bincount(drowsiness_state)\n",
+    "    emotion_distribution = np.bincount(emotion_state)\n",
+    "\n",
+    "    head_pose_percentage = head_pose_distribution / len(head_pose_state) * 100\n",
+    "    drowsiness_percentage = drowsiness_distribution / len(drowsiness_state) * 100\n",
+    "    emotion_percentage = emotion_distribution / len(emotion_state) * 100\n",
+    "\n",
+    "    head_pose_response = {key: f\"{round(value, 2)} %\" for key, value in zip(head_pose_dict.keys(), head_pose_percentage)}\n",
+    "    drowsiness_response = {key: f\"{round(value, 2)} %\" for key, value in zip(drowsiness_dict.keys(), drowsiness_percentage)}\n",
+    "    emotion_response = {key: f\"{round(value, 2)} %\" for key, value in zip(emotion_dict.keys(), emotion_percentage)}\n",
+    "\n",
+    "    return head_pose_response, drowsiness_response, emotion_response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "video_path = 'videos/111.mp4'\n",
+    "head_pose_response, drowsiness_response, emotion_response = inference_attention_analyzer(video_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Head Pose Estimation\n",
+      "{'Looking Down': '0.0 %',\n",
+      " 'Looking Forward': '66.5 %',\n",
+      " 'Looking Left': '3.38 %',\n",
+      " 'Looking Right': '5.84 %',\n",
+      " 'Looking Up': '24.28 %'}\n",
+      "\n",
+      "Drowsiness Detection\n",
+      "{'Not Sleepy': '86.89 %', 'Sleepy': '13.11 %'}\n",
+      "\n",
+      "Emotion Detection\n",
+      "{'angry': '16.6 %',\n",
+      " 'disgust': '1.12 %',\n",
+      " 'fear': '2.93 %',\n",
+      " 'happy': '0.28 %',\n",
+      " 'neutral': '60.95 %',\n",
+      " 'sad': '17.99 %',\n",
+      " 'surprise': '0.14 %'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Head Pose Estimation\")\n",
+    "pprint.pprint(head_pose_response)\n",
+    "\n",
+    "print(\"\\nDrowsiness Detection\")\n",
+    "pprint.pprint(drowsiness_response)\n",
+    "\n",
+    "print(\"\\nEmotion Detection\")\n",
+    "pprint.pprint(emotion_response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tf210",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/ML/emotion-detection.ipynb
+++ b/ML/emotion-detection.ipynb