Added Speech Recognition Setup

parent 6ec6ec1b
......@@ -2,48 +2,10 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "90e3f5ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: mediapipe in c:\\users\\isuri\\anaconda3\\lib\\site-packages (0.8.9.1)\n",
"Requirement already satisfied: OpenCV-python in c:\\users\\isuri\\anaconda3\\lib\\site-packages (4.5.5.64)\n",
"Requirement already satisfied: ffmpeg in c:\\users\\isuri\\anaconda3\\lib\\site-packages (1.4)\n",
"Requirement already satisfied: moviepy in c:\\users\\isuri\\anaconda3\\lib\\site-packages (1.0.3)\n",
"Requirement already satisfied: varname in c:\\users\\isuri\\anaconda3\\lib\\site-packages (0.8.3)\n",
"Requirement already satisfied: opencv-contrib-python in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (4.5.5.64)\n",
"Requirement already satisfied: attrs>=19.1.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (21.2.0)\n",
"Requirement already satisfied: protobuf>=3.11.4 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (3.20.1)\n",
"Requirement already satisfied: matplotlib in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (3.4.3)\n",
"Requirement already satisfied: numpy in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (1.20.3)\n",
"Requirement already satisfied: absl-py in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from mediapipe) (1.0.0)\n",
"Requirement already satisfied: imageio<3.0,>=2.5 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (2.9.0)\n",
"Requirement already satisfied: requests<3.0,>=2.8.1 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (2.26.0)\n",
"Requirement already satisfied: proglog<=1.0.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (0.1.10)\n",
"Requirement already satisfied: decorator<5.0,>=4.0.2 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (4.4.2)\n",
"Requirement already satisfied: imageio-ffmpeg>=0.2.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (0.4.7)\n",
"Requirement already satisfied: tqdm<5.0,>=4.11.2 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from moviepy) (4.62.3)\n",
"Requirement already satisfied: asttokens<3.0.0,>=2.0.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from varname) (2.0.5)\n",
"Requirement already satisfied: pure_eval<1.0.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from varname) (0.2.2)\n",
"Requirement already satisfied: executing<0.9.0,>=0.8.3 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from varname) (0.8.3)\n",
"Requirement already satisfied: six in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from asttokens<3.0.0,>=2.0.0->varname) (1.16.0)\n",
"Requirement already satisfied: pillow in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from imageio<3.0,>=2.5->moviepy) (8.4.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (2021.10.8)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (2.0.4)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (1.26.7)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from requests<3.0,>=2.8.1->moviepy) (3.2)\n",
"Requirement already satisfied: colorama in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from tqdm<5.0,>=4.11.2->moviepy) (0.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from matplotlib->mediapipe) (2.8.2)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from matplotlib->mediapipe) (1.3.1)\n",
"Requirement already satisfied: pyparsing>=2.2.1 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from matplotlib->mediapipe) (3.0.4)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from matplotlib->mediapipe) (0.10.0)\n"
]
}
],
"outputs": [],
"source": [
"!pip install mediapipe OpenCV-python ffmpeg moviepy varname\n",
"\n",
......@@ -52,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "b6068954",
"metadata": {},
"outputs": [],
......@@ -75,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "17b488db",
"metadata": {},
"outputs": [],
......@@ -85,8 +47,8 @@
"def obtain_coordinates():\n",
"\n",
" print(f\"[INFO] Processing {video_name}...\")\n",
" VIDEO_STREAM_OUT = os.path.join(output_folder , video_name + '_MP.mp4')\n",
" graph_plot = os.path.join(output_folder , video_name+'_MP.png')\n",
" #VIDEO_STREAM_OUT = os.path.join(output_folder , video_name + '_MP.mp4')\n",
" #graph_plot = os.path.join(output_folder , video_name+'_MP.png')\n",
" cap = cv2.VideoCapture(VIDEO_STREAM)\n",
"\n",
" fourcc = cv2.VideoWriter_fourcc(*\"XVID\")\n",
......@@ -143,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"id": "f8e035e9",
"metadata": {},
"outputs": [],
......@@ -164,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "75dcebc6",
"metadata": {},
"outputs": [],
......@@ -202,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": null,
"id": "334fd6af",
"metadata": {},
"outputs": [],
......@@ -230,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 95,
"execution_count": null,
"id": "292d05f8",
"metadata": {},
"outputs": [],
......@@ -256,7 +218,7 @@
},
{
"cell_type": "code",
"execution_count": 96,
"execution_count": null,
"id": "9986f31c",
"metadata": {},
"outputs": [],
......@@ -280,7 +242,7 @@
},
{
"cell_type": "code",
"execution_count": 97,
"execution_count": null,
"id": "20fb4bd8",
"metadata": {},
"outputs": [],
......@@ -322,895 +284,259 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": null,
"id": "d3784870",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[INFO] Processing LRH_video_01...\n",
"MoviePy - Writing audio in result_analysis/LRH_video_01/my_result.mp3\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MoviePy - Done.\n",
"MoviePy - Writing audio in result_analysis/LRH_video_01/my_result_sr.wav\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MoviePy - Done.\n",
"[-0.3013699948787689]\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"outputs": [],
"source": [
"###MAIN PROGRAM\n",
"\n",
"#variables\n",
" #storing landmark details\n",
"NOSE, LEFT_EYE_INNER, LEFT_EYE, LEFT_EYE_OUTER, RIGHT_EYE_INNER, RIGHT_EYE, RIGHT_EYE_OUTER, LEFT_MOUTH, RIGHT_MOUTH = [], [], [], [], [], [], [], [], []\n",
"\n",
"landmark_list_desc = [NOSE, LEFT_EYE_INNER, LEFT_EYE, LEFT_EYE_OUTER, RIGHT_EYE_INNER, RIGHT_EYE, RIGHT_EYE_OUTER, LEFT_MOUTH, RIGHT_MOUTH]\n",
"\n",
"landmark_list = ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_MOUTH', 'RIGHT_MOUTH']\n",
"\n",
"\n",
" #video details\n",
"VIDEO_STREAM = 'test_videos/LRH_video_01.mp4'\n",
"video_name = 'LRH_video_01'\n",
"output_folder = 'result_analysis/' + video_name\n",
"\n",
"#processing\n",
"obtain_coordinates()\n",
"print('[INFO] REMOVING AUDIO')\n",
"extract_audio()\n",
"\n",
"#generating yaw,pitch, roll\n",
"\n",
"#saving generated data\n",
" #check if specified path exists, if not create it\n",
"if not (os.path.isdir(output_folder)):\n",
" os.makedirs(output_folder, mode = 0o777, exist_ok = False)\n",
"\n",
"#storage_location = output_folder + '/coordinates.csv' --for one file\n",
"storage_location = output_folder + '/datasheets'\n",
"#check if specified path exists, if not create it\n",
"if not (os.path.isdir(storage_location)):\n",
" os.makedirs(storage_location, mode = 0o777, exist_ok = False)\n",
"\n",
" #write data to csv\n",
"write_to_csv(storage_location) \n",
"\n",
" #plotting and saving graphs\n",
"output_folder_graphs = output_folder + '/graphs'\n",
" #check if specified path for graphs exists, if not create it\n",
"if not (os.path.isdir(output_folder_graphs)):\n",
" os.makedirs(output_folder_graphs, mode = 0o777, exist_ok = False)\n",
"\n",
"plot_coordinates()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 12,
"id": "da44965f",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"Requirement already satisfied: SpeechRecognition in c:\\users\\isuri\\anaconda3\\lib\\site-packages (3.8.1)\n",
"Requirement already satisfied: pydub in c:\\users\\isuri\\anaconda3\\lib\\site-packages (0.25.1)\n",
"Requirement already satisfied: ffmpeg in c:\\users\\isuri\\anaconda3\\lib\\site-packages (1.4)\n",
"Requirement already satisfied: imageio in c:\\users\\isuri\\anaconda3\\lib\\site-packages (2.9.0)\n",
"Requirement already satisfied: imageio-ffmpeg in c:\\users\\isuri\\anaconda3\\lib\\site-packages (0.4.7)\n",
"Requirement already satisfied: pillow in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from imageio) (8.4.0)\n",
"Requirement already satisfied: numpy in c:\\users\\isuri\\anaconda3\\lib\\site-packages (from imageio) (1.20.3)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707]\n"
}
],
"source": [
"pip install SpeechRecognition pydub ffmpeg imageio imageio-ffmpeg"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 13,
"id": "cf3503dc",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"'3.8.1'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282]\n"
"output_type": "execute_result"
}
],
"source": [
"#to convert mp3 to wav\n",
"from pydub import AudioSegment\n",
"\n",
"#to clip an audio\n",
"from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip\n",
"\n",
"import speech_recognition as sr\n",
"sr.__version__"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 4,
"id": "c3d89dae",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"'the still smell of old beer drinkers it takes hi to bring out the order I called it restore selfinvest a salt a kotess find the M tacos Al pastor are my favourite is just for food is Bihar cross bun'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684]\n"
"output_type": "execute_result"
}
],
"source": [
"#recognizing speech using a Recognizer instance\n",
"recognizer = sr.Recognizer()\n",
"\n",
"#processing an audio\n",
" #setting up the audio file\n",
"harvard = sr.AudioFile('test_audio/harvard.wav')\n",
" #saving the data in the audio file in an instance called source\n",
"with harvard as source:\n",
" #records the data of the audio file into AudioData instance\n",
" audio = recognizer.record(source) \n",
"\n",
"#to check the type of the 'audio'\n",
"type(audio)\n",
"\n",
"#using the Google Speech Recognition \n",
" #needs an active internet connection\n",
"recognizer.recognize_google(audio)"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 5,
"id": "9a17f2ba",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"'Bangkok ke teacher song'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266]\n"
"output_type": "execute_result"
}
],
"source": [
"#test two of speech recognition - Mandarin\n",
"\n",
"chinese = sr.AudioFile('test_audio/mandarin_one.wav')\n",
"\n",
"with chinese as source:\n",
" audio = recognizer.record(source) \n",
"\n",
"#using the Google Speech Recognition \n",
" #needs an active internet connection\n",
"recognizer.recognize_google(audio)"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 14,
"id": "1d2ffcec",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"'yah kya hai'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"output_type": "execute_result"
}
],
"source": [
"#test three of speech recognition - full song\n",
"\n",
"#song_mp3 = sr.AudioFile('test_audio/avril_song.mp3')\n",
"\n",
"#changing from mp3 to wav\n",
"#audSeg = AudioSegment.from_mp3('test_audio/avril_song.mp3')\n",
"#audSeg.export('test_audio/avril_song_wav', format='wav')\n",
"\n",
"song = sr.AudioFile('test_audio/avril_song.wav')\n",
"\n",
"with song as source:\n",
" audio = recognizer.record(source) \n",
"\n",
"#using the Google Speech Recognition \n",
" #needs an active internet connection\n",
"recognizer.recognize_google(audio)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"cell_type": "code",
"execution_count": 15,
"id": "dae059b9",
"metadata": {},
"output_type": "display_data"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549]\n"
"Moviepy - Running:\n",
">>> \"+ \" \".join(cmd)\n",
"Moviepy - Command successful\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
"'yah size mein kya hai'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133, -0.39194944500923157]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133, -0.39194944500923157, -0.3952670693397522]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133, -0.39194944500923157, -0.3952670693397522, -0.39360523223876953]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133, -0.39194944500923157, -0.3952670693397522, -0.39360523223876953, -0.39427807927131653]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-0.3013699948787689, -0.4105592668056488, -0.42823660373687744, -0.5199782252311707, -0.4891508221626282, -0.4548543095588684, -0.44156742095947266, -0.4435288608074188, -0.4358169138431549, -0.46769434213638306, -0.4507959187030792, -0.45984163880348206, -0.48630818724632263, -0.43616288900375366, -0.4268682897090912, -0.4363146126270294, -0.4334081709384918, -0.3856501579284668, -0.4038098454475403, -0.4110352098941803, -0.4072643518447876, -0.4114300608634949, -0.4271574020385742, -0.4275912344455719, -0.3884345293045044, -0.37125125527381897, -0.3831891119480133, -0.39194944500923157, -0.3952670693397522, -0.39360523223876953, -0.39427807927131653, -0.4196490943431854]\n"
]
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Figure size 432x288 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"###MAIN PROGRAM\n",
"\n",
"#variables\n",
" #storing landmark details\n",
"NOSE, LEFT_EYE_INNER, LEFT_EYE, LEFT_EYE_OUTER, RIGHT_EYE_INNER, RIGHT_EYE, RIGHT_EYE_OUTER, LEFT_MOUTH, RIGHT_MOUTH = [], [], [], [], [], [], [], [], []\n",
"\n",
"landmark_list_desc = [NOSE, LEFT_EYE_INNER, LEFT_EYE, LEFT_EYE_OUTER, RIGHT_EYE_INNER, RIGHT_EYE, RIGHT_EYE_OUTER, LEFT_MOUTH, RIGHT_MOUTH]\n",
"\n",
"landmark_list = ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_MOUTH', 'RIGHT_MOUTH']\n",
"\n",
"\n",
" #video details\n",
"VIDEO_STREAM = 'test_videos/LRH_video_01.mp4'\n",
"video_name = 'LRH_video_01'\n",
"output_folder = 'result_analysis/' + video_name\n",
"\n",
"#processing\n",
"obtain_coordinates()\n",
"extract_audio()\n",
"\n",
"#generating yaw,pitch, roll\n",
"\n",
"#saving generated data\n",
" #check if specified path exists, if not create it\n",
"if not (os.path.isdir(output_folder)):\n",
" os.makedirs(output_folder, mode = 0o777, exist_ok = False)\n",
"\n",
"#storage_location = output_folder + '/coordinates.csv' --for one file\n",
"storage_location = output_folder + '/datasheets'\n",
"#check if specified path exists, if not create it\n",
"if not (os.path.isdir(storage_location)):\n",
" os.makedirs(storage_location, mode = 0o777, exist_ok = False)\n",
"\n",
" #write data to csv\n",
"write_to_csv(storage_location) \n",
"\n",
" #plotting and saving graphs\n",
"output_folder_graphs = output_folder + '/graphs'\n",
" #check if specified path for graphs exists, if not create it\n",
"if not (os.path.isdir(output_folder_graphs)):\n",
" os.makedirs(output_folder_graphs, mode = 0o777, exist_ok = False)\n",
"\n",
"plot_coordinates(output_folder_graphs)"
"output_type": "execute_result"
}
],
"source": [
"#test three of speech recognition - song clip\n",
"\n",
"\n",
"ffmpeg_extract_subclip('test_audio/avril_song.wav', 6, 18, targetname=\"test_audio/avril_song_clip.wav\")\n",
"\n",
"song = sr.AudioFile('test_audio/avril_song_clip.wav')\n",
"\n",
"with song as source:\n",
" audio = recognizer.record(source) \n",
"\n",
"#using the Google Speech Recognition \n",
" #needs an active internet connection\n",
"recognizer.recognize_google(audio)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da44965f",
"id": "821c9454",
"metadata": {},
"outputs": [],
"source": []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment