Commit bd153756 authored by Paranagama R.P.S.D.'s avatar Paranagama R.P.S.D.

changes with large file changes

parent 610ab229
models/*
!models/
\ No newline at end of file
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "ade37944",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import os\n",
"import cv2\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"import mediapipe as mp"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "16176bf6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['A',\n",
" 'Aah',\n",
" 'Ae',\n",
" 'Aeh',\n",
" 'Ah',\n",
" 'Ee',\n",
" 'Eeh',\n",
" 'Ig',\n",
" 'K',\n",
" 'O',\n",
" 'Ohh',\n",
" 'T',\n",
" 'Uh',\n",
" 'Uhh']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"IMG_SIZE = 224 # image size\n",
"BATCH_SIZE = 32 # batch size\n",
"EPOCHS = 10 # number of epochs\n",
"CLASSES = os.listdir('D:/RP/data/training') # list of classes\n",
"NUM_CLASSES = len(CLASSES) # number of classes\n",
"\n",
"CLASSES"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8f7b1301",
"metadata": {},
"outputs": [],
"source": [
"# Dictionary to map English letters to Sinhala letters\n",
"letter_mapping = {\n",
" 'Ah': 'අ',\n",
" 'Aah': 'ආ',\n",
" 'Aeh': 'ඇ',\n",
" 'Ee': 'ඉ',\n",
" 'Eeh': 'ඊ',\n",
" 'Uh': 'උ',\n",
" 'Uhh': 'ඌ',\n",
" 'A': 'එ',\n",
" 'Ae': 'ඒ',\n",
" 'O': 'ඔ',\n",
" 'Ohh': 'ඕ',\n",
" 'K': 'ක්',\n",
" 'Ig': 'ග්',\n",
" 'T': 'ටී'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c9034cbe",
"metadata": {},
"outputs": [],
"source": [
"def load_dataset(dataset_path):\n",
" data = []\n",
" labels = []\n",
" for class_name in CLASSES:\n",
" class_path = os.path.join(dataset_path, class_name)\n",
" for img_name in os.listdir(class_path):\n",
" try:\n",
" \n",
" img_path = os.path.join(class_path, img_name)\n",
" img = cv2.imread(img_path)\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert color space\n",
" img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # resize image\n",
" data.append(img)\n",
" labels.append(CLASSES.index(class_name))\n",
"\n",
" \n",
" except Exception as e:\n",
" print(f\"Error loading image {img_path}: {e}\")\n",
" data = np.array(data, dtype=np.float32) / 255.0 # normalize pixel values\n",
" labels = tf.keras.utils.to_categorical(labels, num_classes=NUM_CLASSES) # one-hot encode labels\n",
" return data, labels\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7adb379e",
"metadata": {},
"outputs": [],
"source": [
"data, labels = load_dataset('D:/RP/data/training')\n",
"train_data, val_data, train_labels, val_labels = train_test_split(data, labels, test_size=0.2, random_state=42)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d44f7806",
"metadata": {},
"outputs": [],
"source": [
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),\n",
" tf.keras.layers.MaxPooling2D((2, 2)),\n",
" tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),\n",
" tf.keras.layers.MaxPooling2D((2, 2)),\n",
" tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),\n",
" tf.keras.layers.MaxPooling2D((2, 2)),\n",
" tf.keras.layers.Flatten(),\n",
" tf.keras.layers.Dense(128, activation='relu'),\n",
" tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')\n",
"])\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ff4f0d06",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 3.0344 - accuracy: 0.0708 - val_loss: 2.4118 - val_accuracy: 0.1034\n",
"Epoch 2/10\n",
"4/4 [==============================] - 4s 1s/step - loss: 2.3133 - accuracy: 0.3274 - val_loss: 1.6620 - val_accuracy: 0.9310\n",
"Epoch 3/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 1.2560 - accuracy: 0.9558 - val_loss: 0.4894 - val_accuracy: 0.9655\n",
"Epoch 4/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 0.2415 - accuracy: 0.9912 - val_loss: 0.0362 - val_accuracy: 1.0000\n",
"Epoch 5/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 0.0340 - accuracy: 0.9912 - val_loss: 0.0024 - val_accuracy: 1.0000\n",
"Epoch 6/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 0.0021 - accuracy: 1.0000 - val_loss: 0.0127 - val_accuracy: 1.0000\n",
"Epoch 7/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 0.0040 - accuracy: 1.0000 - val_loss: 3.6882e-05 - val_accuracy: 1.0000\n",
"Epoch 8/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 9.9268e-05 - accuracy: 1.0000 - val_loss: 2.7212e-06 - val_accuracy: 1.0000\n",
"Epoch 9/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 5.2195e-05 - accuracy: 1.0000 - val_loss: 6.4126e-07 - val_accuracy: 1.0000\n",
"Epoch 10/10\n",
"4/4 [==============================] - 5s 1s/step - loss: 1.3251e-05 - accuracy: 1.0000 - val_loss: 2.7130e-07 - val_accuracy: 1.0000\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x2d653970160>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n",
"model.fit(train_data, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(val_data, val_labels))\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "61d6a8d8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 3 of 3). These functions will not be directly callable after loading.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: ./models/model\\assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: ./models/model\\assets\n"
]
}
],
"source": [
"model.save('./models/model')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "dc610fdb",
"metadata": {},
"outputs": [],
"source": [
"def load_test_dataset(dataset_path):\n",
" test_data = []\n",
" test_labels = []\n",
" for class_name in CLASSES:\n",
" class_path = os.path.join(dataset_path, class_name)\n",
" for img_name in os.listdir(class_path):\n",
" try:\n",
" img_path = os.path.join(class_path, img_name)\n",
" img = cv2.imread(img_path)\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert color space\n",
" img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # resize image\n",
" test_data.append(img)\n",
" test_labels.append(CLASSES.index(class_name))\n",
" except Exception as e:\n",
" print(f\"Error loading image {img_path}: {e}\")\n",
" test_data = np.array(test_data, dtype=np.float32) / 255.0 # normalize pixel values\n",
" test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=NUM_CLASSES) # one-hot encode labels\n",
" return test_data, test_labels\n",
"\n",
"test_data, test_labels = load_test_dataset('D:/RP/data/validation')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "6b6d20d2",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'test_data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17676\\3131021014.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpredictions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mpredicted_classes\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpredictions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'test_data' is not defined"
]
}
],
"source": [
"predictions = model.predict(test_data)\n",
"predicted_classes = np.argmax(predictions, axis=1)\n",
"\n",
"accuracy = np.sum(predicted_classes == np.argmax(test_labels, axis=1)) / len(test_labels)\n",
"print(\"Test Accuracy:\", accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2bd77ac5",
"metadata": {},
"outputs": [],
"source": [
"# Save the trained model\n",
"model.save('./models/sign_language_model.h5')\n",
"\n",
"# Load the saved model\n",
"loaded_model = tf.keras.models.load_model('./models/sign_language_model.h5')\n",
"\n",
"# Use the loaded model for predictions\n",
"predictions = loaded_model.predict(test_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "885678c5",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"img = cv2.imread('./scene00548.png')\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n",
"img = np.array([img], dtype=np.float32) / 255.0\n",
"prediction = model.predict(img)\n",
"class_index = np.argmax(prediction)\n",
"class_name = CLASSES[class_index]\n",
"sinhala_letter = letter_mapping.get(class_name, 'Unknown')\n",
"print(sinhala_letter)"
]
},
{
"cell_type": "markdown",
"id": "69b66fc1",
"metadata": {},
"source": [
"### Load saved model\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "50944c95",
"metadata": {},
"outputs": [],
"source": [
"# Load the saved model\n",
"model = tf.keras.models.load_model('./models/model')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "75cef5e3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1/1 [==============================] - 0s 100ms/step\n",
"Ohh\n",
"ඕ\n"
]
}
],
"source": [
"img = cv2.imread('./scene00001.png')\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n",
"img = np.array([img], dtype=np.float32) / 255.0\n",
"prediction = model.predict(img)\n",
"class_index = np.argmax(prediction)\n",
"class_name = CLASSES[class_index]\n",
"print(class_name)\n",
"sinhala_letter = letter_mapping.get(class_name, 'Unknown')\n",
"print(sinhala_letter)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 12,
"id": "ade37944", "id": "ade37944",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
...@@ -17,22 +17,73 @@ ...@@ -17,22 +17,73 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 13,
"id": "16176bf6", "id": "16176bf6",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['A',\n",
" 'Aah',\n",
" 'Ae',\n",
" 'Aeh',\n",
" 'Ah',\n",
" 'Ee',\n",
" 'Eeh',\n",
" 'Ig',\n",
" 'K',\n",
" 'O',\n",
" 'Ohh',\n",
" 'T',\n",
" 'Uh',\n",
" 'Uhh']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"IMG_SIZE = 224 # image size\n", "IMG_SIZE = 224 # image size\n",
"BATCH_SIZE = 32 # batch size\n", "BATCH_SIZE = 32 # batch size\n",
"EPOCHS = 2 # number of epochs\n", "EPOCHS = 10 # number of epochs\n",
"CLASSES = os.listdir('./data/Sn_sign_language_dataset') # list of classes\n", "CLASSES = os.listdir('D:/RP/data/training') # list of classes\n",
"NUM_CLASSES = len(CLASSES) # number of classes\n", "NUM_CLASSES = len(CLASSES) # number of classes\n",
"\n" "\n",
"CLASSES"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "8f7b1301",
"metadata": {},
"outputs": [],
"source": [
"# Dictionary to map English letters to Sinhala letters\n",
"letter_mapping = {\n",
" 'Ah': 'අ',\n",
" 'Aah': 'ආ',\n",
" 'Aeh': 'ඇ',\n",
" 'Ee': 'ඉ',\n",
" 'Eeh': 'ඊ',\n",
" 'Uh': 'උ',\n",
" 'Uhh': 'ඌ',\n",
" 'A': 'එ',\n",
" 'Ae': 'ඒ',\n",
" 'O': 'ඔ',\n",
" 'Ohh': 'ඕ',\n",
" 'K': 'ක්',\n",
" 'Ig': 'ග්',\n",
" 'T': 'ටී'\n",
"}"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 15,
"id": "c9034cbe", "id": "c9034cbe",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
...@@ -62,91 +113,18 @@ ...@@ -62,91 +113,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 16,
"id": "7adb379e", "id": "7adb379e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\eight: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\eleven_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\eleven_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\fifty_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\fifty_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\fifty_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\five: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\four & fourteen_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\fourteen_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\fourteen_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\nine: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\one & ten_2 & eleven_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\README.md: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\seven: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\six: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\ten_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\ten_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\thirteen_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\thirteen_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\thirty_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\thirty_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\thirty_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\three & thirteen_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\twenty_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\twenty_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\twenty_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\two: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\what: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\when_1: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\when_2: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\when_3: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\who: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n",
"Error loading image ./data/Sn_sign_language_dataset\\Sn_sign_language_dataset\\why: OpenCV(4.7.0) D:\\a\\opencv-python\\opencv-python\\opencv\\modules\\imgproc\\src\\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'\n",
"\n"
]
}
],
"source": [ "source": [
"data, labels = load_dataset('./data/Sn_sign_language_dataset')\n", "data, labels = load_dataset('D:/RP/data/training')\n",
"train_data, val_data, train_labels, val_labels = train_test_split(data, labels, test_size=0.2, random_state=42)\n" "train_data, val_data, train_labels, val_labels = train_test_split(data, labels, test_size=0.2, random_state=42)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 17,
"id": "d44f7806", "id": "d44f7806",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
...@@ -166,27 +144,45 @@ ...@@ -166,27 +144,45 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 18,
"id": "ff4f0d06", "id": "ff4f0d06",
"metadata": {}, "metadata": {
"scrolled": true
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Epoch 1/2\n", "Epoch 1/10\n",
"287/287 [==============================] - 464s 2s/step - loss: 0.5357 - accuracy: 0.8555 - val_loss: 0.0319 - val_accuracy: 0.9917\n", "152/152 [==============================] - 217s 1s/step - loss: 0.8329 - accuracy: 0.7585 - val_loss: 0.0838 - val_accuracy: 0.9860\n",
"Epoch 2/2\n", "Epoch 2/10\n",
"287/287 [==============================] - 540s 2s/step - loss: 0.0248 - accuracy: 0.9915 - val_loss: 0.0313 - val_accuracy: 0.9900\n" "152/152 [==============================] - 205s 1s/step - loss: 0.0374 - accuracy: 0.9913 - val_loss: 0.0139 - val_accuracy: 0.9942\n",
"Epoch 3/10\n",
"152/152 [==============================] - 212s 1s/step - loss: 0.0022 - accuracy: 0.9998 - val_loss: 0.0106 - val_accuracy: 0.9959\n",
"Epoch 4/10\n",
"152/152 [==============================] - 211s 1s/step - loss: 0.0147 - accuracy: 0.9955 - val_loss: 0.0418 - val_accuracy: 0.9818\n",
"Epoch 5/10\n",
"152/152 [==============================] - 205s 1s/step - loss: 0.0190 - accuracy: 0.9955 - val_loss: 0.0273 - val_accuracy: 0.9917\n",
"Epoch 6/10\n",
"152/152 [==============================] - 205s 1s/step - loss: 0.0142 - accuracy: 0.9967 - val_loss: 0.0509 - val_accuracy: 0.9942\n",
"Epoch 7/10\n",
"152/152 [==============================] - 214s 1s/step - loss: 0.0037 - accuracy: 0.9990 - val_loss: 0.0027 - val_accuracy: 0.9992\n",
"Epoch 8/10\n",
"152/152 [==============================] - 230s 2s/step - loss: 0.0110 - accuracy: 0.9969 - val_loss: 0.0188 - val_accuracy: 0.9967\n",
"Epoch 9/10\n",
"152/152 [==============================] - 220s 1s/step - loss: 1.7629e-04 - accuracy: 1.0000 - val_loss: 0.0190 - val_accuracy: 0.9967\n",
"Epoch 10/10\n",
"152/152 [==============================] - 208s 1s/step - loss: 1.5000e-05 - accuracy: 1.0000 - val_loss: 0.0197 - val_accuracy: 0.9967\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<keras.callbacks.History at 0x166153865b0>" "<keras.callbacks.History at 0x2d6000ebeb0>"
] ]
}, },
"execution_count": 14, "execution_count": 18,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -198,7 +194,7 @@ ...@@ -198,7 +194,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 19,
"id": "61d6a8d8", "id": "61d6a8d8",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
...@@ -213,257 +209,165 @@ ...@@ -213,257 +209,165 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"INFO:tensorflow:Assets written to: ./models/model-new\\assets\n" "INFO:tensorflow:Assets written to: ./models/model\\assets\n"
] ]
}, },
{ {
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"INFO:tensorflow:Assets written to: ./models/model-new\\assets\n" "INFO:tensorflow:Assets written to: ./models/model\\assets\n"
] ]
} }
], ],
"source": [ "source": [
"model.save('./models/model-new')" "model.save('./models/model')"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 20,
"id": "885678c5", "id": "fdc9bfe6",
"metadata": {},
"outputs": [],
"source": [
"def load_test_dataset(dataset_path):\n",
" test_data = []\n",
" test_labels = []\n",
" for class_name in CLASSES:\n",
" class_path = os.path.join(dataset_path, class_name)\n",
" for img_name in os.listdir(class_path):\n",
" try:\n",
" img_path = os.path.join(class_path, img_name)\n",
" img = cv2.imread(img_path)\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert color space\n",
" img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # resize image\n",
" test_data.append(img)\n",
" test_labels.append(CLASSES.index(class_name))\n",
" except Exception as e:\n",
" print(f\"Error loading image {img_path}: {e}\")\n",
" test_data = np.array(test_data, dtype=np.float32) / 255.0 # normalize pixel values\n",
" test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=NUM_CLASSES) # one-hot encode labels\n",
" return test_data, test_labels\n",
"\n",
"test_data, test_labels = load_test_dataset('D:/RP/data/validation')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "297e3e3c",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1/1 [==============================] - 0s 155ms/step\n", "5/5 [==============================] - 2s 297ms/step\n",
"nine\n" "Test Accuracy: 0.9225352112676056\n"
] ]
} }
], ],
"source": [ "source": [
"img = cv2.imread('./IMG_1132.jpeg')\n", "predictions = model.predict(test_data)\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", "predicted_classes = np.argmax(predictions, axis=1)\n",
"img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n", "\n",
"img = np.array([img], dtype=np.float32) / 255.0\n", "accuracy = np.sum(predicted_classes == np.argmax(test_labels, axis=1)) / len(test_labels)\n",
"prediction = model.predict(img)\n", "print(\"Test Accuracy:\", accuracy)"
"class_name = CLASSES[np.argmax(prediction)]\n",
"print(class_name)"
]
},
{
"cell_type": "markdown",
"id": "69b66fc1",
"metadata": {},
"source": [
"### Load saved model\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 22,
"id": "50944c95", "id": "e22211b0",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5/5 [==============================] - 2s 299ms/step\n"
]
}
],
"source": [ "source": [
"# Save the trained model\n",
"model.save('./models/sign_language_model.h5')\n",
"\n",
"# Load the saved model\n", "# Load the saved model\n",
"model = tf.keras.models.load_model('./models/model')" "loaded_model = tf.keras.models.load_model('./models/sign_language_model.h5')\n",
"\n",
"# Use the loaded model for predictions\n",
"predictions = loaded_model.predict(test_data)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "75cef5e3", "id": "885678c5",
"metadata": {}, "metadata": {
"scrolled": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"img = cv2.imread('./IMG_14.jpg')\n", "img = cv2.imread('./scene00548.png')\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", "img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n", "img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n",
"img = np.array([img], dtype=np.float32) / 255.0\n", "img = np.array([img], dtype=np.float32) / 255.0\n",
"prediction = model.predict(img)\n", "prediction = model.predict(img)\n",
"class_name = CLASSES[np.argmax(prediction)]\n", "class_index = np.argmax(prediction)\n",
"print(class_name)" "class_name = CLASSES[class_index]\n",
"sinhala_letter = letter_mapping.get(class_name, 'Unknown')\n",
"print(sinhala_letter)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": null, "id": "69b66fc1",
"id": "cea24baa",
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"import cv2\n", "### Load saved model\n"
"import mediapipe as mp\n",
"import numpy as np\n",
"\n",
"# Initialize the MediaPipe hand detection object\n",
"mp_hands = mp.solutions.hands.Hands()\n",
"\n",
"# Define the lower and upper boundaries of the skin color in the HSV color space\n",
"lower_skin = np.array([0, 20, 70], dtype=np.uint8)\n",
"upper_skin = np.array([20, 255, 255], dtype=np.uint8)\n",
"\n",
"# Initialize the video capture object\n",
"cap = cv2.VideoCapture(0)\n",
"\n",
"while True:\n",
" # Capture a frame from the video feed\n",
" ret, frame = cap.read()\n",
"\n",
" # Convert the frame to the RGB color space\n",
" frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
"\n",
" # Detect hands in the frame using MediaPipe\n",
" results = mp_hands.process(frame)\n",
"\n",
" # Check if hands were detected\n",
" if results.multi_hand_landmarks:\n",
" # Iterate over the detected hands\n",
" for hand_landmarks in results.multi_hand_landmarks:\n",
" # Convert the landmarks to pixel coordinates\n",
" landmarks = [[int(l.x * frame.shape[1]), int(l.y * frame.shape[0])] for l in hand_landmarks.landmark]\n",
"\n",
" # Get the bounding rectangle of the hand\n",
" x, y, w, h = cv2.boundingRect(np.array(landmarks))\n",
"\n",
" # Scale the bounding rectangle\n",
" scale_factor = 1.5\n",
" w = int(scale_factor * w)\n",
" h = int(scale_factor * h)\n",
" x = int(x - (scale_factor - 1) / 2 * w)\n",
" y = int(y - (scale_factor - 1) / 2 * h)\n",
"\n",
" # Crop the region of interest (ROI) containing the hand\n",
" roi = frame[y:y+h, x:x+w]\n",
"\n",
" # Convert the ROI to the HSV color space\n",
" hsv = cv2.cvtColor(roi, cv2.COLOR_RGB2HSV)\n",
"\n",
" # Threshold the ROI to only keep the skin color\n",
" mask = cv2.inRange(hsv, lower_skin, upper_skin)\n",
"\n",
" # Find contours in the mask\n",
" contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # Iterate over the contours to find the hand\n",
" for contour in contours:\n",
" # Get the bounding rectangle of the contour\n",
" cx, cy, cw, ch = cv2.boundingRect(contour)\n",
"\n",
" # Display a rectangle around the hand\n",
" cv2.rectangle(frame, (x+cx, y+cy), (x+cx+cw, y+cy+ch), (0, 255, 0), 2)\n",
"\n",
" # If the 'c' key is pressed, capture the image containing the hand\n",
" if cv2.waitKey(1) & 0xFF == ord('c'):\n",
" # Apply histogram equalization to the ROI to enhance its quality\n",
" roi = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)\n",
" roi = cv2.equalizeHist(roi)\n",
" roi = cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB)\n",
"\n",
" # Save the image containing the hand in its original color format\n",
" cv2.imwrite('hand_image.jpg', roi)\n",
"\n",
" # Convert the frame back to the BGR color space\n",
" frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)\n",
"\n",
" # Display the video feed\n",
" cv2.imshow('Hand Detection', frame)\n",
"\n",
" # If the 'q' key is pressed, exit the loop\n",
" if cv2.waitKey(1) & 0xFF == ord('q'):\n",
" break\n",
"\n",
"# Release the video capture object and close all windows\n",
"cap.release()\n",
"cv2.destroyAllWindows()\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 29,
"id": "eb44c84e", "id": "50944c95",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import cv2\n", "# Load the saved model\n",
"import mediapipe as mp\n", "model = tf.keras.models.load_model('./models/model')"
"import numpy as np\n",
"\n",
"# Initialize Mediapipe hand detection model\n",
"mp_hands = mp.solutions.hands\n",
"\n",
"# Initialize drawing utilities for visualizing hand landmarks\n",
"mp_drawing = mp.solutions.drawing_utils\n",
"\n",
"# Initialize camera capture\n",
"capture = cv2.VideoCapture(0)\n",
"\n",
"# Continuously capture frames from camera\n",
"while True:\n",
" # Capture a frame from the camera\n",
" ret, image = capture.read()\n",
"\n",
" # Convert image to RGB format\n",
" image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
"\n",
" # Detect hands in image\n",
" with mp_hands.Hands(\n",
" static_image_mode=True,\n",
" max_num_hands=1,\n",
" min_detection_confidence=0.5) as hands:\n",
"\n",
" # Process image\n",
" results = hands.process(image)\n",
"\n",
" # Get landmarks of hand\n",
" if results.multi_hand_landmarks:\n",
" landmarks = results.multi_hand_landmarks[0]\n",
"\n",
" # Convert landmarks to numpy array\n",
" landmarks_array = np.array([(lm.x, lm.y) for lm in landmarks.landmark])\n",
"\n",
" # Get bounding box of hand and arm\n",
" x, y, w, h = cv2.boundingRect(landmarks_array.astype(np.int))\n",
"\n",
" # Crop image to bounding box of hand and arm\n",
" cropped_image = image[y:y+h, x:x+w]\n",
"\n",
" # Draw landmarks on original image\n",
" mp_drawing.draw_landmarks(\n",
" image, landmarks, mp_hands.HAND_CONNECTIONS)\n",
"\n",
" # Display cropped image\n",
" cv2.imshow(\"Hand Image\", cropped_image)\n",
"\n",
" # Display original image\n",
" cv2.imshow(\"Hand Detection\", cv2.cvtColor(image, cv2.COLOR_RGB2BGR))\n",
"\n",
" # Save cropped image to file on key press\n",
" key = cv2.waitKey(1) & 0xFF\n",
" if key == ord('s'):\n",
" cv2.imwrite(\"output.jpg\", cropped_image)\n",
"\n",
" # Exit on 'q' key\n",
" elif key == ord('q'):\n",
" break\n",
"\n",
"# Release camera capture\n",
"capture.release()\n",
"\n",
"# Close all windows\n",
"cv2.destroyAllWindows()\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 30,
"id": "d0fe6fff", "id": "75cef5e3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"name": "stdout",
"output_type": "stream",
"text": [
"1/1 [==============================] - 0s 100ms/step\n",
"Ohh\n",
"ඕ\n"
]
}
],
"source": [
"img = cv2.imread('./scene00001.png')\n",
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
"img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))\n",
"img = np.array([img], dtype=np.float32) / 255.0\n",
"prediction = model.predict(img)\n",
"class_index = np.argmax(prediction)\n",
"class_name = CLASSES[class_index]\n",
"print(class_name)\n",
"sinhala_letter = letter_mapping.get(class_name, 'Unknown')\n",
"print(sinhala_letter)"
]
} }
], ],
"metadata": { "metadata": {
......
...@@ -15,3 +15,181 @@ ...@@ -15,3 +15,181 @@
files/* files/*
!files/ !files/
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file
# TMP-23-029 # TMP-23-029
SLIIT Final Year Project SLIIT Final Year Project
Python Environment - Python 3.10.11
Commands
Install libraries
pip install -r requirements.txt
Run application
uvicorn main:app --reload
...@@ -2,3 +2,5 @@ ...@@ -2,3 +2,5 @@
2023-05-19 00:32:23,385 - INFO - Received request at root endpoint. 2023-05-19 00:32:23,385 - INFO - Received request at root endpoint.
2023-05-19 00:32:48,522 - ERROR - Received request at root endpoint. 2023-05-19 00:32:48,522 - ERROR - Received request at root endpoint.
2023-05-19 00:32:48,522 - ERROR - Received request at root endpoint. 2023-05-19 00:32:48,522 - ERROR - Received request at root endpoint.
2023-05-19 23:09:38,565 - INFO - Failed to make predictions. name 'CLASSES' is not defined
2023-05-19 23:09:38,565 - INFO - Failed to make predictions. name 'CLASSES' is not defined
...@@ -7,6 +7,8 @@ from pydantic import BaseModel ...@@ -7,6 +7,8 @@ from pydantic import BaseModel
import tensorflow as tf import tensorflow as tf
from core import setup_logger from core import setup_logger
from services.translate_service import SignLanguagePredictionService
from utils import mappings
router = APIRouter() router = APIRouter()
...@@ -16,11 +18,15 @@ class ImageRequest(BaseModel): ...@@ -16,11 +18,15 @@ class ImageRequest(BaseModel):
image: UploadFile image: UploadFile
# Load your Keras model # Load your Keras model
model = tf.keras.models.load_model('D:\RP\SL-Detection-Action-Recognition\models\model') model = tf.keras.models.load_model('../ML_Models/sign_language_to_text/models/sign_language_model.h5')
CLASSES = os.listdir('D:\RP\SL-Detection-Action-Recognition\data\Sn_sign_language_dataset') # list of classes CLASSES = mappings.classes
NUM_CLASSES = len(CLASSES) # number of classes NUM_CLASSES = len(mappings.classes) # number of classes
IMG_SIZE = 224 # image size IMG_SIZE = 224 # image size
# Instantiate the service class
prediction_service = SignLanguagePredictionService(model, CLASSES, mappings)
@router.post("/upload/video") @router.post("/upload/video")
async def upload_video(video: UploadFile = File(...)): async def upload_video(video: UploadFile = File(...)):
try: try:
...@@ -37,27 +43,23 @@ async def upload_video(video: UploadFile = File(...)): ...@@ -37,27 +43,23 @@ async def upload_video(video: UploadFile = File(...)):
detail="Failed to upload the video" detail="Failed to upload the video"
) )
@router.post('/predict-sign-language') @router.post('/predict-sign-language/image')
def predict(image_request: UploadFile = File(...)): def predict_using_image(image_request: UploadFile = File(...)):
try: try:
file_location = f"files/{image_request.filename}" return prediction_service.predict_sign_language(image_request)
with open(file_location, "wb") as file: except Exception as e:
file.write(image_request.file.read()) logger.info(f"Error. {e}")
raise HTTPException(
# Load the saved image using OpenCV status_code=500,
img = cv2.imread(file_location) detail="Request Failed."
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) )
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) @router.post('/predict-sign-language/video')
img = np.array([img], dtype=np.float32) / 255.0 def predict_using_video(video_request: UploadFile = File(...)):
try:
# Make prediction return prediction_service.predict_sign_language_video(video_request)
prediction = model.predict(img)
class_name = CLASSES[np.argmax(prediction)]
return {'class_name': class_name}
except Exception as e: except Exception as e:
logger.info(f"Failed to make predictions. {e}") logger.info(f"Error. {e}")
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail="Failed to make predictions" detail="Request Failed."
) )
\ No newline at end of file
...@@ -2,6 +2,10 @@ from fastapi import APIRouter ...@@ -2,6 +2,10 @@ from fastapi import APIRouter
router = APIRouter() router = APIRouter()
@router.get("/ping")
def test():
# Your code here
return {"pong"}
@router.get("/users") @router.get("/users")
def get_users(): def get_users():
......
import os
import cv2
import numpy as np
from fastapi import HTTPException, UploadFile
from typing import Dict
import tensorflow as tf
from core import setup_logger
from utils import mappings
logger = setup_logger()
IMG_SIZE = 224 # image size
class SignLanguagePredictionService:
def __init__(self, model, classes, mappings):
self.model = model
self.classes = classes
self.mappings = mappings
def predict_sign_language(self, image_request: UploadFile) -> Dict[str, str]:
try:
file_location = f"files/{image_request.filename}"
with open(file_location, "wb") as file:
file.write(image_request.file.read())
img = cv2.imread(file_location)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img = np.array([img], dtype=np.float32) / 255.0
prediction = self.model.predict(img)
class_index = np.argmax(prediction)
class_name = self.classes[class_index]
sinhala_letter = self.mappings.letter_mapping.get(class_name, 'Unknown')
# Delete the image file
os.remove(file_location)
return {'prediction': sinhala_letter}
except Exception as e:
logger.info(f"Failed to make predictions. {e}")
raise HTTPException(
status_code=500,
detail="Failed to make predictions"
)
def predict_sign_language_video(self, video_request: UploadFile) -> Dict[str, str]:
try:
# Create a temporary file to save the video
video_location = f"files/{video_request.filename}"
with open(video_location, "wb") as file:
file.write(video_request.file.read())
# Read the video using OpenCV
video = cv2.VideoCapture(video_location)
predictions = []
frame_count = 0
# Loop through the frames of the video
while frame_count < 20:
success, frame = video.read()
if not success:
break
# Preprocess the frame
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
frame = np.array([frame], dtype=np.float32) / 255.0
# Make prediction
prediction = self.model.predict(frame)
class_index = np.argmax(prediction)
class_name = self.classes[class_index]
sinhala_letter = self.mappings.letter_mapping.get(class_name, 'Unknown')
# Store the prediction for the frame
predictions.append(sinhala_letter)
frame_count += 1
video.release()
# Delete the video file
os.remove(video_location)
return {'frame_count': frame_count, 'predictions': predictions}
except Exception as e:
logger.info(f"Failed to make predictions. {e}")
raise HTTPException(
status_code=500,
detail="Failed to make predictions"
)
letter_mapping = {
'Ah': 'අ',
'Aah': 'ආ',
'Aeh': 'ඇ',
'Ee': 'ඉ',
'Eeh': 'ඊ',
'Uh': 'උ',
'Uhh': 'ඌ',
'A': 'එ',
'Ae': 'ඒ',
'O': 'ඔ',
'Ohh': 'ඕ',
'K': 'ක්',
'Ig': 'ග්',
'T': 'ටී'
}
classes =['A',
'Aah',
'Ae',
'Aeh',
'Ah',
'Ee',
'Eeh',
'Ig',
'K',
'O',
'Ohh',
'T',
'Uh',
'Uhh']
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment