feat: create sign language translation python code

parent d95d1752
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4a663198",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from PIL import Image\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"from keras.models import load_model"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6a893211",
"metadata": {},
"outputs": [],
"source": [
"# Set the dataset path and image size\n",
"dataset_path = \"C:/Users/himashara/Documents/signlanguage/training/\"\n",
"image_size = (480, 480) # Adjust the image size as per your requirements\n",
"\n",
"# Load and preprocess the dataset\n",
"def load_dataset():\n",
" labels = []\n",
" images = []\n",
" for label in os.listdir(dataset_path):\n",
" label_path = os.path.join(dataset_path, label)\n",
" if os.path.isdir(label_path):\n",
" for image_file in os.listdir(label_path):\n",
" image_path = os.path.join(label_path, image_file)\n",
" if os.path.isfile(image_path):\n",
" image = Image.open(image_path)\n",
" image = image.resize(image_size)\n",
" image = np.array(image)\n",
" images.append(image)\n",
" labels.append(label)\n",
" images = np.array(images)\n",
" labels = np.array(labels)\n",
" return images, labels\n",
"\n",
"images, labels = load_dataset()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ef0e4281",
"metadata": {},
"outputs": [],
"source": [
"# Encode the labels into numerical values\n",
"label_encoder = LabelEncoder()\n",
"labels = label_encoder.fit_transform(labels)\n",
"\n",
"# Split the dataset into training and testing sets\n",
"X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)\n",
"\n",
"# Preprocess the input data\n",
"X_train = X_train.astype(\"float32\") / 255.0\n",
"X_test = X_test.astype(\"float32\") / 255.0\n",
"num_classes = len(np.unique(labels))\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "555ffeee",
"metadata": {},
"outputs": [],
"source": [
"# Define the model architecture\n",
"model = keras.Sequential([\n",
" layers.Conv2D(32, (3, 3), activation=\"relu\", input_shape=(image_size[0], image_size[1], 3)),\n",
" layers.MaxPooling2D(pool_size=(2, 2)),\n",
" layers.Flatten(),\n",
" layers.Dense(64, activation=\"relu\"),\n",
" layers.Dense(num_classes, activation=\"softmax\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "eb07946c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"22/22 [==============================] - 58s 3s/step - loss: 58.0361 - accuracy: 0.1509 - val_loss: 2.3127 - val_accuracy: 0.2059\n",
"Epoch 2/10\n",
"22/22 [==============================] - 59s 3s/step - loss: 2.3554 - accuracy: 0.2278 - val_loss: 2.0756 - val_accuracy: 0.1824\n",
"Epoch 3/10\n",
"22/22 [==============================] - 67s 3s/step - loss: 2.0049 - accuracy: 0.3195 - val_loss: 2.0513 - val_accuracy: 0.2765\n",
"Epoch 4/10\n",
"22/22 [==============================] - 65s 3s/step - loss: 1.9182 - accuracy: 0.3580 - val_loss: 1.8208 - val_accuracy: 0.3235\n",
"Epoch 5/10\n",
"22/22 [==============================] - 87s 4s/step - loss: 1.7040 - accuracy: 0.3905 - val_loss: 1.7384 - val_accuracy: 0.3294\n",
"Epoch 6/10\n",
"22/22 [==============================] - 70s 3s/step - loss: 1.6378 - accuracy: 0.4024 - val_loss: 1.9270 - val_accuracy: 0.3059\n",
"Epoch 7/10\n",
"22/22 [==============================] - 71s 3s/step - loss: 1.6423 - accuracy: 0.3905 - val_loss: 1.7039 - val_accuracy: 0.3529\n",
"Epoch 8/10\n",
"22/22 [==============================] - 66s 3s/step - loss: 1.5635 - accuracy: 0.4083 - val_loss: 1.6209 - val_accuracy: 0.3294\n",
"Epoch 9/10\n",
"22/22 [==============================] - 67s 3s/step - loss: 1.4948 - accuracy: 0.4038 - val_loss: 1.5666 - val_accuracy: 0.3765\n",
"Epoch 10/10\n",
"22/22 [==============================] - 74s 3s/step - loss: 1.4630 - accuracy: 0.4615 - val_loss: 1.5821 - val_accuracy: 0.3588\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x26f794884c0>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Compile and train the model\n",
"model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
"model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "81b6613d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6/6 [==============================] - 7s 1s/step - loss: 1.5821 - accuracy: 0.3588\n",
"Test Loss: 1.5821244716644287\n",
"Test Accuracy: 0.3588235378265381\n"
]
}
],
"source": [
"# Evaluate the model on the test dataset\n",
"loss, accuracy = model.evaluate(X_test, y_test)\n",
"print(\"Test Loss:\", loss)\n",
"print(\"Test Accuracy:\", accuracy)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "65ccc47b",
"metadata": {},
"outputs": [],
"source": [
"# Save the trained model\n",
"model.save(\"letter_classification_model.h5\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0db64f31",
"metadata": {},
"outputs": [],
"source": [
"# ---------------new updated ---------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0ce9c60",
"metadata": {},
"outputs": [],
"source": [
"# import os\n",
"# from PIL import Image\n",
"# import numpy as np\n",
"# from keras.models import load_model\n",
"# from sinling import SinhalaTokenizer\n",
"# from IPython.display import Image as DisplayImage, display\n",
"\n",
"# image_size = (480, 480)\n",
"\n",
"# # Dataset path\n",
"# dataset_path = \"C:/Users/himashara/Documents/signlanguage/training/\"\n",
"\n",
"# # Translation mapping from Sinhala Unicode to Singlish\n",
"# sinhala_to_singlish = {\n",
"# \"අ\": \"A\",\n",
"# \"ආ\": \"Aah\",\n",
"# \"ඇ\": \"Aeh\",\n",
"# \"ඉ\": \"Ee\",\n",
"# \"ඊ\": \"Eeh\",\n",
"# \"උ\": \"Uh\",\n",
"# \"ඌ\": \"Uhh\",\n",
"# \"එ\": \"A\",\n",
"# \"ඒ\": \"Ae\",\n",
"# \"ඔ\": \"O\",\n",
"# \"ඕ\": \"Ohh\",\n",
"# \"ක්\": \"K\",\n",
"# \"ග්\": \"Ig\",\n",
"# \"ටී\": \"Tee\",\n",
"# \"ට\": \"T\"\n",
"# }\n",
"\n",
"# # Function to retrieve the letter image\n",
"# def get_letter_image(letter):\n",
"# if os.path.exists(os.path.join(dataset_path, letter)):\n",
"# letter_path = os.path.join(dataset_path, letter)\n",
"# image_files = os.listdir(letter_path)\n",
"# image_path = os.path.join(letter_path, image_files[0]) # Assuming only one image per letter\n",
"# return Image.open(image_path)\n",
"# return None\n",
"\n",
"# # Load the pre-trained model\n",
"# model = load_model(\"letter_classification_model.h5\")\n",
"\n",
"# # Sinhala tokenizer\n",
"# tokenizer = SinhalaTokenizer()\n",
"\n",
"# # Function to preprocess Sinhala text\n",
"# def preprocess_sinhala_text(text):\n",
"# tokens = tokenizer.tokenize(text)\n",
"# return ' '.join(tokens)\n",
"\n",
"# # User input\n",
"# unicode_input = input(\"Enter a Sinhala Unicode string: \")\n",
"\n",
"# # Remove spaces from the user input\n",
"# unicode_input = unicode_input.replace(\" \", \"\")\n",
"\n",
"# # Translate Sinhala Unicode to Singlish\n",
"# singlish_input = ''.join([sinhala_to_singlish.get(c, c) for c in unicode_input])\n",
"\n",
"# # Print the translated Singlish string\n",
"# print(\"Translated Singlish string:\", singlish_input)\n",
"\n",
"# # Retrieve letter images for the Singlish input\n",
"# letter_images = []\n",
"# combined_string = \"\"\n",
"# for letter in singlish_input:\n",
"# combined_string += letter\n",
"# letter_image = get_letter_image(combined_string)\n",
"# if letter_image is not None:\n",
"# letter_image = letter_image.resize(image_size) # Adjust the size if needed\n",
"# letter_image = np.array(letter_image) / 255.0 # Normalize the image if needed\n",
"# letter_images.append(letter_image)\n",
"# combined_string = \"\"\n",
"\n",
"# # Combine the letter images into a GIF\n",
"# gif_images = [Image.fromarray(np.uint8(letter_image * 255)) for letter_image in letter_images]\n",
"\n",
"# # Save the combined images as a GIF\n",
"# output_path = \"C:/Users/himashara/Documents/signlanguage/generated-gif/generated.gif\"\n",
"# gif_images[0].save(output_path, save_all=True, append_images=gif_images[1:], loop=0, duration=1200)\n",
"\n",
"# # Display the generated GIF\n",
"# display(DisplayImage(filename=output_path))\n",
"# print(\"GIF saved successfully!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea87d3ca",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 67,
"id": "fc22ffc8",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import tkinter as tk\n",
"from tkinter import ttk\n",
"from PIL import Image, ImageTk, ImageSequence\n",
"import numpy as np\n",
"from keras.models import load_model\n",
"from sinling import SinhalaTokenizer\n",
"\n",
"# Create Tkinter window\n",
"window = tk.Tk()\n",
"window.title(\"Sinhala to Singlish Translator\")\n",
"\n",
"# Set up the layout using Tkinter's grid system\n",
"# Step 1: Text area to enter Sinhala Unicode string\n",
"unicode_label = ttk.Label(window, text=\"Enter a Sinhala Unicode string:\")\n",
"unicode_label.grid(row=0, column=0, sticky=tk.W)\n",
"unicode_entry = ttk.Entry(window, width=30)\n",
"unicode_entry.grid(row=0, column=1, columnspan=2, pady=10)\n",
"\n",
"# Step 2: Convert button\n",
"def convert_unicode_to_singlish():\n",
" # Retrieve the Unicode input from the text entry\n",
" unicode_input = unicode_entry.get()\n",
"\n",
" # Remove spaces from the user input\n",
" unicode_input = unicode_input.replace(\" \", \"\")\n",
"\n",
" # Translate Sinhala Unicode to Singlish\n",
" singlish_input = ''.join([sinhala_to_singlish.get(c, c) for c in unicode_input])\n",
"\n",
" # Update the Singlish text display\n",
" singlish_text.delete(1.0, tk.END)\n",
" singlish_text.insert(tk.END, singlish_input)\n",
"\n",
" # Retrieve letter images for the Singlish input\n",
" letter_images = []\n",
" combined_string = \"\"\n",
" for letter in singlish_input:\n",
" combined_string += letter\n",
" letter_image = get_letter_image(combined_string)\n",
" if letter_image is not None:\n",
" letter_image = letter_image.resize(image_size) # Adjust the size if needed\n",
" letter_image = np.array(letter_image) / 255.0 # Normalize the image if needed\n",
" letter_images.append(letter_image)\n",
" combined_string = \"\"\n",
"\n",
" # Combine the letter images into a GIF\n",
" gif_images = []\n",
" for letter_image in letter_images:\n",
" image_pil = Image.fromarray((letter_image * 255).astype(np.uint8))\n",
" gif_images.append(image_pil)\n",
"\n",
" # Save the combined images as a GIF\n",
" output_path = \"C:/Users/himashara/Documents/signlanguage/generated-gif/generated.gif\"\n",
" gif_images[0].save(output_path, save_all=True, append_images=gif_images[1:], loop=0, duration=1200)\n",
"\n",
" # Display the generated GIF\n",
" display_gif(output_path)\n",
"\n",
"def clear_results():\n",
" # Clear the entered Unicode string\n",
" unicode_entry.delete(0, tk.END)\n",
"\n",
" # Clear the Singlish text display\n",
" singlish_text.delete(1.0, tk.END)\n",
"\n",
" # Clear the GIF display\n",
" gif_label.configure(image=None)\n",
" gif_label.image = None\n",
"\n",
" # Cancel the pending after() calls\n",
" window.after_cancel(gif_animation_id)\n",
"\n",
"def display_gif(output_path):\n",
" # Load the GIF and extract frames using ImageSequence module\n",
" gif_image = Image.open(output_path)\n",
" frames = [frame.copy() for frame in ImageSequence.Iterator(gif_image)]\n",
"\n",
" # Create a Tkinter label widget to display the GIF frames\n",
" global gif_label\n",
" gif_label = ttk.Label(window)\n",
" gif_label.grid(row=3, column=1, columnspan=2, pady=10)\n",
"\n",
" # Recursive function to animate the GIF frames\n",
" def animate_frame(frame_index):\n",
" # Display the current frame on the label\n",
" frame = frames[frame_index]\n",
" frame_image = ImageTk.PhotoImage(frame)\n",
" gif_label.configure(image=frame_image)\n",
" gif_label.image = frame_image\n",
"\n",
" # Calculate the index of the next frame\n",
" next_frame_index = (frame_index + 1) % len(frames)\n",
"\n",
" # Call the function again after a certain delay\n",
" global gif_animation_id\n",
" gif_animation_id = window.after(1200, animate_frame, next_frame_index)\n",
"\n",
" # Start the animation by calling the recursive function with the first frame index\n",
" animate_frame(0)\n",
"\n",
"convert_button = ttk.Button(window, text=\"Convert\", command=convert_unicode_to_singlish)\n",
"convert_button.grid(row=1, column=0, pady=10)\n",
"\n",
"clear_button = ttk.Button(window, text=\"Clear Results\", command=clear_results)\n",
"clear_button.grid(row=1, column=1, pady=10)\n",
"\n",
"# Step 3: View area to display Translated Singlish string\n",
"singlish_label = ttk.Label(window, text=\"Translated Singlish string:\")\n",
"singlish_label.grid(row=2, column=0, sticky=tk.W)\n",
"singlish_text = tk.Text(window, height=1, width=30)\n",
"singlish_text.grid(row=2, column=1, columnspan=2, pady=10)\n",
"\n",
"# Dataset path\n",
"dataset_path = \"C:/Users/himashara/Documents/signlanguage/training/\"\n",
"\n",
"# Translation mapping from Sinhala Unicode to Singlish\n",
"sinhala_to_singlish = {\n",
" \"අ\": \"A\",\n",
" \"ආ\": \"Aah\",\n",
" \"ඇ\": \"Aeh\",\n",
" \"ඉ\": \"Ee\",\n",
" \"ඊ\": \"Eeh\",\n",
" \"උ\": \"Uh\",\n",
" \"ඌ\": \"Uhh\",\n",
" \"එ\": \"A\",\n",
" \"ඒ\": \"Ae\",\n",
" \"ඔ\": \"O\",\n",
" \"ඕ\": \"Ohh\",\n",
" \"ක්\": \"K\",\n",
" \"ග්\": \"Ig\",\n",
" \"ටී\": \"Tee\",\n",
" \"ට\": \"T\"\n",
"}\n",
"\n",
"# Image size\n",
"image_size = (480, 480)\n",
"\n",
"# Function to retrieve the letter image\n",
"def get_letter_image(letter):\n",
" if os.path.exists(os.path.join(dataset_path, letter)):\n",
" letter_path = os.path.join(dataset_path, letter)\n",
" image_files = os.listdir(letter_path)\n",
" image_path = os.path.join(letter_path, image_files[0]) # Assuming only one image per letter\n",
" return Image.open(image_path)\n",
" return None\n",
"\n",
"# Load the pre-trained model\n",
"model = load_model(\"letter_classification_model.h5\")\n",
"\n",
"# Sinhala tokenizer\n",
"tokenizer = SinhalaTokenizer()\n",
"\n",
"# Function to preprocess Sinhala text\n",
"def preprocess_sinhala_text(text):\n",
" tokens = tokenizer.tokenize(text)\n",
" return ' '.join(tokens)\n",
"\n",
"# Start the Tkinter event loop\n",
"window.mainloop()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2efcfc9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e25256c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1028b834",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
......@@ -28,15 +28,40 @@ The main objective of this project is to develop a Sign Language Translation Sys
### Member: Ranaweera R M S H (IT20251000)
**Research Question:**
- How can sign language translation be achieved by identifying audio and text components in a video and translating them into sign language using 3D components?
- How can sign language translation be achieved by identifying the audio components in a video and the text, and translating them into sign language using 3D components?
**Objectives:**
- Pre-process video data to extract audio and text components.
- Develop techniques to translate audio and text into sign language.
- Integrate sign language translation with 3D components to create a visually appealing sign language video.
- Evaluate the accuracy of sign language translation and the overall effectiveness of the demonstration.
- Pre-process video data to extract audio components.
- Develop techniques to translate audio into sign language.
- Develop techniques to translate text into sign language.
- Integrate sign language translation with 3D components to create visually appealing sign language videos.
- Evaluate the accuracy of sign language translation.
- Evaluate the overall effectiveness of the sign language translation demonstration.
- Apply computer graphics techniques to enhance the realism and visual quality of the sign language animations.
## Novelty and Contributions
- **Novel Approach:**
> The research question proposes a novel approach to sign language translation by utilizing audio components from a video and text input. This approach combines audio analysis and text translation with 3D components to create visually appealing sign language videos.
- **Audio Component Extraction:**
The research aims to develop techniques to pre-process video data and extract audio components. This extraction process is crucial for identifying and analyzing the audio elements necessary for sign language translation.
- **Audio-to-Sign Language Translation:**
The research contributes to the development of techniques that translate audio components into sign language. By analyzing the audio data, the system can generate accurate sign language gestures and expressions corresponding to the spoken words or sounds.
- **Text-to-Sign Language Translation:**
Another significant contribution is the development of techniques to translate text input into sign language. This enables users to input text directly, which the system converts into sign language gestures and expressions, providing a means of communication for individuals who are deaf or hard of hearing.
- **Integration of 3D Components:**
The research focuses on integrating sign language translation with 3D components to enhance the visual quality and realism of the sign language animations. This integration adds depth and realism to the sign language gestures and creates visually appealing videos that effectively convey the intended message.
- **Evaluation of Translation Accuracy:**
The research evaluates the accuracy of the sign language translation by comparing the generated sign language gestures with established sign language conventions. This evaluation ensures that the translations are linguistically and culturally appropriate, enhancing the overall effectiveness of the system.
- **Computer Graphics Enhancements:**
The research contributes to the application of computer graphics techniques to enhance the realism and visual quality of the sign language animations. By leveraging graphics capabilities, the system can create visually engaging and expressive sign language videos, improving the user experience.
### Member: A V R Dilshan (IT20005276)
**Research Question:**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment