Commit 2053bb22 authored by janithGamage's avatar janithGamage

fix: update

Desc: update project
parent 26f2107c
models/*
!models/
DataSet/Sn_sign_language_dataset/
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f47e929b",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"import torch.nn as nn\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import uvicorn\n",
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "13b1d58b",
"metadata": {},
"outputs": [],
"source": [
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "35f4adc0",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a5aba4be",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "11ec2fae",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "353a4725",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"theCNN(\n",
" (conv01): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (conv02): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (fc01): Linear(in_features=106580, out_features=50, bias=True)\n",
" (output): Linear(in_features=50, out_features=16, bias=True)\n",
")"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "bb87b7f0",
"metadata": {},
"outputs": [],
"source": [
"@app.post(\"/score\")\n",
"async def calculate_score(image_file: UploadFile):\n",
" image = Image.open(io.BytesIO(await image_file.read())).convert(\"L\")\n",
" image = transform(image).unsqueeze(0)\n",
"\n",
" with torch.no_grad():\n",
" output = model(image)\n",
"\n",
" probabilities = torch.softmax(output, dim=1)[0]\n",
" predicted_class = torch.argmax(probabilities).item()\n",
"\n",
" # Get the actual number corresponding to the hand sign\n",
" actual_number = get_actual_number_from_image(image)\n",
" \n",
" print(actual_number)\n",
"\n",
" # Compare predicted class with actual number and calculate correctness percentage\n",
" correct = int(predicted_class + 1 == actual_number)\n",
" print(correct)\n",
" correctness_percentage = correct / 1.0 * 100.0\n",
"\n",
" return {\"predicted_class\": predicted_class, \"correctness_percentage\": correctness_percentage}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "05646e93",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"def get_actual_number_from_image(image):\n",
" # Convert the image to numpy array\n",
" image_array = np.array(image)\n",
"\n",
" # Apply image processing techniques to detect and recognize digits\n",
" # Example steps: thresholding, contour detection, character segmentation, digit recognition\n",
"\n",
" # Apply thresholding\n",
" _, binary_image = cv2.threshold(image_array, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)\n",
"\n",
" # Find contours\n",
" contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # Sort contours based on their x-coordinate\n",
" contours = sorted(contours, key=lambda cnt: cv2.boundingRect(cnt)[0])\n",
"\n",
" # Initialize the recognized digit sequence\n",
" digit_sequence = \"\"\n",
"\n",
" # Iterate over the contours and recognize digits\n",
" for contour in contours:\n",
" # Get the bounding box of the contour\n",
" x, y, w, h = cv2.boundingRect(contour)\n",
"\n",
" # Crop the digit region from the image\n",
" digit_image = binary_image[y:y + h, x:x + w]\n",
"\n",
" # Resize the digit image to a fixed size (e.g., 28x28)\n",
" resized_digit_image = cv2.resize(digit_image, (28, 28))\n",
"\n",
" # Preprocess the resized digit image (e.g., normalize pixel values)\n",
" preprocessed_digit_image = resized_digit_image / 255.0\n",
"\n",
" # Flatten the preprocessed digit image\n",
" flattened_digit_image = preprocessed_digit_image.flatten()\n",
"\n",
" # Pass the flattened digit image to your digit recognition model\n",
" # to get the predicted digit (e.g., using a separate model or the same model you used for training)\n",
"\n",
" # Here, let's assume you have a function `predict_digit` that takes the flattened digit image\n",
" # and returns the predicted digit as an integer\n",
" predicted_digit = predict_digit(flattened_digit_image)\n",
"\n",
" # Add the predicted digit to the digit sequence\n",
" digit_sequence += str(predicted_digit)\n",
"\n",
" # Convert the digit sequence to an integer\n",
" actual_number = int(digit_sequence)\n",
"\n",
" return actual_number\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ee993fc1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO: Started server process [36312]\n",
"INFO: Waiting for application startup.\n",
"INFO: Application startup complete.\n",
"INFO: Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"5\n",
"0\n",
"INFO: 127.0.0.1:60625 - \"POST /score HTTP/1.1\" 200 OK\n"
]
}
],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34c5efea",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c898b57c",
"metadata": {},
"outputs": [],
"source": [
"pip install python-multipart"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a99b4156",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"\n",
"# Apply the patch\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "01b6e57c",
"metadata": {},
"outputs": [],
"source": [
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import torch.nn as nn\n",
"import asyncio\n",
"import uvicorn "
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2e561f13",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1e9e4208",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "4d86d2b8",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9379bf73",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "Expected state_dict to be dict-like, got <class '__main__.theCNN'>.",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[6], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m model \u001b[38;5;241m=\u001b[39m theCNN()\n\u001b[1;32m----> 2\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel1.pth\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m model\u001b[38;5;241m.\u001b[39meval()\n",
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\torch\\nn\\modules\\module.py:1994\u001b[0m, in \u001b[0;36mModule.load_state_dict\u001b[1;34m(self, state_dict, strict)\u001b[0m\n\u001b[0;32m 1971\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Copies parameters and buffers from :attr:`state_dict` into\u001b[39;00m\n\u001b[0;32m 1972\u001b[0m \u001b[38;5;124;03mthis module and its descendants. If :attr:`strict` is ``True``, then\u001b[39;00m\n\u001b[0;32m 1973\u001b[0m \u001b[38;5;124;03mthe keys of :attr:`state_dict` must exactly match the keys returned\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 1991\u001b[0m \u001b[38;5;124;03m ``RuntimeError``.\u001b[39;00m\n\u001b[0;32m 1992\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 1993\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(state_dict, Mapping):\n\u001b[1;32m-> 1994\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected state_dict to be dict-like, got \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mtype\u001b[39m(state_dict)))\n\u001b[0;32m 1996\u001b[0m missing_keys: List[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m 1997\u001b[0m unexpected_keys: List[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m []\n",
"\u001b[1;31mTypeError\u001b[0m: Expected state_dict to be dict-like, got <class '__main__.theCNN'>."
]
}
],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18b869d3",
"metadata": {},
"outputs": [],
"source": [
"@app.post(\"/score\")\n",
"async def calculate_score(image_file: UploadFile):\n",
" image = Image.open(io.BytesIO(await image_file.read())).convert(\"L\")\n",
" image = transform(image).unsqueeze(0)\n",
"\n",
" with torch.no_grad():\n",
" output = model(image)\n",
"\n",
" probabilities = torch.softmax(output, dim=1)[0]\n",
" similarity_scores = probabilities.numpy()\n",
"\n",
" return {\"similarity_scores\": similarity_scores.tolist()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a99a8b6",
"metadata": {},
"outputs": [],
"source": [
"@app.get(\"/\")\n",
"async def hello_world(): \n",
"\n",
" return {\"Hello World\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b30f5c6",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e2e07d8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "60c58fd2",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"\n",
"# Apply the patch\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c1b7af91",
"metadata": {},
"outputs": [],
"source": [
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import torch.nn as nn\n",
"import asyncio\n",
"import uvicorn \n",
"from io import BytesIO"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7f1cd8a4",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2035acf5",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2cfca47d",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2786c4df",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"theCNN(\n",
" (conv01): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (conv02): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (fc01): Linear(in_features=106580, out_features=50, bias=True)\n",
" (output): Linear(in_features=50, out_features=16, bias=True)\n",
")"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "30569105",
"metadata": {},
"outputs": [],
"source": [
"# @app.post(\"/predict-similarity\")\n",
"# async def predict_similarity(image: UploadFile):\n",
"# image_bytes = await image.read()\n",
"# img = Image.open(BytesIO(image_bytes))\n",
"# img = transform(img).unsqueeze(0)\n",
"# output = model(img)\n",
"# similarity_score = torch.max(output).item() * 100 # Get the maximum predicted probability as the similarity score\n",
"# return {\"similarity_score\": similarity_score}\n",
"\n",
"\n",
"@app.post(\"/predict-similarity\")\n",
"async def predict_similarity(sign: str, image: UploadFile):\n",
" image_bytes = await image.read()\n",
" img = Image.open(BytesIO(image_bytes))\n",
" img = transform(img).unsqueeze(0)\n",
" output = model(img)\n",
" similarity_score = torch.max(output).item() * 100 # Get the maximum predicted probability as the similarity score\n",
" return {\"sign\": sign, \"similarity_score\": similarity_score}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5c19e31c",
"metadata": {},
"outputs": [],
"source": [
"@app.get(\"/\")\n",
"async def hello_world(): \n",
"\n",
" return {\"Hello World\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bea39dc9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO: Started server process [36440]\n",
"INFO: Waiting for application startup.\n",
"INFO: Application startup complete.\n",
"INFO: Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO: 127.0.0.1:63408 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63440 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63461 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63484 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n"
]
}
],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52ede8b7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d7bb1db",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: torchvision in c:\\users\\janithgamage\\appdata\\roaming\\python\\python310\\site-packages (0.15.2)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from torchvision) (9.4.0)\n",
"Requirement already satisfied: torch==2.0.1 in c:\\users\\janithgamage\\appdata\\roaming\\python\\python310\\site-packages (from torchvision) (2.0.1)\n",
"Requirement already satisfied: requests in c:\\programdata\\anaconda3\\lib\\site-packages (from torchvision) (2.28.1)\n",
"Requirement already satisfied: numpy in c:\\programdata\\anaconda3\\lib\\site-packages (from torchvision) (1.23.5)\n",
"Requirement already satisfied: jinja2 in c:\\programdata\\anaconda3\\lib\\site-packages (from torch==2.0.1->torchvision) (3.1.2)\n",
"Requirement already satisfied: networkx in c:\\programdata\\anaconda3\\lib\\site-packages (from torch==2.0.1->torchvision) (2.8.4)\n",
"Requirement already satisfied: sympy in c:\\programdata\\anaconda3\\lib\\site-packages (from torch==2.0.1->torchvision) (1.11.1)\n",
"Requirement already satisfied: filelock in c:\\programdata\\anaconda3\\lib\\site-packages (from torch==2.0.1->torchvision) (3.9.0)\n",
"Requirement already satisfied: typing-extensions in c:\\programdata\\anaconda3\\lib\\site-packages (from torch==2.0.1->torchvision) (4.4.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->torchvision) (1.26.14)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->torchvision) (2022.12.7)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->torchvision) (3.4)\n",
"Requirement already satisfied: charset-normalizer<3,>=2 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->torchvision) (2.0.4)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from jinja2->torch==2.0.1->torchvision) (2.1.1)\n",
"Requirement already satisfied: mpmath>=0.19 in c:\\programdata\\anaconda3\\lib\\site-packages (from sympy->torch==2.0.1->torchvision) (1.2.1)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip install torchvision"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.578710Z",
"iopub.status.idle": "2023-05-21T06:28:52.579058Z",
"shell.execute_reply": "2023-05-21T06:28:52.578896Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.578879Z"
}
},
"outputs": [],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.580301Z",
"iopub.status.idle": "2023-05-21T06:28:52.580679Z",
"shell.execute_reply": "2023-05-21T06:28:52.580498Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.580481Z"
}
},
"outputs": [],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.581548Z",
"iopub.status.idle": "2023-05-21T06:28:52.581894Z",
"shell.execute_reply": "2023-05-21T06:28:52.581734Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.581717Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['eight', 'eleven_2', 'eleven_3', 'fifty_1', 'fifty_2', 'fifty_3', 'five', 'four & fourteen_2', 'fourteen_1', 'fourteen_3', 'nine', 'one & ten_2 & eleven_1', 'seven', 'six', 'ten_1', 'ten_3', 'thirteen_1', 'thirteen_3', 'thirty_1', 'thirty_2', 'thirty_3', 'three & thirteen_2', 'twenty_1', 'twenty_2', 'twenty_3', 'two', 'what', 'when_1', 'when_2', 'when_3', 'who', 'why']\n"
]
}
],
"source": [
"# data_dir = \"\"\"../input/sinhala-sign-language-dataset-tdj/Sn_sign_language_dataset\"\"\"\n",
"data_dir = \"\"\"C:/Users/JanithGamage/Desktop/Research/pyhon/Mdoel-1/DataSet/Sn_sign_language_dataset\"\"\"\n",
"classes = []\n",
"\n",
"for directory in os.listdir(data_dir):\n",
" if \".\" not in directory: # Removes .txt and segmentation script\n",
" classes.append(directory) \n",
"print(classes)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.583017Z",
"iopub.status.idle": "2023-05-21T06:28:52.583353Z",
"shell.execute_reply": "2023-05-21T06:28:52.583186Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.583169Z"
}
},
"outputs": [],
"source": [
"fp = []\n",
"class_name = []\n",
"for cls in classes:\n",
" files = os.listdir(f'{data_dir}/{cls}')\n",
" for file in files:\n",
" fp.append(f'{data_dir}/{cls}/{file}')\n",
" class_name.append(cls)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.584414Z",
"iopub.status.idle": "2023-05-21T06:28:52.584759Z",
"shell.execute_reply": "2023-05-21T06:28:52.584598Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.584580Z"
}
},
"outputs": [],
"source": [
"data = pd.DataFrame({\"File Path\":fp,\"Class\":class_name})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.585665Z",
"iopub.status.idle": "2023-05-21T06:28:52.585993Z",
"shell.execute_reply": "2023-05-21T06:28:52.585834Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.585817Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"thirteen_1 668\n",
"six 598\n",
"thirteen_3 576\n",
"ten_1 467\n",
"twenty_1 463\n",
"why 438\n",
"eleven_3 435\n",
"seven 432\n",
"four & fourteen_2 423\n",
"three & thirteen_2 417\n",
"eight 416\n",
"two 386\n",
"one & ten_2 & eleven_1 383\n",
"five 381\n",
"what 375\n",
"ten_3 358\n",
"twenty_2 355\n",
"who 348\n",
"when_1 318\n",
"eleven_2 301\n",
"when_2 290\n",
"when_3 268\n",
"thirty_3 262\n",
"twenty_3 259\n",
"fifty_2 258\n",
"thirty_2 257\n",
"fifty_1 255\n",
"fifty_3 249\n",
"fourteen_3 246\n",
"thirty_1 230\n",
"fourteen_1 208\n",
"nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.587067Z",
"iopub.status.idle": "2023-05-21T06:28:52.587408Z",
"shell.execute_reply": "2023-05-21T06:28:52.587241Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.587224Z"
}
},
"outputs": [],
"source": [
"data['Class'] = data['Class'].apply(lambda x:x.title().replace(\"_\",\"\"))\n",
"data['Class'] = data['Class'].apply(lambda x:re.sub(r'[0-9]+', '', x))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.588313Z",
"iopub.status.idle": "2023-05-21T06:28:52.588671Z",
"shell.execute_reply": "2023-05-21T06:28:52.588490Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.588473Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"When 876\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Why 438\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"What 375\n",
"Who 348\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.589898Z",
"iopub.status.idle": "2023-05-21T06:28:52.590257Z",
"shell.execute_reply": "2023-05-21T06:28:52.590086Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.590061Z"
}
},
"outputs": [],
"source": [
"clases_to_remove = ['When','Why','What','Who']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.591203Z",
"iopub.status.idle": "2023-05-21T06:28:52.592014Z",
"shell.execute_reply": "2023-05-21T06:28:52.591818Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.591791Z"
}
},
"outputs": [],
"source": [
"data = data[data['Class'].apply(lambda x: True if x not in clases_to_remove else False)]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.594622Z",
"iopub.status.idle": "2023-05-21T06:28:52.595000Z",
"shell.execute_reply": "2023-05-21T06:28:52.594827Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.594802Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.596472Z",
"iopub.status.idle": "2023-05-21T06:28:52.596896Z",
"shell.execute_reply": "2023-05-21T06:28:52.596708Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.596686Z"
}
},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300,300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5),std=(0.5))\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.598034Z",
"iopub.status.idle": "2023-05-21T06:28:52.598390Z",
"shell.execute_reply": "2023-05-21T06:28:52.598222Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.598200Z"
}
},
"outputs": [],
"source": [
"str_to_int = {key:val for val,key in enumerate(data['Class'].unique())}"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.600180Z",
"iopub.status.idle": "2023-05-21T06:28:52.600589Z",
"shell.execute_reply": "2023-05-21T06:28:52.600392Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.600367Z"
}
},
"outputs": [],
"source": [
"# str_to_int['One & Ten & Eleven'] = 1\n",
"# str_to_int['Two'] = 2\n",
"# str_to_int['Three & Thirteen'] = 3\n",
"# str_to_int['Four & Fourteen'] = 4\n",
"# str_to_int['Five'] = 5\n",
"# str_to_int['Six'] = 6\n",
"# str_to_int['Seven'] = 7\n",
"# str_to_int['Eight'] = 8\n",
"# str_to_int['Nine'] = 9\n",
"# str_to_int['Ten'] = 10\n",
"# str_to_int['Eleven'] = 11\n",
"# str_to_int['Thirteen'] = 13\n",
"# str_to_int['Fourteen'] = 14\n",
"# str_to_int['Twenty'] = 20\n",
"# str_to_int['Thirty'] = 30\n",
"# str_to_int['Fifty'] = 50"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:31:15.957270Z",
"iopub.status.busy": "2023-05-21T06:31:15.956916Z",
"iopub.status.idle": "2023-05-21T06:31:15.966787Z",
"shell.execute_reply": "2023-05-21T06:31:15.966023Z",
"shell.execute_reply.started": "2023-05-21T06:31:15.957230Z"
}
},
"outputs": [],
"source": [
"class MarkeDataset(Dataset):\n",
" def __init__(self, data, root_dir, transform=transforms.ToTensor()):\n",
" self.data = data\n",
" self.root_dir = root_dir\n",
" self.transform = transform\n",
" self.device = device\n",
" \n",
" def __len__(self):\n",
" return len(self.data)\n",
" \n",
" def __getitem__(self, idx):\n",
" if torch.is_tensor(idx):\n",
" idx = idx.tolist()\n",
" \n",
" img_name = self.data.iloc[idx, 0]\n",
" image = Image.open(img_name)\n",
" y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
" \n",
" if self.transform:\n",
" image = self.transform(image).to(self.device) \n",
" \n",
" return (image, y_label)\n",
"\n",
"# def __getitem__(self, idx):\n",
"# if torch.is_tensor(idx):\n",
"# idx = idx.tolist()\n",
"\n",
"# img_name = self.data.iloc[idx, 0]\n",
"# image = Image.open(img_name)\n",
"# y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
"# # y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]] - 1).to(self.device)\n",
"\n",
"# if self.transform:\n",
"# image = self.transform(image).to(self.device)\n",
"\n",
"# return (image, y_label)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:31:25.622005Z",
"iopub.status.busy": "2023-05-21T06:31:25.621491Z",
"iopub.status.idle": "2023-05-21T06:31:25.626122Z",
"shell.execute_reply": "2023-05-21T06:31:25.625148Z",
"shell.execute_reply.started": "2023-05-21T06:31:25.621968Z"
}
},
"outputs": [],
"source": [
"dataset = MarkeDataset(\n",
" data=data,\n",
" root_dir=data_dir,\n",
" transform=transform\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.606586Z",
"iopub.status.idle": "2023-05-21T06:28:52.606914Z",
"shell.execute_reply": "2023-05-21T06:28:52.606754Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.606737Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img, lab = dataset[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Label :{str(lab.item())}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:07.139527Z",
"iopub.status.busy": "2023-05-21T06:32:07.139161Z",
"iopub.status.idle": "2023-05-21T06:32:07.146617Z",
"shell.execute_reply": "2023-05-21T06:32:07.145658Z",
"shell.execute_reply.started": "2023-05-21T06:32:07.139487Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"9419"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:13.912232Z",
"iopub.status.busy": "2023-05-21T06:32:13.911889Z",
"iopub.status.idle": "2023-05-21T06:32:13.920718Z",
"shell.execute_reply": "2023-05-21T06:32:13.919857Z",
"shell.execute_reply.started": "2023-05-21T06:32:13.912196Z"
}
},
"outputs": [],
"source": [
"# batch_size = 16\n",
"# train_set, test_set = torch.utils.data.random_split(dataset, [8000,1419])\n",
"# trainLoader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True,drop_last=True)\n",
"# testLoader = DataLoader(dataset=test_set,batch_size=1419)\n",
"\n",
"batch_size = 16\n",
"\n",
"train_size = int(0.85 * len(dataset))\n",
"test_size = len(dataset) - train_size\n",
"\n",
"train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])\n",
"trainLoader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)\n",
"testLoader = DataLoader(dataset=test_set, batch_size=len(test_set))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:15.912440Z",
"iopub.status.busy": "2023-05-21T06:32:15.912118Z",
"iopub.status.idle": "2023-05-21T06:32:15.920625Z",
"shell.execute_reply": "2023-05-21T06:32:15.919790Z",
"shell.execute_reply.started": "2023-05-21T06:32:15.912404Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Metrix size after 1st conv. and maxpool layer: 149 X 149\n",
"Metrix size after 2nd conv. and maxpool layer: 73 X 73\n"
]
}
],
"source": [
"kernel_size = 5\n",
"stride = 1\n",
"padding = 1\n",
"\n",
"metrixSize1 = int(np.floor(300+2*padding-kernel_size/stride)+1)\n",
"metrixSize1 = int(np.floor(metrixSize1/2)) #applying 2x2 Max pooling operation\n",
"\n",
"metrixSize2 = int(np.floor(metrixSize1+2*padding-kernel_size/stride)+1)\n",
"metrixSize2 = int(np.floor(metrixSize2/2)) #applying 2x2 Max pooling operation\n",
"\n",
"print(f'Metrix size after 1st conv. and maxpool layer: {metrixSize1} X {metrixSize1}')\n",
"print(f'Metrix size after 2nd conv. and maxpool layer: {metrixSize2} X {metrixSize2}')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:18.919815Z",
"iopub.status.busy": "2023-05-21T06:32:18.919472Z",
"iopub.status.idle": "2023-05-21T06:32:18.931885Z",
"shell.execute_reply": "2023-05-21T06:32:18.930832Z",
"shell.execute_reply.started": "2023-05-21T06:32:18.919776Z"
}
},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels = 1,\n",
" out_channels = 10,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels = 10,\n",
" out_channels = 20,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" expectedSize = np.floor((73+2*0-1)/1) +1 \n",
" expectedSize = 20*int(expectedSize**2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize,50)\n",
" self.output = nn.Linear(50,16)\n",
" \n",
" def forward(self,x):\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv01(x),2))\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv02(x),2))\n",
" \n",
" nUnits = x.shape.numel()/x.shape[0]\n",
" x = x.view(-1,int(nUnits))\n",
" \n",
" x = F.relu(self.fc01(x))\n",
" \n",
" return torch.softmax(self.output(x),axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:24.959976Z",
"iopub.status.busy": "2023-05-21T06:32:24.959650Z",
"iopub.status.idle": "2023-05-21T06:32:24.966633Z",
"shell.execute_reply": "2023-05-21T06:32:24.965555Z",
"shell.execute_reply.started": "2023-05-21T06:32:24.959940Z"
}
},
"outputs": [],
"source": [
"def createModel(lr):\n",
" net = theCNN()\n",
" lossFun = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(params=net.parameters(),lr=lr)\n",
" \n",
" return net,lossFun,optimizer"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:26.930919Z",
"iopub.status.busy": "2023-05-21T06:32:26.930365Z",
"iopub.status.idle": "2023-05-21T06:32:26.991834Z",
"shell.execute_reply": "2023-05-21T06:32:26.990918Z",
"shell.execute_reply.started": "2023-05-21T06:32:26.930881Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0721, 0.0794, 0.0530, 0.0484, 0.0692, 0.0618, 0.0585, 0.0723, 0.0629,\n",
" 0.0478, 0.0588, 0.0584, 0.0704, 0.0604, 0.0588, 0.0678]],\n",
" grad_fn=<SoftmaxBackward0>)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"createModel(0.01)[0](torch.randn(1,1,300,300))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:29.123842Z",
"iopub.status.busy": "2023-05-21T06:32:29.123530Z",
"iopub.status.idle": "2023-05-21T06:32:29.137330Z",
"shell.execute_reply": "2023-05-21T06:32:29.136105Z",
"shell.execute_reply.started": "2023-05-21T06:32:29.123809Z"
}
},
"outputs": [],
"source": [
"def trainModel(epochs, lr):\n",
" net, LossFun, optimizer = createModel(lr)\n",
" net = net.to(device)\n",
" losses = torch.zeros(epochs)\n",
" testAccuracy = torch.zeros(epochs)\n",
" trainAccuracy = torch.zeros(epochs)\n",
"\n",
" for i in range(epochs):\n",
" net.train()\n",
" batchLoss = []\n",
" batchAccuracy = []\n",
"\n",
" for X, y in trainLoader:\n",
" yHat = net(X)\n",
" loss = LossFun(yHat, y.squeeze())\n",
"\n",
" accuracy = 100 * torch.mean((torch.argmax(yHat, axis=1) == y.squeeze()).float())\n",
" batchAccuracy.append(accuracy.item())\n",
" batchLoss.append(loss.item())\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" losses[i] = np.mean(batchLoss)\n",
" trainAccuracy[i] = np.mean(batchAccuracy)\n",
"\n",
" net.eval()\n",
" X, y = next(iter(testLoader))\n",
"\n",
" with torch.no_grad():\n",
" yHat = net(X)\n",
"\n",
" loss = LossFun(yHat, y.squeeze())\n",
" accuracy = 100 * torch.mean((torch.argmax(yHat, axis=1) == y.squeeze()).float())\n",
" testAccuracy[i] = accuracy\n",
" print(f'Test Accuracy: {accuracy:.3f}')\n",
" \n",
" # Save the trained model\n",
" torch.save(net.state_dict(), 'model.pth')\n",
"\n",
" return net, losses, testAccuracy, trainAccuracy\n",
"\n",
"net, losses, testAccuracy, trainAccuracy = trainModel(1, 1e-4)\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:33.702648Z",
"iopub.status.busy": "2023-05-21T06:32:33.702320Z"
},
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test Accuracy: 78.132\n"
]
},
{
"ename": "ValueError",
"evalue": "not enough values to unpack (expected 6, got 4)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m<timed exec>:1\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 6, got 4)"
]
}
],
"source": [
"%%time\n",
"net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(1,1e-4)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.623133Z",
"iopub.status.idle": "2023-05-21T06:28:52.624249Z",
"shell.execute_reply": "2023-05-21T06:28:52.623682Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.623638Z"
}
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'testAccuracy' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[25], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFinal test accuracy: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtestAccuracy[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.3f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mplot(testAccuracy,label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTest\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m plt\u001b[38;5;241m.\u001b[39mplot(trainAccuracy,label\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTrain\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'testAccuracy' is not defined"
]
}
],
"source": [
"plt.title(f'Final test accuracy: {testAccuracy[-1]:.3f}')\n",
"plt.plot(testAccuracy,label='Test')\n",
"plt.plot(trainAccuracy,label='Train')\n",
"plt.legend()\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.630115Z",
"iopub.status.idle": "2023-05-21T06:28:52.630937Z",
"shell.execute_reply": "2023-05-21T06:28:52.630583Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.630547Z"
}
},
"outputs": [],
"source": [
"plt.title(f'Final train loss: {losses[-1]:.3f}')\n",
"plt.plot(losses)\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.635934Z",
"iopub.status.idle": "2023-05-21T06:28:52.637341Z",
"shell.execute_reply": "2023-05-21T06:28:52.636488Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.636414Z"
}
},
"outputs": [],
"source": [
"inv_map = dict(zip(str_to_int.values(), str_to_int.keys()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.642433Z",
"iopub.status.idle": "2023-05-21T06:28:52.643393Z",
"shell.execute_reply": "2023-05-21T06:28:52.642984Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.642964Z"
}
},
"outputs": [],
"source": [
"predictions = torch.argmax(yHat,axis=1).cpu().detach().numpy().tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.646236Z",
"iopub.status.idle": "2023-05-21T06:28:52.647698Z",
"shell.execute_reply": "2023-05-21T06:28:52.647036Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.646959Z"
}
},
"outputs": [],
"source": [
"predictions = list(map(lambda x: inv_map[x],predictions))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.653068Z",
"iopub.status.idle": "2023-05-21T06:28:52.654218Z",
"shell.execute_reply": "2023-05-21T06:28:52.653379Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.653291Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img = X[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Prediction :{predictions[i]}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Todo\n",
" - Add Dropout\n",
" - Try Image augmentation techniques"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2023-05-21T15:21:22.473289Z",
"iopub.status.busy": "2023-05-21T15:21:22.472946Z",
"iopub.status.idle": "2023-05-21T15:21:22.479771Z",
"shell.execute_reply": "2023-05-21T15:21:22.478829Z",
"shell.execute_reply.started": "2023-05-21T15:21:22.473248Z"
}
},
"outputs": [],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:25.461226Z",
"iopub.status.busy": "2023-05-21T15:21:25.460874Z",
"iopub.status.idle": "2023-05-21T15:21:25.466436Z",
"shell.execute_reply": "2023-05-21T15:21:25.465440Z",
"shell.execute_reply.started": "2023-05-21T15:21:25.461185Z"
}
},
"outputs": [],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:27.486270Z",
"iopub.status.busy": "2023-05-21T15:21:27.485805Z",
"iopub.status.idle": "2023-05-21T15:21:27.513366Z",
"shell.execute_reply": "2023-05-21T15:21:27.512417Z",
"shell.execute_reply.started": "2023-05-21T15:21:27.486235Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['eight', 'eleven_2', 'eleven_3', 'fifty_1', 'fifty_2', 'fifty_3', 'five', 'four & fourteen_2', 'fourteen_1', 'fourteen_3', 'nine', 'one & ten_2 & eleven_1', 'seven', 'six', 'ten_1', 'ten_3', 'thirteen_1', 'thirteen_3', 'thirty_1', 'thirty_2', 'thirty_3', 'three & thirteen_2', 'twenty_1', 'twenty_2', 'twenty_3', 'two', 'what', 'when_1', 'when_2', 'when_3', 'who', 'why']\n"
]
}
],
"source": [
"# data_dir = \"\"\"../input/sinhala-sign-language-dataset-tdj/Sn_sign_language_dataset\"\"\"\n",
"data_dir = \"\"\"C:/Users/JanithGamage/Desktop/Research/pyhon/Mdoel-1/DataSet/Sn_sign_language_dataset\"\"\"\n",
"classes = []\n",
"\n",
"for directory in os.listdir(data_dir):\n",
" if \".\" not in directory: # Removes .txt and segmentation script\n",
" classes.append(directory) \n",
"print(classes)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:29.464717Z",
"iopub.status.busy": "2023-05-21T15:21:29.463975Z",
"iopub.status.idle": "2023-05-21T15:21:32.769963Z",
"shell.execute_reply": "2023-05-21T15:21:32.769064Z",
"shell.execute_reply.started": "2023-05-21T15:21:29.464662Z"
}
},
"outputs": [],
"source": [
"fp = []\n",
"class_name = []\n",
"for cls in classes:\n",
" files = os.listdir(f'{data_dir}/{cls}')\n",
" for file in files:\n",
" fp.append(f'{data_dir}/{cls}/{file}')\n",
" class_name.append(cls)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:37.426901Z",
"iopub.status.busy": "2023-05-21T15:21:37.426595Z",
"iopub.status.idle": "2023-05-21T15:21:37.439605Z",
"shell.execute_reply": "2023-05-21T15:21:37.438542Z",
"shell.execute_reply.started": "2023-05-21T15:21:37.426864Z"
}
},
"outputs": [],
"source": [
"data = pd.DataFrame({\"File Path\":fp,\"Class\":class_name})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:38.852702Z",
"iopub.status.busy": "2023-05-21T15:21:38.852366Z",
"iopub.status.idle": "2023-05-21T15:21:38.876210Z",
"shell.execute_reply": "2023-05-21T15:21:38.875333Z",
"shell.execute_reply.started": "2023-05-21T15:21:38.852663Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"thirteen_1 668\n",
"six 598\n",
"thirteen_3 576\n",
"ten_1 467\n",
"twenty_1 463\n",
"why 438\n",
"eleven_3 435\n",
"seven 432\n",
"four & fourteen_2 423\n",
"three & thirteen_2 417\n",
"eight 416\n",
"two 386\n",
"one & ten_2 & eleven_1 383\n",
"five 381\n",
"what 375\n",
"ten_3 358\n",
"twenty_2 355\n",
"who 348\n",
"when_1 318\n",
"eleven_2 301\n",
"when_2 290\n",
"when_3 268\n",
"thirty_3 262\n",
"twenty_3 259\n",
"fifty_2 258\n",
"thirty_2 257\n",
"fifty_1 255\n",
"fifty_3 249\n",
"fourteen_3 246\n",
"thirty_1 230\n",
"fourteen_1 208\n",
"nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:40.858223Z",
"iopub.status.busy": "2023-05-21T15:21:40.857670Z",
"iopub.status.idle": "2023-05-21T15:21:40.901757Z",
"shell.execute_reply": "2023-05-21T15:21:40.900784Z",
"shell.execute_reply.started": "2023-05-21T15:21:40.858177Z"
}
},
"outputs": [],
"source": [
"data['Class'] = data['Class'].apply(lambda x:x.title().replace(\"_\",\"\"))\n",
"data['Class'] = data['Class'].apply(lambda x:re.sub(r'[0-9]+', '', x))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:43.784267Z",
"iopub.status.busy": "2023-05-21T15:21:43.783939Z",
"iopub.status.idle": "2023-05-21T15:21:43.796665Z",
"shell.execute_reply": "2023-05-21T15:21:43.795716Z",
"shell.execute_reply.started": "2023-05-21T15:21:43.784229Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"When 876\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Why 438\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"What 375\n",
"Who 348\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:58.633864Z",
"iopub.status.busy": "2023-05-21T15:21:58.633538Z",
"iopub.status.idle": "2023-05-21T15:21:58.639024Z",
"shell.execute_reply": "2023-05-21T15:21:58.638410Z",
"shell.execute_reply.started": "2023-05-21T15:21:58.633832Z"
}
},
"outputs": [],
"source": [
"clases_to_remove = ['When','Why','What','Who']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:00.893674Z",
"iopub.status.busy": "2023-05-21T15:22:00.893340Z",
"iopub.status.idle": "2023-05-21T15:22:00.908223Z",
"shell.execute_reply": "2023-05-21T15:22:00.907346Z",
"shell.execute_reply.started": "2023-05-21T15:22:00.893635Z"
}
},
"outputs": [],
"source": [
"data = data[data['Class'].apply(lambda x: True if x not in clases_to_remove else False)]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:03.880606Z",
"iopub.status.busy": "2023-05-21T15:22:03.880248Z",
"iopub.status.idle": "2023-05-21T15:22:03.891999Z",
"shell.execute_reply": "2023-05-21T15:22:03.890999Z",
"shell.execute_reply.started": "2023-05-21T15:22:03.880571Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:06.096597Z",
"iopub.status.busy": "2023-05-21T15:22:06.096301Z",
"iopub.status.idle": "2023-05-21T15:22:06.102135Z",
"shell.execute_reply": "2023-05-21T15:22:06.101082Z",
"shell.execute_reply.started": "2023-05-21T15:22:06.096565Z"
}
},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300,300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5),std=(0.5))\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:08.359982Z",
"iopub.status.busy": "2023-05-21T15:22:08.359687Z",
"iopub.status.idle": "2023-05-21T15:22:08.367491Z",
"shell.execute_reply": "2023-05-21T15:22:08.366322Z",
"shell.execute_reply.started": "2023-05-21T15:22:08.359950Z"
}
},
"outputs": [],
"source": [
"str_to_int = {key:val for val,key in enumerate(data['Class'].unique())}"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:10.520408Z",
"iopub.status.busy": "2023-05-21T15:22:10.520069Z",
"iopub.status.idle": "2023-05-21T15:22:10.526070Z",
"shell.execute_reply": "2023-05-21T15:22:10.525115Z",
"shell.execute_reply.started": "2023-05-21T15:22:10.520373Z"
}
},
"outputs": [],
"source": [
"# str_to_int['One & Ten & Eleven'] = 1\n",
"# str_to_int['Two'] = 2\n",
"# str_to_int['Three & Thirteen'] = 3\n",
"# str_to_int['Four & Fourteen'] = 4\n",
"# str_to_int['Five'] = 5\n",
"# str_to_int['Six'] = 6\n",
"# str_to_int['Seven'] = 7\n",
"# str_to_int['Eight'] = 8\n",
"# str_to_int['Nine'] = 9\n",
"# str_to_int['Ten'] = 10\n",
"# str_to_int['Eleven'] = 11\n",
"# str_to_int['Thirteen'] = 13\n",
"# str_to_int['Fourteen'] = 14\n",
"# str_to_int['Twenty'] = 20\n",
"# str_to_int['Thirty'] = 30\n",
"# str_to_int['Fifty'] = 50"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:13.056196Z",
"iopub.status.busy": "2023-05-21T15:22:13.055853Z",
"iopub.status.idle": "2023-05-21T15:22:13.066406Z",
"shell.execute_reply": "2023-05-21T15:22:13.065062Z",
"shell.execute_reply.started": "2023-05-21T15:22:13.056132Z"
}
},
"outputs": [],
"source": [
"class MarkeDataset(Dataset):\n",
" def __init__(self, data, root_dir, transform=transforms.ToTensor()):\n",
" self.data = data\n",
" self.root_dir = root_dir\n",
" self.transform = transform\n",
" self.device = device\n",
" \n",
" def __len__(self):\n",
" return len(self.data)\n",
" \n",
" def __getitem__(self, idx):\n",
" if torch.is_tensor(idx):\n",
" idx = idx.tolist()\n",
" \n",
" img_name = self.data.iloc[idx, 0]\n",
" image = Image.open(img_name)\n",
" y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
" \n",
" if self.transform:\n",
" image = self.transform(image).to(self.device) \n",
" \n",
" return (image, y_label)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:26.257162Z",
"iopub.status.busy": "2023-05-21T15:22:26.256652Z",
"iopub.status.idle": "2023-05-21T15:22:26.261594Z",
"shell.execute_reply": "2023-05-21T15:22:26.260674Z",
"shell.execute_reply.started": "2023-05-21T15:22:26.257103Z"
}
},
"outputs": [],
"source": [
"dataset = MarkeDataset(\n",
" data=data,\n",
" root_dir=data_dir,\n",
" transform=transform\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:29.282820Z",
"iopub.status.busy": "2023-05-21T15:22:29.282505Z",
"iopub.status.idle": "2023-05-21T15:22:30.450098Z",
"shell.execute_reply": "2023-05-21T15:22:30.449111Z",
"shell.execute_reply.started": "2023-05-21T15:22:29.282785Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img, lab = dataset[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Label :{str(lab.item())}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:34.686493Z",
"iopub.status.busy": "2023-05-21T15:22:34.686198Z",
"iopub.status.idle": "2023-05-21T15:22:34.692454Z",
"shell.execute_reply": "2023-05-21T15:22:34.691684Z",
"shell.execute_reply.started": "2023-05-21T15:22:34.686462Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"9419"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:38.619412Z",
"iopub.status.busy": "2023-05-21T15:22:38.618543Z",
"iopub.status.idle": "2023-05-21T15:22:38.635127Z",
"shell.execute_reply": "2023-05-21T15:22:38.634375Z",
"shell.execute_reply.started": "2023-05-21T15:22:38.619366Z"
}
},
"outputs": [],
"source": [
"batch_size = 16\n",
"train_set, test_set = torch.utils.data.random_split(dataset, [8000,1419])\n",
"trainLoader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True,drop_last=True)\n",
"testLoader = DataLoader(dataset=test_set,batch_size=1419)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:41.806762Z",
"iopub.status.busy": "2023-05-21T15:22:41.806452Z",
"iopub.status.idle": "2023-05-21T15:22:41.815670Z",
"shell.execute_reply": "2023-05-21T15:22:41.814797Z",
"shell.execute_reply.started": "2023-05-21T15:22:41.806729Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Metrix size after 1st conv. and maxpool layer: 149 X 149\n",
"Metrix size after 2nd conv. and maxpool layer: 73 X 73\n"
]
}
],
"source": [
"kernel_size = 5\n",
"stride = 1\n",
"padding = 1\n",
"\n",
"metrixSize1 = int(np.floor(300+2*padding-kernel_size/stride)+1)\n",
"metrixSize1 = int(np.floor(metrixSize1/2)) #applying 2x2 Max pooling operation\n",
"\n",
"metrixSize2 = int(np.floor(metrixSize1+2*padding-kernel_size/stride)+1)\n",
"metrixSize2 = int(np.floor(metrixSize2/2)) #applying 2x2 Max pooling operation\n",
"\n",
"print(f'Metrix size after 1st conv. and maxpool layer: {metrixSize1} X {metrixSize1}')\n",
"print(f'Metrix size after 2nd conv. and maxpool layer: {metrixSize2} X {metrixSize2}')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:44.572322Z",
"iopub.status.busy": "2023-05-21T15:22:44.571998Z",
"iopub.status.idle": "2023-05-21T15:22:44.585933Z",
"shell.execute_reply": "2023-05-21T15:22:44.584795Z",
"shell.execute_reply.started": "2023-05-21T15:22:44.572285Z"
}
},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels = 1,\n",
" out_channels = 10,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels = 10,\n",
" out_channels = 20,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" expectedSize = np.floor((73+2*0-1)/1) +1 \n",
" expectedSize = 20*int(expectedSize**2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize,50)\n",
" self.output = nn.Linear(50,16)\n",
" \n",
" def forward(self,x):\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv01(x),2))\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv02(x),2))\n",
" \n",
" nUnits = x.shape.numel()/x.shape[0]\n",
" x = x.view(-1,int(nUnits))\n",
" \n",
" x = F.relu(self.fc01(x))\n",
" \n",
" return torch.softmax(self.output(x),axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:49.860090Z",
"iopub.status.busy": "2023-05-21T15:22:49.859764Z",
"iopub.status.idle": "2023-05-21T15:22:49.865939Z",
"shell.execute_reply": "2023-05-21T15:22:49.864657Z",
"shell.execute_reply.started": "2023-05-21T15:22:49.860052Z"
}
},
"outputs": [],
"source": [
"def createModel(lr):\n",
" net = theCNN()\n",
" lossFun = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(params=net.parameters(),lr=lr)\n",
" \n",
" return net,lossFun,optimizer"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:51.830628Z",
"iopub.status.busy": "2023-05-21T15:22:51.830107Z",
"iopub.status.idle": "2023-05-21T15:22:52.032717Z",
"shell.execute_reply": "2023-05-21T15:22:52.031798Z",
"shell.execute_reply.started": "2023-05-21T15:22:51.830587Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0636, 0.0599, 0.0594, 0.0579, 0.0574, 0.0701, 0.0585, 0.0668, 0.0578,\n",
" 0.0632, 0.0598, 0.0619, 0.0726, 0.0633, 0.0666, 0.0611]],\n",
" grad_fn=<SoftmaxBackward0>)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"createModel(0.01)[0](torch.randn(1,1,300,300))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:55.120195Z",
"iopub.status.busy": "2023-05-21T15:22:55.119083Z",
"iopub.status.idle": "2023-05-21T15:22:55.134205Z",
"shell.execute_reply": "2023-05-21T15:22:55.133312Z",
"shell.execute_reply.started": "2023-05-21T15:22:55.120083Z"
}
},
"outputs": [],
"source": [
"def trainModel(epochs,lr):\n",
" net,LossFun,optimizer = createModel(lr)\n",
" net = net.to(device)\n",
" losses = torch.zeros(epochs)\n",
" testAccuracy = torch.zeros(epochs)\n",
" trainAccurscy = torch.zeros(epochs)\n",
"\n",
" for i in range(epochs):\n",
" print(\"start\")\n",
" net.train()\n",
" batchLoss = []\n",
" batchAccuracy = []\n",
" for X,y in trainLoader:\n",
" yHat = net(X)\n",
" loss = LossFun(yHat,y)\n",
" # print(yHat.item())\n",
"\n",
" accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
" batchAccuracy.append(accuracy.item())\n",
" batchLoss.append(loss.item())\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" losses[i] = np.mean(batchLoss)\n",
" trainAccurscy[i] = np.mean(batchAccuracy)\n",
"\n",
" net.eval()\n",
" X,y = next(iter(testLoader))\n",
" with torch.no_grad():\n",
" yHat = net(X)\n",
" loss = LossFun(yHat,y)\n",
" accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
" testAccuracy[i] = accuracy\n",
" print(f'Test Accuracy: {accuracy:.3f}')\n",
" \n",
" # Save the trained model\n",
" print(\"save the model\")\n",
" #torch.save(net.state_dict(), 'model.pth') \n",
" torch.save(net, \"model1.pth\")\n",
"\n",
" \n",
" return net,losses,testAccuracy,trainAccurscy,yHat,X"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:58.112687Z",
"iopub.status.busy": "2023-05-21T15:22:58.112351Z"
}
},
"outputs": [],
"source": [
"%%time\n",
"net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(10,1e-4)\n",
"# net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(1,1e-4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.623133Z",
"iopub.status.idle": "2023-05-21T06:28:52.624249Z",
"shell.execute_reply": "2023-05-21T06:28:52.623682Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.623638Z"
}
},
"outputs": [],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re\n",
"\n",
"plt.title(f'Final test accuracy: {testAccuracy[-1]:.3f}')\n",
"plt.plot(testAccuracy,label='Test')\n",
"plt.plot(trainAccuracy,label='Train')\n",
"plt.legend()\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.630115Z",
"iopub.status.idle": "2023-05-21T06:28:52.630937Z",
"shell.execute_reply": "2023-05-21T06:28:52.630583Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.630547Z"
}
},
"outputs": [],
"source": [
"plt.title(f'Final train loss: {losses[-1]:.3f}')\n",
"plt.plot(losses)\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.635934Z",
"iopub.status.idle": "2023-05-21T06:28:52.637341Z",
"shell.execute_reply": "2023-05-21T06:28:52.636488Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.636414Z"
}
},
"outputs": [],
"source": [
"inv_map = dict(zip(str_to_int.values(), str_to_int.keys()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.642433Z",
"iopub.status.idle": "2023-05-21T06:28:52.643393Z",
"shell.execute_reply": "2023-05-21T06:28:52.642984Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.642964Z"
}
},
"outputs": [],
"source": [
"predictions = torch.argmax(yHat,axis=1).cpu().detach().numpy().tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.646236Z",
"iopub.status.idle": "2023-05-21T06:28:52.647698Z",
"shell.execute_reply": "2023-05-21T06:28:52.647036Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.646959Z"
}
},
"outputs": [],
"source": [
"predictions = list(map(lambda x: inv_map[x],predictions))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.653068Z",
"iopub.status.idle": "2023-05-21T06:28:52.654218Z",
"shell.execute_reply": "2023-05-21T06:28:52.653379Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.653291Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img = X[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Prediction :{predictions[i]}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Todo\n",
" - Add Dropout\n",
" - Try Image augmentation techniques"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "49b49c05",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"import torch.nn as nn\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import uvicorn\n",
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f0148a4",
"metadata": {},
"outputs": [],
"source": [
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26e5f198",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4359663",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "308a25d3",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4e3544d2",
"metadata": {},
"outputs": [],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d521a8ca",
"metadata": {},
"outputs": [],
"source": [
"@app.post(\"/score\")\n",
"async def calculate_score(image_file: UploadFile):\n",
" image = Image.open(io.BytesIO(await image_file.read())).convert(\"L\")\n",
" image = transform(image).unsqueeze(0)\n",
"\n",
" with torch.no_grad():\n",
" output = model(image)\n",
"\n",
" probabilities = torch.softmax(output, dim=1)[0]\n",
" predicted_class = torch.argmax(probabilities).item()\n",
"\n",
" # Get the actual number corresponding to the hand sign\n",
" actual_number = get_actual_number_from_image(image)\n",
" \n",
" print(actual_number)\n",
"\n",
" # Compare predicted class with actual number and calculate correctness percentage\n",
" correct = int(predicted_class + 1 == actual_number)\n",
" print(correct)\n",
" correctness_percentage = correct / 1.0 * 100.0\n",
"\n",
" return {\"predicted_class\": predicted_class, \"correctness_percentage\": correctness_percentage}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "030c5fbc",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"def get_actual_number_from_image(image):\n",
" # Convert the image to numpy array\n",
" image_array = np.array(image)\n",
"\n",
" # Apply image processing techniques to detect and recognize digits\n",
" # Example steps: thresholding, contour detection, character segmentation, digit recognition\n",
"\n",
" # Apply thresholding\n",
" _, binary_image = cv2.threshold(image_array, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)\n",
"\n",
" # Find contours\n",
" contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
"\n",
" # Sort contours based on their x-coordinate\n",
" contours = sorted(contours, key=lambda cnt: cv2.boundingRect(cnt)[0])\n",
"\n",
" # Initialize the recognized digit sequence\n",
" digit_sequence = \"\"\n",
"\n",
" # Iterate over the contours and recognize digits\n",
" for contour in contours:\n",
" # Get the bounding box of the contour\n",
" x, y, w, h = cv2.boundingRect(contour)\n",
"\n",
" # Crop the digit region from the image\n",
" digit_image = binary_image[y:y + h, x:x + w]\n",
"\n",
" # Resize the digit image to a fixed size (e.g., 28x28)\n",
" resized_digit_image = cv2.resize(digit_image, (28, 28))\n",
"\n",
" # Preprocess the resized digit image (e.g., normalize pixel values)\n",
" preprocessed_digit_image = resized_digit_image / 255.0\n",
"\n",
" # Flatten the preprocessed digit image\n",
" flattened_digit_image = preprocessed_digit_image.flatten()\n",
"\n",
" # Pass the flattened digit image to your digit recognition model\n",
" # to get the predicted digit (e.g., using a separate model or the same model you used for training)\n",
"\n",
" # Here, let's assume you have a function `predict_digit` that takes the flattened digit image\n",
" # and returns the predicted digit as an integer\n",
" predicted_digit = predict_digit(flattened_digit_image)\n",
"\n",
" # Add the predicted digit to the digit sequence\n",
" digit_sequence += str(predicted_digit)\n",
"\n",
" # Convert the digit sequence to an integer\n",
" actual_number = int(digit_sequence)\n",
"\n",
" return actual_number\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5035c79a",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0449757",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "c898b57c",
"metadata": {},
"outputs": [],
"source": [
"pip install python-multipart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a99b4156",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"\n",
"# Apply the patch\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "01b6e57c",
"metadata": {},
"outputs": [],
"source": [
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import torch.nn as nn\n",
"import asyncio\n",
"import uvicorn "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e561f13",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e9e4208",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d86d2b8",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9379bf73",
"metadata": {},
"outputs": [],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18b869d3",
"metadata": {},
"outputs": [],
"source": [
"@app.post(\"/score\")\n",
"async def calculate_score(image_file: UploadFile):\n",
" image = Image.open(io.BytesIO(await image_file.read())).convert(\"L\")\n",
" image = transform(image).unsqueeze(0)\n",
"\n",
" with torch.no_grad():\n",
" output = model(image)\n",
"\n",
" probabilities = torch.softmax(output, dim=1)[0]\n",
" similarity_scores = probabilities.numpy()\n",
"\n",
" return {\"similarity_scores\": similarity_scores.tolist()}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a99a8b6",
"metadata": {},
"outputs": [],
"source": [
"@app.get(\"/\")\n",
"async def hello_world(): \n",
"\n",
" return {\"Hello World\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b30f5c6",
"metadata": {},
"outputs": [],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e2e07d8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3d735e52",
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"import asyncio\n",
"import torch.nn.functional as F\n",
"\n",
"# Apply the patch\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "db756418",
"metadata": {},
"outputs": [],
"source": [
"from fastapi import FastAPI, UploadFile\n",
"from PIL import Image\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"import numpy as np\n",
"import io\n",
"import torch.nn as nn\n",
"import asyncio\n",
"import uvicorn \n",
"from io import BytesIO"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fc83d1b8",
"metadata": {},
"outputs": [],
"source": [
"app = FastAPI()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "63dbfc01",
"metadata": {},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300, 300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5), std=(0.5))\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f7e5cdba",
"metadata": {},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super(theCNN, self).__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels=1,\n",
" out_channels=10,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels=10,\n",
" out_channels=20,\n",
" kernel_size=5,\n",
" stride=1,\n",
" padding=1\n",
" )\n",
" \n",
" expectedSize = int(np.floor((73 + 2 * 0 - 1) / 1) + 1)\n",
" expectedSize = 20 * int(expectedSize ** 2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize, 50)\n",
" self.output = nn.Linear(50, 16)\n",
"\n",
" def forward(self, x):\n",
" x = F.relu(F.max_pool2d(self.conv01(x), 2))\n",
" x = F.relu(F.max_pool2d(self.conv02(x), 2))\n",
" nUnits = x.shape.numel() / x.shape[0]\n",
" x = x.view(-1, int(nUnits))\n",
" x = F.relu(self.fc01(x))\n",
" return torch.softmax(self.output(x), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d86a9515",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"theCNN(\n",
" (conv01): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (conv02): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))\n",
" (fc01): Linear(in_features=106580, out_features=50, bias=True)\n",
" (output): Linear(in_features=50, out_features=16, bias=True)\n",
")"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = theCNN()\n",
"model.load_state_dict(torch.load(\"model.pth\"))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4b527135",
"metadata": {},
"outputs": [],
"source": [
"# @app.post(\"/predict-similarity\")\n",
"# async def predict_similarity(image: UploadFile):\n",
"# image_bytes = await image.read()\n",
"# img = Image.open(BytesIO(image_bytes))\n",
"# img = transform(img).unsqueeze(0)\n",
"# output = model(img)\n",
"# similarity_score = torch.max(output).item() * 100 # Get the maximum predicted probability as the similarity score\n",
"# return {\"similarity_score\": similarity_score}\n",
"\n",
"\n",
"@app.post(\"/predict-similarity\")\n",
"async def predict_similarity(sign: str, image: UploadFile):\n",
" image_bytes = await image.read()\n",
" img = Image.open(BytesIO(image_bytes))\n",
" img = transform(img).unsqueeze(0)\n",
" output = model(img)\n",
" similarity_score = torch.max(output).item() * 100 # Get the maximum predicted probability as the similarity score\n",
" return {\"sign\": sign, \"similarity_score\": similarity_score}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "df08611e",
"metadata": {},
"outputs": [],
"source": [
"@app.get(\"/\")\n",
"async def hello_world(): \n",
"\n",
" return {\"Hello World\"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84b9601b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO: Started server process [36440]\n",
"INFO: Waiting for application startup.\n",
"INFO: Application startup complete.\n",
"INFO: Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO: 127.0.0.1:63408 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63440 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63461 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63484 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63502 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63517 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63523 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63536 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63536 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63536 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n",
"INFO: 127.0.0.1:63546 - \"POST /predict-similarity HTTP/1.1\" 422 Unprocessable Entity\n"
]
}
],
"source": [
"if __name__ == \"__main__\":\n",
" loop = asyncio.get_event_loop()\n",
" loop.create_task(uvicorn.run(app, host=\"127.0.0.1\", port=8001))\n",
" loop.run_forever()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab358400",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "504cf81d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pip install torchvision"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.578710Z",
"iopub.status.idle": "2023-05-21T06:28:52.579058Z",
"shell.execute_reply": "2023-05-21T06:28:52.578896Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.578879Z"
}
},
"outputs": [],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.580301Z",
"iopub.status.idle": "2023-05-21T06:28:52.580679Z",
"shell.execute_reply": "2023-05-21T06:28:52.580498Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.580481Z"
}
},
"outputs": [],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.581548Z",
"iopub.status.idle": "2023-05-21T06:28:52.581894Z",
"shell.execute_reply": "2023-05-21T06:28:52.581734Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.581717Z"
}
},
"outputs": [],
"source": [
"# data_dir = \"\"\"../input/sinhala-sign-language-dataset-tdj/Sn_sign_language_dataset\"\"\"\n",
"data_dir = \"\"\"C:/Users/JanithGamage/Desktop/Research/pyhon/Mdoel-1/DataSet/Sn_sign_language_dataset\"\"\"\n",
"classes = []\n",
"\n",
"for directory in os.listdir(data_dir):\n",
" if \".\" not in directory: # Removes .txt and segmentation script\n",
" classes.append(directory) \n",
"print(classes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.583017Z",
"iopub.status.idle": "2023-05-21T06:28:52.583353Z",
"shell.execute_reply": "2023-05-21T06:28:52.583186Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.583169Z"
}
},
"outputs": [],
"source": [
"fp = []\n",
"class_name = []\n",
"for cls in classes:\n",
" files = os.listdir(f'{data_dir}/{cls}')\n",
" for file in files:\n",
" fp.append(f'{data_dir}/{cls}/{file}')\n",
" class_name.append(cls)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.584414Z",
"iopub.status.idle": "2023-05-21T06:28:52.584759Z",
"shell.execute_reply": "2023-05-21T06:28:52.584598Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.584580Z"
}
},
"outputs": [],
"source": [
"data = pd.DataFrame({\"File Path\":fp,\"Class\":class_name})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.585665Z",
"iopub.status.idle": "2023-05-21T06:28:52.585993Z",
"shell.execute_reply": "2023-05-21T06:28:52.585834Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.585817Z"
}
},
"outputs": [],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.587067Z",
"iopub.status.idle": "2023-05-21T06:28:52.587408Z",
"shell.execute_reply": "2023-05-21T06:28:52.587241Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.587224Z"
}
},
"outputs": [],
"source": [
"data['Class'] = data['Class'].apply(lambda x:x.title().replace(\"_\",\"\"))\n",
"data['Class'] = data['Class'].apply(lambda x:re.sub(r'[0-9]+', '', x))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.588313Z",
"iopub.status.idle": "2023-05-21T06:28:52.588671Z",
"shell.execute_reply": "2023-05-21T06:28:52.588490Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.588473Z"
}
},
"outputs": [],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.589898Z",
"iopub.status.idle": "2023-05-21T06:28:52.590257Z",
"shell.execute_reply": "2023-05-21T06:28:52.590086Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.590061Z"
}
},
"outputs": [],
"source": [
"clases_to_remove = ['When','Why','What','Who']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.591203Z",
"iopub.status.idle": "2023-05-21T06:28:52.592014Z",
"shell.execute_reply": "2023-05-21T06:28:52.591818Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.591791Z"
}
},
"outputs": [],
"source": [
"data = data[data['Class'].apply(lambda x: True if x not in clases_to_remove else False)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.594622Z",
"iopub.status.idle": "2023-05-21T06:28:52.595000Z",
"shell.execute_reply": "2023-05-21T06:28:52.594827Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.594802Z"
}
},
"outputs": [],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.596472Z",
"iopub.status.idle": "2023-05-21T06:28:52.596896Z",
"shell.execute_reply": "2023-05-21T06:28:52.596708Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.596686Z"
}
},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300,300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5),std=(0.5))\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.598034Z",
"iopub.status.idle": "2023-05-21T06:28:52.598390Z",
"shell.execute_reply": "2023-05-21T06:28:52.598222Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.598200Z"
}
},
"outputs": [],
"source": [
"str_to_int = {key:val for val,key in enumerate(data['Class'].unique())}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.600180Z",
"iopub.status.idle": "2023-05-21T06:28:52.600589Z",
"shell.execute_reply": "2023-05-21T06:28:52.600392Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.600367Z"
}
},
"outputs": [],
"source": [
"# str_to_int['One & Ten & Eleven'] = 1\n",
"# str_to_int['Two'] = 2\n",
"# str_to_int['Three & Thirteen'] = 3\n",
"# str_to_int['Four & Fourteen'] = 4\n",
"# str_to_int['Five'] = 5\n",
"# str_to_int['Six'] = 6\n",
"# str_to_int['Seven'] = 7\n",
"# str_to_int['Eight'] = 8\n",
"# str_to_int['Nine'] = 9\n",
"# str_to_int['Ten'] = 10\n",
"# str_to_int['Eleven'] = 11\n",
"# str_to_int['Thirteen'] = 13\n",
"# str_to_int['Fourteen'] = 14\n",
"# str_to_int['Twenty'] = 20\n",
"# str_to_int['Thirty'] = 30\n",
"# str_to_int['Fifty'] = 50"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:31:15.957270Z",
"iopub.status.busy": "2023-05-21T06:31:15.956916Z",
"iopub.status.idle": "2023-05-21T06:31:15.966787Z",
"shell.execute_reply": "2023-05-21T06:31:15.966023Z",
"shell.execute_reply.started": "2023-05-21T06:31:15.957230Z"
}
},
"outputs": [],
"source": [
"class MarkeDataset(Dataset):\n",
" def __init__(self, data, root_dir, transform=transforms.ToTensor()):\n",
" self.data = data\n",
" self.root_dir = root_dir\n",
" self.transform = transform\n",
" self.device = device\n",
" \n",
" def __len__(self):\n",
" return len(self.data)\n",
" \n",
" def __getitem__(self, idx):\n",
" if torch.is_tensor(idx):\n",
" idx = idx.tolist()\n",
" \n",
" img_name = self.data.iloc[idx, 0]\n",
" image = Image.open(img_name)\n",
" y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
" \n",
" if self.transform:\n",
" image = self.transform(image).to(self.device) \n",
" \n",
" return (image, y_label)\n",
"\n",
"# def __getitem__(self, idx):\n",
"# if torch.is_tensor(idx):\n",
"# idx = idx.tolist()\n",
"\n",
"# img_name = self.data.iloc[idx, 0]\n",
"# image = Image.open(img_name)\n",
"# y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
"# # y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]] - 1).to(self.device)\n",
"\n",
"# if self.transform:\n",
"# image = self.transform(image).to(self.device)\n",
"\n",
"# return (image, y_label)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:31:25.622005Z",
"iopub.status.busy": "2023-05-21T06:31:25.621491Z",
"iopub.status.idle": "2023-05-21T06:31:25.626122Z",
"shell.execute_reply": "2023-05-21T06:31:25.625148Z",
"shell.execute_reply.started": "2023-05-21T06:31:25.621968Z"
}
},
"outputs": [],
"source": [
"dataset = MarkeDataset(\n",
" data=data,\n",
" root_dir=data_dir,\n",
" transform=transform\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.606586Z",
"iopub.status.idle": "2023-05-21T06:28:52.606914Z",
"shell.execute_reply": "2023-05-21T06:28:52.606754Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.606737Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img, lab = dataset[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Label :{str(lab.item())}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:07.139527Z",
"iopub.status.busy": "2023-05-21T06:32:07.139161Z",
"iopub.status.idle": "2023-05-21T06:32:07.146617Z",
"shell.execute_reply": "2023-05-21T06:32:07.145658Z",
"shell.execute_reply.started": "2023-05-21T06:32:07.139487Z"
}
},
"outputs": [],
"source": [
"len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:13.912232Z",
"iopub.status.busy": "2023-05-21T06:32:13.911889Z",
"iopub.status.idle": "2023-05-21T06:32:13.920718Z",
"shell.execute_reply": "2023-05-21T06:32:13.919857Z",
"shell.execute_reply.started": "2023-05-21T06:32:13.912196Z"
}
},
"outputs": [],
"source": [
"# batch_size = 16\n",
"# train_set, test_set = torch.utils.data.random_split(dataset, [8000,1419])\n",
"# trainLoader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True,drop_last=True)\n",
"# testLoader = DataLoader(dataset=test_set,batch_size=1419)\n",
"\n",
"batch_size = 16\n",
"\n",
"train_size = int(0.85 * len(dataset))\n",
"test_size = len(dataset) - train_size\n",
"\n",
"train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])\n",
"trainLoader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)\n",
"testLoader = DataLoader(dataset=test_set, batch_size=len(test_set))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:15.912440Z",
"iopub.status.busy": "2023-05-21T06:32:15.912118Z",
"iopub.status.idle": "2023-05-21T06:32:15.920625Z",
"shell.execute_reply": "2023-05-21T06:32:15.919790Z",
"shell.execute_reply.started": "2023-05-21T06:32:15.912404Z"
}
},
"outputs": [],
"source": [
"kernel_size = 5\n",
"stride = 1\n",
"padding = 1\n",
"\n",
"metrixSize1 = int(np.floor(300+2*padding-kernel_size/stride)+1)\n",
"metrixSize1 = int(np.floor(metrixSize1/2)) #applying 2x2 Max pooling operation\n",
"\n",
"metrixSize2 = int(np.floor(metrixSize1+2*padding-kernel_size/stride)+1)\n",
"metrixSize2 = int(np.floor(metrixSize2/2)) #applying 2x2 Max pooling operation\n",
"\n",
"print(f'Metrix size after 1st conv. and maxpool layer: {metrixSize1} X {metrixSize1}')\n",
"print(f'Metrix size after 2nd conv. and maxpool layer: {metrixSize2} X {metrixSize2}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:18.919815Z",
"iopub.status.busy": "2023-05-21T06:32:18.919472Z",
"iopub.status.idle": "2023-05-21T06:32:18.931885Z",
"shell.execute_reply": "2023-05-21T06:32:18.930832Z",
"shell.execute_reply.started": "2023-05-21T06:32:18.919776Z"
}
},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels = 1,\n",
" out_channels = 10,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels = 10,\n",
" out_channels = 20,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" expectedSize = np.floor((73+2*0-1)/1) +1 \n",
" expectedSize = 20*int(expectedSize**2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize,50)\n",
" self.output = nn.Linear(50,16)\n",
" \n",
" def forward(self,x):\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv01(x),2))\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv02(x),2))\n",
" \n",
" nUnits = x.shape.numel()/x.shape[0]\n",
" x = x.view(-1,int(nUnits))\n",
" \n",
" x = F.relu(self.fc01(x))\n",
" \n",
" return torch.softmax(self.output(x),axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:24.959976Z",
"iopub.status.busy": "2023-05-21T06:32:24.959650Z",
"iopub.status.idle": "2023-05-21T06:32:24.966633Z",
"shell.execute_reply": "2023-05-21T06:32:24.965555Z",
"shell.execute_reply.started": "2023-05-21T06:32:24.959940Z"
}
},
"outputs": [],
"source": [
"def createModel(lr):\n",
" net = theCNN()\n",
" lossFun = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(params=net.parameters(),lr=lr)\n",
" \n",
" return net,lossFun,optimizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:26.930919Z",
"iopub.status.busy": "2023-05-21T06:32:26.930365Z",
"iopub.status.idle": "2023-05-21T06:32:26.991834Z",
"shell.execute_reply": "2023-05-21T06:32:26.990918Z",
"shell.execute_reply.started": "2023-05-21T06:32:26.930881Z"
}
},
"outputs": [],
"source": [
"createModel(0.01)[0](torch.randn(1,1,300,300))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:29.123842Z",
"iopub.status.busy": "2023-05-21T06:32:29.123530Z",
"iopub.status.idle": "2023-05-21T06:32:29.137330Z",
"shell.execute_reply": "2023-05-21T06:32:29.136105Z",
"shell.execute_reply.started": "2023-05-21T06:32:29.123809Z"
}
},
"outputs": [],
"source": [
"# def trainModel(epochs,lr):\n",
"# net,LossFun,optimizer = createModel(lr)\n",
"# net = net.to(device)\n",
"# losses = torch.zeros(epochs)\n",
"# testAccuracy = torch.zeros(epochs)\n",
"# trainAccurscy = torch.zeros(epochs)\n",
"\n",
"# for i in range(epochs):\n",
"# net.train()\n",
"# batchLoss = []\n",
"# batchAccuracy = []\n",
"# for X,y in trainLoader:\n",
"# yHat = net(X)\n",
"# loss = LossFun(yHat,y)\n",
"# # print(yHat.item())\n",
"\n",
"# accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
"# batchAccuracy.append(accuracy.item())\n",
"# batchLoss.append(loss.item())\n",
"\n",
"# optimizer.zero_grad()\n",
"# loss.backward()\n",
"# optimizer.step()\n",
"\n",
"# losses[i] = np.mean(batchLoss)\n",
"# trainAccurscy[i] = np.mean(batchAccuracy)\n",
"\n",
"# net.eval()\n",
"# X,y = next(iter(testLoader))\n",
"# with torch.no_grad():\n",
"# yHat = net(X)\n",
"# loss = LossFun(yHat,y)\n",
"# accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
"# testAccuracy[i] = accuracy\n",
"# print(f'Test Accuracy: {accuracy:.3f}')\n",
"# return net,losses,testAccuracy,trainAccurscy,yHat,X\n",
"\n",
"def trainModel(epochs, lr):\n",
" net, LossFun, optimizer = createModel(lr)\n",
" net = net.to(device)\n",
" losses = torch.zeros(epochs)\n",
" testAccuracy = torch.zeros(epochs)\n",
" trainAccuracy = torch.zeros(epochs)\n",
"\n",
" for i in range(epochs):\n",
" net.train()\n",
" batchLoss = []\n",
" batchAccuracy = []\n",
"\n",
" for X, y in trainLoader:\n",
" yHat = net(X)\n",
" loss = LossFun(yHat, y.squeeze())\n",
"\n",
" accuracy = 100 * torch.mean((torch.argmax(yHat, axis=1) == y.squeeze()).float())\n",
" batchAccuracy.append(accuracy.item())\n",
" batchLoss.append(loss.item())\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" losses[i] = np.mean(batchLoss)\n",
" trainAccuracy[i] = np.mean(batchAccuracy)\n",
"\n",
" net.eval()\n",
" X, y = next(iter(testLoader))\n",
"\n",
" with torch.no_grad():\n",
" yHat = net(X)\n",
"\n",
" loss = LossFun(yHat, y.squeeze())\n",
" accuracy = 100 * torch.mean((torch.argmax(yHat, axis=1) == y.squeeze()).float())\n",
" testAccuracy[i] = accuracy\n",
" print(f'Test Accuracy: {accuracy:.3f}')\n",
" \n",
" # Save the trained model\n",
" torch.save(net.state_dict(), 'model.pth')\n",
"\n",
"\n",
" return net, losses, testAccuracy, trainAccuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T06:32:33.702648Z",
"iopub.status.busy": "2023-05-21T06:32:33.702320Z"
},
"scrolled": false
},
"outputs": [],
"source": [
"%%time\n",
"net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(1,1e-4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.623133Z",
"iopub.status.idle": "2023-05-21T06:28:52.624249Z",
"shell.execute_reply": "2023-05-21T06:28:52.623682Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.623638Z"
}
},
"outputs": [],
"source": [
"plt.title(f'Final test accuracy: {testAccuracy[-1]:.3f}')\n",
"plt.plot(testAccuracy,label='Test')\n",
"plt.plot(trainAccuracy,label='Train')\n",
"plt.legend()\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.630115Z",
"iopub.status.idle": "2023-05-21T06:28:52.630937Z",
"shell.execute_reply": "2023-05-21T06:28:52.630583Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.630547Z"
}
},
"outputs": [],
"source": [
"plt.title(f'Final train loss: {losses[-1]:.3f}')\n",
"plt.plot(losses)\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.635934Z",
"iopub.status.idle": "2023-05-21T06:28:52.637341Z",
"shell.execute_reply": "2023-05-21T06:28:52.636488Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.636414Z"
}
},
"outputs": [],
"source": [
"inv_map = dict(zip(str_to_int.values(), str_to_int.keys()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.642433Z",
"iopub.status.idle": "2023-05-21T06:28:52.643393Z",
"shell.execute_reply": "2023-05-21T06:28:52.642984Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.642964Z"
}
},
"outputs": [],
"source": [
"predictions = torch.argmax(yHat,axis=1).cpu().detach().numpy().tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.646236Z",
"iopub.status.idle": "2023-05-21T06:28:52.647698Z",
"shell.execute_reply": "2023-05-21T06:28:52.647036Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.646959Z"
}
},
"outputs": [],
"source": [
"predictions = list(map(lambda x: inv_map[x],predictions))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.653068Z",
"iopub.status.idle": "2023-05-21T06:28:52.654218Z",
"shell.execute_reply": "2023-05-21T06:28:52.653379Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.653291Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img = X[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Prediction :{predictions[i]}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Todo\n",
" - Add Dropout\n",
" - Try Image augmentation techniques"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2023-05-21T15:21:22.473289Z",
"iopub.status.busy": "2023-05-21T15:21:22.472946Z",
"iopub.status.idle": "2023-05-21T15:21:22.479771Z",
"shell.execute_reply": "2023-05-21T15:21:22.478829Z",
"shell.execute_reply.started": "2023-05-21T15:21:22.473248Z"
}
},
"outputs": [],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:25.461226Z",
"iopub.status.busy": "2023-05-21T15:21:25.460874Z",
"iopub.status.idle": "2023-05-21T15:21:25.466436Z",
"shell.execute_reply": "2023-05-21T15:21:25.465440Z",
"shell.execute_reply.started": "2023-05-21T15:21:25.461185Z"
}
},
"outputs": [],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:27.486270Z",
"iopub.status.busy": "2023-05-21T15:21:27.485805Z",
"iopub.status.idle": "2023-05-21T15:21:27.513366Z",
"shell.execute_reply": "2023-05-21T15:21:27.512417Z",
"shell.execute_reply.started": "2023-05-21T15:21:27.486235Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['eight', 'eleven_2', 'eleven_3', 'fifty_1', 'fifty_2', 'fifty_3', 'five', 'four & fourteen_2', 'fourteen_1', 'fourteen_3', 'nine', 'one & ten_2 & eleven_1', 'seven', 'six', 'ten_1', 'ten_3', 'thirteen_1', 'thirteen_3', 'thirty_1', 'thirty_2', 'thirty_3', 'three & thirteen_2', 'twenty_1', 'twenty_2', 'twenty_3', 'two', 'what', 'when_1', 'when_2', 'when_3', 'who', 'why']\n"
]
}
],
"source": [
"# data_dir = \"\"\"../input/sinhala-sign-language-dataset-tdj/Sn_sign_language_dataset\"\"\"\n",
"data_dir = \"\"\"C:/Users/JanithGamage/Desktop/Research/pyhon/Mdoel-1/DataSet/Sn_sign_language_dataset\"\"\"\n",
"classes = []\n",
"\n",
"for directory in os.listdir(data_dir):\n",
" if \".\" not in directory: # Removes .txt and segmentation script\n",
" classes.append(directory) \n",
"print(classes)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:29.464717Z",
"iopub.status.busy": "2023-05-21T15:21:29.463975Z",
"iopub.status.idle": "2023-05-21T15:21:32.769963Z",
"shell.execute_reply": "2023-05-21T15:21:32.769064Z",
"shell.execute_reply.started": "2023-05-21T15:21:29.464662Z"
}
},
"outputs": [],
"source": [
"fp = []\n",
"class_name = []\n",
"for cls in classes:\n",
" files = os.listdir(f'{data_dir}/{cls}')\n",
" for file in files:\n",
" fp.append(f'{data_dir}/{cls}/{file}')\n",
" class_name.append(cls)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:37.426901Z",
"iopub.status.busy": "2023-05-21T15:21:37.426595Z",
"iopub.status.idle": "2023-05-21T15:21:37.439605Z",
"shell.execute_reply": "2023-05-21T15:21:37.438542Z",
"shell.execute_reply.started": "2023-05-21T15:21:37.426864Z"
}
},
"outputs": [],
"source": [
"data = pd.DataFrame({\"File Path\":fp,\"Class\":class_name})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:38.852702Z",
"iopub.status.busy": "2023-05-21T15:21:38.852366Z",
"iopub.status.idle": "2023-05-21T15:21:38.876210Z",
"shell.execute_reply": "2023-05-21T15:21:38.875333Z",
"shell.execute_reply.started": "2023-05-21T15:21:38.852663Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"thirteen_1 668\n",
"six 598\n",
"thirteen_3 576\n",
"ten_1 467\n",
"twenty_1 463\n",
"why 438\n",
"eleven_3 435\n",
"seven 432\n",
"four & fourteen_2 423\n",
"three & thirteen_2 417\n",
"eight 416\n",
"two 386\n",
"one & ten_2 & eleven_1 383\n",
"five 381\n",
"what 375\n",
"ten_3 358\n",
"twenty_2 355\n",
"who 348\n",
"when_1 318\n",
"eleven_2 301\n",
"when_2 290\n",
"when_3 268\n",
"thirty_3 262\n",
"twenty_3 259\n",
"fifty_2 258\n",
"thirty_2 257\n",
"fifty_1 255\n",
"fifty_3 249\n",
"fourteen_3 246\n",
"thirty_1 230\n",
"fourteen_1 208\n",
"nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:40.858223Z",
"iopub.status.busy": "2023-05-21T15:21:40.857670Z",
"iopub.status.idle": "2023-05-21T15:21:40.901757Z",
"shell.execute_reply": "2023-05-21T15:21:40.900784Z",
"shell.execute_reply.started": "2023-05-21T15:21:40.858177Z"
}
},
"outputs": [],
"source": [
"data['Class'] = data['Class'].apply(lambda x:x.title().replace(\"_\",\"\"))\n",
"data['Class'] = data['Class'].apply(lambda x:re.sub(r'[0-9]+', '', x))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:43.784267Z",
"iopub.status.busy": "2023-05-21T15:21:43.783939Z",
"iopub.status.idle": "2023-05-21T15:21:43.796665Z",
"shell.execute_reply": "2023-05-21T15:21:43.795716Z",
"shell.execute_reply.started": "2023-05-21T15:21:43.784229Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"When 876\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Why 438\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"What 375\n",
"Who 348\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:21:58.633864Z",
"iopub.status.busy": "2023-05-21T15:21:58.633538Z",
"iopub.status.idle": "2023-05-21T15:21:58.639024Z",
"shell.execute_reply": "2023-05-21T15:21:58.638410Z",
"shell.execute_reply.started": "2023-05-21T15:21:58.633832Z"
}
},
"outputs": [],
"source": [
"clases_to_remove = ['When','Why','What','Who']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:00.893674Z",
"iopub.status.busy": "2023-05-21T15:22:00.893340Z",
"iopub.status.idle": "2023-05-21T15:22:00.908223Z",
"shell.execute_reply": "2023-05-21T15:22:00.907346Z",
"shell.execute_reply.started": "2023-05-21T15:22:00.893635Z"
}
},
"outputs": [],
"source": [
"data = data[data['Class'].apply(lambda x: True if x not in clases_to_remove else False)]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:03.880606Z",
"iopub.status.busy": "2023-05-21T15:22:03.880248Z",
"iopub.status.idle": "2023-05-21T15:22:03.891999Z",
"shell.execute_reply": "2023-05-21T15:22:03.890999Z",
"shell.execute_reply.started": "2023-05-21T15:22:03.880571Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Thirteen 1244\n",
"Twenty 1077\n",
"Ten 825\n",
"Fifty 762\n",
"Thirty 749\n",
"Eleven 736\n",
"Six 598\n",
"Fourteen 454\n",
"Seven 432\n",
"Four & Fourteen 423\n",
"Three & Thirteen 417\n",
"Eight 416\n",
"Two 386\n",
"One & Ten & Eleven 383\n",
"Five 381\n",
"Nine 136\n",
"Name: Class, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['Class'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:06.096597Z",
"iopub.status.busy": "2023-05-21T15:22:06.096301Z",
"iopub.status.idle": "2023-05-21T15:22:06.102135Z",
"shell.execute_reply": "2023-05-21T15:22:06.101082Z",
"shell.execute_reply.started": "2023-05-21T15:22:06.096565Z"
}
},
"outputs": [],
"source": [
"transform = transforms.Compose([\n",
" transforms.Resize((300,300)),\n",
" transforms.Grayscale(num_output_channels=1),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize(mean=(0.5),std=(0.5))\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:08.359982Z",
"iopub.status.busy": "2023-05-21T15:22:08.359687Z",
"iopub.status.idle": "2023-05-21T15:22:08.367491Z",
"shell.execute_reply": "2023-05-21T15:22:08.366322Z",
"shell.execute_reply.started": "2023-05-21T15:22:08.359950Z"
}
},
"outputs": [],
"source": [
"str_to_int = {key:val for val,key in enumerate(data['Class'].unique())}"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:10.520408Z",
"iopub.status.busy": "2023-05-21T15:22:10.520069Z",
"iopub.status.idle": "2023-05-21T15:22:10.526070Z",
"shell.execute_reply": "2023-05-21T15:22:10.525115Z",
"shell.execute_reply.started": "2023-05-21T15:22:10.520373Z"
}
},
"outputs": [],
"source": [
"# str_to_int['One & Ten & Eleven'] = 1\n",
"# str_to_int['Two'] = 2\n",
"# str_to_int['Three & Thirteen'] = 3\n",
"# str_to_int['Four & Fourteen'] = 4\n",
"# str_to_int['Five'] = 5\n",
"# str_to_int['Six'] = 6\n",
"# str_to_int['Seven'] = 7\n",
"# str_to_int['Eight'] = 8\n",
"# str_to_int['Nine'] = 9\n",
"# str_to_int['Ten'] = 10\n",
"# str_to_int['Eleven'] = 11\n",
"# str_to_int['Thirteen'] = 13\n",
"# str_to_int['Fourteen'] = 14\n",
"# str_to_int['Twenty'] = 20\n",
"# str_to_int['Thirty'] = 30\n",
"# str_to_int['Fifty'] = 50"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:13.056196Z",
"iopub.status.busy": "2023-05-21T15:22:13.055853Z",
"iopub.status.idle": "2023-05-21T15:22:13.066406Z",
"shell.execute_reply": "2023-05-21T15:22:13.065062Z",
"shell.execute_reply.started": "2023-05-21T15:22:13.056132Z"
}
},
"outputs": [],
"source": [
"class MarkeDataset(Dataset):\n",
" def __init__(self, data, root_dir, transform=transforms.ToTensor()):\n",
" self.data = data\n",
" self.root_dir = root_dir\n",
" self.transform = transform\n",
" self.device = device\n",
" \n",
" def __len__(self):\n",
" return len(self.data)\n",
" \n",
" def __getitem__(self, idx):\n",
" if torch.is_tensor(idx):\n",
" idx = idx.tolist()\n",
" \n",
" img_name = self.data.iloc[idx, 0]\n",
" image = Image.open(img_name)\n",
" y_label = torch.tensor(str_to_int[self.data.iloc[idx, 1]]).to(self.device)\n",
" \n",
" if self.transform:\n",
" image = self.transform(image).to(self.device) \n",
" \n",
" return (image, y_label)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:26.257162Z",
"iopub.status.busy": "2023-05-21T15:22:26.256652Z",
"iopub.status.idle": "2023-05-21T15:22:26.261594Z",
"shell.execute_reply": "2023-05-21T15:22:26.260674Z",
"shell.execute_reply.started": "2023-05-21T15:22:26.257103Z"
}
},
"outputs": [],
"source": [
"dataset = MarkeDataset(\n",
" data=data,\n",
" root_dir=data_dir,\n",
" transform=transform\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:29.282820Z",
"iopub.status.busy": "2023-05-21T15:22:29.282505Z",
"iopub.status.idle": "2023-05-21T15:22:30.450098Z",
"shell.execute_reply": "2023-05-21T15:22:30.449111Z",
"shell.execute_reply.started": "2023-05-21T15:22:29.282785Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img, lab = dataset[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Label :{str(lab.item())}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:34.686493Z",
"iopub.status.busy": "2023-05-21T15:22:34.686198Z",
"iopub.status.idle": "2023-05-21T15:22:34.692454Z",
"shell.execute_reply": "2023-05-21T15:22:34.691684Z",
"shell.execute_reply.started": "2023-05-21T15:22:34.686462Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"9419"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dataset)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:38.619412Z",
"iopub.status.busy": "2023-05-21T15:22:38.618543Z",
"iopub.status.idle": "2023-05-21T15:22:38.635127Z",
"shell.execute_reply": "2023-05-21T15:22:38.634375Z",
"shell.execute_reply.started": "2023-05-21T15:22:38.619366Z"
}
},
"outputs": [],
"source": [
"batch_size = 16\n",
"train_set, test_set = torch.utils.data.random_split(dataset, [8000,1419])\n",
"trainLoader = DataLoader(dataset=train_set,batch_size=batch_size,shuffle=True,drop_last=True)\n",
"testLoader = DataLoader(dataset=test_set,batch_size=1419)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:41.806762Z",
"iopub.status.busy": "2023-05-21T15:22:41.806452Z",
"iopub.status.idle": "2023-05-21T15:22:41.815670Z",
"shell.execute_reply": "2023-05-21T15:22:41.814797Z",
"shell.execute_reply.started": "2023-05-21T15:22:41.806729Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Metrix size after 1st conv. and maxpool layer: 149 X 149\n",
"Metrix size after 2nd conv. and maxpool layer: 73 X 73\n"
]
}
],
"source": [
"kernel_size = 5\n",
"stride = 1\n",
"padding = 1\n",
"\n",
"metrixSize1 = int(np.floor(300+2*padding-kernel_size/stride)+1)\n",
"metrixSize1 = int(np.floor(metrixSize1/2)) #applying 2x2 Max pooling operation\n",
"\n",
"metrixSize2 = int(np.floor(metrixSize1+2*padding-kernel_size/stride)+1)\n",
"metrixSize2 = int(np.floor(metrixSize2/2)) #applying 2x2 Max pooling operation\n",
"\n",
"print(f'Metrix size after 1st conv. and maxpool layer: {metrixSize1} X {metrixSize1}')\n",
"print(f'Metrix size after 2nd conv. and maxpool layer: {metrixSize2} X {metrixSize2}')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:44.572322Z",
"iopub.status.busy": "2023-05-21T15:22:44.571998Z",
"iopub.status.idle": "2023-05-21T15:22:44.585933Z",
"shell.execute_reply": "2023-05-21T15:22:44.584795Z",
"shell.execute_reply.started": "2023-05-21T15:22:44.572285Z"
}
},
"outputs": [],
"source": [
"class theCNN(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" \n",
" self.conv01 = nn.Conv2d(\n",
" in_channels = 1,\n",
" out_channels = 10,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" self.conv02 = nn.Conv2d(\n",
" in_channels = 10,\n",
" out_channels = 20,\n",
" kernel_size = 5,\n",
" stride = 1,\n",
" padding = 1\n",
" )\n",
" \n",
" expectedSize = np.floor((73+2*0-1)/1) +1 \n",
" expectedSize = 20*int(expectedSize**2)\n",
" \n",
" self.fc01 = nn.Linear(expectedSize,50)\n",
" self.output = nn.Linear(50,16)\n",
" \n",
" def forward(self,x):\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv01(x),2))\n",
" \n",
" #convo -> maxpool -> relu\n",
" x = F.relu(F.max_pool2d(self.conv02(x),2))\n",
" \n",
" nUnits = x.shape.numel()/x.shape[0]\n",
" x = x.view(-1,int(nUnits))\n",
" \n",
" x = F.relu(self.fc01(x))\n",
" \n",
" return torch.softmax(self.output(x),axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:49.860090Z",
"iopub.status.busy": "2023-05-21T15:22:49.859764Z",
"iopub.status.idle": "2023-05-21T15:22:49.865939Z",
"shell.execute_reply": "2023-05-21T15:22:49.864657Z",
"shell.execute_reply.started": "2023-05-21T15:22:49.860052Z"
}
},
"outputs": [],
"source": [
"def createModel(lr):\n",
" net = theCNN()\n",
" lossFun = nn.CrossEntropyLoss()\n",
" optimizer = torch.optim.Adam(params=net.parameters(),lr=lr)\n",
" \n",
" return net,lossFun,optimizer"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:51.830628Z",
"iopub.status.busy": "2023-05-21T15:22:51.830107Z",
"iopub.status.idle": "2023-05-21T15:22:52.032717Z",
"shell.execute_reply": "2023-05-21T15:22:52.031798Z",
"shell.execute_reply.started": "2023-05-21T15:22:51.830587Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0636, 0.0599, 0.0594, 0.0579, 0.0574, 0.0701, 0.0585, 0.0668, 0.0578,\n",
" 0.0632, 0.0598, 0.0619, 0.0726, 0.0633, 0.0666, 0.0611]],\n",
" grad_fn=<SoftmaxBackward0>)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"createModel(0.01)[0](torch.randn(1,1,300,300))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:55.120195Z",
"iopub.status.busy": "2023-05-21T15:22:55.119083Z",
"iopub.status.idle": "2023-05-21T15:22:55.134205Z",
"shell.execute_reply": "2023-05-21T15:22:55.133312Z",
"shell.execute_reply.started": "2023-05-21T15:22:55.120083Z"
}
},
"outputs": [],
"source": [
"def trainModel(epochs,lr):\n",
" net,LossFun,optimizer = createModel(lr)\n",
" net = net.to(device)\n",
" losses = torch.zeros(epochs)\n",
" testAccuracy = torch.zeros(epochs)\n",
" trainAccurscy = torch.zeros(epochs)\n",
"\n",
" for i in range(epochs):\n",
" print(\"start\")\n",
" net.train()\n",
" batchLoss = []\n",
" batchAccuracy = []\n",
" for X,y in trainLoader:\n",
" yHat = net(X)\n",
" loss = LossFun(yHat,y)\n",
" # print(yHat.item())\n",
"\n",
" accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
" batchAccuracy.append(accuracy.item())\n",
" batchLoss.append(loss.item())\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" losses[i] = np.mean(batchLoss)\n",
" trainAccurscy[i] = np.mean(batchAccuracy)\n",
"\n",
" net.eval()\n",
" X,y = next(iter(testLoader))\n",
" with torch.no_grad():\n",
" yHat = net(X)\n",
" loss = LossFun(yHat,y)\n",
" accuracy = 100*torch.mean((torch.argmax(yHat,axis=1)==y).float())\n",
" testAccuracy[i] = accuracy\n",
" print(f'Test Accuracy: {accuracy:.3f}')\n",
" \n",
" # Save the trained model\n",
" print(\"save the model\")\n",
" #torch.save(net.state_dict(), 'model.pth') \n",
" torch.save(net, \"model1.pth\")\n",
"\n",
" \n",
" return net,losses,testAccuracy,trainAccurscy,yHat,X"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"execution": {
"iopub.execute_input": "2023-05-21T15:22:58.112687Z",
"iopub.status.busy": "2023-05-21T15:22:58.112351Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"start\n",
"Test Accuracy: 77.590\n",
"save the model\n",
"start\n",
"Test Accuracy: 84.003\n",
"save the model\n",
"start\n",
"Test Accuracy: 84.496\n",
"save the model\n",
"start\n",
"Test Accuracy: 85.976\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.469\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.610\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.328\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.751\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.399\n",
"save the model\n",
"start\n",
"Test Accuracy: 86.681\n",
"save the model\n",
"CPU times: total: 4h 42s\n",
"Wall time: 1h 3min 31s\n"
]
}
],
"source": [
"%%time\n",
"net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(10,1e-4)\n",
"# net,losses,testAccuracy,trainAccuracy,yHat,X = trainModel(1,1e-4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.623133Z",
"iopub.status.idle": "2023-05-21T06:28:52.624249Z",
"shell.execute_reply": "2023-05-21T06:28:52.623682Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.623638Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from PIL import Image\n",
"from torch.utils.data import Dataset\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import os\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import re\n",
"\n",
"plt.title(f'Final test accuracy: {testAccuracy[-1]:.3f}')\n",
"plt.plot(testAccuracy,label='Test')\n",
"plt.plot(trainAccuracy,label='Train')\n",
"plt.legend()\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.630115Z",
"iopub.status.idle": "2023-05-21T06:28:52.630937Z",
"shell.execute_reply": "2023-05-21T06:28:52.630583Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.630547Z"
}
},
"outputs": [],
"source": [
"plt.title(f'Final train loss: {losses[-1]:.3f}')\n",
"plt.plot(losses)\n",
"plt.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.635934Z",
"iopub.status.idle": "2023-05-21T06:28:52.637341Z",
"shell.execute_reply": "2023-05-21T06:28:52.636488Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.636414Z"
}
},
"outputs": [],
"source": [
"inv_map = dict(zip(str_to_int.values(), str_to_int.keys()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.642433Z",
"iopub.status.idle": "2023-05-21T06:28:52.643393Z",
"shell.execute_reply": "2023-05-21T06:28:52.642984Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.642964Z"
}
},
"outputs": [],
"source": [
"predictions = torch.argmax(yHat,axis=1).cpu().detach().numpy().tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.646236Z",
"iopub.status.idle": "2023-05-21T06:28:52.647698Z",
"shell.execute_reply": "2023-05-21T06:28:52.647036Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.646959Z"
}
},
"outputs": [],
"source": [
"predictions = list(map(lambda x: inv_map[x],predictions))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.status.busy": "2023-05-21T06:28:52.653068Z",
"iopub.status.idle": "2023-05-21T06:28:52.654218Z",
"shell.execute_reply": "2023-05-21T06:28:52.653379Z",
"shell.execute_reply.started": "2023-05-21T06:28:52.653291Z"
}
},
"outputs": [],
"source": [
"randIds = np.random.randint(0,1000,size=10)\n",
"fig,ax = plt.subplots(2,5,figsize=(15,5))\n",
"for i,axi in zip(randIds,ax.flatten()):\n",
" img = X[i]\n",
" axi.imshow(img.cpu().numpy().transpose((1, 2, 0)),cmap='gray')\n",
" axi.text(x = 150,y =2,s =f'Prediction :{predictions[i]}',ha='center',backgroundcolor='y')\n",
" axi.axis('off')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Todo\n",
" - Add Dropout\n",
" - Try Image augmentation techniques"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment