audio model

94db1936 · thirani · d0e05967 · 94db1936
Commit 94db1936 authored May 24, 2023 by thirani
Hide whitespace changes
Inline Side-by-side

Showing with 834 additions and 0 deletions

Audio_Based_Object_Detection.ipynb Audio_Based_Object_Detection.ipynb +834 -0

No files found.
--- a/Audio_Based_Object_Detection.ipynb
+++ b/Audio_Based_Object_Detection.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow as tf\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "import librosa"
+      ],
+      "metadata": {
+        "id": "OlmoQenQwYA4"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/gdrive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "MhVIS9IDLaw4",
+        "outputId": "48dbb406-2a23-45c8-abfb-3bb5ac49978d"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/gdrive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!cp /content/gdrive/MyDrive/AudioDataset/myAudioDataSet.zip /content"
+      ],
+      "metadata": {
+        "id": "UGghL1U9Lh2o"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!unzip -q myAudioDataSet.zip"
+      ],
+      "metadata": {
+        "id": "DeVJXwLiMlDx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the main folder and subfolder names\n",
+        "main_folder = '/content/myAudioDataSet'\n",
+        "train_subfolder = '/content/myAudioDataSet/train'\n",
+        "test_subfolder = '/content/myAudioDataSet/test'"
+      ],
+      "metadata": {
+        "id": "-7n2NI2Zwabf"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the class labels based on the subfolder names\n",
+        "my_classes = ['cat', 'computer', 'dog', 'door', 'person']"
+      ],
+      "metadata": {
+        "id": "VlNxTzFdwikk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define the base data paths for train and test sets\n",
+        "base_train_data_path = os.path.join(main_folder, train_subfolder)\n",
+        "base_test_data_path = os.path.join(main_folder, test_subfolder)"
+      ],
+      "metadata": {
+        "id": "4BYCLuxAwkmZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define audio feature extraction function using librosa\n",
+        "def extract_features(file_path):\n",
+        "    audio, _ = librosa.load(file_path, sr=16000)\n",
+        "    mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)\n",
+        "    return mfccs"
+      ],
+      "metadata": {
+        "id": "GLg2nDMxwmeT"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load audio file paths for training set\n",
+        "train_file_paths = []\n",
+        "train_labels = []\n",
+        "for class_label in my_classes:\n",
+        "    class_folder = os.path.join(base_train_data_path, class_label)\n",
+        "    file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
+        "    train_file_paths.extend(file_paths)\n",
+        "    train_labels.extend([class_label] * len(file_paths))"
+      ],
+      "metadata": {
+        "id": "1Y9Wf3rGwoUf"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load audio file paths for test set\n",
+        "test_file_paths = []\n",
+        "test_labels = []\n",
+        "for class_label in my_classes:\n",
+        "    class_folder = os.path.join(base_test_data_path, class_label)\n",
+        "    file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
+        "    test_file_paths.extend(file_paths)\n",
+        "    test_labels.extend([class_label] * len(file_paths))"
+      ],
+      "metadata": {
+        "id": "mJ3btJJ7wpNg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Encode labels\n",
+        "label_encoder = LabelEncoder()\n",
+        "train_labels_encoded = label_encoder.fit_transform(train_labels)\n",
+        "test_labels_encoded = label_encoder.transform(test_labels)"
+      ],
+      "metadata": {
+        "id": "94TldR2IwsXQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Split data into train and validation sets\n",
+        "train_file_paths, val_file_paths, train_labels_encoded, val_labels_encoded = train_test_split(\n",
+        "    train_file_paths, train_labels_encoded, test_size=0.2, stratify=train_labels_encoded, random_state=42\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "CsSRKQN_wvAx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Extract features for training set\n",
+        "train_features = [extract_features(file_path) for file_path in train_file_paths]\n",
+        "train_features = np.stack(train_features)"
+      ],
+      "metadata": {
+        "id": "sKr1LgzAwwki"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Extract features for validation set\n",
+        "val_features = [extract_features(file_path) for file_path in val_file_paths]\n",
+        "val_features = np.stack(val_features)"
+      ],
+      "metadata": {
+        "id": "0ubAv-Tjw0EQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Extract features for test set\n",
+        "test_features = [extract_features(file_path) for file_path in test_file_paths]\n",
+        "test_features = np.stack(test_features)"
+      ],
+      "metadata": {
+        "id": "5Ll27c6Kw2Go"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Normalize feature values\n",
+        "train_features = (train_features - np.mean(train_features)) / np.std(train_features)\n",
+        "val_features = (val_features - np.mean(val_features)) / np.std(val_features)\n",
+        "test_features = (test_features - np.mean(test_features)) / np.std(test_features)"
+      ],
+      "metadata": {
+        "id": "qnhwpLvew4Pq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Create TensorFlow Datasets\n",
+        "train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels_encoded)).shuffle(len(train_features)).batch(32)\n",
+        "val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels_encoded)).batch(32)"
+      ],
+      "metadata": {
+        "id": "EqrZ1soTw51R"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from tensorflow.keras.models import Sequential\n",
+        "from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "from sklearn import metrics"
+      ],
+      "metadata": {
+        "id": "uTAW6KLZNCWg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Input(shape=(train_features.shape[1], train_features.shape[2])),\n",
+        "    tf.keras.layers.Conv1D(64, 3, activation='relu'),\n",
+        "    tf.keras.layers.MaxPooling1D(2),\n",
+        "    tf.keras.layers.Conv1D(128, 3, activation='relu'),\n",
+        "    tf.keras.layers.MaxPooling1D(2),\n",
+        "    tf.keras.layers.Conv1D(256, 3, activation='relu'),\n",
+        "    tf.keras.layers.GlobalAveragePooling1D(),\n",
+        "    tf.keras.layers.Dense(128, activation='relu'),\n",
+        "    tf.keras.layers.Dense(len(my_classes))\n",
+        "])"
+      ],
+      "metadata": {
+        "id": "0Mup4RTGNYbi"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.summary()"
+      ],
+      "metadata": {
+        "id": "7Yqfimrzw9H3"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Compile the model\n",
+        "model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "              optimizer=tf.keras.optimizers.Adam(),\n",
+        "              metrics=['accuracy'])"
+      ],
+      "metadata": {
+        "id": "_76Icnvow-bo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Train the model\n",
+        "history = model.fit(train_ds, epochs=100, validation_data=val_ds)"
+      ],
+      "metadata": {
+        "id": "heog45TNxAyP"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Evaluate the model on the test set\n",
+        "test_loss, test_acc = model.evaluate(test_features, test_labels_encoded)\n",
+        "print(f'Test Loss: {test_loss:.4f}')\n",
+        "print(f'Test Accuracy: {test_acc:.4f}')\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mVwLmcnKwW_7",
+        "outputId": "edb64e6a-7349-419b-bb9a-8b15173ca666"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "2/2 [==============================] - 0s 8ms/step - loss: 0.5738 - accuracy: 0.9200\n",
+            "Test Loss: 0.5738\n",
+            "Test Accuracy: 0.9200\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "# Generate predictions for the test set\n",
+        "predictions = model.predict(test_features)\n",
+        "predicted_labels = np.argmax(predictions, axis=1)\n",
+        "\n",
+        "# Convert the encoded labels back to original class labels\n",
+        "predicted_class_labels = label_encoder.inverse_transform(predicted_labels)\n",
+        "\n",
+        "# Plot some examples from the test set along with their predicted labels\n",
+        "num_examples = 5\n",
+        "\n",
+        "plt.figure(figsize=(12, 8))\n",
+        "for i in range(num_examples):\n",
+        "    plt.subplot(1, num_examples, i+1)\n",
+        "    plt.imshow(test_features[i].T, cmap='inferno', origin='lower', aspect='auto')\n",
+        "    plt.title(f'True: {label_encoder.inverse_transform([test_labels_encoded[i]])[0]}\\nPredicted: {predicted_class_labels[i]}')\n",
+        "    plt.axis('off')\n",
+        "\n",
+        "plt.tight_layout()\n",
+        "plt.show()\n"
+      ],
+      "metadata": {
+        "id": "TVUUvDoWxYbT"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import tensorflow as tf\n",
+        "import numpy as np\n",
+        "import joblib\n",
+        "\n",
+        "# Convert the model to TensorFlow Lite\n",
+        "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
+        "tflite_model = converter.convert()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kFYXhqTuyOHL",
+        "outputId": "a36c0015-cdd6-4ccd-88e5-b307d1badb3c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the TensorFlow Lite model\n",
+        "tflite_model_path = '/content/sound.tflite'\n",
+        "with open(tflite_model_path, 'wb') as f:\n",
+        "    f.write(tflite_model)"
+      ],
+      "metadata": {
+        "id": "UKzcA82gyYYt"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Save the label encoder\n",
+        "label_encoder_path = '/content/labels_encoder.pkl'\n",
+        "joblib.dump(label_encoder, label_encoder_path)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cJ6nN9lJycyu",
+        "outputId": "49fad93b-033d-46af-dc25-672cff082865"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['/content/labels_encoder.pkl']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 27
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Test Sample WAV Files"
+      ],
+      "metadata": {
+        "id": "nJ0mACRAyFrU"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load the TensorFlow Lite model\n",
+        "interpreter = tf.lite.Interpreter(model_path='sound.tflite')\n",
+        "interpreter.allocate_tensors()\n",
+        "\n",
+        "# Get input and output details\n",
+        "input_details = interpreter.get_input_details()\n",
+        "output_details = interpreter.get_output_details()\n",
+        "\n",
+        "# Print the input tensor details\n",
+        "print(\"Input Tensor Details:\")\n",
+        "print(f\"Name: {input_details[0]['name']}\")\n",
+        "print(f\"Shape: {input_details[0]['shape']}\")\n",
+        "print(f\"Data Type: {input_details[0]['dtype']}\")\n",
+        "print(f\"Quantization Parameters: {input_details[0]['quantization']}\")\n",
+        "\n",
+        "# Load the sample audio file\n",
+        "sample_file_path = '/content/footsteps-1.wav'\n",
+        "sample_audio, sr = librosa.load(sample_file_path, sr=16000)\n",
+        "\n",
+        "# Extract features for the sample audio\n",
+        "sample_features = extract_features(sample_file_path)\n",
+        "#print(sample_features.shape)\n",
+        "expected_shape = (20, 157)\n",
+        "\n",
+        "if sample_features.shape[1] < expected_shape[1]:\n",
+        "    # Pad the features with zeros\n",
+        "    pad_width = expected_shape[1] - sample_features.shape[1]\n",
+        "    sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))\n",
+        "elif sample_features.shape[1] > expected_shape[1]:\n",
+        "    # Truncate the features\n",
+        "    sample_features = sample_features[:, :expected_shape[1]]\n",
+        "\n",
+        "#print(sample_features.shape)\n",
+        "# Reshape and expand dimensions to match the input tensor shape\n",
+        "sample_features = np.expand_dims(sample_features, axis=0)\n",
+        "sample_features = np.expand_dims(sample_features, axis=-1)\n",
+        "# Print the shapes for debugging\n",
+        "print(f\"sample_features shape: {sample_features.shape}\")\n",
+        "print(f\"expected shape: {input_details[0]['shape']}\")\n",
+        "\n",
+        "import numpy as np\n",
+        "expected_shape = input_details[0]['shape']\n",
+        "# Reshape the sample_features array\n",
+        "sample_features = np.reshape(sample_features, expected_shape)\n",
+        "\n",
+        "# Print the updated shape\n",
+        "print(f\"Updated sample_features shape: {sample_features.shape}\")\n",
+        "\n",
+        "# Run inference on the sample audio\n",
+        "interpreter.set_tensor(input_details[0]['index'], sample_features)\n",
+        "interpreter.invoke()\n",
+        "output = interpreter.get_tensor(output_details[0]['index'])\n",
+        "predicted_class = np.argmax(output)\n",
+        "\n",
+        "# Map the predicted class index to the actual class label\n",
+        "predicted_label = label_encoder.inverse_transform([predicted_class])[0]\n",
+        "\n",
+        "print(f'Predicted Class: {predicted_label}')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "c0MICiyOAPsx",
+        "outputId": "8b012b18-a63f-4605-a7e6-bb68297b9789"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Input Tensor Details:\n",
+            "Name: serving_default_input_1:0\n",
+            "Shape: [  1  20 157]\n",
+            "Data Type: <class 'numpy.float32'>\n",
+            "Quantization Parameters: (0.0, 0)\n",
+            "sample_features shape: (1, 20, 157, 1)\n",
+            "expected shape: [  1  20 157]\n",
+            "Updated sample_features shape: (1, 20, 157)\n",
+            "Predicted Class: person\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install pydub"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "8TNcrgKLSi_g",
+        "outputId": "0895fa89-fd8c-43e1-deea-cde51ee90d0b"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Collecting pydub\n",
+            "  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
+            "Installing collected packages: pydub\n",
+            "Successfully installed pydub-0.25.1\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Test Sample MP3 Files"
+      ],
+      "metadata": {
+        "id": "5kM7JbhbyKnX"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from pydub import AudioSegment\n",
+        "\n",
+        "# Load the sample audio file\n",
+        "sample_file_path = '/content/quick-mechanical-keyboard-14391.mp3'\n",
+        "\n",
+        "# Convert MP3 to WAV\n",
+        "output_file_path = '/content/quick-mechanical-keyboard-14391.wav'\n",
+        "AudioSegment.from_mp3(sample_file_path).export(output_file_path, format='wav')\n",
+        "\n",
+        "# Load the converted WAV file\n",
+        "sample_audio, sr = librosa.load(output_file_path, sr=16000)\n",
+        "\n",
+        "# Extract features for the sample audio\n",
+        "sample_features = extract_features(sample_file_path)\n",
+        "#print(sample_features.shape)\n",
+        "expected_shape = (20, 157)\n",
+        "\n",
+        "if sample_features.shape[1] < expected_shape[1]:\n",
+        "    # Pad the features with zeros\n",
+        "    pad_width = expected_shape[1] - sample_features.shape[1]\n",
+        "    sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))\n",
+        "elif sample_features.shape[1] > expected_shape[1]:\n",
+        "    # Truncate the features\n",
+        "    sample_features = sample_features[:, :expected_shape[1]]\n",
+        "\n",
+        "#print(sample_features.shape)\n",
+        "# Reshape and expand dimensions to match the input tensor shape\n",
+        "sample_features = np.expand_dims(sample_features, axis=0)\n",
+        "sample_features = np.expand_dims(sample_features, axis=-1)\n",
+        "# Print the shapes for debugging\n",
+        "print(f\"sample_features shape: {sample_features.shape}\")\n",
+        "print(f\"expected shape: {input_details[0]['shape']}\")\n",
+        "\n",
+        "import numpy as np\n",
+        "expected_shape = input_details[0]['shape']\n",
+        "# Reshape the sample_features array\n",
+        "sample_features = np.reshape(sample_features, expected_shape)\n",
+        "\n",
+        "# Print the updated shape\n",
+        "print(f\"Updated sample_features shape: {sample_features.shape}\")\n",
+        "\n",
+        "# Run inference on the sample audio\n",
+        "interpreter.set_tensor(input_details[0]['index'], sample_features)\n",
+        "interpreter.invoke()\n",
+        "output = interpreter.get_tensor(output_details[0]['index'])\n",
+        "predicted_class = np.argmax(output)\n",
+        "\n",
+        "# Map the predicted class index to the actual class label\n",
+        "predicted_label = label_encoder.inverse_transform([predicted_class])[0]\n",
+        "\n",
+        "print(f'Predicted Class: {predicted_label}')"
+      ],
+      "metadata": {
+        "id": "Q7bpi_3iAmgQ",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8292a746-d1c6-4547-9531-1af8ce75085e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "sample_features shape: (1, 20, 157, 1)\n",
+            "expected shape: [  1  20 157]\n",
+            "Updated sample_features shape: (1, 20, 157)\n",
+            "Predicted Class: computer\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Record and Test"
+      ],
+      "metadata": {
+        "id": "ibQJAr2ryPpc"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Full Code"
+      ],
+      "metadata": {
+        "id": "HmR4wsZd0XNZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow as tf\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "import librosa\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/gdrive')\n",
+        "\n",
+        "!cp /content/gdrive/MyDrive/AudioDataset/myAudioDataSet.zip /content\n",
+        "\n",
+        "!unzip -q myAudioDataSet.zip\n",
+        "\n",
+        "# Define the main folder and subfolder names\n",
+        "main_folder = '/content/myAudioDataSet'\n",
+        "train_subfolder = '/content/myAudioDataSet/train'\n",
+        "test_subfolder = '/content/myAudioDataSet/test'\n",
+        "\n",
+        "# Define the class labels based on the subfolder names\n",
+        "my_classes = ['cat', 'computer', 'dog', 'door', 'person']\n",
+        "\n",
+        "# Define the base data paths for train and test sets\n",
+        "base_train_data_path = os.path.join(main_folder, train_subfolder)\n",
+        "base_test_data_path = os.path.join(main_folder, test_subfolder)\n",
+        "\n",
+        "# Define audio feature extraction function using librosa\n",
+        "def extract_features(file_path):\n",
+        "    audio, _ = librosa.load(file_path, sr=16000)\n",
+        "    mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)\n",
+        "    return mfccs\n",
+        "\n",
+        "# Load audio file paths for training set\n",
+        "train_file_paths = []\n",
+        "train_labels = []\n",
+        "for class_label in my_classes:\n",
+        "    class_folder = os.path.join(base_train_data_path, class_label)\n",
+        "    file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
+        "    train_file_paths.extend(file_paths)\n",
+        "\n",
+        "# Load audio file paths for test set\n",
+        "test_file_paths = []\n",
+        "test_labels = []\n",
+        "for class_label in my_classes:\n",
+        "    class_folder = os.path.join(base_test_data_path, class_label)\n",
+        "    file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
+        "    test_file_paths.extend(file_paths)\n",
+        "    test_labels.extend([class_label] * len(file_paths))\n",
+        "\n",
+        "# Encode labels\n",
+        "label_encoder = LabelEncoder()\n",
+        "train_labels_encoded = label_encoder.fit_transform(train_labels)\n",
+        "test_labels_encoded = label_encoder.transform(test_labels)\n",
+        "\n",
+        "# Split data into train and validation sets\n",
+        "train_file_paths, val_file_paths, train_labels_encoded, val_labels_encoded = train_test_split(\n",
+        "    train_file_paths, train_labels_encoded, test_size=0.2, stratify=train_labels_encoded, random_state=42\n",
+        ")\n",
+        "\n",
+        "# Extract features for training set\n",
+        "train_features = [extract_features(file_path) for file_path in train_file_paths]\n",
+        "train_features = np.stack(train_features)\n",
+        "\n",
+        "# Extract features for validation set\n",
+        "val_features = [extract_features(file_path) for file_path in val_file_paths]\n",
+        "val_features = np.stack(val_features)\n",
+        "\n",
+        "# Extract features for test set\n",
+        "test_features = [extract_features(file_path) for file_path in test_file_paths]\n",
+        "test_features = np.stack(test_features)\n",
+        "\n",
+        "# Normalize feature values\n",
+        "train_features = (train_features - np.mean(train_features)) / np.std(train_features)\n",
+        "val_features = (val_features - np.mean(val_features)) / np.std(val_features)\n",
+        "test_features = (test_features - np.mean(test_features)) / np.std(test_features)\n",
+        "\n",
+        "# Create TensorFlow Datasets\n",
+        "train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels_encoded)).shuffle(len(train_features)).batch(32)\n",
+        "val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels_encoded)).batch(32)\n",
+        "\n",
+        "from tensorflow.keras.models import Sequential\n",
+        "from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "from sklearn import metrics\n",
+        "\n",
+        "model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Input(shape=(train_features.shape[1], train_features.shape[2])),\n",
+        "    tf.keras.layers.Conv1D(64, 3, activation='relu'),\n",
+        "    tf.keras.layers.MaxPooling1D(2),\n",
+        "    tf.keras.layers.Conv1D(128, 3, activation='relu'),\n",
+        "    tf.keras.layers.MaxPooling1D(2),\n",
+        "    tf.keras.layers.Conv1D(256, 3, activation='relu'),\n",
+        "    tf.keras.layers.GlobalAveragePooling1D(),\n",
+        "    tf.keras.layers.Dense(128, activation='relu'),\n",
+        "    tf.keras.layers.Dense(len(my_classes))\n",
+        "])\n",
+        "\n",
+        "model.summary()\n",
+        "\n",
+        "# Compile the model\n",
+        "model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+        "              optimizer=tf.keras.optimizers.Adam(),\n",
+        "              metrics=['accuracy'])\n",
+        "\n",
+        "# Train the model\n",
+        "history = model.fit(train_ds, epochs=100, validation_data=val_ds)\n",
+        "\n",
+        "# Evaluate the model on the test set\n",
+        "test_loss, test_acc = model.evaluate(test_features, test_labels_encoded)\n",
+        "print(f'Test Loss: {test_loss:.4f}')\n",
+        "print(f'Test Accuracy: {test_acc:.4f}')\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "import numpy as np\n",
+        "import joblib\n",
+        "\n",
+        "# Convert the model to TensorFlow Lite\n",
+        "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
+        "tflite_model = converter.convert()\n",
+        "\n",
+        "# Save the TensorFlow Lite model\n",
+        "tflite_model_path = '/content/sound.tflite'\n",
+        "with open(tflite_model_path, 'wb') as f:\n",
+        "    f.write(tflite_model)\n",
+        "\n",
+        "# Save the label encoder\n",
+        "label_encoder_path = '/content/labels_encoder.pkl'\n",
+        "joblib.dump(label_encoder, label_encoder_path)"
+      ],
+      "metadata": {
+        "id": "JOURw20i0Vvz"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file