Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2023-232
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
T.H.C. Heshan
2023-232
Commits
94db1936
Commit
94db1936
authored
May 24, 2023
by
thirani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
audio model
parent
d0e05967
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
834 additions
and
0 deletions
+834
-0
Audio_Based_Object_Detection.ipynb
Audio_Based_Object_Detection.ipynb
+834
-0
No files found.
Audio_Based_Object_Detection.ipynb
0 → 100644
View file @
94db1936
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"import librosa"
],
"metadata": {
"id": "OlmoQenQwYA4"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MhVIS9IDLaw4",
"outputId": "48dbb406-2a23-45c8-abfb-3bb5ac49978d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/gdrive\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!cp /content/gdrive/MyDrive/AudioDataset/myAudioDataSet.zip /content"
],
"metadata": {
"id": "UGghL1U9Lh2o"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!unzip -q myAudioDataSet.zip"
],
"metadata": {
"id": "DeVJXwLiMlDx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Define the main folder and subfolder names\n",
"main_folder = '/content/myAudioDataSet'\n",
"train_subfolder = '/content/myAudioDataSet/train'\n",
"test_subfolder = '/content/myAudioDataSet/test'"
],
"metadata": {
"id": "-7n2NI2Zwabf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Define the class labels based on the subfolder names\n",
"my_classes = ['cat', 'computer', 'dog', 'door', 'person']"
],
"metadata": {
"id": "VlNxTzFdwikk"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Define the base data paths for train and test sets\n",
"base_train_data_path = os.path.join(main_folder, train_subfolder)\n",
"base_test_data_path = os.path.join(main_folder, test_subfolder)"
],
"metadata": {
"id": "4BYCLuxAwkmZ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Define audio feature extraction function using librosa\n",
"def extract_features(file_path):\n",
" audio, _ = librosa.load(file_path, sr=16000)\n",
" mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)\n",
" return mfccs"
],
"metadata": {
"id": "GLg2nDMxwmeT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Load audio file paths for training set\n",
"train_file_paths = []\n",
"train_labels = []\n",
"for class_label in my_classes:\n",
" class_folder = os.path.join(base_train_data_path, class_label)\n",
" file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
" train_file_paths.extend(file_paths)\n",
" train_labels.extend([class_label] * len(file_paths))"
],
"metadata": {
"id": "1Y9Wf3rGwoUf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Load audio file paths for test set\n",
"test_file_paths = []\n",
"test_labels = []\n",
"for class_label in my_classes:\n",
" class_folder = os.path.join(base_test_data_path, class_label)\n",
" file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
" test_file_paths.extend(file_paths)\n",
" test_labels.extend([class_label] * len(file_paths))"
],
"metadata": {
"id": "mJ3btJJ7wpNg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Encode labels\n",
"label_encoder = LabelEncoder()\n",
"train_labels_encoded = label_encoder.fit_transform(train_labels)\n",
"test_labels_encoded = label_encoder.transform(test_labels)"
],
"metadata": {
"id": "94TldR2IwsXQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Split data into train and validation sets\n",
"train_file_paths, val_file_paths, train_labels_encoded, val_labels_encoded = train_test_split(\n",
" train_file_paths, train_labels_encoded, test_size=0.2, stratify=train_labels_encoded, random_state=42\n",
")"
],
"metadata": {
"id": "CsSRKQN_wvAx"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Extract features for training set\n",
"train_features = [extract_features(file_path) for file_path in train_file_paths]\n",
"train_features = np.stack(train_features)"
],
"metadata": {
"id": "sKr1LgzAwwki"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Extract features for validation set\n",
"val_features = [extract_features(file_path) for file_path in val_file_paths]\n",
"val_features = np.stack(val_features)"
],
"metadata": {
"id": "0ubAv-Tjw0EQ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Extract features for test set\n",
"test_features = [extract_features(file_path) for file_path in test_file_paths]\n",
"test_features = np.stack(test_features)"
],
"metadata": {
"id": "5Ll27c6Kw2Go"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Normalize feature values\n",
"train_features = (train_features - np.mean(train_features)) / np.std(train_features)\n",
"val_features = (val_features - np.mean(val_features)) / np.std(val_features)\n",
"test_features = (test_features - np.mean(test_features)) / np.std(test_features)"
],
"metadata": {
"id": "qnhwpLvew4Pq"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Create TensorFlow Datasets\n",
"train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels_encoded)).shuffle(len(train_features)).batch(32)\n",
"val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels_encoded)).batch(32)"
],
"metadata": {
"id": "EqrZ1soTw51R"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten\n",
"from tensorflow.keras.optimizers import Adam\n",
"from sklearn import metrics"
],
"metadata": {
"id": "uTAW6KLZNCWg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Input(shape=(train_features.shape[1], train_features.shape[2])),\n",
" tf.keras.layers.Conv1D(64, 3, activation='relu'),\n",
" tf.keras.layers.MaxPooling1D(2),\n",
" tf.keras.layers.Conv1D(128, 3, activation='relu'),\n",
" tf.keras.layers.MaxPooling1D(2),\n",
" tf.keras.layers.Conv1D(256, 3, activation='relu'),\n",
" tf.keras.layers.GlobalAveragePooling1D(),\n",
" tf.keras.layers.Dense(128, activation='relu'),\n",
" tf.keras.layers.Dense(len(my_classes))\n",
"])"
],
"metadata": {
"id": "0Mup4RTGNYbi"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.summary()"
],
"metadata": {
"id": "7Yqfimrzw9H3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Compile the model\n",
"model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
" optimizer=tf.keras.optimizers.Adam(),\n",
" metrics=['accuracy'])"
],
"metadata": {
"id": "_76Icnvow-bo"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Train the model\n",
"history = model.fit(train_ds, epochs=100, validation_data=val_ds)"
],
"metadata": {
"id": "heog45TNxAyP"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Evaluate the model on the test set\n",
"test_loss, test_acc = model.evaluate(test_features, test_labels_encoded)\n",
"print(f'Test Loss: {test_loss:.4f}')\n",
"print(f'Test Accuracy: {test_acc:.4f}')\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mVwLmcnKwW_7",
"outputId": "edb64e6a-7349-419b-bb9a-8b15173ca666"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2/2 [==============================] - 0s 8ms/step - loss: 0.5738 - accuracy: 0.9200\n",
"Test Loss: 0.5738\n",
"Test Accuracy: 0.9200\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Generate predictions for the test set\n",
"predictions = model.predict(test_features)\n",
"predicted_labels = np.argmax(predictions, axis=1)\n",
"\n",
"# Convert the encoded labels back to original class labels\n",
"predicted_class_labels = label_encoder.inverse_transform(predicted_labels)\n",
"\n",
"# Plot some examples from the test set along with their predicted labels\n",
"num_examples = 5\n",
"\n",
"plt.figure(figsize=(12, 8))\n",
"for i in range(num_examples):\n",
" plt.subplot(1, num_examples, i+1)\n",
" plt.imshow(test_features[i].T, cmap='inferno', origin='lower', aspect='auto')\n",
" plt.title(f'True: {label_encoder.inverse_transform([test_labels_encoded[i]])[0]}\\nPredicted: {predicted_class_labels[i]}')\n",
" plt.axis('off')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n"
],
"metadata": {
"id": "TVUUvDoWxYbT"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import joblib\n",
"\n",
"# Convert the model to TensorFlow Lite\n",
"converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
"tflite_model = converter.convert()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kFYXhqTuyOHL",
"outputId": "a36c0015-cdd6-4ccd-88e5-b307d1badb3c"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Save the TensorFlow Lite model\n",
"tflite_model_path = '/content/sound.tflite'\n",
"with open(tflite_model_path, 'wb') as f:\n",
" f.write(tflite_model)"
],
"metadata": {
"id": "UKzcA82gyYYt"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Save the label encoder\n",
"label_encoder_path = '/content/labels_encoder.pkl'\n",
"joblib.dump(label_encoder, label_encoder_path)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cJ6nN9lJycyu",
"outputId": "49fad93b-033d-46af-dc25-672cff082865"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['/content/labels_encoder.pkl']"
]
},
"metadata": {},
"execution_count": 27
}
]
},
{
"cell_type": "markdown",
"source": [
"### Test Sample WAV Files"
],
"metadata": {
"id": "nJ0mACRAyFrU"
}
},
{
"cell_type": "code",
"source": [
"# Load the TensorFlow Lite model\n",
"interpreter = tf.lite.Interpreter(model_path='sound.tflite')\n",
"interpreter.allocate_tensors()\n",
"\n",
"# Get input and output details\n",
"input_details = interpreter.get_input_details()\n",
"output_details = interpreter.get_output_details()\n",
"\n",
"# Print the input tensor details\n",
"print(\"Input Tensor Details:\")\n",
"print(f\"Name: {input_details[0]['name']}\")\n",
"print(f\"Shape: {input_details[0]['shape']}\")\n",
"print(f\"Data Type: {input_details[0]['dtype']}\")\n",
"print(f\"Quantization Parameters: {input_details[0]['quantization']}\")\n",
"\n",
"# Load the sample audio file\n",
"sample_file_path = '/content/footsteps-1.wav'\n",
"sample_audio, sr = librosa.load(sample_file_path, sr=16000)\n",
"\n",
"# Extract features for the sample audio\n",
"sample_features = extract_features(sample_file_path)\n",
"#print(sample_features.shape)\n",
"expected_shape = (20, 157)\n",
"\n",
"if sample_features.shape[1] < expected_shape[1]:\n",
" # Pad the features with zeros\n",
" pad_width = expected_shape[1] - sample_features.shape[1]\n",
" sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))\n",
"elif sample_features.shape[1] > expected_shape[1]:\n",
" # Truncate the features\n",
" sample_features = sample_features[:, :expected_shape[1]]\n",
"\n",
"#print(sample_features.shape)\n",
"# Reshape and expand dimensions to match the input tensor shape\n",
"sample_features = np.expand_dims(sample_features, axis=0)\n",
"sample_features = np.expand_dims(sample_features, axis=-1)\n",
"# Print the shapes for debugging\n",
"print(f\"sample_features shape: {sample_features.shape}\")\n",
"print(f\"expected shape: {input_details[0]['shape']}\")\n",
"\n",
"import numpy as np\n",
"expected_shape = input_details[0]['shape']\n",
"# Reshape the sample_features array\n",
"sample_features = np.reshape(sample_features, expected_shape)\n",
"\n",
"# Print the updated shape\n",
"print(f\"Updated sample_features shape: {sample_features.shape}\")\n",
"\n",
"# Run inference on the sample audio\n",
"interpreter.set_tensor(input_details[0]['index'], sample_features)\n",
"interpreter.invoke()\n",
"output = interpreter.get_tensor(output_details[0]['index'])\n",
"predicted_class = np.argmax(output)\n",
"\n",
"# Map the predicted class index to the actual class label\n",
"predicted_label = label_encoder.inverse_transform([predicted_class])[0]\n",
"\n",
"print(f'Predicted Class: {predicted_label}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "c0MICiyOAPsx",
"outputId": "8b012b18-a63f-4605-a7e6-bb68297b9789"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Input Tensor Details:\n",
"Name: serving_default_input_1:0\n",
"Shape: [ 1 20 157]\n",
"Data Type: <class 'numpy.float32'>\n",
"Quantization Parameters: (0.0, 0)\n",
"sample_features shape: (1, 20, 157, 1)\n",
"expected shape: [ 1 20 157]\n",
"Updated sample_features shape: (1, 20, 157)\n",
"Predicted Class: person\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!pip install pydub"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8TNcrgKLSi_g",
"outputId": "0895fa89-fd8c-43e1-deea-cde51ee90d0b"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting pydub\n",
" Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
"Installing collected packages: pydub\n",
"Successfully installed pydub-0.25.1\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### Test Sample MP3 Files"
],
"metadata": {
"id": "5kM7JbhbyKnX"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"from pydub import AudioSegment\n",
"\n",
"# Load the sample audio file\n",
"sample_file_path = '/content/quick-mechanical-keyboard-14391.mp3'\n",
"\n",
"# Convert MP3 to WAV\n",
"output_file_path = '/content/quick-mechanical-keyboard-14391.wav'\n",
"AudioSegment.from_mp3(sample_file_path).export(output_file_path, format='wav')\n",
"\n",
"# Load the converted WAV file\n",
"sample_audio, sr = librosa.load(output_file_path, sr=16000)\n",
"\n",
"# Extract features for the sample audio\n",
"sample_features = extract_features(sample_file_path)\n",
"#print(sample_features.shape)\n",
"expected_shape = (20, 157)\n",
"\n",
"if sample_features.shape[1] < expected_shape[1]:\n",
" # Pad the features with zeros\n",
" pad_width = expected_shape[1] - sample_features.shape[1]\n",
" sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))\n",
"elif sample_features.shape[1] > expected_shape[1]:\n",
" # Truncate the features\n",
" sample_features = sample_features[:, :expected_shape[1]]\n",
"\n",
"#print(sample_features.shape)\n",
"# Reshape and expand dimensions to match the input tensor shape\n",
"sample_features = np.expand_dims(sample_features, axis=0)\n",
"sample_features = np.expand_dims(sample_features, axis=-1)\n",
"# Print the shapes for debugging\n",
"print(f\"sample_features shape: {sample_features.shape}\")\n",
"print(f\"expected shape: {input_details[0]['shape']}\")\n",
"\n",
"import numpy as np\n",
"expected_shape = input_details[0]['shape']\n",
"# Reshape the sample_features array\n",
"sample_features = np.reshape(sample_features, expected_shape)\n",
"\n",
"# Print the updated shape\n",
"print(f\"Updated sample_features shape: {sample_features.shape}\")\n",
"\n",
"# Run inference on the sample audio\n",
"interpreter.set_tensor(input_details[0]['index'], sample_features)\n",
"interpreter.invoke()\n",
"output = interpreter.get_tensor(output_details[0]['index'])\n",
"predicted_class = np.argmax(output)\n",
"\n",
"# Map the predicted class index to the actual class label\n",
"predicted_label = label_encoder.inverse_transform([predicted_class])[0]\n",
"\n",
"print(f'Predicted Class: {predicted_label}')"
],
"metadata": {
"id": "Q7bpi_3iAmgQ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8292a746-d1c6-4547-9531-1af8ce75085e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"sample_features shape: (1, 20, 157, 1)\n",
"expected shape: [ 1 20 157]\n",
"Updated sample_features shape: (1, 20, 157)\n",
"Predicted Class: computer\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### Record and Test"
],
"metadata": {
"id": "ibQJAr2ryPpc"
}
},
{
"cell_type": "markdown",
"source": [
"# Full Code"
],
"metadata": {
"id": "HmR4wsZd0XNZ"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import LabelEncoder\n",
"import librosa\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/gdrive')\n",
"\n",
"!cp /content/gdrive/MyDrive/AudioDataset/myAudioDataSet.zip /content\n",
"\n",
"!unzip -q myAudioDataSet.zip\n",
"\n",
"# Define the main folder and subfolder names\n",
"main_folder = '/content/myAudioDataSet'\n",
"train_subfolder = '/content/myAudioDataSet/train'\n",
"test_subfolder = '/content/myAudioDataSet/test'\n",
"\n",
"# Define the class labels based on the subfolder names\n",
"my_classes = ['cat', 'computer', 'dog', 'door', 'person']\n",
"\n",
"# Define the base data paths for train and test sets\n",
"base_train_data_path = os.path.join(main_folder, train_subfolder)\n",
"base_test_data_path = os.path.join(main_folder, test_subfolder)\n",
"\n",
"# Define audio feature extraction function using librosa\n",
"def extract_features(file_path):\n",
" audio, _ = librosa.load(file_path, sr=16000)\n",
" mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)\n",
" return mfccs\n",
"\n",
"# Load audio file paths for training set\n",
"train_file_paths = []\n",
"train_labels = []\n",
"for class_label in my_classes:\n",
" class_folder = os.path.join(base_train_data_path, class_label)\n",
" file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
" train_file_paths.extend(file_paths)\n",
"\n",
"# Load audio file paths for test set\n",
"test_file_paths = []\n",
"test_labels = []\n",
"for class_label in my_classes:\n",
" class_folder = os.path.join(base_test_data_path, class_label)\n",
" file_paths = [os.path.join(class_folder, file) for file in os.listdir(class_folder) if file.endswith('.wav')]\n",
" test_file_paths.extend(file_paths)\n",
" test_labels.extend([class_label] * len(file_paths))\n",
"\n",
"# Encode labels\n",
"label_encoder = LabelEncoder()\n",
"train_labels_encoded = label_encoder.fit_transform(train_labels)\n",
"test_labels_encoded = label_encoder.transform(test_labels)\n",
"\n",
"# Split data into train and validation sets\n",
"train_file_paths, val_file_paths, train_labels_encoded, val_labels_encoded = train_test_split(\n",
" train_file_paths, train_labels_encoded, test_size=0.2, stratify=train_labels_encoded, random_state=42\n",
")\n",
"\n",
"# Extract features for training set\n",
"train_features = [extract_features(file_path) for file_path in train_file_paths]\n",
"train_features = np.stack(train_features)\n",
"\n",
"# Extract features for validation set\n",
"val_features = [extract_features(file_path) for file_path in val_file_paths]\n",
"val_features = np.stack(val_features)\n",
"\n",
"# Extract features for test set\n",
"test_features = [extract_features(file_path) for file_path in test_file_paths]\n",
"test_features = np.stack(test_features)\n",
"\n",
"# Normalize feature values\n",
"train_features = (train_features - np.mean(train_features)) / np.std(train_features)\n",
"val_features = (val_features - np.mean(val_features)) / np.std(val_features)\n",
"test_features = (test_features - np.mean(test_features)) / np.std(test_features)\n",
"\n",
"# Create TensorFlow Datasets\n",
"train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels_encoded)).shuffle(len(train_features)).batch(32)\n",
"val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels_encoded)).batch(32)\n",
"\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten\n",
"from tensorflow.keras.optimizers import Adam\n",
"from sklearn import metrics\n",
"\n",
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Input(shape=(train_features.shape[1], train_features.shape[2])),\n",
" tf.keras.layers.Conv1D(64, 3, activation='relu'),\n",
" tf.keras.layers.MaxPooling1D(2),\n",
" tf.keras.layers.Conv1D(128, 3, activation='relu'),\n",
" tf.keras.layers.MaxPooling1D(2),\n",
" tf.keras.layers.Conv1D(256, 3, activation='relu'),\n",
" tf.keras.layers.GlobalAveragePooling1D(),\n",
" tf.keras.layers.Dense(128, activation='relu'),\n",
" tf.keras.layers.Dense(len(my_classes))\n",
"])\n",
"\n",
"model.summary()\n",
"\n",
"# Compile the model\n",
"model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
" optimizer=tf.keras.optimizers.Adam(),\n",
" metrics=['accuracy'])\n",
"\n",
"# Train the model\n",
"history = model.fit(train_ds, epochs=100, validation_data=val_ds)\n",
"\n",
"# Evaluate the model on the test set\n",
"test_loss, test_acc = model.evaluate(test_features, test_labels_encoded)\n",
"print(f'Test Loss: {test_loss:.4f}')\n",
"print(f'Test Accuracy: {test_acc:.4f}')\n",
"\n",
"import tensorflow as tf\n",
"import numpy as np\n",
"import joblib\n",
"\n",
"# Convert the model to TensorFlow Lite\n",
"converter = tf.lite.TFLiteConverter.from_keras_model(model)\n",
"tflite_model = converter.convert()\n",
"\n",
"# Save the TensorFlow Lite model\n",
"tflite_model_path = '/content/sound.tflite'\n",
"with open(tflite_model_path, 'wb') as f:\n",
" f.write(tflite_model)\n",
"\n",
"# Save the label encoder\n",
"label_encoder_path = '/content/labels_encoder.pkl'\n",
"joblib.dump(label_encoder, label_encoder_path)"
],
"metadata": {
"id": "JOURw20i0Vvz"
},
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment