f:script

c5934ed9 · thirani · 94db1936 · c5934ed9 · c5934ed9
Commit c5934ed9 authored May 24, 2023 by thirani
Hide whitespace changes
Inline Side-by-side

Showing with 102 additions and 0 deletions

audio_classification/audio_classify.py audio_classification/audio_classify.py +102 -0

audio_classification/sound.tflite audio_classification/sound.tflite +0 -0

No files found.
--- a/audio_classification/audio_classify.py
+++ b/audio_classification/audio_classify.py
+import os
+import tensorflow as tf
+import numpy as np
+import joblib
+import librosa
+from pydub import AudioSegment
+import simpleaudio as sa
+# Define audio feature extraction function using librosa
+def extract_features(file_path):
+    audio, _ = librosa.load(file_path, sr=16000)
+    mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)
+    return mfccs
+# Function to convert MP3 to WAV format
+def convert_to_wav(file_path):
+    audio = AudioSegment.from_mp3(file_path)
+    wav_file_path = file_path.replace('.mp3', '.wav')
+    audio.export(wav_file_path, format='wav')
+    return wav_file_path
+# Load the TensorFlow Lite model
+interpreter = tf.lite.Interpreter(model_path='sound.tflite')
+interpreter.allocate_tensors()
+# Load the label encoder
+label_encoder_path = 'labels_encoder.pkl'
+label_encoder = joblib.load(label_encoder_path)
+# Load the label names from the label.txt file
+label_file_path = 'label.txt'
+with open(label_file_path, 'r') as f:
+    labels = f.read().splitlines()
+# Define the output messages
+output_messages = {
+    'cat': 'There is a cat somewhere',
+    'dog': 'There is a dog somewhere',
+    'door': 'There is a door near you',
+    'person': 'People are around you',
+    'computer': "That's a computer"
+}
+# Get input and output details
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+# Define the path to the audio folder
+audio_folder = 'audio'
+# Get a list of all files in the audio folder
+audio_files = os.listdir(audio_folder)
+for file_name in audio_files:
+    file_path = os.path.join(audio_folder, file_name)
+    sample_audio_format = os.path.splitext(file_path)[1][1:].lower()  # Get the file format
+    if sample_audio_format == 'mp3':
+        file_path = convert_to_wav(file_path)  # Convert MP3 to WAV if it's an MP3 file
+    # Extract features for the audio file
+    sample_features = extract_features(file_path)
+    expected_shape = (20, 157)
+    if sample_features.shape[1] < expected_shape[1]:
+        # Pad the features with zeros
+        pad_width = expected_shape[1] - sample_features.shape[1]
+        sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))
+    elif sample_features.shape[1] > expected_shape[1]:
+        # Truncate the features
+        sample_features = sample_features[:, :expected_shape[1]]
+    # Reshape and expand dimensions to match the input tensor shape
+    sample_features = np.expand_dims(sample_features, axis=0)
+    sample_features = np.expand_dims(sample_features, axis=-1)
+    expected_shape = input_details[0]['shape']
+    # Reshape the sample_features array
+    sample_features = np.reshape(sample_features, expected_shape)
+    # Run inference on the audio file
+    interpreter.set_tensor(input_details[0]['index'], sample_features)
+    interpreter.invoke()
+    output = interpreter.get_tensor(output_details[0]['index'])
+    predicted_class_index = np.argmax(output)
+    predicted_class = labels[predicted_class_index]
+    # Map the predicted class index to the actual class label
+    predicted_label = label_encoder.inverse_transform([predicted_class_index])[0]
+    # Play the audio file
+    wave_obj = sa.WaveObject.from_wave_file(file_path)
+    play_obj = wave_obj.play()
+    play_obj.wait_done()
+    # Display the predicted class
+    output_message = output_messages.get(predicted_class, 'Unknown sound')
+    print(f'Audio File: {file_name}\nOutput Message: {output_message}\n')
--- a/audio_classification/sound.tflite
+++ b/audio_classification/sound.tflite