Commit c5934ed9 authored by thirani's avatar thirani

f:script

parent 94db1936
import os
import tensorflow as tf
import numpy as np
import joblib
import librosa
from pydub import AudioSegment
import simpleaudio as sa
# Define audio feature extraction function using librosa
def extract_features(file_path):
audio, _ = librosa.load(file_path, sr=16000)
mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=20)
return mfccs
# Function to convert MP3 to WAV format
def convert_to_wav(file_path):
audio = AudioSegment.from_mp3(file_path)
wav_file_path = file_path.replace('.mp3', '.wav')
audio.export(wav_file_path, format='wav')
return wav_file_path
# Load the TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path='sound.tflite')
interpreter.allocate_tensors()
# Load the label encoder
label_encoder_path = 'labels_encoder.pkl'
label_encoder = joblib.load(label_encoder_path)
# Load the label names from the label.txt file
label_file_path = 'label.txt'
with open(label_file_path, 'r') as f:
labels = f.read().splitlines()
# Define the output messages
output_messages = {
'cat': 'There is a cat somewhere',
'dog': 'There is a dog somewhere',
'door': 'There is a door near you',
'person': 'People are around you',
'computer': "That's a computer"
}
# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Define the path to the audio folder
audio_folder = 'audio'
# Get a list of all files in the audio folder
audio_files = os.listdir(audio_folder)
for file_name in audio_files:
file_path = os.path.join(audio_folder, file_name)
sample_audio_format = os.path.splitext(file_path)[1][1:].lower() # Get the file format
if sample_audio_format == 'mp3':
file_path = convert_to_wav(file_path) # Convert MP3 to WAV if it's an MP3 file
# Extract features for the audio file
sample_features = extract_features(file_path)
expected_shape = (20, 157)
if sample_features.shape[1] < expected_shape[1]:
# Pad the features with zeros
pad_width = expected_shape[1] - sample_features.shape[1]
sample_features = np.pad(sample_features, ((0, 0), (0, pad_width)))
elif sample_features.shape[1] > expected_shape[1]:
# Truncate the features
sample_features = sample_features[:, :expected_shape[1]]
# Reshape and expand dimensions to match the input tensor shape
sample_features = np.expand_dims(sample_features, axis=0)
sample_features = np.expand_dims(sample_features, axis=-1)
expected_shape = input_details[0]['shape']
# Reshape the sample_features array
sample_features = np.reshape(sample_features, expected_shape)
# Run inference on the audio file
interpreter.set_tensor(input_details[0]['index'], sample_features)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]['index'])
predicted_class_index = np.argmax(output)
predicted_class = labels[predicted_class_index]
# Map the predicted class index to the actual class label
predicted_label = label_encoder.inverse_transform([predicted_class_index])[0]
# Play the audio file
wave_obj = sa.WaveObject.from_wave_file(file_path)
play_obj = wave_obj.play()
play_obj.wait_done()
# Display the predicted class
output_message = output_messages.get(predicted_class, 'Unknown sound')
print(f'Audio File: {file_name}\nOutput Message: {output_message}\n')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment