dataset prepare

4cab3664 · Piumi Navoda · ff03cfbb · 4cab3664
Commit 4cab3664 authored Nov 07, 2023 by Piumi Navoda
Hide whitespace changes
Inline Side-by-side

Showing with 70 additions and 0 deletions

voicerecognizion/prepare_dataset.py voicerecognizion/prepare_dataset.py +70 -0

No files found.
--- a/voicerecognizion/prepare_dataset.py
+++ b/voicerecognizion/prepare_dataset.py
+import librosa
+import os
+import json
+
+DATASET_PATH = "dataset"
+JSON_PATH = "data.json"
+SAMPLES_TO_CONSIDER = 22050 # 1 sec. of audio
+
+
+def preprocess_dataset(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512):
+    """Extracts MFCCs from music dataset and saves them into a json file.
+
+    :param dataset_path (str): Path to dataset
+    :param json_path (str): Path to json file used to save MFCCs
+    :param num_mfcc (int): Number of coefficients to extract
+    :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
+    :param hop_length (int): Sliding window for FFT. Measured in # of samples
+    :return:
+    """
+
+    # dictionary where we'll store mapping, labels, MFCCs and filenames
+    data = {
+        "mapping": [],
+        "labels": [],
+        "MFCCs": [],
+        "files": []
+    }
+
+    # loop through all sub-dirs
+    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
+
+        # ensure we're at sub-folder level
+        if dirpath is not dataset_path:
+
+            # save label (i.e., sub-folder name) in the mapping
+            label = dirpath.split("/")[-1]
+            data["mapping"].append(label)
+            print("\nProcessing: '{}'".format(label))
+
+            # process all audio files in sub-dir and store MFCCs
+            for f in filenames:
+                file_path = os.path.join(dirpath, f)
+
+                # load audio file and slice it to ensure length consistency among different files
+                signal, sample_rate = librosa.load(file_path)
+
+                # drop audio files with less than pre-decided number of samples
+                if len(signal) >= SAMPLES_TO_CONSIDER:
+
+                    # ensure consistency of the length of the signal
+                    signal = signal[:SAMPLES_TO_CONSIDER]
+
+                    # extract MFCCs
+                    MFCCs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
+
+
+
+                    # store data for analysed track
+                    data["MFCCs"].append(MFCCs.T.tolist())
+                    data["labels"].append(i-1)
+                    data["files"].append(file_path)
+                    print("{}: {}".format(file_path, i-1))
+
+    # save data in json file
+    with open(json_path, "w") as fp:
+        json.dump(data, fp, indent=4)
+
+
+if __name__ == "__main__":
+    preprocess_dataset(DATASET_PATH, JSON_PATH)
\ No newline at end of file