Commit 108a9b53 authored by W.D.R.P. Sandeepa's avatar W.D.R.P. Sandeepa

Merge branch 'it18257632' into 'master'

prepared dataset

See merge request !48
parents 3e67b743 58874145
import json
import librosa
import os
DATASET_PATH = "dataset"
JSON_PATH = "data.json"
SAMPLES_TO_CONSIDER = 22050
def prepare_dataset(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
# data dictionary
data = {
"mappings": [],
"labels": [],
"MFCCs": [],
"files": []
}
# loop through all sub-dirs
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
if dirpath is not dataset_path:
# update mapping
category = dirpath.split("/")
data["mappings"].append(category)
print(f"Processing{category}")
# loop through aii the file name and extract MFCCs
for f in filenames:
# get file path
file_path = os.path.join(dirpath, f)
# load audio file
signal, sr = librosa.load(file_path)
# ensure the audio file is at least 1 sec
if len(signal) >= SAMPLES_TO_CONSIDER:
# enforce 1 sec signal
signal = signal[:SAMPLES_TO_CONSIDER]
# extract the MFCCs
MFCCs = librosa.feature.mfcc(signal, sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
# store data
data["labels"].append(i - 1)
data["MFCCs"].append(MFCCs.T.tolist())
data["files"].append(file_path)
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
if __name__ == "__main__":
prepare_dataset(DATASET_PATH, JSON_PATH)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment