Commit 4227166f authored by W.D.R.P. Sandeepa's avatar W.D.R.P. Sandeepa

Merge branch 'it18218640' into 'master'

create prepare_dataset method

See merge request !21
parents 38a4b382 bebad94f
......@@ -4,4 +4,52 @@ import json
DATASET_PATH = "dataset"
JSON_PATH = "data.json"
SAMPLES_TO_CONSIDER = 22050
\ No newline at end of file
SAMPLES_TO_CONSIDER = 22050
def prepare_dataset(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
# data dictionary
data = {
"mappings": [],
"labels": [],
"MFCCs": [],
"files": []
}
# loop through all the sub-dirs
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
# we need to ensure that we are not at root level
if dirpath is not dataset_path:
# update mapping
category = dirpath.split("/")[-1] # dataset/down -> [dataset, down]
data["mappings"].append(category)
print(f"Processing {category}")
# loop through all the filenames and extract MFCCs
for f in filenames:
# get file path
file_path = os.path.join(dirpath, f)
# load audio file
signal, sr = librosa.load(file_path)
if len(signal) >= SAMPLES_TO_CONSIDER:
# enforce 1 sec, long signal
signal = signal[:SAMPLES_TO_CONSIDER]
# extract the MFCCs
MFCCs = librosa.feature.mfcc(signal, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
# store data
data["labels"].append(i-1)
data["MFCCs"].append(MFCCs.T.tolist())
data["files"].append(file_path)
print(f"{file_path}:{i-1}")
# store in json file
with open(json_path, "w") as fp:
json.dump(data, fp, indent=4)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment