Merge branch 'it18257632' into 'master'

genarate accuracy & loss graph See merge request !85

Merge branch 'it18257632' into 'master'
genarate accuracy & loss graph See merge request !85
dd96ab05 · Lihinikaduwa D.N.R. · 51b6109d · 2fce640d · dd96ab05 · dd96ab05
Commit dd96ab05 authored Jan 08, 2022 by Lihinikaduwa D.N.R.
10 changed files
--- a/.DS_Store
+++ b/.DS_Store
--- a/backend/.DS_Store
+++ b/backend/.DS_Store
--- a/backend/IT18257632/.DS_Store
+++ b/backend/IT18257632/.DS_Store
--- a/backend/IT18257632/graphs/accuracy.PNG
+++ b/backend/IT18257632/graphs/accuracy.PNG
--- a/backend/IT18257632/graphs/consol_log.PNG
+++ b/backend/IT18257632/graphs/consol_log.PNG
--- a/backend/IT18257632/graphs/loss.PNG
+++ b/backend/IT18257632/graphs/loss.PNG
--- a/backend/IT18257632/model.h5
+++ b/backend/IT18257632/model.h5
--- a/backend/IT18257632/prepare_dataset.py
+++ b/backend/IT18257632/prepare_dataset.py
 import json
 import librosa
 import os
 DATASET_PATH = "dataset"
 JSON_PATH = "data.json"
 SAMPLES_TO_CONSIDER = 22050
 def prepare_dataset(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    # data dictionary
    data = {
        "mappings": [],
        "labels": [],
        "MFCCs": [],
        "files": []
    }
    # loop through all sub-dirs
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath is not dataset_path:
            # update mapping
-            category = dirpath.split("/")
+            category = dirpath.split("/")[-1]
            data["mappings"].append(category)
            print(f"Processing{category}")
            # loop through aii the file name and extract MFCCs
            for f in filenames:
                # get file path
                file_path = os.path.join(dirpath, f)
                # load audio file
                signal, sr = librosa.load(file_path)
                # ensure the audio file is at least 1 sec
                if len(signal) >= SAMPLES_TO_CONSIDER:
                    # enforce 1 sec signal
                    signal = signal[:SAMPLES_TO_CONSIDER]
                    # extract the MFCCs
-                    MFCCs = librosa.feature.mfcc(signal, sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
+                    MFCCs = librosa.feature.mfcc(signal, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)
                    # store data
                    data["labels"].append(i - 1)
                    data["MFCCs"].append(MFCCs.T.tolist())
                    data["files"].append(file_path)
+                    print(f"{file_path}: {i-1}")
-    with open(json_path, "w") as fp:
-        json.dump(data, fp, indent=4)
+    # store in json file
+    with open(json_path, "w") as fp:
+        json.dump(data, fp, indent=4)
-if __name__ == "__main__":
-    prepare_dataset(DATASET_PATH, JSON_PATH)
+if __name__ == "__main__":
+    prepare_dataset(DATASET_PATH, JSON_PATH)
--- a/backend/IT18257632/keyword_spotting_service.py
+++ b/backend/IT18257632/keyword_spotting_service.py
--- a/backend/IT18257632/train.py
+++ b/backend/IT18257632/train.py
 import json
 import numpy as np
+import matplotlib.pyplot as pyplot
 import tensorflow.keras as keras
 from sklearn.model_selection import train_test_split
@@ -7,10 +8,10 @@ DATA_PATH = "data.json"
 SAVE_MODEL_PATH = "model.h5"
 LEARNING_RATE = 0.0001
-EPOCHS = 40
+EPOCHS = 100
 BATCH_SIZE = 32
-NUM_KEYWORDS = 15
+NUM_KEYWORDS = 13
 def load_dataset(data_path):
@@ -78,6 +79,23 @@ def build_model(input_shape, learning_rate, error="sparse_categorical_crossentro
    return model
+def generate_graph(history):
+    pyplot.plot(history.history['loss'], 'r', label='train loss')
+    pyplot.plot(history.history['val_loss'], 'g', label='validation loss')
+    pyplot.legend()
+    pyplot.xlabel('Epochs')
+    pyplot.ylabel('Loss')
+    pyplot.show()
+    pyplot.subplot(1, 2, 1)
+    pyplot.ylabel('Accuracy')
+    pyplot.xlabel('Epochs')
+    pyplot.plot(history.history['accuracy'], label='Training Accuracy')
+    pyplot.plot(history.history['val_accuracy'], label='Validation Accuracy')
+    pyplot.legend()
+    pyplot.show()
 def main():
    # load train/validation/test data splits
    X_train, X_validation, X_test, y_train, y_validation, y_test = get_data_splits(DATA_PATH)
@@ -87,7 +105,8 @@ def main():
    model = build_model(input_shape, LEARNING_RATE)
    # train the model
-    model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_validation, y_validation))
+    history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE,
+                        validation_data=(X_validation, y_validation))
    # evaluate the model
    test_error, test_accuracy = model.evaluate(X_test, y_test)
@@ -96,6 +115,8 @@ def main():
    # save the model
    model.save(SAVE_MODEL_PATH)
+    generate_graph(history)
 if __name__ == "__main__":
    main()