flow analyzing analyzing changers done

33073db6 · Dhananjaya Jayashanka · db5902be · 33073db6 · 33073db6 · 33073db6
Commit 33073db6 authored Nov 22, 2021 by Dhananjaya Jayashanka
5 changed files
--- a/filler_words/API.py
+++ b/filler_words/API.py
+import flask
+from flask_cors import CORS, cross_origin
+
+import getFillterWordCount
+
+app = flask.Flask(__name__)
+cors = CORS(app)
+app.config['CORS_HEADERS'] = 'Content-Type'
+app.config["DEBUG"] = True
+
+
+@app.route('/countFillerWords', methods=['GET'])
+@cross_origin()
+def countFillerWords():
+    fillterWordCount = getFillterWordCount.countFillerWords("../temp.wav")
+    return fillterWordCount
+
+app.run(port=5001)
--- a/filler_words/Train_Neural_Network.py
+++ b/filler_words/Train_Neural_Network.py
+import numpy as np
+from sklearn.preprocessing import LabelEncoder
+from keras.utils import to_categorical
+from sklearn.model_selection import train_test_split
+import get_features
+import neural_network
+import sys
+
+
+def get_numpy_array(features_df):
+    X = np.array(features_df.feature.tolist())
+    y = np.array(features_df.class_label.tolist())
+    # encode classification labels
+    le = LabelEncoder()
+    # one hot encoded labels
+    yy = to_categorical(le.fit_transform(y))
+    return X, yy, le
+
+
+def get_train_test(X, y):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    return X_train, X_test, y_train, y_test
+
+
+if __name__ == "__main__":
+    # extract features
+    print("Extracting features..")
+    features_df = get_features.extract_features()
+
+    # convert into numpy array
+    X, y, le = get_numpy_array(features_df)
+
+    # split into training and testing data
+    X_train, X_test, y_train, y_test = get_train_test(X, y)
+    num_labels = y.shape[1]
+
+    X_train = np.expand_dims(X_train, axis=2)
+    X_test = np.expand_dims(X_test, axis=2)
+
+    # create model architecture
+    model = neural_network.create_cnn(num_labels)
+
+    # train model
+    print("Training..")
+    neural_network.train(model, X_train, X_test, y_train, y_test, "trained_cnn.h5")
+
+    # compute test loss and accuracy
+    test_loss, test_accuracy = neural_network.compute(X_test, y_test, "trained_cnn.h5")
+    print("Test loss", test_loss)
+    print("Test accuracy", test_accuracy)
+
+    # predicting using trained model with any test file in dataset
+    # neural_network.predict("data/ab/ab.ogg", le, "trained_cnn.h5")
--- a/filler_words/getFillterWordCount.py
+++ b/filler_words/getFillterWordCount.py
+import os
+
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+from tensorflow.keras.utils import to_categorical
+from sklearn.preprocessing import LabelEncoder
+import numpy as np
+import get_features
+import neural_network
+
+# countPauses("../content analyzing/temp.wav")
+ScoreforUserSilence = 70/100
+
+def get_numpy_array(features_df):
+    X = np.array(features_df.feature.tolist())
+    y = np.array(features_df.class_label.tolist())
+    # encode classification labels
+    le = LabelEncoder()
+    # one hot encoded labels
+    yy = to_categorical(le.fit_transform(y))
+    return X, yy, le
+
+features_df = get_features.extract_features()
+X, y, le = get_numpy_array(features_df)
+
+def countFillerWords(filePath):
+    fillerWordCount = 0
+    sound = AudioSegment.from_wav(filePath)
+    chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
+
+    # Chunk Folder file Path
+    chunk_folder_name = "chunks"
+
+    # create folder to store chunks
+    if not os.path.isdir(chunk_folder_name):
+        os.mkdir(chunk_folder_name)
+
+    for i, audio_chunk in enumerate(chunks, start=1):
+        chunk_file = os.path.join(chunk_folder_name, f"chunk{i}.wav")
+        audio_chunk.export(chunk_file, format="wav", bitrate='192k')
+        prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5")
+        print(prediction)
+        if float(prediction["probability"]) > 0.99:
+            fillerWordCount += 1
+
+    print("****** How many times Filler words in their Speech *****")
+
+    # print count of silence
+    print("Filler words: ", fillerWordCount)
+    return {
+        "message": str(fillerWordCount) + " : filler word/s found",
+        "score": ScoreforUserSilence
+    }
+
+# countFillerWords("../audio.wav")
--- a/filler_words/get_features.py
+++ b/filler_words/get_features.py
+import os
+import librosa
+import soundfile as sf
+import numpy as np
+import glob
+import pandas as pd
+
+def get_features(file_name):
+
+    print(sf.available_formats())
+    if file_name: 
+        X, sample_rate = sf.read(file_name, dtype='float32')
+
+    # mfcc (mel-frequency cepstrum)
+    print("sample rate: ", sample_rate)
+    monoX = []
+    for leftX in X:
+        if len(X.shape) == 2:
+            monoX.append(leftX[0])
+        else:
+            monoX.append(leftX)
+    shape = np.shape(monoX)
+    padded_array = np.zeros((150000))
+    padded_array[:shape[0]] = monoX
+    mfccs = librosa.feature.mfcc(y=padded_array, sr=sample_rate, n_mfcc=40)
+    mfccs_scaled = np.mean(mfccs.T,axis=0)
+    return mfccs_scaled
+
+def extract_features():
+
+    # path to dataset containing 10 subdirectories of .ogg files
+    sub_dirs = os.listdir('data')
+    sub_dirs.sort()
+    print(sub_dirs)
+    features_list = []
+    for label, sub_dir in enumerate(sub_dirs):  
+        for file_name in glob.glob(os.path.join('data',sub_dir,"*.ogg")):
+            print("Extracting file ", file_name)
+            try:
+                mfccs = get_features(file_name)
+            except Exception as e:
+                print(e)
+                print("Extraction error")
+                continue
+            features_list.append([mfccs,label])
+
+    features_df = pd.DataFrame(features_list,columns = ['feature','class_label'])
+    print(features_df.head())    
+    return features_df
--- a/filler_words/neural_network.py
+++ b/filler_words/neural_network.py
+# Imports
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
+from keras.models import load_model
+import get_features
+from sklearn.preprocessing import LabelEncoder
+import numpy as np
+import os
+from sklearn.metrics import classification_report
+
+def create_mlp(num_labels):
+
+    model = Sequential()
+    model.add(Dense(256,input_shape = (40,)))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+
+    model.add(Dense(256,input_shape = (40,)))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+
+    model.add(Dense(num_labels))
+    model.add(Activation('softmax'))
+    return model
+
+def create_cnn(num_labels):
+
+    model = Sequential()
+    model.add(Conv1D(64, 3, activation='relu', input_shape=(40, 1)))
+    model.add(Conv1D(64, 3, activation='relu'))
+    model.add(MaxPooling1D(3))
+    model.add(Conv1D(128, 3, activation='relu'))
+    model.add(Conv1D(128, 3, activation='relu'))
+    model.add(GlobalAveragePooling1D())
+    model.add(Dropout(0.5))
+    model.add(Dense(num_labels))
+    model.add(Activation('softmax'))
+    return model
+
+def train(model,X_train, X_test, y_train, y_test,model_file):    
+    
+    # compile the model 
+    model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
+
+    print(model.summary())
+
+    print("training for 100 epochs with batch size 32")
+   
+    model.fit(X_train,y_train,batch_size= 10, epochs = 100, validation_data=(X_test,y_test))
+    
+    # save model to disk
+    print("Saving model to disk")
+    model.save(model_file)
+
+    y_pred = model.predict_classes(X_test, batch_size=8, verbose=1)
+    y_test = np.argmax(y_test, axis=1)
+    y_pred_arg_max = []
+    print("--------------->>>>>>>>>>>>>>>")
+    print(y_pred)
+    print(y_test)
+
+    print(classification_report(y_test, y_pred))
+
+def compute(X_test,y_test,model_file):
+
+    # load model from disk
+    loaded_model = load_model(model_file)
+    score = loaded_model.evaluate(X_test,y_test)
+    return score[0],score[1]*100
+
+def predict(filename,le,model_file):
+
+    model = load_model(model_file)
+    prediction_feature = get_features.get_features(filename)
+    if len(prediction_feature) == 0:
+        return {"pred": "", "probability": str(0)}
+    if model_file == "trained_mlp.h5":
+        prediction_feature = np.array([prediction_feature])
+    elif model_file == "trained_cnn.h5":    
+        prediction_feature = np.expand_dims(np.array([prediction_feature]),axis=2)
+
+    predicted_vector = model.predict_classes(prediction_feature)
+    predicted_class = le.inverse_transform(predicted_vector)
+    sub_dirs = os.listdir('data')
+    sub_dirs.sort()
+    print("Predicted class",sub_dirs[predicted_class[0]])
+    predicted_proba_vector = model.predict_proba([prediction_feature])
+
+    word = ""
+    probability = 0
+    predicted_proba = predicted_proba_vector[0]
+    for i in range(len(predicted_proba)): 
+        category = le.inverse_transform(np.array([i]))
+        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )
+        if (predicted_proba[i] > probability):
+            probability = predicted_proba[i]
+            word = sub_dirs[predicted_class[0]]
+            print("Selected word: ", word)
+
+    return {"pred": word, "probability": str(probability)}