Modified IT18126884 function

dc69140e · Dhananjaya Jayashanka · 2318ce5e · dc69140e · dc69140e · dc69140e
Commit dc69140e authored Sep 18, 2021 by Dhananjaya Jayashanka
11 changed files
--- a/Flow of the sppech/DoubleWords.py
+++ b/Flow of the sppech/DoubleWords.py
+import spacy
+# In here we are importing stop words from spacy. there are already spefied stop words there in the spacy
+from spacy.lang.en.stop_words import STOP_WORDS
+from string import punctuation
+from heapq import nlargest
+
+ScoreforRepetedwords = 70/100
+
+stopwords = list(STOP_WORDS)
+# print(stopwords)
+nlp = spacy.load('en_core_web_sm')
+
+def processDoubleWords(speech):
+    retVal = []
+    doc = nlp(speech)
+    # Tokenization
+    tokens = [token.text for token in doc]
+    print("***** Analyze Repeted Words in you're Speech *****")
+    for i in range(len(tokens)-1):
+        if tokens[i] == tokens[i + 1]:
+            print(f" You stuck in this word :{tokens[i]}")
+            retVal.append(f" You stuck in this word :{tokens[i]}")
+    return {
+        "message": retVal,
+        "score": ScoreforRepetedwords
+    }
+
--- a/Flow of the sppech/FillerWords.py
+++ b/Flow of the sppech/FillerWords.py
+ScoreforFillerwords = 60/100
+
+def wordcount(filename, listwords):
+    try:
+        read = filename.split("\n")
+        for word in listwords:
+            lower = word.lower()
+
+            count = 0
+
+            for sentance in read:
+                line = sentance.split()
+                for each in line:
+                    line2 = each.lower()
+                    line2 = line2.strip("!@#$%^&*(()_+=")
+
+                    if lower == line2:
+                        count += 1
+
+            print(lower, ":", count)
+            return {
+                "message": str(lower) + ":" + str(count),
+                "score": ScoreforFillerwords
+            }
+    except FileExistsError:
+        print("Have not filler word")
+        return "Have not filler word"
+
+# print("********Analyze Filler Word in your Speech********")
+# wordcount("momo.txt", ["Like","okay" ,"so", "actually" ,"basically","right"])
--- a/Flow of the sppech/Silence.py
+++ b/Flow of the sppech/Silence.py
+import os
+
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+
+# countPauses("../content analyzing/temp.wav")
+ScoreforUserSilence = 70/100
+
+def countPauses(filePath):
+    sound = AudioSegment.from_wav(filePath)
+    chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
+
+    # Chunk Folder file Path
+    chunk_folder_name = "chunks"
+
+    # create folder to store chunks
+    if not os.path.isdir(chunk_folder_name):
+        os.mkdir(chunk_folder_name)
+
+    for i, audio_chunk in enumerate(chunks, start=1):
+        chunk_file = os.path.join(chunk_folder_name, f"chunk{i}.wav")
+        audio_chunk.export(chunk_file, format="wav", bitrate='192k')
+
+    print("****** How many times User Silence in their Speech *****")
+
+    # print count of silence
+    print(str(len(chunks) - 1) + " : Silence/s found")
+    return {
+        "message": str(len(chunks) - 1) + " : Silence/s found",
+        "score": ScoreforUserSilence
+    }
+
--- a/Flow of the sppech/momo.txt
+++ b/Flow of the sppech/momo.txt
+I am Rahul, the host for this evening. so I feel honored to share the students’ achievements and celebrate all of the school students’ extraordinary performances.
+Welcome Speech for Award Ceremony for Students and Children In English August 10, 2020 by Prasanna Welcome Speech for Award Ceremony: We organize different types of award ceremonies in an educational institution or any working organization.
+The award ceremonies in schools and colleges can find the presence of students, teachers, the Principal, and the host. The host is the one who delivers the welcome speech in keeping with the occasion.
+Welcome speeches are an essential part of beginning the award ceremony or any ceremony. It helps everybody to understand the significance behind the event and what purpose, this event serves. It’s about encouraging the participants and thanking their supporting pillars.
+Students can also find more English Speech Writing about Welcome Speeches, Farewell Speeches, etc.Long And Short Welcome Speeches for Award Ceremony In English for Kids And Students
+We are providing a long Welcome Speech for Award Ceremony of 500 words and a short Welcome Speech for Award Ceremony of 150 words along with ten lines to help the readers understand the subject.
+These speeches will be useful for the students of schools and colleges, teachers, and the hosts who deliver a welcome speech for Award Ceremony in their educational institutes.
+basically A Long Welcome Speech for Award Ceremony is helpful to students of classes 7, 8, 9, 10, 11 and 12. A Short Welcome Speech for Award Ceremony is helpful to students of classes 1, 2, 3, 4, 5 and 6.
+Long Welcome Speech for Award Ceremony 500 Words In English.Good Afternoon to all the honorable guests present in this award ceremony! I am Rahul, the host for this evening. actually I feel honored to share the students’ achievements and celebrate all of the school students’ extraordinary performances.
+With excellent performances, all of the participants proved that everybody is a winner.We consider children as our future. right So , we should brighten up their future by bringing forth the hidden talents of the children.
+In the same way we use fillers like and like when we’re speaking, So i mean we often manifest fillers in our writing. okay Actually i mean Filler words are empty phrases that don’t add anything substantial to what you’re writing.okay They can muddle your points and weigh down your right paragraphs.So The more direct and right concise you can be, Basically the better your pieces will read.
+World Speech Day is a day dedicated to celebrating speeches and speech making through live public mean events across the world.Over 100 nations now hold World Speech Day events.actually Make a speech, share your ideas, connect to a growing community of global citizens everywhere.
+Anyone can host a WSD event.Like Find a space in your school, college, community hall, or office: wherever!Even in a cafe! so Release the power of speech making around you. All you need to do is join us!
+Everyone has ideas. right And everyone can have a voice on World Speech Day. Just like one voice can change everything.We reach out to unexpectedvoices across the globe.
\ No newline at end of file
--- a/Flow of the sppech/wordslist.txt
+++ b/Flow of the sppech/wordslist.txt
+Surprisingly Abraham really did nothing nothing important in his life.
+He was not not a great writer, king, inventor or military leader.
+Josh was always that boy with the same haircut and a small little smile-the the one that says he is pleased with himself.
+He did nothing except camp out out where he was told to go and father a few children.
+Canada is one of the best countries in the world to live in. First, Canada has an
+excellent health care system. All Canadians have access to to medical services at a reasonable
+price. Second, Canada has a high standard of education. Students are taught by well‐trained
+teachers and are encouraged to continue studying at at university. Finally, Canada's cities are clean
+and efficiently managed. Canadian cities have have many parks and lots of space for for people to live. As
+a result, Canada is a a desirable place to live.
\ No newline at end of file
--- a/filler words/API.py
+++ b/filler words/API.py
+import flask
+from flask_cors import CORS, cross_origin
+
+import getFillterWordCount
+
+app = flask.Flask(__name__)
+cors = CORS(app)
+app.config['CORS_HEADERS'] = 'Content-Type'
+app.config["DEBUG"] = True
+
+
+@app.route('/countFillerWords', methods=['GET'])
+@cross_origin()
+def countFillerWords():
+    fillterWordCount = getFillterWordCount.countFillerWords("../temp.wav")
+    return fillterWordCount
+
+app.run(port=5001)
--- a/filler words/Train_Neural_Network.py
+++ b/filler words/Train_Neural_Network.py
+import numpy as np
+from sklearn.preprocessing import LabelEncoder
+from keras.utils import to_categorical
+from sklearn.model_selection import train_test_split
+import get_features
+import neural_network
+import sys
+
+
+def get_numpy_array(features_df):
+    X = np.array(features_df.feature.tolist())
+    y = np.array(features_df.class_label.tolist())
+    # encode classification labels
+    le = LabelEncoder()
+    # one hot encoded labels
+    yy = to_categorical(le.fit_transform(y))
+    return X, yy, le
+
+
+def get_train_test(X, y):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    return X_train, X_test, y_train, y_test
+
+
+if __name__ == "__main__":
+    # extract features
+    print("Extracting features..")
+    features_df = get_features.extract_features()
+
+    # convert into numpy array
+    X, y, le = get_numpy_array(features_df)
+
+    # split into training and testing data
+    X_train, X_test, y_train, y_test = get_train_test(X, y)
+    num_labels = y.shape[1]
+
+    X_train = np.expand_dims(X_train, axis=2)
+    X_test = np.expand_dims(X_test, axis=2)
+
+    # create model architecture
+    model = neural_network.create_cnn(num_labels)
+
+    # train model
+    print("Training..")
+    neural_network.train(model, X_train, X_test, y_train, y_test, "trained_cnn.h5")
+
+    # compute test loss and accuracy
+    test_loss, test_accuracy = neural_network.compute(X_test, y_test, "trained_cnn.h5")
+    print("Test loss", test_loss)
+    print("Test accuracy", test_accuracy)
+
+    # predicting using trained model with any test file in dataset
+    neural_network.predict("data/ab/ab.ogg", le, "trained_cnn.h5")
--- a/filler words/getFillterWordCount.py
+++ b/filler words/getFillterWordCount.py
+import os
+
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+from tensorflow.keras.utils import to_categorical
+from sklearn.preprocessing import LabelEncoder
+import numpy as np
+import get_features
+import neural_network
+
+# countPauses("../content analyzing/temp.wav")
+ScoreforUserSilence = 70/100
+
+def get_numpy_array(features_df):
+    X = np.array(features_df.feature.tolist())
+    y = np.array(features_df.class_label.tolist())
+    # encode classification labels
+    le = LabelEncoder()
+    # one hot encoded labels
+    yy = to_categorical(le.fit_transform(y))
+    return X, yy, le
+
+features_df = get_features.extract_features()
+X, y, le = get_numpy_array(features_df)
+
+def countFillerWords(filePath):
+    fillerWordCount = 0
+    sound = AudioSegment.from_wav(filePath)
+    chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
+
+    # Chunk Folder file Path
+    chunk_folder_name = "chunks"
+
+    # create folder to store chunks
+    if not os.path.isdir(chunk_folder_name):
+        os.mkdir(chunk_folder_name)
+
+    for i, audio_chunk in enumerate(chunks, start=1):
+        chunk_file = os.path.join(chunk_folder_name, f"chunk{i}.wav")
+        audio_chunk.export(chunk_file, format="wav", bitrate='192k')
+        prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5")
+        print(prediction)
+        if float(prediction["probability"]) > 0.99:
+            fillerWordCount += 1
+
+    print("****** How many times Filler words in their Speech *****")
+
+    # print count of silence
+    print("Filler words: ", fillerWordCount)
+    return {
+        "message": str(fillerWordCount) + " : filler word/s found",
+        "score": ScoreforUserSilence
+    }
+
+# countFillerWords("../audio.wav")
--- a/filler words/get_features.py
+++ b/filler words/get_features.py
+import os
+import librosa
+import soundfile as sf
+import numpy as np
+import glob
+import pandas as pd
+
+def get_features(file_name):
+
+    print(sf.available_formats())
+    if file_name: 
+        X, sample_rate = sf.read(file_name, dtype='float32')
+
+    # mfcc (mel-frequency cepstrum)
+    print("sample rate: ", sample_rate)
+    monoX = []
+    for leftX in X:
+        if len(X.shape) == 2:
+            monoX.append(leftX[0])
+        else:
+            monoX.append(leftX)
+    shape = np.shape(monoX)
+    padded_array = np.zeros((300000))
+    print(len(monoX))
+    if len(monoX) > 300000:
+        return []
+    padded_array[:shape[0]] = monoX
+    mfccs = librosa.feature.mfcc(y=padded_array, sr=sample_rate, n_mfcc=40)
+    mfccs_scaled = np.mean(mfccs.T,axis=0)
+    return mfccs_scaled
+
+def extract_features():
+
+    # path to dataset containing 10 subdirectories of .ogg files
+    sub_dirs = os.listdir('/data')
+    sub_dirs.sort()
+    print(sub_dirs)
+    features_list = []
+    for label, sub_dir in enumerate(sub_dirs):  
+        for file_name in glob.glob(os.path.join('data',sub_dir,"*.ogg")):
+            print("Extracting file ", file_name)
+            try:
+                mfccs = get_features(file_name)
+            except Exception as e:
+                print(e)
+                print("Extraction error")
+                continue
+            features_list.append([mfccs,label])
+
+    features_df = pd.DataFrame(features_list,columns = ['feature','class_label'])
+    print(features_df.head())    
+    return features_df
--- a/filler words/neural_network.py
+++ b/filler words/neural_network.py
+# Imports
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
+from keras.models import load_model
+import get_features
+from sklearn.preprocessing import LabelEncoder
+import numpy as np
+import os
+
+def create_mlp(num_labels):
+
+    model = Sequential()
+    model.add(Dense(256,input_shape = (40,)))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+
+    model.add(Dense(256,input_shape = (40,)))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+
+    model.add(Dense(num_labels))
+    model.add(Activation('softmax'))
+    return model
+
+def create_cnn(num_labels):
+
+    model = Sequential()
+    model.add(Conv1D(64, 3, activation='relu', input_shape=(40, 1)))
+    model.add(Conv1D(64, 3, activation='relu'))
+    model.add(MaxPooling1D(3))
+    model.add(Conv1D(128, 3, activation='relu'))
+    model.add(Conv1D(128, 3, activation='relu'))
+    model.add(GlobalAveragePooling1D())
+    model.add(Dropout(0.5))
+    model.add(Dense(num_labels))
+    model.add(Activation('softmax'))
+    return model
+
+def train(model,X_train, X_test, y_train, y_test,model_file):    
+    
+    # compile the model 
+    model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
+
+    print(model.summary())
+
+    print("training for 100 epochs with batch size 32")
+   
+    model.fit(X_train,y_train,batch_size= 10, epochs = 100, validation_data=(X_test,y_test))
+
+    # save model to disk
+    print("Saving model to disk")
+    model.save(model_file)
+
+def compute(X_test,y_test,model_file):
+
+    # load model from disk
+    loaded_model = load_model(model_file)
+    score = loaded_model.evaluate(X_test,y_test)
+    return score[0],score[1]*100
+
+def predict(filename,le,model_file):
+
+    model = load_model(model_file)
+    prediction_feature = get_features.get_features(filename)
+    if len(prediction_feature) == 0:
+        return {"pred": "", "probability": str(0)}
+    if model_file == "trained_mlp.h5":
+        prediction_feature = np.array([prediction_feature])
+    elif model_file == "trained_cnn.h5":    
+        prediction_feature = np.expand_dims(np.array([prediction_feature]),axis=2)
+
+    predicted_vector = model.predict_classes(prediction_feature)
+    predicted_class = le.inverse_transform(predicted_vector)
+    sub_dirs = os.listdir('data')
+    sub_dirs.sort()
+    print("Predicted class",sub_dirs[predicted_class[0]])
+    predicted_proba_vector = model.predict_proba([prediction_feature])
+
+    word = ""
+    probability = 0
+    predicted_proba = predicted_proba_vector[0]
+    for i in range(len(predicted_proba)): 
+        category = le.inverse_transform(np.array([i]))
+        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )
+        if (predicted_proba[i] > probability):
+            probability = predicted_proba[i]
+            word = sub_dirs[predicted_class[0]]
+            print("Selected word: ", word)
+
+    return {"pred": word, "probability": str(probability)}
--- a/filler words/trained_cnn.h5
+++ b/filler words/trained_cnn.h5