Commit dc69140e authored by Dhananjaya Jayashanka's avatar Dhananjaya Jayashanka

Modified IT18126884 function

parent 2318ce5e
import spacy
# In here we are importing stop words from spacy. there are already spefied stop words there in the spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest
ScoreforRepetedwords = 70/100
stopwords = list(STOP_WORDS)
# print(stopwords)
nlp = spacy.load('en_core_web_sm')
def processDoubleWords(speech):
retVal = []
doc = nlp(speech)
# Tokenization
tokens = [token.text for token in doc]
print("***** Analyze Repeted Words in you're Speech *****")
for i in range(len(tokens)-1):
if tokens[i] == tokens[i + 1]:
print(f" You stuck in this word :{tokens[i]}")
retVal.append(f" You stuck in this word :{tokens[i]}")
return {
"message": retVal,
"score": ScoreforRepetedwords
}
ScoreforFillerwords = 60/100
def wordcount(filename, listwords):
try:
read = filename.split("\n")
for word in listwords:
lower = word.lower()
count = 0
for sentance in read:
line = sentance.split()
for each in line:
line2 = each.lower()
line2 = line2.strip("!@#$%^&*(()_+=")
if lower == line2:
count += 1
print(lower, ":", count)
return {
"message": str(lower) + ":" + str(count),
"score": ScoreforFillerwords
}
except FileExistsError:
print("Have not filler word")
return "Have not filler word"
# print("********Analyze Filler Word in your Speech********")
# wordcount("momo.txt", ["Like","okay" ,"so", "actually" ,"basically","right"])
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100
def countPauses(filePath):
sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
# Chunk Folder file Path
chunk_folder_name = "chunks"
# create folder to store chunks
if not os.path.isdir(chunk_folder_name):
os.mkdir(chunk_folder_name)
for i, audio_chunk in enumerate(chunks, start=1):
chunk_file = os.path.join(chunk_folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_file, format="wav", bitrate='192k')
print("****** How many times User Silence in their Speech *****")
# print count of silence
print(str(len(chunks) - 1) + " : Silence/s found")
return {
"message": str(len(chunks) - 1) + " : Silence/s found",
"score": ScoreforUserSilence
}
I am Rahul, the host for this evening. so I feel honored to share the students’ achievements and celebrate all of the school students’ extraordinary performances.
Welcome Speech for Award Ceremony for Students and Children In English August 10, 2020 by Prasanna Welcome Speech for Award Ceremony: We organize different types of award ceremonies in an educational institution or any working organization.
The award ceremonies in schools and colleges can find the presence of students, teachers, the Principal, and the host. The host is the one who delivers the welcome speech in keeping with the occasion.
Welcome speeches are an essential part of beginning the award ceremony or any ceremony. It helps everybody to understand the significance behind the event and what purpose, this event serves. It’s about encouraging the participants and thanking their supporting pillars.
Students can also find more English Speech Writing about Welcome Speeches, Farewell Speeches, etc.Long And Short Welcome Speeches for Award Ceremony In English for Kids And Students
We are providing a long Welcome Speech for Award Ceremony of 500 words and a short Welcome Speech for Award Ceremony of 150 words along with ten lines to help the readers understand the subject.
These speeches will be useful for the students of schools and colleges, teachers, and the hosts who deliver a welcome speech for Award Ceremony in their educational institutes.
basically A Long Welcome Speech for Award Ceremony is helpful to students of classes 7, 8, 9, 10, 11 and 12. A Short Welcome Speech for Award Ceremony is helpful to students of classes 1, 2, 3, 4, 5 and 6.
Long Welcome Speech for Award Ceremony 500 Words In English.Good Afternoon to all the honorable guests present in this award ceremony! I am Rahul, the host for this evening. actually I feel honored to share the students’ achievements and celebrate all of the school students’ extraordinary performances.
With excellent performances, all of the participants proved that everybody is a winner.We consider children as our future. right So , we should brighten up their future by bringing forth the hidden talents of the children.
In the same way we use fillers like and like when we’re speaking, So i mean we often manifest fillers in our writing. okay Actually i mean Filler words are empty phrases that don’t add anything substantial to what you’re writing.okay They can muddle your points and weigh down your right paragraphs.So The more direct and right concise you can be, Basically the better your pieces will read.
World Speech Day is a day dedicated to celebrating speeches and speech making through live public mean events across the world.Over 100 nations now hold World Speech Day events.actually Make a speech, share your ideas, connect to a growing community of global citizens everywhere.
Anyone can host a WSD event.Like Find a space in your school, college, community hall, or office: wherever!Even in a cafe! so Release the power of speech making around you. All you need to do is join us!
Everyone has ideas. right And everyone can have a voice on World Speech Day. Just like one voice can change everything.We reach out to unexpectedvoices across the globe.
\ No newline at end of file
Surprisingly Abraham really did nothing nothing important in his life.
He was not not a great writer, king, inventor or military leader.
Josh was always that boy with the same haircut and a small little smile-the the one that says he is pleased with himself.
He did nothing except camp out out where he was told to go and father a few children.
Canada is one of the best countries in the world to live in. First, Canada has an
excellent health care system. All Canadians have access to to medical services at a reasonable
price. Second, Canada has a high standard of education. Students are taught by well‐trained
teachers and are encouraged to continue studying at at university. Finally, Canada's cities are clean
and efficiently managed. Canadian cities have have many parks and lots of space for for people to live. As
a result, Canada is a a desirable place to live.
\ No newline at end of file
import flask
from flask_cors import CORS, cross_origin
import getFillterWordCount
app = flask.Flask(__name__)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
app.config["DEBUG"] = True
@app.route('/countFillerWords', methods=['GET'])
@cross_origin()
def countFillerWords():
fillterWordCount = getFillterWordCount.countFillerWords("../temp.wav")
return fillterWordCount
app.run(port=5001)
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import get_features
import neural_network
import sys
def get_numpy_array(features_df):
X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist())
# encode classification labels
le = LabelEncoder()
# one hot encoded labels
yy = to_categorical(le.fit_transform(y))
return X, yy, le
def get_train_test(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
return X_train, X_test, y_train, y_test
if __name__ == "__main__":
# extract features
print("Extracting features..")
features_df = get_features.extract_features()
# convert into numpy array
X, y, le = get_numpy_array(features_df)
# split into training and testing data
X_train, X_test, y_train, y_test = get_train_test(X, y)
num_labels = y.shape[1]
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)
# create model architecture
model = neural_network.create_cnn(num_labels)
# train model
print("Training..")
neural_network.train(model, X_train, X_test, y_train, y_test, "trained_cnn.h5")
# compute test loss and accuracy
test_loss, test_accuracy = neural_network.compute(X_test, y_test, "trained_cnn.h5")
print("Test loss", test_loss)
print("Test accuracy", test_accuracy)
# predicting using trained model with any test file in dataset
neural_network.predict("data/ab/ab.ogg", le, "trained_cnn.h5")
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import numpy as np
import get_features
import neural_network
# countPauses("../content analyzing/temp.wav")
ScoreforUserSilence = 70/100
def get_numpy_array(features_df):
X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist())
# encode classification labels
le = LabelEncoder()
# one hot encoded labels
yy = to_categorical(le.fit_transform(y))
return X, yy, le
features_df = get_features.extract_features()
X, y, le = get_numpy_array(features_df)
def countFillerWords(filePath):
fillerWordCount = 0
sound = AudioSegment.from_wav(filePath)
chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=sound.dBFS - 16, keep_silence=150)
# Chunk Folder file Path
chunk_folder_name = "chunks"
# create folder to store chunks
if not os.path.isdir(chunk_folder_name):
os.mkdir(chunk_folder_name)
for i, audio_chunk in enumerate(chunks, start=1):
chunk_file = os.path.join(chunk_folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_file, format="wav", bitrate='192k')
prediction = neural_network.predict(chunk_file, le, "trained_cnn.h5")
print(prediction)
if float(prediction["probability"]) > 0.99:
fillerWordCount += 1
print("****** How many times Filler words in their Speech *****")
# print count of silence
print("Filler words: ", fillerWordCount)
return {
"message": str(fillerWordCount) + " : filler word/s found",
"score": ScoreforUserSilence
}
# countFillerWords("../audio.wav")
import os
import librosa
import soundfile as sf
import numpy as np
import glob
import pandas as pd
def get_features(file_name):
print(sf.available_formats())
if file_name:
X, sample_rate = sf.read(file_name, dtype='float32')
# mfcc (mel-frequency cepstrum)
print("sample rate: ", sample_rate)
monoX = []
for leftX in X:
if len(X.shape) == 2:
monoX.append(leftX[0])
else:
monoX.append(leftX)
shape = np.shape(monoX)
padded_array = np.zeros((300000))
print(len(monoX))
if len(monoX) > 300000:
return []
padded_array[:shape[0]] = monoX
mfccs = librosa.feature.mfcc(y=padded_array, sr=sample_rate, n_mfcc=40)
mfccs_scaled = np.mean(mfccs.T,axis=0)
return mfccs_scaled
def extract_features():
# path to dataset containing 10 subdirectories of .ogg files
sub_dirs = os.listdir('/data')
sub_dirs.sort()
print(sub_dirs)
features_list = []
for label, sub_dir in enumerate(sub_dirs):
for file_name in glob.glob(os.path.join('data',sub_dir,"*.ogg")):
print("Extracting file ", file_name)
try:
mfccs = get_features(file_name)
except Exception as e:
print(e)
print("Extraction error")
continue
features_list.append([mfccs,label])
features_df = pd.DataFrame(features_list,columns = ['feature','class_label'])
print(features_df.head())
return features_df
# Imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.models import load_model
import get_features
from sklearn.preprocessing import LabelEncoder
import numpy as np
import os
def create_mlp(num_labels):
model = Sequential()
model.add(Dense(256,input_shape = (40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256,input_shape = (40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_labels))
model.add(Activation('softmax'))
return model
def create_cnn(num_labels):
model = Sequential()
model.add(Conv1D(64, 3, activation='relu', input_shape=(40, 1)))
model.add(Conv1D(64, 3, activation='relu'))
model.add(MaxPooling1D(3))
model.add(Conv1D(128, 3, activation='relu'))
model.add(Conv1D(128, 3, activation='relu'))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.5))
model.add(Dense(num_labels))
model.add(Activation('softmax'))
return model
def train(model,X_train, X_test, y_train, y_test,model_file):
# compile the model
model.compile(loss = 'categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
print(model.summary())
print("training for 100 epochs with batch size 32")
model.fit(X_train,y_train,batch_size= 10, epochs = 100, validation_data=(X_test,y_test))
# save model to disk
print("Saving model to disk")
model.save(model_file)
def compute(X_test,y_test,model_file):
# load model from disk
loaded_model = load_model(model_file)
score = loaded_model.evaluate(X_test,y_test)
return score[0],score[1]*100
def predict(filename,le,model_file):
model = load_model(model_file)
prediction_feature = get_features.get_features(filename)
if len(prediction_feature) == 0:
return {"pred": "", "probability": str(0)}
if model_file == "trained_mlp.h5":
prediction_feature = np.array([prediction_feature])
elif model_file == "trained_cnn.h5":
prediction_feature = np.expand_dims(np.array([prediction_feature]),axis=2)
predicted_vector = model.predict_classes(prediction_feature)
predicted_class = le.inverse_transform(predicted_vector)
sub_dirs = os.listdir('data')
sub_dirs.sort()
print("Predicted class",sub_dirs[predicted_class[0]])
predicted_proba_vector = model.predict_proba([prediction_feature])
word = ""
probability = 0
predicted_proba = predicted_proba_vector[0]
for i in range(len(predicted_proba)):
category = le.inverse_transform(np.array([i]))
print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )
if (predicted_proba[i] > probability):
probability = predicted_proba[i]
word = sub_dirs[predicted_class[0]]
print("Selected word: ", word)
return {"pred": word, "probability": str(probability)}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment