Commit 6aacd29a authored by Koralage H.C's avatar Koralage H.C

Merge branch 'Shaveen' into 'master'

Shaveen

See merge request !4
parents 12c628bf 783fdbb2
Pipeline #7087 failed with stages
File added
my_classes = {0: {'siren': 0},
1: {'cow': 0, 'hand_saw': 1, 'sheep': 2, 'crackling_fire': 3, 'rooster': 4, 'frog': 5, 'dog': 6, 'crickets': 7, 'door_wood_creaks': 8, 'chirping_birds': 9, 'crow': 10, 'pig': 11, 'brushing_teeth': 12, 'insects': 13, 'clock_tick': 14, 'hen': 15, 'cat': 16}, 2: {'brushing_teeth': 0}, 3: {'footsteps': 0}, 4: {'laughing': 0, 'coughing': 1}, 5: {'drinking_sipping': 0, 'clapping': 1}, 6: {'door_wood_creaks': 0}, 7: {'crickets': 0}, 8: {'footsteps': 0, 'fireworks': 1}, 9: {'breathing': 0}, 10: {'thunderstorm': 0, 'wind': 1}, 11: {'siren': 0, 'cow': 1, 'wind': 2, 'crickets': 3, 'clock_alarm': 4}, 12: {'clock_tick': 0, 'clock_alarm': 1, 'water_drops': 2}, 13: {'crickets': 0}, 14: {'crackling_fire': 0}, 15: {'clock_tick': 0, 'vacuum_cleaner': 1, 'door_wood_creaks': 2, 'washing_machine': 3}, 16: {'water_drops': 0}, 17: {'crackling_fire': 0}, 18: {'mouse_click': 0}, 19: {'siren': 0, 'hand_saw': 1, 'cow': 2, 'water_drops': 3, 'airplane': 4, 'door_wood_creaks': 5, 'washing_machine': 6, 'insects': 7, 'clock_tick': 8, 'brushing_teeth': 9, 'car_horn': 10, 'can_opening': 11, 'engine': 12, 'door_wood_knock': 13, 'church_bells': 14, 'cat': 15}, 20: {'toilet_flush': 0, 'wind': 1, 'sea_waves': 2, 'pouring_water': 3, 'rain': 4, 'door_wood_creaks': 5}, 21: {'cow': 0}, 22: {'can_opening': 0}, 23: {'crackling_fire': 0}, 24: {'brushing_teeth': 0}, 25: {'wind': 0}, 26: {'chirping_birds': 0}, 27: {'brushing_teeth': 0}, 28: {'door_wood_creaks': 0}, 29: {'frog': 0}, 30: {'crackling_fire': 0}, 31: {'footsteps': 0, 'mouse_click': 1}, 32: {'mouse_click': 0, 'water_drops': 1}, 33: {'crickets': 0}, 34: {'breathing': 0}, 35: {'crow': 0}, 36: {'pouring_water': 0}, 37: {'airplane': 0, 'thunderstorm': 1, 'fireworks': 2}, 38: {'cow': 0}, 39: {'water_drops': 0}, 40: {'laughing': 0, 'crying_baby': 1}, 41: {'engine': 0, 'chainsaw': 1}, 42: {'airplane': 0, 'wind': 1}, 43: {'clapping': 0}, 44: {'siren': 0}, 45: {'crying_baby': 0, 'car_horn': 1, 'drinking_sipping': 2, 'train': 3, 'door_wood_creaks': 4, 'sheep': 5, 'crackling_fire': 6, 'clapping': 7, 'pig': 8, 'sneezing': 9, 'cat': 10, 'cow': 11, 'laughing': 12, 'clock_tick': 13, 'door_wood_knock': 14, 'mouse_click': 15, 'frog': 16, 'insects': 17, 'keyboard_typing': 18}, 46: {'cat': 0, 'crying_baby': 1, 'door_wood_creaks': 2}, 47: {'crickets': 0, 'chirping_birds': 1}, 48: {'washing_machine': 0}, 49: {'vacuum_cleaner': 0}, 50: {'cat': 0}, 51: {'door_wood_creaks': 0}, 52: {'crackling_fire': 0}, 53: {'hand_saw': 0, 'vacuum_cleaner': 1, 'clock_alarm': 2}, 54: {'car_horn': 0}, 55: {'pig': 0, 'hand_saw': 1}, 56: {'crackling_fire': 0}, 57: {'brushing_teeth': 0},
58: {'brushing_teeth': 0, 'hand_saw': 1}, 59: {'breathing': 0}, 60: {'insects': 0, 'crickets': 1}, 61: {'siren': 0, 'airplane': 1, 'washing_machine': 2, 'car_horn': 3, 'rain': 4, 'train': 5, 'drinking_sipping': 6, 'door_wood_creaks': 7, 'hand_saw': 8, 'toilet_flush': 9, 'crackling_fire': 10, 'crickets': 11, 'vacuum_cleaner': 12, 'helicopter': 13, 'engine': 14, 'chainsaw': 15, 'cow': 16, 'wind': 17, 'water_drops': 18, 'sea_waves': 19, 'mouse_click': 20, 'breathing': 21}, 62: {'siren': 0, 'crying_baby': 1, 'car_horn': 2, 'drinking_sipping': 3, 'door_wood_creaks': 4, 'hand_saw': 5, 'sheep': 6, 'crackling_fire': 7, 'crickets': 8, 'chirping_birds': 9, 'crow': 10, 'clapping': 11, 'pig': 12, 'brushing_teeth': 13, 'can_opening': 14, 'fireworks': 15, 'sneezing': 16, 'engine': 17, 'helicopter': 18, 'dog': 19, 'cat': 20, 'cow': 21, 'rooster': 22, 'water_drops': 23, 'laughing': 24, 'footsteps': 25, 'sea_waves': 26, 'clock_tick': 27, 'pouring_water': 28, 'door_wood_knock': 29, 'mouse_click': 30, 'hen': 31, 'frog': 32, 'clock_alarm': 33, 'snoring': 34, 'insects': 35, 'glass_breaking': 36, 'breathing': 37, 'coughing': 38, 'thunderstorm': 39, 'keyboard_typing': 40}, 63: {'clock_tick': 0}, 64: {'crackling_fire': 0, 'wind': 1, 'water_drops': 2, 'footsteps': 3, 'crickets': 4}, 65: {'door_wood_creaks': 0}, 66: {'crying_baby': 0}, 67: {'pouring_water': 0, 'water_drops': 1}, 68: {'insects': 0}, 69: {'clock_tick': 0, 'can_opening': 1, 'glass_breaking': 2}, 70: {'water_drops': 0}, 71: {'clapping': 0}, 72: {'glass_breaking': 0, 'crackling_fire': 1}, 73: {'clock_alarm': 0}, 74: {'clapping': 0}, 75: {'siren': 0}, 76: {'footsteps': 0}, 77: {'snoring': 0}, 78: {'washing_machine': 0, 'train': 1, 'chirping_birds': 2}, 79: {'washing_machine': 0}, 80: {'sneezing': 0},
81: {'crickets': 0}, 82: {'clock_alarm': 0}, 83: {'wind': 0}, 84: {'washing_machine': 0}, 85: {'pouring_water': 0}, 86: {'chainsaw': 0}, 87: {'drinking_sipping': 0}, 88: {'vacuum_cleaner': 0, 'washing_machine': 1}, 89: {'insects': 0, 'crickets': 1, 'rain': 2, 'chirping_birds': 3}, 90: {'footsteps': 0, 'door_wood_creaks': 1, 'door_wood_knock': 2}, 91: {'clock_tick': 0, 'clock_alarm': 1}, 92: {'footsteps': 0, 'mouse_click': 1, 'keyboard_typing': 2, 'door_wood_knock': 3}, 93: {'brushing_teeth': 0, 'dog': 1, 'cat': 2, 'door_wood_creaks': 3}, 94: {'sea_waves': 0}, 95: {'airplane': 0}, 96: {'vacuum_cleaner': 0, 'washing_machine': 1}, 97: {'crackling_fire': 0, 'rain': 1}, 98: {'footsteps': 0, 'fireworks': 1}, 99: {'hand_saw': 0}, 100: {'drinking_sipping': 0}, 101: {'clock_alarm': 0}, 102: {'sheep': 0}, 103: {'crackling_fire': 0}, 104: {'crackling_fire': 0}, 105: {'door_wood_creaks': 0, 'washing_machine': 1, 'chainsaw': 2, 'engine': 3},
106: {'can_opening': 0, 'drinking_sipping': 1}, 107: {'siren': 0}, 108: {'toilet_flush': 0}, 109: {'crackling_fire': 0}, 110: {'drinking_sipping': 0}, 111: {'crying_baby': 0}, 112: {'crickets': 0, 'helicopter': 1, 'wind': 2}, 113: {'insects': 0, 'door_wood_creaks': 1}, 114: {'pig': 0, 'hen': 1, 'cow': 2, 'sheep': 3}, 115: {'footsteps': 0, 'crackling_fire': 1}, 116: {'footsteps': 0, 'airplane': 1, 'sea_waves': 2, 'wind': 3}, 117: {'clock_tick': 0}, 118: {'crickets': 0}, 119: {'footsteps': 0, 'crackling_fire': 1},
120: {'sea_waves': 0}, 121: {'hand_saw': 0, 'snoring': 1, 'drinking_sipping': 2, 'breathing': 3, 'coughing': 4}, 122: {'church_bells': 0}, 123: {'dog': 0}, 124: {'footsteps': 0}, 125: {'chainsaw': 0}, 126: {'clapping': 0}, 127: {'brushing_teeth': 0}, 128: {'laughing': 0}, 129: {'clapping': 0}, 130: {'clock_alarm': 0}, 131: {'vacuum_cleaner': 0}, 132: {'engine': 0}, 133: {'crackling_fire': 0, 'water_drops': 1, 'footsteps': 2, 'laughing': 3, 'washing_machine': 4, 'clock_tick': 5, 'brushing_teeth': 6, 'can_opening': 7, 'drinking_sipping': 8, 'mouse_click': 9, 'door_wood_creaks': 10},
134: {'thunderstorm': 0}, 135: {'hand_saw': 0}, 136: {'helicopter': 0, 'washing_machine': 1}}
type_list = ['Police car (siren)', 'Animal', 'Toothbrush', 'Sliding door', 'Cough', 'Crunch', 'Buzzer',
'Cricket', 'Crackle', 'Sigh', 'Thunderstorm', 'Alarm', 'Telephone', 'Rattle (instrument)',
'Rodents, rats, mice', 'Mechanisms', 'Wood block', 'Keys jangling', 'Chink, clink', 'Music',
'Water', 'Cattle, bovinae', 'Cap gun', 'Clip-clop', 'Scratch', 'White noise', 'Bird', 'Writing',
'Electric shaver, electric razor', 'Frog', 'Patter', 'Door', 'Plop', 'Chirp tone', 'Snort', 'Crow',
'Liquid', 'Explosion', 'Roaring cats (lions, tigers)', 'Tick', 'Laughter', 'Chainsaw', 'Eruption',
'Hands', 'Shofar', 'Speech', 'Crying, sobbing', 'Environmental noise', 'Fill (with liquid)',
'Vacuum cleaner', 'Cat', 'Synthesizer', 'Tearing', 'Tools', 'Vehicle horn, car horn, honking', 'Pig',
'Scissors', 'Squeal', 'Rub', 'Gasp', 'Insect', 'Vehicle', 'Silence', 'Clock',
'Outside, rural or natural', 'Rattle', 'Babbling', 'Drip', 'Bee, wasp, etc.', 'Glass',
'Bathtub (filling or washing)', 'Tap', 'Crack', 'Jingle bell', 'Cheering', 'Emergency vehicle',
'Walk, footsteps', 'Snoring', 'Rail transport', 'Blender', 'Sneeze', 'Raindrop', 'Ringtone',
'Whale vocalization', 'Pump (liquid)', 'Gurgling', 'Chant', 'Biting', 'Jet engine', 'Wild animals',
'Knock', 'Alarm clock', 'Typing', 'Domestic animals, pets', 'Stream', 'Artillery fire', 'Aircraft',
'Rain', 'Fireworks', 'Wood', 'Chewing, mastication', 'Beep, bleep', 'Sheep', 'Coin (dropping)',
'Boiling', 'Engine', 'Burping, eructation', 'Siren', 'Toilet flush', 'Bicycle', 'Stomach rumble',
'Child speech, kid speaking', 'Wind', 'Fart', 'Livestock, farm animals, working animals', 'Fire',
'Ocean', 'Sound effect', 'Snake', 'Crumpling, crinkling', 'Boat, Water vehicle', 'Breathing', 'Bell',
'Dog', 'Thunk', 'Light engine (high frequency)', 'Clapping', 'Pant', 'Radio', 'Applause',
'Telephone bell ringing', 'Steam',
'Motorcycle', 'Inside, small room', 'Heart sounds, heartbeat', 'Sawing', 'Helicopter']
import tensorflow as tf
import numpy as np
import statistics
from cash import type_list, my_classes
from sound_feature_extract import feature_extraction
def classify_class(filename):
embeddings , model_id, inferred_class = feature_extraction(filename=filename)
model = tf.keras.models.load_model('models/sound_classifier' + str(model_id) + '.h5')
predictions = model.predict(embeddings)
predicted_class = np.argmax(predictions, axis=-1)
print(predicted_class)
res = statistics.mode(predicted_class)
count= 0
for cls in predicted_class:
if res == cls:
count+=1
print(count)
class_obj = my_classes[type_list.index(inferred_class)]
print(class_obj)
if count>3:
class_ = list(class_obj.keys())[list(class_obj.values()).index(res)]
else:
class_ = "Can not predict this sound"
print(class_)
print(type_list.index(inferred_class))
print(my_classes[type_list.index(inferred_class)])
return {
"prediction" : class_
}
# print(classify_class('1-115920-B-22.wav'))
\ No newline at end of file
import os
import subprocess
def convert_aud_ffmpeg():
ffmpeg_path = '/usr/bin/ffmpeg' # The path to the ffmpeg binary
input_file = 'uploaded.amr'
output_file = 'uploaded.wav'
if os.path.exists(output_file):
os.remove(output_file)
ffmpeg_command = f'{ffmpeg_path} -i {input_file} {output_file}'
try:
result = subprocess.run(ffmpeg_command, shell=True, check=True)
except subprocess.CalledProcessError as e:
print(f"Error: {e.returncode}")
else:
print("FFmpeg command executed successfully")
\ No newline at end of file
This diff is collapsed.
import json
from flask import Flask, request, jsonify
from flask_cors import CORS, cross_origin
from convert_using_ffmpeg import convert_aud_ffmpeg
from sound_convert_wav import convert_audio_type , convert_wav_bit_type
import pronouncation_accuracy
from classify_sound import classify_class
# from sign_detect import inference
from sign_detect import inference
from summerization import get_summerized_paragraph
app = Flask(__name__)
CORS(app, resources={r"/": {"origins": "*"}})
@app.route("/")
def main():
return "home"
@app.route("/v1/pronouncationaccuracy",methods=["POST"])
@cross_origin()
def pronouncation():
uploaded_file = request.files['audio']
if uploaded_file:
uploaded_file.save('uploaded.wav')
return pronouncation_accuracy.get_pronouncation_accuracy(audio_file='uploaded.wav' , pronounce_word=request.form['element'])
@app.route("/v1/sounddetect",methods=["POST"])
@cross_origin()
def soundclasification():
print(request)
uploaded_file = request.files['audio']
if uploaded_file:
uploaded_file.save('uploaded.amr')
convert_aud_ffmpeg()
convert_wav_bit_type()
return classify_class(filename='uploaded.wav')
@app.route('/v1/sign', methods=['POST'])
def sign():
image_obj = request.files['image_path']
sign_obj = request.form['sign']
filename = image_obj.filename
image_path = f'uploads/{filename}'
image_obj.save(image_path)
prediction, proba = inference(image_path)
if prediction == sign_obj:
matched = True
else:
matched = False
return jsonify({
"PredSignType": f"{prediction}",
"TrueSignType": f"{sign_obj}",
"probability" : f"{proba}",
"matched": f"{matched}"
})
@app.route('/v1/summerize', methods=['POST'])
def summerize():
paragraph = request.form['paragraph']
return get_summerized_paragraph(paragraph)
@app.route("/form_data", methods=["POST"])
@cross_origin()
def formdata():
uploaded_file = request.files['image']
body = request.form['name']
print(body)
print(json.loads(body))
if uploaded_file:
uploaded_file.save('uploaded.png')
return 'File uploaded successfully!', 200
if __name__ == '__main__':
app.debug = True
app.run(host='localhost',port=5000)
File added
This diff is collapsed.
import speech_recognition as sr
import nltk
from translate_google_api import get_text_google_api
nltk.download('wordnet')
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from nltk.metrics import jaccard_distance
# Initialize the recognizer
recognizer = sr.Recognizer()
# Load an audio file (replace 'your_audio_file.wav' with the path to your audio file)
def get_pronouncation_text(audio_file):
# Open the audio file using the recognizer
with sr.AudioFile(audio_file) as source:
audio_data = recognizer.record(source) # Record the entire audio file
# Use the Google Speech Recognition API to convert audio to text
try:
return True , recognizer.recognize_google(audio_data)
print("Transcription: " + text)
except sr.UnknownValueError:
return False , "Could not understand the audio"
except sr.RequestError as e:
return False , "Could not request results; {0}"+format(e)
# Calculate Wu-Palmer Similarity using WordNet
def get_wordnet_similarity(word1 , word2):
synset1 = wordnet.synsets(word1)[0]
synset2 = wordnet.synsets(word2)[0]
return wordnet.path_similarity(synset1, synset2, simulate_root=True)
def generate_words_in_sequence(letters):
# Initialize a list to store words in sequence
words_in_sequence = []
# Generate words by progressively adding letters
for i in range(1, len(letters) + 1):
word = letters[:i]
words_in_sequence.append(word)
return words_in_sequence
def get_tokenize_word(word1 , word2):
words1 = generate_words_in_sequence(word1)
words2 = generate_words_in_sequence(word2)
return words1 , words2
# from nltk.wup_similarity import wup_similarity
lemmatizer = WordNetLemmatizer()
def get_jaccard_similarity(word1 , word2):
lemma1 = lemmatizer.lemmatize(word1, pos='v') # 'run'
lemma2 = lemmatizer.lemmatize(word2, pos='v') # 'run'
jaccard_similarity = 1 - jaccard_distance(set(lemma1), set(lemma2))
return jaccard_similarity
# similarity_score = wordnet.path_similarity(synset1, synset2, simulate_root=True)
# print(similarity_score)
def get_tokenize_similarity(words1 , words2):
total_similarity = 0
comparisons = 0
length = len(words2) if len(words1) >= len(words2) else len(words1)
difference = len(words1) - len(words2) if len(words1) >= len(words2) else len(words2) - len(words1)
for i in range(length):
total_similarity+= get_jaccard_similarity(words1[i] , words2[i])
comparisons+=1
avg_similarity = total_similarity / (comparisons+difference)
return avg_similarity
def get_pronouncation_accuracy(audio_file , pronounce_word):
state , word1 = get_pronouncation_text(audio_file)
similarity = 0
hit = ""
if not state:
word1 = get_text_google_api(audio_file)
word2 = pronounce_word
print(word1 , word2)
words1 , words2 = get_tokenize_word(word1 , word2)
print(words1 , words2)
tokenize_similarity = get_tokenize_similarity(words1 , words2)
if tokenize_similarity>= 0.45:
print('Hit Token Similarity')
print('Pronouncation Accuracy = ',tokenize_similarity)
similarity = tokenize_similarity
hit = "Hit Token Similarity"
else:
try:
print('Hit Wordnet Similarity')
wordnet_similarity = get_wordnet_similarity(word1 , word2)
if wordnet_similarity>=0.45:
print('Pronouncation Accuracy = ',wordnet_similarity)
similarity = wordnet_similarity
hit = "Hit Wordnet Similarity"
else:
print('Pronouncation Accuracy = ',0)
similarity = 0
except:
print('Hit jaccard Similarity')
jaccard_similarity = get_jaccard_similarity(word1 , word2)
print('Pronouncation Accuracy = ',jaccard_similarity)
similarity = jaccard_similarity
hit = "Hit jaccard Similarity"
return {
"similarity":similarity*100,
"hit":hit
}
# audio_file = 'teacher2.wav'
# similarity = get_pronouncation_accuracy(audio_file , 'teachers')
\ No newline at end of file
import glob
import cv2 as cv
import numpy as np
import pandas as pd
import tensorflow as tf
model = tf.keras.models.load_model('sign_detector.h5')
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=[
tf.keras.metrics.CategoricalAccuracy(name='accuracy'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
])
class_dict = {
'1': 0,
'3': 1,
'4': 2,
'5': 3,
'7': 4,
'8': 5,
'9': 6,
'A': 7,
'B': 8,
'Baby': 9,
'Brother': 10,
'C': 11,
'D': 12,
'Dont_like': 13,
'E': 14,
'F': 15,
'Friend': 16,
'G': 17,
'H': 18,
'Help': 19,
'House': 20,
'I': 21,
'J': 22,
'K': 23,
'L': 24,
'Like': 25,
'Love': 26,
'M': 27,
'Make': 28,
'More': 29,
'N': 30,
'Name': 31,
'No': 32,
'O_OR_0': 33,
'P': 34,
'Pay': 35,
'Play': 36,
'Q': 37,
'R': 38,
'S': 39,
'Stop': 40,
'T': 41,
'U': 42,
'V_OR_2': 43,
'W_OR_6': 44,
'With': 45,
'X': 46,
'Y': 47,
'Yes': 48,
'Z': 49,
'nothing': 50
}
class_dict_rev = {v: k for k, v in class_dict.items()}
def inference(
image_path,
target_size=(224, 224)
):
image_path = image_path.replace('\\', '/')
img = cv.imread(image_path)
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
img = cv.resize(img, target_size)
img = np.expand_dims(img, axis=0)
img = tf.keras.applications.xception.preprocess_input(img)
prediction = model.predict(img).squeeze()
p = prediction.argmax()
proba = prediction[p]
proba = round(proba, 3)
return class_dict_rev[int(p)], proba
\ No newline at end of file
from pydub import AudioSegment
import soundfile as sf
import re
# Load the AMR audio file
def is_wav_file(file_path):
print(file_path)
return file_path.lower().endswith('.amr')
# def convert_audio_type(audio_file):
# pattern = r"'(.*?)'"
# match = re.search(pattern, audio_file)
# filename = match.group(1)
# print(filename)
# if is_wav_file(filename):
# amr_audio = AudioSegment.from_file(audio_file, format='amr')
# amr_audio.export('uploaded.wav', format='wav')
def convert_audio_type():
amr_audio = AudioSegment.from_file('uploaded.amr')
amr_audio.export('uploaded2.wav', format='wav')
# Specify the input 32-bit WAV file paths
def convert_wav_bit_type():
try:
input_file_path = "uploaded.wav" # Change this to the actual path
# Specify the output 16-bit WAV file path
output_file_path = "uploaded.wav" # Change this to the desired output path
# Read the 32-bit WAV file
audio_data, sample_rate = sf.read(input_file_path)
print(sample_rate)
# Convert the audio data to 16-bit
audio_data_16bit = (audio_data * 32767).astype('int16')
# Write the 16-bit audio data to a new WAV file
sf.write(output_file_path, audio_data_16bit, sample_rate)
except:
print("error converting")
\ No newline at end of file
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from cash import type_list
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)
def load_wav_16k_mono(filename):
""" Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
file_contents = tf.io.read_file(filename)
print(file_contents)
wav, sample_rate = tf.audio.decode_wav(
file_contents,
desired_channels=1)
wav = tf.squeeze(wav, axis=-1)
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
print(sample_rate)
# wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
print(wav)
return wav
def feature_extraction(filename):
class_map_path = yamnet_model.class_map_path().numpy().decode('utf-8')
class_names = list(pd.read_csv(class_map_path)['display_name'])
testing_wav_data = load_wav_16k_mono(filename)
print(testing_wav_data)
scores, embeddings, spectrogram = yamnet_model(testing_wav_data)
class_scores = tf.reduce_mean(scores, axis=0)
top_class = tf.math.argmax(class_scores)
inferred_class = class_names[top_class]
types = type_list
model_id = 0
i = 0
for type in types:
if inferred_class in type:
print(type)
model_id = i
i += 1
print(model_id)
print(inferred_class)
return embeddings , model_id , inferred_class
\ No newline at end of file
{
"type": "service_account",
"project_id": "soundclassification-403103",
"private_key_id": "e747d85c7c857f64662e6f7968efd25f46fcb3de",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCU9SSOsO9qpcU6\nbyu0DDg5GTXEWtlXCdtsd8jVsmNOUWGt9vh14e6LAavfRTKZBExnh0m4tlOLjdZT\nB6/ES/jT3JXoojutBJbN5jtFhvE74Z0ij1yyP3aVp0llRA84vsEgpVc+KUNm6qrl\nP5oGXGbXA8aHjrabUHSbxw7MYTnxH7/5azlFR3FD+bgBRn2TyvtzKCGC8bNgQ7NB\nxzSjz7eoO5UUznpuZ6uVtGm9JGSHSXfu1ylBt+HcKowrNFrjEY+yDg3Fpe9+aln0\n01UJugkQ0jcEBBQct7g62jNvI084DoQc7aup5zdvfmkoiZieEvz+y6cLHD3GT+70\nvaPu6tDVAgMBAAECggEACZvMmWXBc+QwlkXgkFMvd0Nwi/N+TJaGsHcDngFNVZp7\nThD90wDZdd8MOrFIg8VatlzsPP2J4xkpp5550JRD/0qd4u1AIzeGEM8WqLP9MKsr\nizSrOpZMbHfnjKA4ySTI6XWw7IIV5nghwPr60Qk52wnTzx++GMO2WewbwMROzAZy\nj3e/woKkgFJBNY+3pdlPnq5dEsa6UWJY6SJ9UoGz968QI4ehAViqYWloka3SKXOM\nem9TnPR98Jw94KvSUi7aFYD4BGkXn9NmRwq/o37s4Zh6pY9C6nhVDel1hmJ95HTm\nP+vqRpMLfL5BHUBqMJEOBPyIFh7xOT7C8kb6njWBEQKBgQDMyveZDjp/wbKwf4LW\nTiDFnhRL52UQH1Pj48+7eurdihwXgdjuTjjZ8Db7dubdEpgHBJ+LeTirXI26s77r\nXvuK45OeEzpJaxX+TqybA4tGSnBb2S7qyagoWI2eBpEPRyzqhv//Nzz+6HPFMBWi\noSLcJzPKXxlpzEpuI0yesx9taQKBgQC6NBhLNYrmIGdVg5XcSOg4rc7YiT5FzyRq\nTOVeovFbvpBgn2rfTPvgsJ1gCdevCvrmTbEt617ppZmzSOjTSBCNdat8PUS9Tbv2\nfnSPcEXPTwj5xqYFKJiZOZ6zg6OFjZULeMqCTTjTgLqnoj/6KuKWvfHX+DgJKeRO\nla1mqBdejQKBgQCmO3aBkZAglc0DAW3loONvJaLZ/gkF7jf9OrUrPFoa2/lzh13c\nCjiRhzBKLDi3aFlpEPKklxBN4MHWtAKcMaxDD/mKDZd5cugWViw7Y8QQmWnc1E0G\nHgxWO5Mwm6nx2dd2w4BW3LKxQ2R9wks9iEiwpU2fzlPlkgM0pYiTJVi0SQKBgHtR\nvTUyRMecdlOATmQSNRo0omVun7uEMM3mF5LRv5/PWbdkXVd+mOJ6lf/hBON9FiYp\nFksfM0Dy2BSRGQG1HKFTaTtjKH9nWDEsFu5YoSNnaL5j/dg0xrvUGXMyn733wtpS\nKWya0CPjgbs8DbsVY/QJ4H30esg24mnZX3o5oSCRAoGBALjh30SSpnQjvPW4OaxU\nX9L0soM/yQLCJVknoQ9JqETzrDo1h7JCLA5AfS0jpuWeUsyRCgv97MvL3h3zNPjl\nI+uP1R5dbaY1oiNWv4F+VZJbx8ISMjXL4+U2c6b0W7Iu2YfVBBMOPmlt5ayS0YwA\ngQ9Hn4SaCVa5hYVtzDxov91+\n-----END PRIVATE KEY-----\n",
"client_email": "soundclassification@soundclassification-403103.iam.gserviceaccount.com",
"client_id": "106583804081864367311",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/soundclassification%40soundclassification-403103.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import spacy
nltk.download('stopwords')
nltk.download('punkt')
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger')
def get_sentences_para(paragraph):
sentences = sent_tokenize(paragraph)
words = word_tokenize(paragraph)
# Get a list of English stopwords
stop_words = set(stopwords.words('english'))
# Remove stopwords and punctuation
filtered_words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words]
summarized_paragraph = ' '.join(filtered_words)
print(summarized_paragraph)
sentences = sent_tokenize(paragraph)
print(sentences)
return sentences
def generate_words_in_sequence(letters):
# Initialize a list to store words in sequence
words_in_sequence = []
# Generate words by progressively adding letters
for i in range(1, len(letters) + 1):
word = letters[:i]
words_in_sequence.append(word)
return words_in_sequence
nlp = spacy.load("en_core_web_sm")
plural_word = "umbrella"
def check_plural_word(word):
doc = nlp(word)
for token in doc:
if "Number=Plur" in token.morph:
singular_form = token.lemma_
return {
"state":True,
"word":singular_form
}
break
else:
return {
"state":False,
"word":word
}
def get_pos_tags(sentence_):
tokens = word_tokenize(sentence_)
# Perform part-of-speech tagging
tagged_tokens = pos_tag(tokens)
# Extract POS tags into an array
pos_tags = [tag for word, tag in tagged_tokens]
return pos_tags
file1 = open('english_sentences.txt', 'r')
Lines = file1.readlines()
all_patterns = []
all_words_tokens = []
count = 0
# Strips the newline character
for line in Lines:
pos_tags = get_pos_tags(line)
words = word_tokenize(line)
count += 1
print("Line{}: {}".format(count, line.strip()))
print(generate_words_in_sequence(pos_tags))
print(generate_words_in_sequence(words))
all_patterns.append(generate_words_in_sequence(pos_tags))
all_words_tokens.append(generate_words_in_sequence(words))
import pandas as pd
df = pd.read_csv('nounlist.csv')
print(list(df['nouns']))
nouns_list = list(df['nouns'])
def check_word_noun(word):
if word in nouns_list:
return True
return False
def check_grammer(sentence):
pos_tags = get_pos_tags(sentence)
print(pos_tags)
matching = False
matching_sentence = ""
i = 0
j = 0
for tags in all_patterns:
if i >= 100:
matching_sentence = sentence
break
word_token = all_words_tokens[i]
j = 0
for pattern in tags:
if all(element1 == element2 for element1, element2 in zip(pattern, pos_tags)) and len(pattern) == len(
pos_tags):
matching = True
print(word_token)
print(j)
matching_sentence = word_token[j]
break
j += 1
if matching:
break
i += 1
return matching, matching_sentence
import re
from nltk.corpus import words
import itertools
nltk.download('words')
def has_number(word):
# Define a regular expression pattern to match any digit (0-9)
pattern = r'\d'
# Use re.search to check if the pattern is found in the word
if re.search(pattern, word):
return True
else:
return False
def get_sentence(sentence):
english_words = set(words.words())
words_ = word_tokenize(sentence)
meaning_full_words = []
print(words)
for word in words_:
number = has_number(word)
word_singular = check_plural_word(word)['word'].lower()
print(word_singular)
if word.lower() in english_words or number or check_word_noun(word_singular):
meaning_full_words.append(word)
possible_sentences = []
print(meaning_full_words)
if len(words_) == len(meaning_full_words):
print('perfect')
return sentence
matching, matching_sentence = check_grammer(sentence)
if matching:
return ' '.join(matching_sentence)
permutations = list(itertools.islice(itertools.permutations(meaning_full_words),100))
# Print the generated permutations
i = 0
for perm in permutations:
sentence_ = ' '.join(perm)
i += 1
if i >= 100:
return sentence
if check_grammer(sentence_):
print(sentence_)
return sentence_
return sentence
def get_grammatical_sentence(sentences,sentence):
for sentence in sentences:
if check_grammer(sentence):
print(sentence)
return sentence
return sentence
def create_para(sentences):
print(sentences)
return '.'.join(sentences)
def get_summerized_paragraph(paragraph):
sentences = get_sentences_para(paragraph)
best_sentences = []
for sentence in sentences:
best_sentence = get_sentence(re.sub(r'\.', '', sentence))
best_sentences.append(best_sentence)
summerized_para = create_para(best_sentences)
print(summerized_para)
return {
"original" : paragraph,
"summerized" : summerized_para
}
# paragraph = "This is the john's book and it is very bla"
# get_summerized_paragraph(paragraph)
\ No newline at end of file
File added
import os
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'soundclassification-403103-e747d85c7c85.json'
def transcribe_audio(audio_file , sample_rate):
client = speech.SpeechClient()
with open(audio_file, 'rb') as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=sample_rate,
language_code="en-US",
audio_channel_count=1
)
response = client.recognize(config=config, audio=audio)
for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))
return result.alternatives[0].transcript
def get_text_google_api(audio_path):
stereo_audio = AudioSegment.from_wav(audio_path)
sample_rate = stereo_audio.frame_rate
print(f"Sample rate of the audio file: {sample_rate} Hz")
mono_audio = stereo_audio.set_channels(1)
mono_audio.export('mono-audio.wav', format='wav')
return transcribe_audio('mono-audio.wav',sample_rate)
\ No newline at end of file
File added
File added
File added
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment