Commit 8062eaca authored by NaweenTharuka's avatar NaweenTharuka

test: integrate prosody features

parent 52547cfe
import os
from json_tricks import load
import numpy as np
import librosa
from pydub import AudioSegment, effects
import noisereduce as nr
import tensorflow as tf
import keras
from keras.models import model_from_json
from keras.models import load_model
import matplotlib.pyplot as plt
import pyaudio
import wave
from array import array
import struct
import time
from os import path
from pydub import AudioSegment
#convert video files to .wav format
# src = input("In: ")
# dst = input("Out: ")
# src = input("F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\media\\video\\22")
# dst = input("F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\media\\audio")
# sound = AudioSegment.from_mp3(src)
# sound.export(dst, format="wav")
saved_model_path = "F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\models\\model8723.json"
saved_weights_path = "F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\models\\model8723_weights.h5"
audio_file = "F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\media\\audio\\fearful.wav"
with open(saved_model_path, 'r') as json_file:
json_savedModel = json_file.read()
model = tf.keras.models.model_from_json(json_savedModel)
model.load_weights(saved_weights_path)
model.compile(loss='categorical_crossentropy',
optimizer='RMSProp',
metrics=['categorical_accuracy'])
# print(model.summary())
def preprocess(file_path, frame_length = 2048, hop_length = 512):
_, sr = librosa.load(path = file_path, sr = None)
rawsound = AudioSegment.from_file(file_path, duration = None)
normalizedsound = effects.normalize(rawsound, headroom = 5.0)
normal_x = np.array(normalizedsound.get_array_of_samples(), dtype = 'float32')
# final_x = nr.reduce_noise(normal_x, sr=sr, use_tensorflow=True)
final_x = nr.reduce_noise(normal_x, sr=sr)
f1 = librosa.feature.rms(final_x, frame_length=frame_length, hop_length=hop_length, center=True, pad_mode='reflect').T
f2 = librosa.feature.zero_crossing_rate(final_x, frame_length=frame_length, hop_length=hop_length,center=True).T
f3 = librosa.feature.mfcc(final_x, sr=sr, S=None, n_mfcc=13, hop_length = hop_length).T
X = np.concatenate((f1, f2, f3), axis = 1)
X_3D = np.expand_dims(X, axis=0)
return X_3D
emotions = {
0 : 'neutral',
1 : 'calm',
2 : 'happy',
3 : 'sad',
4 : 'angry',
5 : 'fearful',
6 : 'disgust',
7 : 'suprised'
}
emo_list = list(emotions.values())
def is_silent(data):
return max(data) < 100
RATE = 24414
CHUNK = 512
RECORD_SECONDS = 7.1
FORMAT = pyaudio.paInt32
CHANNELS = 1
WAVE_OUTPUT_FILE = "F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\models\\output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
data = array('h', np.random.randint(size = 512, low = 0, high = 500))
print("** session started")
total_predictions = []
tic = time.perf_counter()
print("* recording...")
frames = []
data = np.nan
timesteps = int(RATE / CHUNK * RECORD_SECONDS)
for i in range(0, timesteps):
data = array('l', stream.read(CHUNK))
frames.append(data)
wf = wave.open(WAVE_OUTPUT_FILE, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
print("* done recording")
x = preprocess(WAVE_OUTPUT_FILE)
# x = WAVE_OUTPUT_FILE
predictions = model.predict(x, use_multiprocessing=True)
pred_list = list(predictions)
pred_np = np.squeeze(np.array(pred_list).tolist(), axis=0)
total_predictions.append(pred_np)
fig = plt.figure(figsize = (10, 2))
plt.bar(emo_list, pred_np, color = 'darkturquoise')
plt.ylabel("Probabilty (%)")
plt.show()
max_emo = np.argmax(predictions)
print('max emotion:', emotions.get(max_emo,-1))
print(100*'-')
toc = time.perf_counter()
stream.stop_stream()
stream.close()
p.terminate()
wf.close()
print('** session ended')
total_predictions_np = np.mean(np.array(total_predictions).tolist(), axis=0)
fig = plt.figure(figsize = (10, 5))
plt.bar(emo_list, total_predictions_np, color = 'indigo')
plt.ylabel("Mean probabilty (%)")
plt.title("Session Summary")
plt.show()
print(f"Emotions analyzed for: {(toc - tic):0.4f} seconds")
......@@ -190,4 +190,4 @@ def detect_emotions(location_videofile, NumberofFrames):
return max(arry, key=arry.get), arry
emo, arr1 = detect_emotions(location_videofile, 300)
print(emo, arr1)
\ No newline at end of file
# print(emo, arr1)
\ No newline at end of file
import myprosody as mysp
import pickle
p="suun3"
p="happy"
c="F:\\CDAP-PRESENTLY\\21_22-j-02\\Presently\\presently\\users\\myprosody"
mysp.myspsyl(p,c)
mysp.mysppaus(p,c)
mysp.myspsr(p,c)
mysp.myspatc(p,c)
mysp.myspst(p,c)
mysp.myspod(p,c)
mysp.myspbala(p,c)
mysp.myspf0mean(p,c)
mysp.myspf0sd(p,c)
mysp.myspf0med(p,c)
mysp.myspf0min(p,c)
mysp.myspf0max(p,c)
mysp.myspf0q25(p,c)
mysp.myspf0q75(p,c)
mysp.mysptotal(p,c)
mysp.myspgend(p,c)
mysp.mysppron(p,c)
mysp.myprosody(p,c)
......
......@@ -3,11 +3,13 @@
{% load static %}
{% block content %}
<h2> {{ var1 }} {{ var2 }}</h2>
<!-- {% for key, value in predictions.items %}
<h2>{{ forloop.counter }}. {{ key }} : {{ value }}</h2>
{% endfor %} -->
<div class="container">
<!-- <div class="container">
<div class="heading-text heading-line text-center mt-5">
<h4>Summery of emotions</h4>
</div>
......@@ -35,5 +37,5 @@
<h5 class="py-5">Waiting...</h5>
</div>
</div> -->
{% endblock %}
\ No newline at end of file
......@@ -10,13 +10,14 @@ from .models import Video
from django.template import Template, Context
import datetime
from . import emotiondetectionvideo
from . import emotiondetectionaudio
def test(request):
emotion= emotiondetectionvideo.emo
arr= emotiondetectionvideo.arr1
var1= emotiondetectionaudio.emo_list
var2= emotiondetectionaudio.total_predictions_np
context= {
'maxemotion': emotion,
'predictions': arr
'var1': var1,
'var2': var2
}
return render(request, 'users/test.html',context)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment