Commit 229a6683 authored by LiniEisha's avatar LiniEisha Committed by I.K Seneviratne

Combining with old files

parent f8449fa0
import nltk
read_lines = [line.rstrip('\n') for line in open("audioToText01.txt", "r")]
sentences_list = []
sentence_list = nltk.sent_tokenize(read_lines)
word_search = "important"
sentences_with_word = []
for sentence in sentences_list:
if sentence.count(word_search)>0:
sentences_with_word.append(sentence)
words_search = ["exam", "assignment"]
word_sentence_dictionary = {"exam":[],"assignment":[]}
for word in words_search:
sentences_with_word = []
for sentence in sentences_list:
if sentence.count(word)>0:
sentences_with_word.append(sentence)
word_sentence_dictionary[word] = sentences_with_word
\ No newline at end of file
import spacy
from spacy.lang.pt.stop_words import STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
import pt_core_news_sm
nlp = pt_core_news_sm.load()
with open("audioToText01.txt", "r", encoding="utf-8") as f:
text = " ".join(f.readlines())
doc = nlp(text)
corpus = [sent.text.lower() for sent in doc.sents ]
cv = CountVectorizer(stop_words=list(STOP_WORDS))
cv_fit=cv.fit_transform(corpus)
word_list = cv.get_feature_names();
count_list = cv_fit.toarray().sum(axis=0)
word_frequency = dict(zip(word_list,count_list))
val=sorted(word_frequency.values())
higher_word_frequencies = [word for word,freq in word_frequency.items() if freq in val[-3:]]
print("\nWords with higher frequencies: ", higher_word_frequencies)
# gets relative frequency of words
higher_frequency = val[-1]
for word in word_frequency.keys():
word_frequency[word] = (word_frequency[word]/higher_frequency)
sentence_rank={}
for sent in doc.sents:
for word in sent :
if word.text.lower() in word_frequency.keys():
if sent in sentence_rank.keys():
sentence_rank[sent]+=word_frequency[word.text.lower()]
else:
sentence_rank[sent]=word_frequency[word.text.lower()]
top_sentences=(sorted(sentence_rank.values())[::-1])
top_sent=top_sentences[:3]
summary=[]
for sent,strength in sentence_rank.items():
if strength in top_sent:
summary.append(sent)
else:
continue
for i in summary:
print(i,end=" ")
\ No newline at end of file
from rest_framework.views import APIView
from rest_framework.response import Response
from LectureSummarizingApp.models import LectureAudio
from LectureSummarizingApp.serializer import LectureAudioSerializer
from LectureSummarizingApp.models import LectureAudio, LectureAudioNoiseRemoved, LectureSpeechToText, \
LectureAudioSummary, LectureNotices
from LectureSummarizingApp.serializer import LectureAudioSerializer, LectureAudioNoiseRemovedSerializer, \
LectureSpeechToTextSerializer, LectureAudioSummarySerializer
# this API will retrieve lecture audio details
class LectureAudioAPI(APIView):
def get(self, request):
lecture_audio = LectureAudio.objects.all()
lecture_audio = LectureAudio.objects.all().order_by('lecturer_date')
lecture_audio_serializer = LectureAudioSerializer(lecture_audio, many=True)
return Response(lecture_audio_serializer.data)
class audioNoiseRemovedList(APIView):
def get(self, request):
lecture_audio_noise_removed = LectureAudioNoiseRemoved.objects.all()
serializer = LectureAudioNoiseRemovedSerializer(lecture_audio_noise_removed, many=True)
return Response(serializer.data)
def post(self, request):
LectureAudioNoiseRemoved(
lecture_audio_noise_removed_id=request.data["lecture_audio_noise_removed_id"],
lecture_audio_id=request.data["lecture_audio_id"],
lecturer_date=request.data["lecturer_date"],
lecture_audio_name=request.data["lecture_audio_name"],
lecture_audio_length=request.data["lecture_audio_length"]
).save()
return Response({"response": request.data})
class audioToTextList(APIView):
def get(self, request):
lecture_speech_to_text_id = LectureSpeechToText.objects.all()
serializer = LectureSpeechToTextSerializer(lecture_speech_to_text_id, many=True)
return Response(serializer.data)
def post(self, request):
LectureSpeechToText(
lecture_speech_to_text_id=request.data["lecture_speech_to_text_id"],
lecture_audio_id=request.data["lecture_audio_id"],
audio_original_text=request.data["audio_original_text"]
).save()
return Response({"response": request.data})
data = lecture_audio_serializer.data
class lectureSummaryList(APIView):
def get(self, request):
lecture_audio_summary_id = LectureAudioSummary.objects.all()
serializer = LectureAudioSummarySerializer(lecture_audio_summary_id, many=True)
return Response(serializer.data)
def post(self, request):
LectureAudioSummary(
lecture_speech_to_text_id=request.data["lecture_speech_to_text_id"],
lecture_audio_id=request.data["lecture_audio_id"],
audio_original_text=request.data["audio_original_text"],
audio_summary=request.data["audio_summary"]
).save()
return Response({"response": request.data})
class lectureNoticeList(APIView):
def get(self, request):
lecture_notice_id = LectureNotices.objects.all()
serializer = LectureSpeechToTextSerializer(lecture_notice_id, many=True)
return Response(serializer.data)
return Response({
"response": data
})
\ No newline at end of file
def post(self, request):
LectureSpeechToText(
lecture_notice_id=request.data["lecture_notice_id"],
lecture_audio_id=request.data["lecture_audio_id"],
notice_text=request.data["notice_text"]
).save()
return Response({"response": request.data})
\ No newline at end of file
perhaps this is what is PR agency is are their dignity schedule III was much is 50 feet then the choreographer missed arbitrated never go back into acquiescence with things as they work finds it in misery and isolation around us in this instance such personal purchase for a luxury leather cases of severe and advisement say he is a horse days Ranjan or he may have a point that contains between fuel prices straight line which symbolises uniqueness the circuit universality of small hole in wall with client has more subtle implications in passport expiry marketing program manufacturers taking initiative of the costs involved cricket overlapping twisted widely spaced to you always navigate like this
\ No newline at end of file
......@@ -23,8 +23,6 @@ class LectureAudioNoiseRemoved (models.Model):
lecturer_date = models.DateField()
lecture_audio_name = models.CharField(max_length=50)
lecture_audio_length = models.DurationField()
lecturer = models.ForeignKey(Lecturer, on_delete=models.CASCADE, default=0)
subject = models.ForeignKey(Subject, on_delete=models.CASCADE, default=0)
def __str__(self):
return self.lecture_audio_noise_removed_id
......@@ -48,3 +46,11 @@ class LectureAudioSummary (models.Model):
def __str__(self):
return self.lecture_audio_summary_id
class LectureNotices (models.Model):
lecture_notice_id = models.CharField(max_length=10)
lecture_audio_id = models.ForeignKey(LectureAudio, on_delete=models.CASCADE)
notice_text = models.TextField()
def __str__(self):
return self.lecture_notice_id
import librosa
from pysndfx import AudioEffectsChain
import numpy as np
import math
import python_speech_features
import scipy as sp
from scipy import signal
import soundfile
def read_file(file_name):
sample_file = file_name
sample_directory = 'lectures/'
sample_path = sample_directory + sample_file
# generating audio time series and a sampling rate (int)
y, sr = librosa.load(sample_path)
return y, sr
'''CENTROID'''
def reduce_noise_centroid_s(y, sr):
cent = librosa.feature.spectral_centroid(y=y, sr=sr)
threshold_h = np.max(cent)
threshold_l = np.min(cent)
less_noise = AudioEffectsChain().lowshelf(gain=-12.0, frequency=threshold_l, slope=0.5).highshelf(gain=-12.0, frequency=threshold_h, slope=0.5).limiter(gain=6.0)
y_cleaned = less_noise(y)
return y_cleaned
'''MFCC'''
def mffc_highshelf(y, sr):
mfcc = python_speech_features.base.mfcc(y)
mfcc = python_speech_features.base.logfbank(y)
mfcc = python_speech_features.base.lifter(mfcc)
sum_of_squares = []
index = -1
for r in mfcc:
sum_of_squares.append(0)
index = index + 1
for n in r:
sum_of_squares[index] = sum_of_squares[index] + n**2
strongest_frame = sum_of_squares.index(max(sum_of_squares))
hz = python_speech_features.base.mel2hz(mfcc[strongest_frame])
max_hz = max(hz)
min_hz = min(hz)
speech_booster = AudioEffectsChain().highshelf(frequency=min_hz*(-1)*1.2, gain=-12.0, slope=0.6).limiter(gain=8.0)
y_speach_boosted = speech_booster(y)
return (y_speach_boosted)
def mfcc_lowshelf(y, sr):
mfcc = python_speech_features.base.mfcc(y)
mfcc = python_speech_features.base.logfbank(y)
mfcc = python_speech_features.base.lifter(mfcc)
sum_of_squares = []
index = -1
for r in mfcc:
sum_of_squares.append(0)
index = index + 1
for n in r:
sum_of_squares[index] = sum_of_squares[index] + n**2
strongest_frame = sum_of_squares.index(max(sum_of_squares))
hz = python_speech_features.base.mel2hz(mfcc[strongest_frame])
max_hz = max(hz)
min_hz = min(hz)
speech_booster = AudioEffectsChain().lowshelf(frequency=min_hz*(-1), gain=12.0, slope=0.5)
y_speach_boosted = speech_booster(y)
return (y_speach_boosted)
def trim_silence(y):
y_trimmed, index = librosa.effects.trim(y, top_db=20, frame_length=2, hop_length=500)
trimmed_length = librosa.get_duration(y) - librosa.get_duration(y_trimmed)
return y_trimmed, trimmed_length
def enhance(y):
apply_audio_effects = AudioEffectsChain().lowshelf(gain=10.0, frequency=260, slope=0.1).reverb(reverberance=25, hf_damping=5, room_scale=5, stereo_depth=50, pre_delay=20, wet_gain=0, wet_only=False)#.normalize()
y_enhanced = apply_audio_effects(y)
return y_enhanced
def output_file(destination ,filename, y, sr, ext=""):
destination = destination + filename[:-4] + ext + '.wav'
librosa.output.write_wav(destination, y, sr)
lectures = ['Lecture01.wav']
for s in lectures:
filename = s
y, sr = read_file(filename)
y_reduced_centroid_s = reduce_noise_centroid_s(y, sr)
y_reduced_mfcc_lowshelf = mfcc_lowshelf(y, sr)
y_reduced_mfcc_highshelf = mffc_highshelf(y, sr)
# trimming silences
y_reduced_centroid_s, time_trimmed = trim_silence(y_reduced_centroid_s)
y_reduced_mfcc_up, time_trimmed = trim_silence(mfcc_lowshelf)
y_reduced_mfcc_down, time_trimmed = trim_silence(mffc_highshelf)
output_file('lectures_trimmed_noise_reduced/' ,filename, y_reduced_centroid_s, sr, '_ctr_s')
output_file('lectures_trimmed_noise_reduced/' ,filename, y_reduced_mfcc_up, sr, '_mfcc_up')
output_file('lectures_trimmed_noise_reduced/' ,filename, y_reduced_mfcc_down, sr, '_mfcc_down')
output_file('lectures_trimmed_noise_reduced/' ,filename, y, sr, '_org')
......@@ -36,3 +36,11 @@ class LectureAudioSummarySerializer(serializers.ModelSerializer):
class Meta:
model = LectureAudioSummary
fields = '__all__'
class LectureNoticesSerializer(serializers.ModelSerializer):
lecture_audio_noise_removed_id = LectureSpeechToTextSerializer()
class Meta:
model = LectureAudioSummary
fields = '__all__'
\ No newline at end of file
import speech_recognition as sr
r = sr.Recognizer()
with sr.AudioFile('female.wav') as source:
audio = r.listen(source)
file = open('audioToText01.txt', 'w')
try:
text = r.recognize_google(audio)
file.write(text)
except:
file.write('error')
file.close()
\ No newline at end of file
......@@ -26,7 +26,15 @@ urlpatterns = [
# # path('Video', views.hello)
# API to retrieve activity recognition
url(r'^get-lecture-audio/$', api.LectureAudioAPI.as_view()),
url(r'^lecture-audio/$', api.LectureAudioAPI.as_view()),
url(r'^lecture-audio-noise-removed/$', api.audioNoiseRemovedList.as_view()),
url(r'^lecture-audio-to-text/$', api.audioToTextList.as_view()),
url(r'^lecture-summary/$', api.lectureSummaryList.as_view()),
url(r'^lecture-notices/$', api.lectureNoticeList.as_view()),
# # API to retrieve audio analysis
# url(r'^get-audio-analysis', api.GetLectureAudioAnalysis.as_view()),
......
from django.shortcuts import render
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, render
from rest_framework.views import APIView
from rest_framework.response import Response
from rest_framework import viewsets
from .models import LectureAudio, LectureAudioNoiseRemoved, LectureSpeechToText, LectureAudioSummary, LectureNotices
from .serializer import LectureAudioSerializer, LectureAudioNoiseRemovedSerializer, LectureAudioSummarySerializer, \
LectureSpeechToTextSerializer
# Create your views here.
def summarization(request):
return render(request, "LectureSummarizingApp/summarization.html")
class audioList(APIView):
def get(self, request):
lecture_audio = LectureAudio.objects.all()
serializer = LectureAudioSerializer(lecture_audio, many=True)
return Response(serializer.data)
def post(self):
pass
class audioNoiseRemovedList(APIView):
def get(self, request):
lecture_audio_noise_removed = LectureAudioNoiseRemoved.objects.all()
serializer = LectureAudioNoiseRemovedSerializer(lecture_audio_noise_removed, many=True)
return Response(serializer.data)
def post(self, request):
LectureAudioNoiseRemoved(
lecture_audio_noise_removed_id=request.data["lecture_audio_noise_removed_id"],
lecture_audio_id=request.data["lecture_audio_id"],
lecturer_date=request.data["lecturer_date"],
lecture_audio_name=request.data["lecture_audio_name"],
lecture_audio_length=request.data["lecture_audio_length"]
).save()
return Response({"response": request.data})
class audioToTextList(APIView):
def get(self, request):
lecture_speech_to_text_id = LectureSpeechToText.objects.all()
serializer = LectureSpeechToTextSerializer(lecture_speech_to_text_id, many=True)
return Response(serializer.data)
def post(self, request):
LectureSpeechToText(
lecture_speech_to_text_id=request.data["lecture_speech_to_text_id"],
lecture_audio_id=request.data["lecture_audio_id"],
audio_original_text=request.data["audio_original_text"]
).save()
return Response({"response": request.data})
class lectureSummaryList(APIView):
def get(self, request):
lecture_audio_summary_id = LectureAudioSummary.objects.all()
serializer = LectureAudioSummarySerializer(lecture_audio_summary_id, many=True)
return Response(serializer.data)
def post(self, request):
LectureAudioSummary(
lecture_speech_to_text_id=request.data["lecture_speech_to_text_id"],
lecture_audio_id=request.data["lecture_audio_id"],
audio_original_text=request.data["audio_original_text"],
audio_summary=request.data["audio_summary"]
).save()
return Response({"response": request.data})
class lectureNoticeList(APIView):
def get(self, request):
lecture_notice_id = LectureNotices.objects.all()
serializer = LectureSpeechToTextSerializer(lecture_notice_id, many=True)
return Response(serializer.data)
def post(self, request):
LectureSpeechToText(
lecture_notice_id=request.data["lecture_notice_id"],
lecture_audio_id=request.data["lecture_audio_id"],
notice_text=request.data["notice_text"]
).save()
return Response({"response": request.data})
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment