Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2020-101
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sachith Fernando
2020-101
Commits
b098d5ae
Commit
b098d5ae
authored
Jan 08, 2021
by
LiniEisha
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Indentation
parent
b9ea95ec
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
192 additions
and
100 deletions
+192
-100
FirstApp/templates/FirstApp/template.html
FirstApp/templates/FirstApp/template.html
+1
-1
LectureSummarizingApp/ExtractKeySentences.py
LectureSummarizingApp/ExtractKeySentences.py
+23
-14
LectureSummarizingApp/Summary.py
LectureSummarizingApp/Summary.py
+47
-42
LectureSummarizingApp/api.py
LectureSummarizingApp/api.py
+60
-23
LectureSummarizingApp/speech_to_text.py
LectureSummarizingApp/speech_to_text.py
+5
-3
LectureSummarizingApp/templates/LectureSummarizingApp/summarization.html
...ingApp/templates/LectureSummarizingApp/summarization.html
+56
-17
No files found.
FirstApp/templates/FirstApp/template.html
View file @
b098d5ae
...
...
@@ -118,7 +118,7 @@
<div
id=
"collapseThree"
class=
"collapse"
aria-labelledby=
"headingThree"
data-parent=
"#accordionSidebar"
>
<div
class=
"bg-white py-2 collapse-inner rounded"
>
<h6
class=
"collapse-header"
>
Components:
</h6>
<a
class=
"collapse-item"
href=
"/summary/record"
>
Record Lecture
</a
>
<!-- <a class="collapse-item" href="/summary/record">Record Lecture</a>--
>
<a
class=
"collapse-item"
href=
"/summary/lecture"
>
Summarization
</a>
</div>
</div>
...
...
LectureSummarizingApp/ExtractKeySentences.py
View file @
b098d5ae
import
nltk
read_lines
=
[
line
.
rstrip
(
'
\n
'
)
for
line
in
open
(
"audioToText01.txt"
,
"r"
)]
sentences_list
=
[]
sentence_list
=
nltk
.
sent_tokenize
(
read_lines
)
word_search
=
"important"
sentences_with_word
=
[]
for
sentence
in
sentences_list
:
if
sentence
.
count
(
word_search
)
>
0
:
sentences_with_word
.
append
(
sentence
)
import
os
words_search
=
[
"exam"
,
"assignment"
]
word_sentence_dictionary
=
{
"exam"
:[],
"assignment"
:[]}
def
LectureNotice
(
notice_name
):
for
word
in
words_search
:
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
FILE_PATH
=
os
.
path
.
join
(
BASE_DIR
,
"speechToText
\\
{}"
.
format
(
notice_name
))
DESTINATION_DIR
=
os
.
path
.
dirname
(
os
.
path
.
join
(
BASE_DIR
,
"LectureSummarizingApp
\\
Notices
\\
sample.txt"
))
print
(
'destination directory: '
,
DESTINATION_DIR
)
read_lines
=
[
line
.
rstrip
(
'
\n
'
)
for
line
in
open
(
"audioToText01.txt"
,
"r"
)]
sentences_list
=
[]
sentence_list
=
nltk
.
sent_tokenize
(
read_lines
)
word_search
=
"important"
sentences_with_word
=
[]
for
sentence
in
sentence
s
_list
:
if
sentence
.
count
(
word
)
>
0
:
for
sentence
in
sentence_list
:
if
sentence
.
count
(
word
_search
)
>
0
:
sentences_with_word
.
append
(
sentence
)
word_sentence_dictionary
[
word
]
=
sentences_with_word
\ No newline at end of file
words_search
=
[
"exam"
,
"assignment"
]
word_sentence_dictionary
=
{
"exam"
:[],
"assignment"
:[]}
for
word
in
words_search
:
sentences_with_word
=
[]
for
sentence
in
sentences_list
:
if
sentence
.
count
(
word
)
>
0
:
sentences_with_word
.
append
(
sentence
)
word_sentence_dictionary
[
word
]
=
sentences_with_word
\ No newline at end of file
LectureSummarizingApp/Summary.py
View file @
b098d5ae
from
spacy.lang.pt.stop_words
import
STOP_WORDS
from
sklearn.feature_extraction.text
import
CountVectorizer
import
pt_core_news_sm
import
os
def
LectureSummary
(
summary_name
):
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
FILE_PATH
=
os
.
path
.
join
(
BASE_DIR
,
"speechToText
\\
{}"
.
format
(
summary_name
))
DESTINATION_DIR
=
os
.
path
.
dirname
(
os
.
path
.
join
(
BASE_DIR
,
"LectureSummarizingApp
\\
summary
\\
sample.txt"
))
print
(
'destination directory: '
,
DESTINATION_DIR
)
# Reading the file
nlp
=
pt_core_news_sm
.
load
()
with
open
(
"audioToText01.txt"
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
text
=
" "
.
join
(
f
.
readlines
())
nlp
=
pt_core_news_sm
.
load
()
with
open
(
"audioToText01.txt"
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
text
=
" "
.
join
(
f
.
readlines
())
doc
=
nlp
(
text
)
doc
=
nlp
(
text
)
#calculating the word frequency
corpus
=
[
sent
.
text
.
lower
()
for
sent
in
doc
.
sents
]
cv
=
CountVectorizer
(
stop_words
=
list
(
STOP_WORDS
))
cv_fit
=
cv
.
fit_transform
(
corpus
)
word_list
=
cv
.
get_feature_names
()
count_list
=
cv_fit
.
toarray
()
.
sum
(
axis
=
0
)
word_frequency
=
dict
(
zip
(
word_list
,
count_list
))
val
=
sorted
(
word_frequency
.
values
())
higher_word_frequencies
=
[
word
for
word
,
freq
in
word_frequency
.
items
()
if
freq
in
val
[
-
3
:]]
print
(
"
\n
Words with higher frequencies: "
,
higher_word_frequencies
)
# gets relative frequency of words
higher_frequency
=
val
[
-
1
]
for
word
in
word_frequency
.
keys
():
word_frequency
[
word
]
=
(
word_frequency
[
word
]
/
higher_frequency
)
corpus
=
[
sent
.
text
.
lower
()
for
sent
in
doc
.
sents
]
cv
=
CountVectorizer
(
stop_words
=
list
(
STOP_WORDS
))
cv_fit
=
cv
.
fit_transform
(
corpus
)
word_list
=
cv
.
get_feature_names
()
count_list
=
cv_fit
.
toarray
()
.
sum
(
axis
=
0
)
word_frequency
=
dict
(
zip
(
word_list
,
count_list
))
val
=
sorted
(
word_frequency
.
values
())
higher_word_frequencies
=
[
word
for
word
,
freq
in
word_frequency
.
items
()
if
freq
in
val
[
-
3
:]]
print
(
"
\n
Words with higher frequencies: "
,
higher_word_frequencies
)
# gets relative frequency of words
higher_frequency
=
val
[
-
1
]
for
word
in
word_frequency
.
keys
():
word_frequency
[
word
]
=
(
word_frequency
[
word
]
/
higher_frequency
)
#calculating sentence rank and taking top ranked sentences for the summary
sentence_rank
=
{}
for
sent
in
doc
.
sents
:
for
word
in
sent
:
if
word
.
text
.
lower
()
in
word_frequency
.
keys
():
if
sent
in
sentence_rank
.
keys
():
sentence_rank
[
sent
]
+=
word_frequency
[
word
.
text
.
lower
()]
else
:
sentence_rank
[
sent
]
=
word_frequency
[
word
.
text
.
lower
()]
top_sentences
=
(
sorted
(
sentence_rank
.
values
())[::
-
1
])
top_sent
=
top_sentences
[:
3
]
summary
=
[]
for
sent
,
strength
in
sentence_rank
.
items
():
if
strength
in
top_sent
:
summary
.
append
(
sent
)
else
:
continue
for
i
in
summary
:
file
=
open
(
'Summary01.txt'
,
'w'
)
file
.
write
(
str
(
i
))
file
.
close
()
\ No newline at end of file
sentence_rank
=
{}
for
sent
in
doc
.
sents
:
for
word
in
sent
:
if
word
.
text
.
lower
()
in
word_frequency
.
keys
():
if
sent
in
sentence_rank
.
keys
():
sentence_rank
[
sent
]
+=
word_frequency
[
word
.
text
.
lower
()]
else
:
sentence_rank
[
sent
]
=
word_frequency
[
word
.
text
.
lower
()]
top_sentences
=
(
sorted
(
sentence_rank
.
values
())[::
-
1
])
top_sent
=
top_sentences
[:
3
]
summary
=
[]
for
sent
,
strength
in
sentence_rank
.
items
():
if
strength
in
top_sent
:
summary
.
append
(
sent
)
else
:
continue
for
i
in
summary
:
file
=
open
(
'Summary01.txt'
,
'w'
)
file
.
write
(
str
(
i
))
file
.
close
()
\ No newline at end of file
LectureSummarizingApp/api.py
View file @
b098d5ae
...
...
@@ -15,6 +15,7 @@ import datetime
# APIs used in Lecture Summarizing Component
from
.noise
import
noise_removal
from
.speech_to_text
import
speech_to_text
class
LectureAudioAPI
(
APIView
):
...
...
@@ -73,35 +74,36 @@ class audioNoiseRemovedList(APIView):
class
audioToTextList
(
APIView
):
def
get
(
self
,
request
):
lecture_speech_to_text_id
=
LectureSpeechToText
.
objects
.
all
()
serializer
=
LectureSpeechToTextSerializer
(
lecture_speech_to_text_id
,
many
=
True
)
#lecture_speech_to_text_id = LectureSpeechToText.objects.all()
#serializer = LectureSpeechToTextSerializer(lecture_speech_to_text_id, many=True)
audio_to_text_list
=
LectureSpeechToText
.
objects
.
order_by
(
'lecture_speech_to_text_id'
)
.
last
()
# return Response(serializer.data)
# video_name = request.query_params.get("video_name")
#
# print('video name: ', video_name)
#
# # nr.noise_removalll(video_name)
# noise_removal(video_name)
speech_to_text_name
=
request
.
query_params
.
get
(
"speech_to_text_name"
)
# stt.speech_to_text(video_name)
print
(
'file name: '
,
speech_to_text_name
)
id
=
int
(
request
.
query_params
.
get
(
"id"
))
# generate new id for speech to text file
new_speech_to_text_id
=
generate_new_id
(
audio_to_text_list
.
new_speech_to_text_id
)
speech_to_text
(
speech_to_text_name
)
LectureSpeechToText
(
lecture_speech_to_text_id
=
new_speech_to_text_id
,
lecture_audio_id
=
id
,
audio_original_text
=
speech_to_text_name
)
.
save
()
return
Response
({
"response"
:
"successful"
"response"
:
Response
.
status_code
})
def
post
(
self
,
request
):
# video_name = request.data["video_name"]
#
# print('video name: ', video_name)
#
# stt.speech_to_text(video_name)
LectureSpeechToText
(
lecture_speech_to_text_id
=
request
.
data
[
"lecture_speech_to_text_id"
],
lecture_audio_id
=
request
.
data
[
"lecture_audio_id"
],
audio_original_text
=
request
.
data
[
"audio_original_text"
]
audio_original_text
=
request
.
data
[
"audio_original_text"
]
,
)
.
save
()
return
Response
({
"response"
:
request
.
data
})
...
...
@@ -110,13 +112,29 @@ class lectureSummaryList(APIView):
def
get
(
self
,
request
):
lecture_audio_summary_id
=
LectureAudioSummary
.
objects
.
all
()
serializer
=
LectureAudioSummarySerializer
(
lecture_audio_summary_id
,
many
=
True
)
return
Response
(
serializer
.
data
)
#
serializer = LectureAudioSummarySerializer(lecture_audio_summary_id, many=True)
#
return Response(serializer.data)
def
post
(
self
,
request
):
lecture_summary_list
=
LectureAudioSummary
.
objects
.
order_by
(
'lecture_summary_list'
)
.
last
()
lecture_summary_name
=
request
.
query_params
.
get
(
"lecture_summary_name"
)
id
=
int
(
request
.
query_params
.
get
(
"id"
))
current_date
=
datetime
.
datetime
.
now
()
.
date
()
# generate new id for summary
lecture_summary_id
=
generate_new_id
(
lecture_summary_list
.
lecture_summary_id
)
LectureAudioSummary
(
lecture_summary_name
)
LectureAudioSummary
(
lecture_speech_to_text_id
=
id
,
lecture_audio_id
=
lecture_summary_id
,
audio_original_text
=
current_date
,
audio_summary
=
lecture_summary_name
)
.
save
()
return
Response
({
"response"
:
request
.
data
})
def
post
(
self
,
request
):
LectureAudioSummary
(
lecture_speech_to_text_id
=
request
.
data
[
"lecture_speech_to_text_id"
],
lecture_audio_id
=
request
.
data
[
"lecture_audio_id"
],
...
...
@@ -131,8 +149,27 @@ class lectureNoticeList(APIView):
def
get
(
self
,
request
):
lecture_notice_id
=
LectureNotices
.
objects
.
all
()
serializer
=
LectureNoticesSerializer
(
lecture_notice_id
,
many
=
True
)
return
Response
(
serializer
.
data
)
# serializer = LectureNoticesSerializer(lecture_notice_id, many=True)
# return Response(serializer.data)
lecture_notice_list
=
LectureNotices
.
objects
.
order_by
(
'lecture_notice_list'
)
.
last
()
lecture_notice_name
=
request
.
query_params
.
get
(
"lecture_notice_name"
)
id
=
int
(
request
.
query_params
.
get
(
"id"
))
# generate new id for notices
lecture_notice_id
=
generate_new_id
(
lecture_notice_list
.
lecture_notice_id
)
LectureNotices
(
lecture_notice_name
)
LectureNotices
(
lecture_notice_id
=
id
,
lecture_audio_id
=
lecture_notice_id
,
notice_text
=
lecture_notice_name
)
.
save
()
return
Response
({
"response"
:
request
.
data
})
def
post
(
self
,
request
):
LectureNotices
(
...
...
LectureSummarizingApp/speech_to_text.py
View file @
b098d5ae
...
...
@@ -2,15 +2,17 @@ import speech_recognition as sr
import
os
def
speech_to_text
(
video
_name
):
def
speech_to_text
(
speech_to_text
_name
):
#calling the Recognizer()
r
=
sr
.
Recognizer
()
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
VIDEO_PATH
=
os
.
path
.
join
(
BASE_DIR
,
"lectures
\\
{}"
.
format
(
video_name
))
FILE_PATH
=
os
.
path
.
join
(
BASE_DIR
,
"noise_removed_lectures
\\
{}"
.
format
(
speech_to_text_name
))
DESTINATION_DIR
=
os
.
path
.
dirname
(
os
.
path
.
join
(
BASE_DIR
,
"LectureSummarizingApp
\\
speechToText
\\
sample.txt"
))
print
(
'destination directory: '
,
DESTINATION_DIR
)
with
sr
.
AudioFile
(
VIDEO
_PATH
)
as
source
:
with
sr
.
AudioFile
(
FILE
_PATH
)
as
source
:
audio
=
r
.
listen
(
source
)
file
=
open
(
'audioToText01.txt'
,
'w'
)
#open file
try
:
...
...
LectureSummarizingApp/templates/LectureSummarizingApp/summarization.html
View file @
b098d5ae
...
...
@@ -34,33 +34,38 @@
$
(
document
).
ready
(
function
()
{
<!--
speech
to
text
-->
$
(
'
.audio_to_text_process
'
).
click
(
function
()
{
alert
(
'
Processing
'
);
let
id
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
id
'
);
let
speech_to_text_name
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
data-noiseless-audio-name
'
);
speech_to_text_name
=
speech_to_text_name
+
"
.txt
"
;
alert
(
'
id:
'
+
id
);
alert
(
'
speech to text file name:
'
+
speech_to_text_name
);
//call the fetch API
fetch
(
'
http://127.0.0.1:8000/summary/lecture-audio-to-text/?
video_name=Lecture01.wav
'
)
fetch
(
'
http://127.0.0.1:8000/summary/lecture-audio-to-text/?
speech_to_text_name=
'
+
speech_to_text_name
+
'
&id=
'
+
id
)
.
then
((
res
)
=>
res
.
json
())
.
then
((
out
)
=>
aler
t
(
out
.
response
))
.
then
((
out
)
=>
handleSpeechToTex
t
(
out
.
response
))
.
catch
((
err
)
=>
alert
(
'
error:
'
+
err
))
});
//this function will handle the success response for speech-to-text
function
handleSpeechToText
(
response
)
{
if
(
response
===
200
)
{
document
.
location
.
reload
();
}
}
<!--
background
noise
-->
$
(
'
.audio_process
'
).
click
(
function
(
e
)
{
alert
(
'
Processing
'
);
let
id
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
id
'
);
let
audio_name
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
data-audio-name
'
);
audio_name
=
audio_name
+
"
.wav
"
;
alert
(
'
id:
'
+
id
);
alert
(
'
audio name:
'
+
audio_name
);
//call the fetch API
<!--
fetch
(
'
http://127.0.0.1:8000/summary/lecture-audio-to-text/?audio_name=
'
)
-->
<!--
.
then
((
res
)
=>
res
.
json
())
-->
<!--
.
then
((
out
)
=>
alert
(
out
.
response
))
-->
<!--
.
catch
((
err
)
=>
alert
(
'
error:
'
+
err
))
-->
fetch
(
'
http://127.0.0.1:8000/summary/lecture-audio-noise-removed/?audio_name=
'
+
audio_name
+
'
&id=
'
+
id
)
.
then
((
res
)
=>
res
.
json
())
...
...
@@ -68,8 +73,6 @@
.
catch
((
err
)
=>
alert
(
'
error:
'
+
err
))
});
//this function will handle the success respopnse for noise removed
function
handleNoiseRemoved
(
response
)
{
...
...
@@ -78,18 +81,52 @@
}
}
<!--
To
summary
-->
$
(
'
.to_summary
'
).
click
(
function
()
{
alert
(
'
Processing
'
);
let
id
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
id
'
);
let
lecture_summary_name
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
data-summary-name
'
);
lecture_summary_name
=
lecture_summary_name
+
"
.wav
"
;
alert
(
'
id:
'
+
id
);
alert
(
'
lecture_summary_name:
'
+
lecture_summary_name
);
//call the fetch API
fetch
(
'
http://127.0.0.1:8000/summary/lecture-audio-to-text/?video_name=Lecture01.wav
'
)
//call the fetch API
etch
(
'
http://127.0.0.1:8000/summary/lecture-summary/?lecture_summary_name=
'
+
lecture_summary_name
+
'
&id=
'
+
id
)
.
then
((
res
)
=>
res
.
json
())
.
then
((
out
)
=>
alert
(
out
.
response
))
.
then
((
out
)
=>
handleLectureRemoved
(
out
.
response
))
.
catch
((
err
)
=>
alert
(
'
error:
'
+
err
))
});
//this function will handle the success response for summary
function
handleLectureRemoved
(
response
)
{
if
(
response
===
200
)
{
document
.
location
.
reload
();
}
}
<!--
To
Notice
-->
$
(
'
.get_notices
'
).
click
(
function
()
{
alert
(
'
Processing
'
);
let
id
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
id
'
);
let
lecture_notice_name
=
e
.
target
.
parentNode
.
parentNode
.
getAttribute
(
'
data-summary-name
'
);
lecture_notice_name
=
lecture_notice_name
+
"
.wav
"
;
alert
(
'
id:
'
+
id
);
alert
(
'
lecture_notice_name:
'
+
lecture_notice_name
);
//call the fetch API
etch
(
'
http://127.0.0.1:8000/summary/lecture-notices/?lecture_notice_name=
'
+
lecture_notice_name
+
'
&id=
'
+
id
)
.
then
((
res
)
=>
res
.
json
())
.
then
((
out
)
=>
handleNoticeRemoved
(
out
.
response
))
.
catch
((
err
)
=>
alert
(
'
error:
'
+
err
))
});
//this function will handle the success response for notices
function
handleNoticeRemoved
(
response
)
{
if
(
response
===
200
)
{
document
.
location
.
reload
();
}
}
});
...
...
@@ -127,6 +164,7 @@
<span
class=
"font-italic"
>
No Recordings
</span>
</div>
{% else %}
#lecture list
<div
class=
"table-responsive"
>
<table
class=
"table table-bordered"
id=
"datatable"
>
<thead>
...
...
@@ -187,6 +225,7 @@
<span
class=
"font-italic"
>
No Recordings
</span>
</div>
{% else %}
#noise removes list
<div
class=
"table-responsive"
>
<table
class=
"table table-bordered"
id=
"datatable"
>
<thead>
...
...
@@ -201,7 +240,7 @@
<tbody>
{% for noiseless_audio in noiseless_data %}
<tr
class=
"recordings not_clicked"
id=
"{{ noiseless_audio.lecture_audio_id }}"
>
<tr
class=
"recordings not_clicked"
id=
"{{ noiseless_audio.lecture_audio_id }}"
data-noiseless-audio-name=
"{{ noiseless_audio.lecture_audio_name }}"
>
<!-- <td>-->
<!-- <div class="radio">-->
<!-- <label><input type="radio"-->
...
...
@@ -269,7 +308,7 @@
<tbody>
{% for lec_text in lecture_text_data %}
<tr
class=
"recordings not_clicked"
id=
"{{ lec_text.lecture_audio_id }}"
>
<tr
class=
"recordings not_clicked"
id=
"{{ lec_text.lecture_audio_id }}"
data-summary-name=
"{{lec_text.lecture_audio_id.subject.name}}"
>
<!-- <td>-->
<!-- <div class="radio">-->
<!-- <label><input type="radio"-->
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment