Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
R
rp_server_one
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ranasinghe R.A.P.T
rp_server_one
Commits
e31c9649
Commit
e31c9649
authored
Apr 15, 2022
by
Pamal-Ranasinghe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some nltk pre-processing steps are added
parent
8212e2c2
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
64 additions
and
4 deletions
+64
-4
.env.example
.env.example
+3
-0
app.py
app.py
+0
-1
assets/converted_wav/converted.wav
assets/converted_wav/converted.wav
+0
-0
resources/__pycache__/routes.cpython-38.pyc
resources/__pycache__/routes.cpython-38.pyc
+0
-0
resources/__pycache__/speechExtraction.cpython-38.pyc
resources/__pycache__/speechExtraction.cpython-38.pyc
+0
-0
resources/speechExtraction.py
resources/speechExtraction.py
+17
-3
resources/wordsProcessModel.py
resources/wordsProcessModel.py
+44
-0
No files found.
.env.example
View file @
e31c9649
CONVERTED_AUDIO_PATH = '<fil path for the extracted audio>'
CONVERTED_AUDIO_FILE_NAME = '<extracted audio file name>'
NORMAL_LANGUAGE = '<english>'
\ No newline at end of file
app.py
View file @
e31c9649
#import flask module
from
flask
import
Flask
from
flask_restful
import
Api
from
flask
import
Flask
from
resources.routes
import
initialize_routes
...
...
assets/converted_wav/converted.wav
deleted
100644 → 0
View file @
8212e2c2
File deleted
resources/__pycache__/routes.cpython-38.pyc
View file @
e31c9649
No preview for this file type
resources/__pycache__/speechExtraction.cpython-38.pyc
View file @
e31c9649
No preview for this file type
resources/speechExtraction.py
View file @
e31c9649
from
flask_restful
import
Resource
from
loguru
import
logger
from
.wordsProcessModel
import
WordModel
import
speech_recognition
as
sr
import
moviepy.editor
as
mp
import
json
import
os
class
SpeechExtraction
(
Resource
):
...
...
@@ -11,6 +13,7 @@ class SpeechExtraction(Resource):
# params: self
# return: json
# author: Pamal Ranasinghe
def
get
(
self
):
try
:
# Check the endpoint execution
...
...
@@ -26,12 +29,23 @@ class SpeechExtraction(Resource):
result
=
r
.
recognize_google
(
audio_file
)
# Create a dict object which includes the result
value
=
{
"text"
:
result
}
# Calling word pre processor model
wm
=
WordModel
(
result
)
processed_words
=
json
.
dumps
(
wm
.
word_pre_processor
())
value
=
{
"text"
:
result
,
"tokens"
:
json
.
loads
(
processed_words
)[
"tokens"
],
"functional_words"
:
json
.
loads
(
processed_words
)[
"filtered_words"
],
}
#remove the coverted.wav for get more space in the server
os
.
remove
(
os
.
path
.
join
(
os
.
getenv
(
'CONVERTED_AUDIO_PATH'
),
os
.
getenv
(
'CONVERTED_AUDIO_FILE_NAME'
)))
#return the json object which is having converted speech
return
json
.
loads
(
json
.
dumps
(
value
)),
200
except
Exception
as
e
:
logger
.
error
(
str
(
e
))
return
json
.
loads
(
json
.
dumps
({
"message"
:
"Something went wrong"
}))
,
500
\ No newline at end of file
return
json
.
loads
(
json
.
dumps
({
"message"
:
str
(
e
)}))
,
500
\ No newline at end of file
resources/wordsProcessModel.py
0 → 100644
View file @
e31c9649
from
nltk.tokenize
import
word_tokenize
from
loguru
import
logger
import
os
import
json
class
WordModel
:
def
__init__
(
self
,
para
):
self
.
para
=
para
# This function uses for pre-processing on words
# params: self
# return: json
# author: Pamal Ranasinghe
def
word_pre_processor
(
self
):
try
:
logger
.
info
(
'word_pre_process - hits'
)
# Identify all the takens
para_tokenize
=
word_tokenize
(
self
.
para
)
logger
.
info
(
'Tokenized Words : '
,
para_tokenize
)
# Remove the stop words from the text
from
nltk.corpus
import
stopwords
stop_words
=
set
(
stopwords
.
words
(
os
.
getenv
(
'NORMAL_LANGUAGE'
)))
filtered_sentence
=
[
w
for
w
in
para_tokenize
if
not
w
.
lower
()
in
stop_words
]
filtered_sentence
=
[]
#Append the rest of words after removing the stop words
for
w
in
para_tokenize
:
if
w
not
in
stop_words
:
filtered_sentence
.
append
(
w
)
return
json
.
loads
(
json
.
dumps
({
"filtered_words"
:
filtered_sentence
,
"tokens"
:
para_tokenize
,
}))
except
Exception
as
e
:
logger
.
error
(
str
(
e
))
return
json
.
loads
(
json
.
dumps
({
"message"
:
str
(
e
)})),
500
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment