Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2022-158
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2022-158
2022-158
Commits
67391e5f
Commit
67391e5f
authored
Sep 26, 2022
by
Thiwanka K.A.T
🎯
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'IT19076362' into 'master'
UPDATE: code refactoring See merge request
!66
parents
1555a0b9
67269416
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
17 deletions
+21
-17
backend/database.db
backend/database.db
+0
-0
backend/services/question_preprocess_service.py
backend/services/question_preprocess_service.py
+4
-3
backend/services/tokenization_service.py
backend/services/tokenization_service.py
+17
-14
No files found.
backend/database.db
View file @
67391e5f
No preview for this file type
backend/services/question_preprocess_service.py
View file @
67391e5f
...
...
@@ -3,6 +3,7 @@ from services.class_diagram_generation_service import *
from
services.use_case_diagram_generation_service
import
*
# removing unwanted spaces
def
remove_unwanted_values
(
data
):
remove_element
=
'None'
if
remove_element
in
data
:
...
...
@@ -22,7 +23,7 @@ def remove_punctuation(sentence):
return
cleaned_sentence
# load the text
file
# load the text
def
main
(
scenario
):
requirement_text
=
scenario
.
replace
(
"
\n\n
"
,
" "
)
.
replace
(
"
\n
"
,
" "
)
nlp
=
spacy
.
load
(
"en_core_web_lg"
)
...
...
@@ -32,12 +33,13 @@ def main(scenario):
sentences
=
list
(
doc
.
sents
)
sentences
.
pop
(
0
)
del
sentences
[
-
1
]
nc
=
[]
cleaned_extracted_actions
=
[]
cleaned_sentences
=
[]
splitted_actions_array
=
[]
# looping through
sentences
# looping through
each sentence
for
sentence
in
sentences
:
res
=
get_nouns_pnouns
(
sentence
)
nc
.
append
(
str
(
res
))
...
...
@@ -49,7 +51,6 @@ def main(scenario):
extracted_actions
=
get_actions
(
splitted_actions
)
if
extracted_actions
is
not
None
:
cleaned_extracted_actions
.
append
(
extracted_actions
)
...
...
backend/services/tokenization_service.py
View file @
67391e5f
import
re
import
spacy
# import docx
def
get_ner
(
sentence
):
print
(
sentence
)
ents
=
list
(
sentence
.
ents
)
return
ents
def
get_pos
(
sentence
):
for
token
in
sentence
:
print
(
token
.
text
,
token
.
pos_
)
def
get_noun_chunks
(
sentence
):
return
list
(
sentence
.
noun_chunks
)
def
get_nouns_pnouns
(
sentence
):
for
token
in
sentence
:
if
token
.
pos_
==
"PROPN"
and
token
.
pos_
!=
None
:
return
token
def
remove_punctuation
(
sentence
):
text_no_punct
=
[
token
for
token
in
sentence
if
not
token
.
is_punct
]
cleaned_sentence
=
' '
.
join
(
token
.
text
for
token
in
text_no_punct
)
return
cleaned_sentence
def
split_actions
(
sentence
):
split_string
=
"should be able to "
if
split_string
in
sentence
:
extracted_string
=
sentence
.
split
(
split_string
)
return
extracted_string
def
get_actions
(
splitted_action
):
# print('splitted_action',splitted_action)
temp_array
=
[]
if
splitted_action
is
not
None
and
'|'
in
splitted_action
[
1
]:
res
=
splitted_action
[
1
]
.
split
(
' | '
)
# print('res',res)
temp_array
.
append
(
splitted_action
[
0
])
temp_array
.
append
(
res
[
0
])
return
temp_array
res
=
splitted_action
[
1
]
.
split
(
' | '
)
# print('res',res)
temp_array
.
append
(
splitted_action
[
0
])
temp_array
.
append
(
res
[
0
])
return
temp_array
else
:
return
splitted_action
# def get_text_from_docx(filename):
# doc = docx.Document(filename)
# fullText = []
# for para in doc.paragraphs:
# fullText.append(para.text)
# return '\n'.join(fullText)
def
get_sentences
(
text
):
nlp
=
spacy
.
load
(
"en_core_web_lg"
)
...
...
@@ -58,9 +58,12 @@ def get_sentences(text):
flag
=
False
token_count
=
0
for
token
in
sentence
:
token_count
=
token_count
+
1
token_count
=
token_count
+
1
if
token
.
pos_
==
'INTJ'
or
token
.
text
==
'?'
or
(
token
.
text
==
'I'
and
token
.
pos_
==
'PRON'
)
or
(
token
.
text
==
'’m'
and
token
.
pos_
==
'VERB'
)
or
((
token
.
text
==
'what'
or
token
.
text
==
'What'
)
and
token
.
pos_
==
'PRON'
)
or
((
token
.
text
==
'We'
or
token
.
text
==
'we'
)
and
token
.
pos_
==
'PRON'
)
or
((
token
.
text
==
'You'
or
token
.
text
==
'you'
)
and
token
.
pos_
==
'PRON'
):
token
.
text
==
'’m'
and
token
.
pos_
==
'VERB'
)
or
(
(
token
.
text
==
'what'
or
token
.
text
==
'What'
)
and
token
.
pos_
==
'PRON'
)
or
(
(
token
.
text
==
'We'
or
token
.
text
==
'we'
)
and
token
.
pos_
==
'PRON'
)
or
(
(
token
.
text
==
'You'
or
token
.
text
==
'you'
)
and
token
.
pos_
==
'PRON'
):
flag
=
True
if
token_count
<
6
:
flag
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment