Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2022-226
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2022-226
2022-226
Commits
69a0ea3b
Commit
69a0ea3b
authored
Sep 15, 2022
by
Wickramasinghe R.J.P
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
data pre-processing module updated
parent
6f7c3ac3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
20 deletions
+42
-20
Ontology_Based_Information_System/CSV Translator/csv_convertor.py
..._Based_Information_System/CSV Translator/csv_convertor.py
+42
-20
No files found.
Ontology_Based_Information_System/CSV Translator/csv_convertor.py
View file @
69a0ea3b
...
...
@@ -3,23 +3,19 @@ import re
import
pandas
as
pd
if
__name__
==
'__main__'
:
df
=
pd
.
read_csv
(
'
C:
\\
Users
\\
LENOVO
\\
Music
\\
New folder
\\
Ontology generation
\\
petmd.csv'
)
df
=
pd
.
read_csv
(
'petmd.csv'
)
df_col
=
df
.
columns
data_dict
=
{
'Disease'
:
[],
'DiseaseDescription'
:
[],
'DiseaseCause'
:
[],
'DiseaseCauseDescription'
:
[],
'DiseaseDiagnose'
:
[],
'DiseaseDiagnoseDescription'
:
[],
'DiseasePrevention'
:
[],
'DiseasePreventionDescription'
:
[],
'DiseaseSymptoms'
:
[],
'DiseaseSymptomsDescription'
:
[],
'DiseaseTreatment'
:
[],
'DiseaseSymptomDescription'
:
[],
'DiseaseTreatmentDescription'
:
[],
'DiseaseInfection'
:
[]
}
disease
=
''
...
...
@@ -63,13 +59,11 @@ if __name__ == '__main__':
topic
=
row
[
1
][
df_col
[
1
]]
description
=
row
[
1
][
df_col
[
2
]]
if
regexp
.
search
(
str
(
topic
)):
data_dict
[
'DiseaseCause'
]
.
append
(
str
(
disease
)
.
strip
()
.
replace
(
" "
,
""
)
+
'Cause'
)
data_dict
[
'DiseaseCauseDescription'
]
.
append
(
str
(
description
))
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseaseCause'
]
.
append
(
''
)
data_dict
[
'DiseaseCauseDescription'
]
.
append
(
''
)
temp
=
0
...
...
@@ -78,13 +72,11 @@ if __name__ == '__main__':
topic
=
row
[
1
][
df_col
[
1
]]
description
=
row
[
1
][
df_col
[
2
]]
if
regexp
.
search
(
str
(
topic
)):
data_dict
[
'DiseaseDiagnose'
]
.
append
(
str
(
disease
)
.
strip
()
.
replace
(
" "
,
""
)
+
'Diagnose'
)
data_dict
[
'DiseaseDiagnoseDescription'
]
.
append
(
str
(
description
))
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseaseDiagnose'
]
.
append
(
''
)
data_dict
[
'DiseaseDiagnoseDescription'
]
.
append
(
''
)
temp
=
0
...
...
@@ -93,13 +85,11 @@ if __name__ == '__main__':
topic
=
row
[
1
][
df_col
[
1
]]
description
=
row
[
1
][
df_col
[
2
]]
if
regexp
.
search
(
str
(
topic
)):
data_dict
[
'DiseasePrevention'
]
.
append
(
str
(
disease
)
.
strip
()
.
replace
(
" "
,
""
)
+
'Prevention'
)
data_dict
[
'DiseasePreventionDescription'
]
.
append
(
str
(
description
))
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseasePrevention'
]
.
append
(
''
)
data_dict
[
'DiseasePreventionDescription'
]
.
append
(
''
)
temp
=
0
...
...
@@ -108,14 +98,12 @@ if __name__ == '__main__':
topic
=
row
[
1
][
df_col
[
1
]]
description
=
row
[
1
][
df_col
[
2
]]
if
regexp
.
search
(
str
(
topic
)):
data_dict
[
'DiseaseSymptoms'
]
.
append
(
str
(
disease
)
.
strip
()
.
replace
(
" "
,
""
)
+
'Symptoms'
)
data_dict
[
'DiseaseSymptomsDescription'
]
.
append
(
str
(
description
))
data_dict
[
'DiseaseSymptomDescription'
]
.
append
(
str
(
description
))
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseaseSymptoms'
]
.
append
(
''
)
data_dict
[
'DiseaseSymptomsDescription'
]
.
append
(
''
)
data_dict
[
'DiseaseSymptomDescription'
]
.
append
(
''
)
temp
=
0
for
row
in
grouped_rows
.
iterrows
():
...
...
@@ -123,13 +111,47 @@ if __name__ == '__main__':
topic
=
row
[
1
][
df_col
[
1
]]
description
=
row
[
1
][
df_col
[
2
]]
if
regexp
.
search
(
str
(
topic
)):
data_dict
[
'DiseaseTreatment'
]
.
append
(
str
(
disease
)
.
strip
()
.
replace
(
" "
,
""
)
+
'Treatment'
)
data_dict
[
'DiseaseTreatmentDescription'
]
.
append
(
str
(
description
))
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseaseTreatment'
]
.
append
(
''
)
data_dict
[
'DiseaseTreatmentDescription'
]
.
append
(
''
)
pd
.
DataFrame
(
data_dict
)
.
to_csv
(
'dogDisease.csv'
,
index
=
False
)
temp
=
0
for
row
in
grouped_rows
.
iterrows
():
description
=
row
[
1
][
df_col
[
2
]]
regexp
=
re
.
compile
(
r'Allerg|allerg'
)
if
regexp
.
search
(
str
(
description
)):
data_dict
[
'DiseaseInfection'
]
.
append
(
'Allergic'
)
temp
=
1
break
regexp
=
re
.
compile
(
r'Bacter|bacter'
)
if
regexp
.
search
(
str
(
description
)):
data_dict
[
'DiseaseInfection'
]
.
append
(
'Bacterial'
)
temp
=
1
break
regexp
=
re
.
compile
(
r'Flea|flea'
)
if
regexp
.
search
(
str
(
description
)):
data_dict
[
'DiseaseInfection'
]
.
append
(
'Fleas'
)
temp
=
1
break
regexp
=
re
.
compile
(
r'Fung|fung'
)
if
regexp
.
search
(
str
(
description
)):
data_dict
[
'DiseaseInfection'
]
.
append
(
'Fungal'
)
temp
=
1
break
regexp
=
re
.
compile
(
r'Virus|virus'
)
if
regexp
.
search
(
str
(
description
)):
data_dict
[
'DiseaseInfection'
]
.
append
(
'Viral'
)
temp
=
1
break
if
temp
==
0
:
data_dict
[
'DiseaseInfection'
]
.
append
(
''
)
pd
.
DataFrame
(
data_dict
)
.
drop_duplicates
()
.
to_csv
(
'dogDisease.csv'
,
index
=
False
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment