data pre-processing module updated

69a0ea3b · Wickramasinghe R.J.P · 6f7c3ac3 · 69a0ea3b
Commit 69a0ea3b authored Sep 15, 2022 by Wickramasinghe R.J.P
Show whitespace changes
Inline Side-by-side

Showing with 42 additions and 20 deletions

Ontology_Based_Information_System/CSV Translator/csv_convertor.py ..._Based_Information_System/CSV Translator/csv_convertor.py +42 -20

No files found.
--- a/Ontology_Based_Information_System/CSV Translator/csv_convertor.py
+++ b/Ontology_Based_Information_System/CSV Translator/csv_convertor.py
@@ -3,23 +3,19 @@ import re
 import pandas as pd

 if __name__ == '__main__':
-    df = pd.read_csv('C:\\Users\\LENOVO\\Music\\New folder\\Ontology generation\\petmd.csv')
+    df = pd.read_csv('petmd.csv')

    df_col = df.columns

    data_dict = {
        'Disease': [],
        'DiseaseDescription': [],
-        'DiseaseCause': [],
        'DiseaseCauseDescription': [],
-        'DiseaseDiagnose': [],
        'DiseaseDiagnoseDescription': [],
-        'DiseasePrevention': [],
        'DiseasePreventionDescription': [],
-        'DiseaseSymptoms': [],
-        'DiseaseSymptomsDescription': [],
-        'DiseaseTreatment': [],
+        'DiseaseSymptomDescription': [],
        'DiseaseTreatmentDescription': [],
+        'DiseaseInfection': []
    }

    disease = ''
@@ -63,13 +59,11 @@ if __name__ == '__main__':
            topic = row[1][df_col[1]]
            description = row[1][df_col[2]]
            if regexp.search(str(topic)):
-                data_dict['DiseaseCause'].append(str(disease).strip().replace(" ", "") + 'Cause')
                data_dict['DiseaseCauseDescription'].append(str(description))
                temp = 1
                break

        if temp == 0:
-            data_dict['DiseaseCause'].append('')
            data_dict['DiseaseCauseDescription'].append('')

        temp = 0
@@ -78,13 +72,11 @@ if __name__ == '__main__':
            topic = row[1][df_col[1]]
            description = row[1][df_col[2]]
            if regexp.search(str(topic)):
-                data_dict['DiseaseDiagnose'].append(str(disease).strip().replace(" ", "") + 'Diagnose')
                data_dict['DiseaseDiagnoseDescription'].append(str(description))
                temp = 1
                break

        if temp == 0:
-            data_dict['DiseaseDiagnose'].append('')
            data_dict['DiseaseDiagnoseDescription'].append('')

        temp = 0
@@ -93,13 +85,11 @@ if __name__ == '__main__':
            topic = row[1][df_col[1]]
            description = row[1][df_col[2]]
            if regexp.search(str(topic)):
-                data_dict['DiseasePrevention'].append(str(disease).strip().replace(" ", "") + 'Prevention')
                data_dict['DiseasePreventionDescription'].append(str(description))
                temp = 1
                break

        if temp == 0:
-            data_dict['DiseasePrevention'].append('')
            data_dict['DiseasePreventionDescription'].append('')

        temp = 0
@@ -108,14 +98,12 @@ if __name__ == '__main__':
            topic = row[1][df_col[1]]
            description = row[1][df_col[2]]
            if regexp.search(str(topic)):
-                data_dict['DiseaseSymptoms'].append(str(disease).strip().replace(" ", "") + 'Symptoms')
-                data_dict['DiseaseSymptomsDescription'].append(str(description))
+                data_dict['DiseaseSymptomDescription'].append(str(description))
                temp = 1
                break

        if temp == 0:
-            data_dict['DiseaseSymptoms'].append('')
-            data_dict['DiseaseSymptomsDescription'].append('')
+            data_dict['DiseaseSymptomDescription'].append('')

        temp = 0
        for row in grouped_rows.iterrows():
@@ -123,13 +111,47 @@ if __name__ == '__main__':
            topic = row[1][df_col[1]]
            description = row[1][df_col[2]]
            if regexp.search(str(topic)):
-                data_dict['DiseaseTreatment'].append(str(disease).strip().replace(" ", "") + 'Treatment')
                data_dict['DiseaseTreatmentDescription'].append(str(description))
                temp = 1
                break

        if temp == 0:
-            data_dict['DiseaseTreatment'].append('')
            data_dict['DiseaseTreatmentDescription'].append('')

-    pd.DataFrame(data_dict).to_csv('dogDisease.csv', index=False)
+        temp = 0
+        for row in grouped_rows.iterrows():
+            description = row[1][df_col[2]]
+            regexp = re.compile(r'Allerg|allerg')
+            if regexp.search(str(description)):
+                data_dict['DiseaseInfection'].append('Allergic')
+                temp = 1
+                break
+
+            regexp = re.compile(r'Bacter|bacter')
+            if regexp.search(str(description)):
+                data_dict['DiseaseInfection'].append('Bacterial')
+                temp = 1
+                break
+
+            regexp = re.compile(r'Flea|flea')
+            if regexp.search(str(description)):
+                data_dict['DiseaseInfection'].append('Fleas')
+                temp = 1
+                break
+
+            regexp = re.compile(r'Fung|fung')
+            if regexp.search(str(description)):
+                data_dict['DiseaseInfection'].append('Fungal')
+                temp = 1
+                break
+
+            regexp = re.compile(r'Virus|virus')
+            if regexp.search(str(description)):
+                data_dict['DiseaseInfection'].append('Viral')
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseInfection'].append('')
+
+    pd.DataFrame(data_dict).drop_duplicates().to_csv('dogDisease.csv', index=False)