Dataset Converter Added

3c8e482f · Wickramasinghe R.J.P · af75b08f · 3c8e482f
Commit 3c8e482f authored Sep 02, 2022 by Wickramasinghe R.J.P
Show whitespace changes
Inline Side-by-side

Showing with 135 additions and 0 deletions

Ontology_Based_Information_System/CSV Translator/csv_convertor.py ..._Based_Information_System/CSV Translator/csv_convertor.py +135 -0

No files found.
--- a/Ontology_Based_Information_System/CSV Translator/csv_convertor.py
+++ b/Ontology_Based_Information_System/CSV Translator/csv_convertor.py
+import re
+
+import pandas as pd
+
+if __name__ == '__main__':
+    df = pd.read_csv('C:\\Users\\LENOVO\\Music\\New folder\\Ontology generation\\petmd.csv')
+
+    df_col = df.columns
+
+    data_dict = {
+        'Disease': [],
+        'DiseaseDescription': [],
+        'DiseaseCause': [],
+        'DiseaseCauseDescription': [],
+        'DiseaseDiagnose': [],
+        'DiseaseDiagnoseDescription': [],
+        'DiseasePrevention': [],
+        'DiseasePreventionDescription': [],
+        'DiseaseSymptoms': [],
+        'DiseaseSymptomsDescription': [],
+        'DiseaseTreatment': [],
+        'DiseaseTreatmentDescription': [],
+    }
+
+    disease = ''
+    diseases = []
+
+    for index, row in df.iterrows():
+        new_disease = row[df_col[0]]
+        if disease != new_disease:
+            disease = new_disease
+            diseases.append(disease)
+
+    for disease in diseases:
+        grouped_rows = df[df[df_col[0]] == disease]
+
+        regex = re.compile('[^a-zA-Z]')
+        txt = str(disease).strip().replace(" ", "").replace("'", "").replace("inDogs", "")
+        head, sep, tail = txt.partition(":")
+        txt2 = head
+        head, sep, tail = txt2.partition("(")
+        txt3 = head
+        head, sep, tail = txt3.partition("/")
+        txt4 = regex.sub('', str(head))
+
+        data_dict['Disease'].append(txt4)
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'What|what')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseaseDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseDescription'].append('')
+
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'Cause|cause')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseaseCause'].append(str(disease).strip().replace(" ", "") + 'Cause')
+                data_dict['DiseaseCauseDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseCause'].append('')
+            data_dict['DiseaseCauseDescription'].append('')
+
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'Diagnose|diagnose')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseaseDiagnose'].append(str(disease).strip().replace(" ", "") + 'Diagnose')
+                data_dict['DiseaseDiagnoseDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseDiagnose'].append('')
+            data_dict['DiseaseDiagnoseDescription'].append('')
+
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'Prevention|prevention')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseasePrevention'].append(str(disease).strip().replace(" ", "") + 'Prevention')
+                data_dict['DiseasePreventionDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseasePrevention'].append('')
+            data_dict['DiseasePreventionDescription'].append('')
+
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'Symptoms|symptoms')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseaseSymptoms'].append(str(disease).strip().replace(" ", "") + 'Symptoms')
+                data_dict['DiseaseSymptomsDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseSymptoms'].append('')
+            data_dict['DiseaseSymptomsDescription'].append('')
+
+        temp = 0
+        for row in grouped_rows.iterrows():
+            regexp = re.compile(r'Treatment|treatment')
+            topic = row[1][df_col[1]]
+            description = row[1][df_col[2]]
+            if regexp.search(str(topic)):
+                data_dict['DiseaseTreatment'].append(str(disease).strip().replace(" ", "") + 'Treatment')
+                data_dict['DiseaseTreatmentDescription'].append(str(description))
+                temp = 1
+                break
+
+        if temp == 0:
+            data_dict['DiseaseTreatment'].append('')
+            data_dict['DiseaseTreatmentDescription'].append('')
+
+    pd.DataFrame(data_dict).to_csv('dogDisease.csv', index=False)