Commit 3c8e482f authored by Wickramasinghe R.J.P's avatar Wickramasinghe R.J.P

Dataset Converter Added

parent af75b08f
import re
import pandas as pd
if __name__ == '__main__':
df = pd.read_csv('C:\\Users\\LENOVO\\Music\\New folder\\Ontology generation\\petmd.csv')
df_col = df.columns
data_dict = {
'Disease': [],
'DiseaseDescription': [],
'DiseaseCause': [],
'DiseaseCauseDescription': [],
'DiseaseDiagnose': [],
'DiseaseDiagnoseDescription': [],
'DiseasePrevention': [],
'DiseasePreventionDescription': [],
'DiseaseSymptoms': [],
'DiseaseSymptomsDescription': [],
'DiseaseTreatment': [],
'DiseaseTreatmentDescription': [],
}
disease = ''
diseases = []
for index, row in df.iterrows():
new_disease = row[df_col[0]]
if disease != new_disease:
disease = new_disease
diseases.append(disease)
for disease in diseases:
grouped_rows = df[df[df_col[0]] == disease]
regex = re.compile('[^a-zA-Z]')
txt = str(disease).strip().replace(" ", "").replace("'", "").replace("inDogs", "")
head, sep, tail = txt.partition(":")
txt2 = head
head, sep, tail = txt2.partition("(")
txt3 = head
head, sep, tail = txt3.partition("/")
txt4 = regex.sub('', str(head))
data_dict['Disease'].append(txt4)
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'What|what')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'Cause|cause')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseCause'].append(str(disease).strip().replace(" ", "") + 'Cause')
data_dict['DiseaseCauseDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseCause'].append('')
data_dict['DiseaseCauseDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'Diagnose|diagnose')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseDiagnose'].append(str(disease).strip().replace(" ", "") + 'Diagnose')
data_dict['DiseaseDiagnoseDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseDiagnose'].append('')
data_dict['DiseaseDiagnoseDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'Prevention|prevention')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseasePrevention'].append(str(disease).strip().replace(" ", "") + 'Prevention')
data_dict['DiseasePreventionDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseasePrevention'].append('')
data_dict['DiseasePreventionDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'Symptoms|symptoms')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseSymptoms'].append(str(disease).strip().replace(" ", "") + 'Symptoms')
data_dict['DiseaseSymptomsDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseSymptoms'].append('')
data_dict['DiseaseSymptomsDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
regexp = re.compile(r'Treatment|treatment')
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseTreatment'].append(str(disease).strip().replace(" ", "") + 'Treatment')
data_dict['DiseaseTreatmentDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseTreatment'].append('')
data_dict['DiseaseTreatmentDescription'].append('')
pd.DataFrame(data_dict).to_csv('dogDisease.csv', index=False)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment