Commit 69a0ea3b authored by Wickramasinghe R.J.P's avatar Wickramasinghe R.J.P

data pre-processing module updated

parent 6f7c3ac3
......@@ -3,23 +3,19 @@ import re
import pandas as pd
if __name__ == '__main__':
df = pd.read_csv('C:\\Users\\LENOVO\\Music\\New folder\\Ontology generation\\petmd.csv')
df = pd.read_csv('petmd.csv')
df_col = df.columns
data_dict = {
'Disease': [],
'DiseaseDescription': [],
'DiseaseCause': [],
'DiseaseCauseDescription': [],
'DiseaseDiagnose': [],
'DiseaseDiagnoseDescription': [],
'DiseasePrevention': [],
'DiseasePreventionDescription': [],
'DiseaseSymptoms': [],
'DiseaseSymptomsDescription': [],
'DiseaseTreatment': [],
'DiseaseSymptomDescription': [],
'DiseaseTreatmentDescription': [],
'DiseaseInfection': []
}
disease = ''
......@@ -63,13 +59,11 @@ if __name__ == '__main__':
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseCause'].append(str(disease).strip().replace(" ", "") + 'Cause')
data_dict['DiseaseCauseDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseCause'].append('')
data_dict['DiseaseCauseDescription'].append('')
temp = 0
......@@ -78,13 +72,11 @@ if __name__ == '__main__':
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseDiagnose'].append(str(disease).strip().replace(" ", "") + 'Diagnose')
data_dict['DiseaseDiagnoseDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseDiagnose'].append('')
data_dict['DiseaseDiagnoseDescription'].append('')
temp = 0
......@@ -93,13 +85,11 @@ if __name__ == '__main__':
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseasePrevention'].append(str(disease).strip().replace(" ", "") + 'Prevention')
data_dict['DiseasePreventionDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseasePrevention'].append('')
data_dict['DiseasePreventionDescription'].append('')
temp = 0
......@@ -108,14 +98,12 @@ if __name__ == '__main__':
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseSymptoms'].append(str(disease).strip().replace(" ", "") + 'Symptoms')
data_dict['DiseaseSymptomsDescription'].append(str(description))
data_dict['DiseaseSymptomDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseSymptoms'].append('')
data_dict['DiseaseSymptomsDescription'].append('')
data_dict['DiseaseSymptomDescription'].append('')
temp = 0
for row in grouped_rows.iterrows():
......@@ -123,13 +111,47 @@ if __name__ == '__main__':
topic = row[1][df_col[1]]
description = row[1][df_col[2]]
if regexp.search(str(topic)):
data_dict['DiseaseTreatment'].append(str(disease).strip().replace(" ", "") + 'Treatment')
data_dict['DiseaseTreatmentDescription'].append(str(description))
temp = 1
break
if temp == 0:
data_dict['DiseaseTreatment'].append('')
data_dict['DiseaseTreatmentDescription'].append('')
pd.DataFrame(data_dict).to_csv('dogDisease.csv', index=False)
temp = 0
for row in grouped_rows.iterrows():
description = row[1][df_col[2]]
regexp = re.compile(r'Allerg|allerg')
if regexp.search(str(description)):
data_dict['DiseaseInfection'].append('Allergic')
temp = 1
break
regexp = re.compile(r'Bacter|bacter')
if regexp.search(str(description)):
data_dict['DiseaseInfection'].append('Bacterial')
temp = 1
break
regexp = re.compile(r'Flea|flea')
if regexp.search(str(description)):
data_dict['DiseaseInfection'].append('Fleas')
temp = 1
break
regexp = re.compile(r'Fung|fung')
if regexp.search(str(description)):
data_dict['DiseaseInfection'].append('Fungal')
temp = 1
break
regexp = re.compile(r'Virus|virus')
if regexp.search(str(description)):
data_dict['DiseaseInfection'].append('Viral')
temp = 1
break
if temp == 0:
data_dict['DiseaseInfection'].append('')
pd.DataFrame(data_dict).drop_duplicates().to_csv('dogDisease.csv', index=False)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment