Commit d6cade50 authored by Pamudi Naveesha's avatar Pamudi Naveesha

initial commit

parent 81d4dbf1
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **FUNCTION TO EXTRACT IMAGE LEADS(1-13) (FEATURE EXTRACTION)**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b67bd485-758c-4399-8bb2-b4a13485786f",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"#### **FUNCTION FOR IMAGE LEADS(1-13) PRE-PROCESSING**\"\"\"\n",
"def Convert_Image_Lead(image_file,parent_folder):\n",
" #read the image\n",
" image=imread('{parent}/{image_file}'.format(parent=str(parent_folder),image_file=str(image_file)),plugin='matplotlib')\n",
" #dividing the ECG leads from 1-13 from the above image\n",
" Lead_1 = image[300:600, 150:643]\n",
" Lead_2 = image[300:600, 646:1135]\n",
" Lead_3 = image[300:600, 1140:1626]\n",
" Lead_4 = image[300:600, 1630:2125]\n",
" Lead_5 = image[600:900, 150:643]\n",
" Lead_6 = image[600:900, 646:1135]\n",
" Lead_7 = image[600:900, 1140:1626]\n",
" Lead_8 = image[600:900, 1630:2125]\n",
" Lead_9 = image[900:1200, 150:643]\n",
" Lead_10 = image[900:1200, 646:1135]\n",
" Lead_11 = image[900:1200, 1140:1626]\n",
" Lead_12 = image[900:1200, 1630:2125]\n",
" Lead_13 = image[1250:1480, 150:2125]\n",
"\n",
" #list of leads\n",
" Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12,Lead_13]\n",
" \n",
" #folder_name to store lead_images\n",
" folder_name= re.sub('.jpg', '',image_file)\n",
"\n",
" #loop through leads and create seperate images\n",
" for x,y in enumerate(Leads):\n",
" fig , ax = plt.subplots()\n",
" #fig.set_size_inches(20, 20)\n",
" ax.imshow(y)\n",
" ax.axis('off')\n",
" ax.set_title(\"Leads {0}\".format(x+1))\n",
" if (os.path.exists(parent_folder+'/'+folder_name)):\n",
" pass\n",
" else:\n",
" os.makedirs(parent_folder+'/'+folder_name)\n",
"\n",
" #save the image\n",
" plt.close('all')\n",
" plt.ioff()\n",
" fig.savefig('{parent}/{folder_name}/Lead_{x}_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent_folder))\n",
" \n",
" extract_signal_leads(Leads,folder_name,parent_folder)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **FUNCTION FOR IMAGE LEADS(1-13) PRE-PROCESSING & EXTRACTING SIGNAL CONTOURS**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9cc376d9-c1d7-4c4e-87b8-1b569d451f0a",
"metadata": {},
"outputs": [],
"source": [
"#extract_only signal from images\n",
"def extract_signal_leads(Leads,folder_name,parent):\n",
" #looping through image list containg all leads from 1-13\n",
" for x,y in enumerate(Leads):\n",
" #creating subplot\n",
" fig1 , ax1 = plt.subplots()\n",
"\n",
" #set fig size\n",
" #fig1.set_size_inches(20, 20)\n",
"\n",
" #converting to gray scale\n",
" grayscale = color.rgb2gray(y)\n",
" #smoothing image\n",
" blurred_image = gaussian(grayscale,sigma=0.7)\n",
" #thresholding to distinguish foreground and background\n",
" #using otsu thresholding for getting threshold value\n",
" global_thresh = threshold_otsu(blurred_image)\n",
"\n",
" #creating binary image based on threshold\n",
" binary_global = blurred_image < global_thresh\n",
" \n",
" #resize image\n",
" if x!=12:\n",
" binary_global = resize(binary_global, (300, 450))\n",
"\n",
" ax1.imshow(binary_global,cmap=\"gray\")\n",
" ax1.axis('off')\n",
" ax1.set_title(\"pre-processed Leads {} image\".format(x+1))\n",
" plt.close('all')\n",
" plt.ioff()\n",
" #save the image\n",
" fig1.savefig('{parent}/{folder_name}/Lead_{x}_preprocessed_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))\n",
"\n",
" fig7 , ax7 = plt.subplots()\n",
" plt.gca().invert_yaxis()\n",
"\n",
" #find contour and get only the necessary signal contour\n",
" contours = measure.find_contours(binary_global,0.8)\n",
" contours_shape = sorted([x.shape for x in contours])[::-1][0:1]\n",
" for contour in contours:\n",
" if contour.shape in contours_shape:\n",
" test = resize(contour, (255, 2))\n",
" ax7.plot(test[:, 1], test[:, 0],linewidth=1,color='black')\n",
" ax7.axis('image')\n",
" ax7.set_title(\"Contour {} image\".format(x+1))\n",
" plt.close('all')\n",
" plt.ioff()\n",
" #save the image\n",
" fig7.savefig('{parent}/{folder_name}/Lead_{x}_Contour_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent)) \n",
" lead_no=x\n",
" #convert_csv(test,lead_no,folder_name,parent)\n",
" #scale_csv(test,lead_no,folder_name,parent)\n",
" scale_csv_1D(test,lead_no,folder_name,parent)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"####**FUNCTIONS FOR CSV CONVERSION AND SCALING**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c28a9eb0-2c8a-4f30-b6d7-b5ae5f9a4fe7",
"metadata": {},
"outputs": [],
"source": [
"def convert_csv(test,lead_no,folder_name,parent):\n",
"#convert contour to dataframe\n",
" target=folder_name[0:2]\n",
" df = pd.DataFrame(test, columns = ['X','Y'])\n",
" df['Target']=target\n",
" #x_axis= 'Lead_{lead_no}_X'.format(lead_no=lead_no)\n",
" #y_axis= 'Lead_{lead_no}_Y'.format(lead_no=lead_no)\n",
" fig5, ax5 = plt.subplots()\n",
" #convert to CSV\n",
" df.to_csv('{parent}/{folder_name}/{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
"\n",
"def scale_csv(test,lead_no,folder_name,parent):\n",
" #scaling the data and testing\n",
" target=folder_name[0:2]\n",
" scaler = MinMaxScaler()\n",
" fit_transform_data = scaler.fit_transform(test)\n",
" Normalized_Scaled=pd.DataFrame(fit_transform_data, columns = ['X','Y'])\n",
" Normalized_Scaled=Normalized_Scaled.T\n",
" Normalized_Scaled['Target']=target\n",
" #scaled_data to CSV\n",
" if (os.path.isfile('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
" Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
" else:\n",
" Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
"\n",
"def scale_csv_1D(test,lead_no,folder_name,parent):\n",
" target=folder_name[0:2]\n",
" #scaling the data and testing\n",
" scaler = MinMaxScaler()\n",
" fit_transform_data = scaler.fit_transform(test)\n",
" Normalized_Scaled=pd.DataFrame(fit_transform_data[:,0], columns = ['X'])\n",
" fig6, ax6 = plt.subplots()\n",
" plt.gca().invert_yaxis()\n",
" ax6.plot(Normalized_Scaled,linewidth=1,color='black',linestyle='solid')\n",
" plt.close('all')\n",
" plt.ioff()\n",
" fig6.savefig('{parent}/{folder_name}/ID_Lead_{lead_no}_Signal.png'.format(folder_name=folder_name,lead_no=lead_no+1,parent=parent))\n",
" Normalized_Scaled=Normalized_Scaled.T\n",
" Normalized_Scaled['Target']=target\n",
" #scaled_data to CSV\n",
" if (os.path.isfile('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
" Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
" else:\n",
" Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### **RUN THE BELOW CELL TO GENERATE THE NECESSARY FILES**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2afba58-fbb2-4381-a8f6-29b8f58bd3aa",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import re\n",
"from skimage.filters import threshold_otsu,gaussian\n",
"from skimage import measure\n",
"import pandas as pd\n",
"import numpy as nm\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from skimage.io import imread\n",
"from skimage import color\n",
"from skimage.transform import resize\n",
"from numpy import asarray\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\"\"\"#### **NOW WE HAVE BOTH CSV FILES AND CROPPED LEAD IMAGES(1-13) TO WORK ON. WE CAN PERFROM CNN on 1D images & 2D images and perform different Supervised classification algorithms based on CSV DATA(KNN, LOGISTIC REGRESSION, SVM etc. and also different clustering techniques without target to check predictions**\n",
"\n",
"### **NOW PERFORM DATA PREPROCESSING/FEATURE EXTRACTION ON ALL THE FILES IN THE ECG_IMAGES FOLDER**\n",
"\n",
"#### **FUNCTION TO EXTRACT IMAGE LEADS(1-13) (FEATURE EXTRACTION)**\n",
"\"\"\"\n",
"\n",
"\"\"\"####**FUNCTIONS FOR CSV CONVERSION AND SCALING**\"\"\"\n",
"\n",
"#extract_only signal from images\n",
"def extract_signal_leads(Leads,folder_name,parent):\n",
" #looping through image list containg all leads from 1-13\n",
" for x,y in enumerate(Leads):\n",
" #creating subplot\n",
" fig1 , ax1 = plt.subplots()\n",
"\n",
" #set fig size\n",
" #fig1.set_size_inches(20, 20)\n",
"\n",
" #converting to gray scale\n",
" grayscale = color.rgb2gray(y)\n",
" #smoothing image\n",
" blurred_image = gaussian(grayscale,sigma=0.7)\n",
" #thresholding to distinguish foreground and background\n",
" #using otsu thresholding for getting threshold value\n",
" global_thresh = threshold_otsu(blurred_image)\n",
"\n",
" #creating binary image based on threshold\n",
" binary_global = blurred_image < global_thresh\n",
" \n",
" #resize image\n",
" if x!=12:\n",
" binary_global = resize(binary_global, (300, 450))\n",
"\n",
" ax1.imshow(binary_global,cmap=\"gray\")\n",
" ax1.axis('off')\n",
" ax1.set_title(\"pre-processed Leads {} image\".format(x+1))\n",
" plt.close('all')\n",
" plt.ioff()\n",
" #save the image\n",
" fig1.savefig('{parent}/{folder_name}/Lead_{x}_preprocessed_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))\n",
"\n",
" fig7 , ax7 = plt.subplots()\n",
" plt.gca().invert_yaxis()\n",
"\n",
" #find contour and get only the necessary signal contour\n",
" contours = measure.find_contours(binary_global,0.8)\n",
" contours_shape = sorted([x.shape for x in contours])[::-1][0:1]\n",
" for contour in contours:\n",
" if contour.shape in contours_shape:\n",
" test = resize(contour, (255, 2))\n",
" ax7.plot(test[:, 1], test[:, 0],linewidth=1,color='black')\n",
" ax7.axis('image')\n",
" ax7.set_title(\"Contour {} image\".format(x+1))\n",
" plt.close('all')\n",
" plt.ioff()\n",
" #save the image\n",
" fig7.savefig('{parent}/{folder_name}/Lead_{x}_Contour_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent)) \n",
" lead_no=x\n",
" #convert_csv(test,lead_no,folder_name,parent)\n",
" #scale_csv(test,lead_no,folder_name,parent)\n",
" scale_csv_1D(test,lead_no,folder_name,parent)\n",
"\n",
"\"\"\"#### **FUNCTION FOR IMAGE LEADS(1-13) PRE-PROCESSING**\"\"\"\n",
"def Convert_Image_Lead(image_file, parent_folder):\n",
" # Read the image using matplotlib.image.imread\n",
" image = plt.imread('{parent}/{image_file}'.format(parent=str(parent_folder), image_file=str(image_file)))\n",
"\n",
" # Check if the image has an alpha channel\n",
" if image.shape[2] == 4:\n",
" # Remove the alpha channel\n",
" image_without_alpha = image[:, :, :3]\n",
" else:\n",
" image_without_alpha = image\n",
"\n",
" # Dividing the ECG leads from 1-13 from the above image\n",
" Lead_1 = image[300:600, 150:643]\n",
" Lead_2 = image[300:600, 646:1135]\n",
" Lead_3 = image[300:600, 1140:1625]\n",
" Lead_4 = image[300:600, 1630:2125]\n",
" Lead_5 = image[600:900, 150:643]\n",
" Lead_6 = image[600:900, 646:1135]\n",
" Lead_7 = image[600:900, 1140:1625]\n",
" Lead_8 = image[600:900, 1630:2125]\n",
" Lead_9 = image[900:1200, 150:643]\n",
" Lead_10 = image[900:1200, 646:1135]\n",
" Lead_11 = image[900:1200, 1140:1625]\n",
" Lead_12 = image[900:1200, 1630:2125]\n",
" Lead_13 = image[1250:1480, 150:2125]\n",
"\n",
" # List of leads\n",
" Leads = [Lead_1, Lead_2, Lead_3, Lead_4, Lead_5, Lead_6, Lead_7, Lead_8, Lead_9, Lead_10, Lead_11, Lead_12, Lead_13]\n",
"\n",
" # Folder_name to store lead_images\n",
" folder_name = re.sub('.png', '', image_file)\n",
"\n",
" # Loop through leads and create separate images\n",
" for x, y in enumerate(Leads):\n",
" fig, ax = plt.subplots()\n",
" ax.imshow(y)\n",
" ax.axis('off')\n",
" ax.set_title(\"Leads {0}\".format(x+1))\n",
" if (os.path.exists(parent_folder+'/'+folder_name)):\n",
" pass\n",
" else:\n",
" os.makedirs(parent_folder+'/'+folder_name)\n",
"\n",
" # Save the image\n",
" plt.close('all')\n",
" plt.ioff()\n",
" fig.savefig('{parent}/{folder_name}/Lead_{x}_Signal.png'.format(folder_name=folder_name, x=x+1, parent=parent_folder))\n",
"\n",
" extract_signal_leads(Leads, folder_name, parent_folder)\n",
"\n",
"\n",
"\n",
"def convert_csv(test,lead_no,folder_name,parent):\n",
"#convert contour to dataframe\n",
" target=folder_name[0:2]\n",
" df = pd.DataFrame(test, columns = ['X','Y'])\n",
" df['Target']=target\n",
" #x_axis= 'Lead_{lead_no}_X'.format(lead_no=lead_no)\n",
" #y_axis= 'Lead_{lead_no}_Y'.format(lead_no=lead_no)\n",
" fig5, ax5 = plt.subplots()\n",
" #convert to CSV\n",
" df.to_csv('{parent}/{folder_name}/{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
"\n",
"def scale_csv(test,lead_no,folder_name,parent):\n",
" #scaling the data and testing\n",
" target=folder_name[0:2]\n",
" scaler = MinMaxScaler()\n",
" fit_transform_data = scaler.fit_transform(test)\n",
" Normalized_Scaled=pd.DataFrame(fit_transform_data, columns = ['X','Y'])\n",
" Normalized_Scaled=Normalized_Scaled.T\n",
" Normalized_Scaled['Target']=target\n",
" #scaled_data to CSV\n",
" if (os.path.isfile('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
" Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
" else:\n",
" Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
"\n",
"def scale_csv_1D(test,lead_no,folder_name,parent):\n",
" target=folder_name[0:2]\n",
" #scaling the data and testing\n",
" scaler = MinMaxScaler()\n",
" fit_transform_data = scaler.fit_transform(test)\n",
" Normalized_Scaled=pd.DataFrame(fit_transform_data[:,0], columns = ['X'])\n",
" fig6, ax6 = plt.subplots()\n",
" plt.gca().invert_yaxis()\n",
" ax6.plot(Normalized_Scaled,linewidth=1,color='black',linestyle='solid')\n",
" plt.close('all')\n",
" plt.ioff()\n",
" fig6.savefig('{parent}/{folder_name}/ID_Lead_{lead_no}_Signal.png'.format(folder_name=folder_name,lead_no=lead_no+1,parent=parent))\n",
" Normalized_Scaled=Normalized_Scaled.T\n",
" Normalized_Scaled['Target']=target\n",
" #scaled_data to CSV\n",
" if (os.path.isfile('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
" Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
" else:\n",
" Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
" \n",
"#load the different types of folders\n",
"normal_parent_dir = '../artifacts/ECG_Images/Normal Person ECG Images (284x12=3408)/'\n",
"abnormal_parent_dir = '../artifacts/ECG_Images/ECG Images of Patient that have abnormal heartbeat (233x12=2796)'\n",
"MI_parent_dir = '../artifacts/ECG_Images/ECG Images of Myocardial Infarction Patients (240x12=2880)'\n",
"MI_history_parent_dir = '../artifacts/ECG_Images/ECG Images of Patient that have History of MI (172x12=2064)'\n",
"\n",
"#Types of heart. Now taking only 3. will work on COVID-19 later\n",
"Types_ECG = {'Abnormal_hear_beat':abnormal_parent_dir,'MI':MI_parent_dir,'History_MI':MI_history_parent_dir,'Normal':normal_parent_dir}\n",
"\n",
"#Types_ECG_test = {'Abnormal':abnormal_parent_dir}\n",
"\n",
"#### parse all the files in the different folders and peform all the above steps as mentioned in the analysis Sample.\n",
"\n",
"#loop through folder/files and create seperate images of different leads\n",
"for types,folder in Types_ECG.items():\n",
" for files in os.listdir(folder):\n",
" #if files.endswith(\".jpg\"):\n",
" with open(os.path.join(folder, files), 'r') as f:\n",
" Convert_Image_Lead(files,folder)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment