initial commit

d6cade50 · Pamudi Naveesha · 81d4dbf1 · d6cade50
Commit d6cade50 authored Feb 20, 2024 by Pamudi Naveesha
Hide whitespace changes
Inline Side-by-side

Showing with 422 additions and 0 deletions

ECG_Analysis_Sample.ipynb ECG_Analysis_Sample.ipynb +422 -0

No files found.
--- a/ECG_Analysis_Sample.ipynb
+++ b/ECG_Analysis_Sample.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **FUNCTION TO EXTRACT IMAGE LEADS(1-13) (FEATURE EXTRACTION)**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b67bd485-758c-4399-8bb2-b4a13485786f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"#### **FUNCTION FOR  IMAGE LEADS(1-13) PRE-PROCESSING**\"\"\"\n",
+    "def Convert_Image_Lead(image_file,parent_folder):\n",
+    "    #read the image\n",
+    "    image=imread('{parent}/{image_file}'.format(parent=str(parent_folder),image_file=str(image_file)),plugin='matplotlib')\n",
+    "    #dividing the ECG leads from 1-13 from the above image\n",
+    "    Lead_1 = image[300:600, 150:643]\n",
+    "    Lead_2 = image[300:600, 646:1135]\n",
+    "    Lead_3 = image[300:600, 1140:1626]\n",
+    "    Lead_4 = image[300:600, 1630:2125]\n",
+    "    Lead_5 = image[600:900, 150:643]\n",
+    "    Lead_6 = image[600:900, 646:1135]\n",
+    "    Lead_7 = image[600:900, 1140:1626]\n",
+    "    Lead_8 = image[600:900, 1630:2125]\n",
+    "    Lead_9 = image[900:1200, 150:643]\n",
+    "    Lead_10 = image[900:1200, 646:1135]\n",
+    "    Lead_11 = image[900:1200, 1140:1626]\n",
+    "    Lead_12 = image[900:1200, 1630:2125]\n",
+    "    Lead_13 = image[1250:1480, 150:2125]\n",
+    "\n",
+    "    #list of leads\n",
+    "    Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12,Lead_13]\n",
+    "    \n",
+    "    #folder_name to store lead_images\n",
+    "    folder_name= re.sub('.jpg', '',image_file)\n",
+    "\n",
+    "    #loop through leads and create seperate images\n",
+    "    for x,y in enumerate(Leads):\n",
+    "      fig , ax = plt.subplots()\n",
+    "      #fig.set_size_inches(20, 20)\n",
+    "      ax.imshow(y)\n",
+    "      ax.axis('off')\n",
+    "      ax.set_title(\"Leads {0}\".format(x+1))\n",
+    "      if (os.path.exists(parent_folder+'/'+folder_name)):\n",
+    "        pass\n",
+    "      else:\n",
+    "        os.makedirs(parent_folder+'/'+folder_name)\n",
+    "\n",
+    "      #save the image\n",
+    "      plt.close('all')\n",
+    "      plt.ioff()\n",
+    "      fig.savefig('{parent}/{folder_name}/Lead_{x}_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent_folder))\n",
+    "     \n",
+    "    extract_signal_leads(Leads,folder_name,parent_folder)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **FUNCTION FOR  IMAGE LEADS(1-13) PRE-PROCESSING & EXTRACTING SIGNAL CONTOURS**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9cc376d9-c1d7-4c4e-87b8-1b569d451f0a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#extract_only signal from images\n",
+    "def extract_signal_leads(Leads,folder_name,parent):\n",
+    "  #looping through image list containg all leads from 1-13\n",
+    "  for x,y in enumerate(Leads):\n",
+    "    #creating subplot\n",
+    "    fig1 , ax1 = plt.subplots()\n",
+    "\n",
+    "    #set fig size\n",
+    "    #fig1.set_size_inches(20, 20)\n",
+    "\n",
+    "    #converting to gray scale\n",
+    "    grayscale = color.rgb2gray(y)\n",
+    "    #smoothing image\n",
+    "    blurred_image = gaussian(grayscale,sigma=0.7)\n",
+    "    #thresholding to distinguish foreground and background\n",
+    "    #using otsu thresholding for getting threshold value\n",
+    "    global_thresh = threshold_otsu(blurred_image)\n",
+    "\n",
+    "    #creating binary image based on threshold\n",
+    "    binary_global = blurred_image < global_thresh\n",
+    "    \n",
+    "    #resize image\n",
+    "    if x!=12:\n",
+    "      binary_global = resize(binary_global, (300, 450))\n",
+    "\n",
+    "    ax1.imshow(binary_global,cmap=\"gray\")\n",
+    "    ax1.axis('off')\n",
+    "    ax1.set_title(\"pre-processed Leads {} image\".format(x+1))\n",
+    "    plt.close('all')\n",
+    "    plt.ioff()\n",
+    "    #save the image\n",
+    "    fig1.savefig('{parent}/{folder_name}/Lead_{x}_preprocessed_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))\n",
+    "\n",
+    "    fig7 , ax7 = plt.subplots()\n",
+    "    plt.gca().invert_yaxis()\n",
+    "\n",
+    "    #find contour and get only the necessary signal contour\n",
+    "    contours = measure.find_contours(binary_global,0.8)\n",
+    "    contours_shape = sorted([x.shape for x in contours])[::-1][0:1]\n",
+    "    for contour in contours:\n",
+    "      if contour.shape in contours_shape:\n",
+    "        test = resize(contour, (255, 2))\n",
+    "        ax7.plot(test[:, 1], test[:, 0],linewidth=1,color='black')\n",
+    "    ax7.axis('image')\n",
+    "    ax7.set_title(\"Contour {} image\".format(x+1))\n",
+    "    plt.close('all')\n",
+    "    plt.ioff()\n",
+    "    #save the image\n",
+    "    fig7.savefig('{parent}/{folder_name}/Lead_{x}_Contour_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))    \n",
+    "    lead_no=x\n",
+    "    #convert_csv(test,lead_no,folder_name,parent)\n",
+    "    #scale_csv(test,lead_no,folder_name,parent)\n",
+    "    scale_csv_1D(test,lead_no,folder_name,parent)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "####**FUNCTIONS FOR CSV CONVERSION AND SCALING**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c28a9eb0-2c8a-4f30-b6d7-b5ae5f9a4fe7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_csv(test,lead_no,folder_name,parent):\n",
+    "#convert contour to dataframe\n",
+    "  target=folder_name[0:2]\n",
+    "  df = pd.DataFrame(test, columns = ['X','Y'])\n",
+    "  df['Target']=target\n",
+    "  #x_axis= 'Lead_{lead_no}_X'.format(lead_no=lead_no)\n",
+    "  #y_axis= 'Lead_{lead_no}_Y'.format(lead_no=lead_no)\n",
+    "  fig5, ax5 = plt.subplots()\n",
+    "  #convert to CSV\n",
+    "  df.to_csv('{parent}/{folder_name}/{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
+    "\n",
+    "def scale_csv(test,lead_no,folder_name,parent):\n",
+    "  #scaling the data and testing\n",
+    "  target=folder_name[0:2]\n",
+    "  scaler = MinMaxScaler()\n",
+    "  fit_transform_data = scaler.fit_transform(test)\n",
+    "  Normalized_Scaled=pd.DataFrame(fit_transform_data, columns = ['X','Y'])\n",
+    "  Normalized_Scaled=Normalized_Scaled.T\n",
+    "  Normalized_Scaled['Target']=target\n",
+    "  #scaled_data to CSV\n",
+    "  if (os.path.isfile('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
+    "    Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
+    "  else:\n",
+    "    Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
+    "\n",
+    "def scale_csv_1D(test,lead_no,folder_name,parent):\n",
+    "  target=folder_name[0:2]\n",
+    "  #scaling the data and testing\n",
+    "  scaler = MinMaxScaler()\n",
+    "  fit_transform_data = scaler.fit_transform(test)\n",
+    "  Normalized_Scaled=pd.DataFrame(fit_transform_data[:,0], columns = ['X'])\n",
+    "  fig6, ax6 = plt.subplots()\n",
+    "  plt.gca().invert_yaxis()\n",
+    "  ax6.plot(Normalized_Scaled,linewidth=1,color='black',linestyle='solid')\n",
+    "  plt.close('all')\n",
+    "  plt.ioff()\n",
+    "  fig6.savefig('{parent}/{folder_name}/ID_Lead_{lead_no}_Signal.png'.format(folder_name=folder_name,lead_no=lead_no+1,parent=parent))\n",
+    "  Normalized_Scaled=Normalized_Scaled.T\n",
+    "  Normalized_Scaled['Target']=target\n",
+    "  #scaled_data to CSV\n",
+    "  if (os.path.isfile('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
+    "    Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
+    "  else:\n",
+    "    Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### **RUN THE BELOW CELL TO GENERATE THE NECESSARY FILES**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2afba58-fbb2-4381-a8f6-29b8f58bd3aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import re\n",
+    "from skimage.filters import threshold_otsu,gaussian\n",
+    "from skimage import measure\n",
+    "import pandas as pd\n",
+    "import numpy as nm\n",
+    "from sklearn.preprocessing import MinMaxScaler\n",
+    "from skimage.io import imread\n",
+    "from skimage import color\n",
+    "from skimage.transform import resize\n",
+    "from numpy import asarray\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "\"\"\"#### **NOW WE HAVE BOTH CSV FILES AND CROPPED LEAD IMAGES(1-13) TO WORK ON. WE CAN PERFROM CNN on 1D images & 2D images and perform different Supervised classification algorithms based on CSV DATA(KNN, LOGISTIC REGRESSION, SVM etc. and also different clustering techniques without target to check predictions**\n",
+    "\n",
+    "### **NOW PERFORM DATA PREPROCESSING/FEATURE EXTRACTION ON  ALL THE FILES IN THE ECG_IMAGES FOLDER**\n",
+    "\n",
+    "#### **FUNCTION TO EXTRACT IMAGE LEADS(1-13) (FEATURE EXTRACTION)**\n",
+    "\"\"\"\n",
+    "\n",
+    "\"\"\"####**FUNCTIONS FOR CSV CONVERSION AND SCALING**\"\"\"\n",
+    "\n",
+    "#extract_only signal from images\n",
+    "def extract_signal_leads(Leads,folder_name,parent):\n",
+    "  #looping through image list containg all leads from 1-13\n",
+    "  for x,y in enumerate(Leads):\n",
+    "    #creating subplot\n",
+    "    fig1 , ax1 = plt.subplots()\n",
+    "\n",
+    "    #set fig size\n",
+    "    #fig1.set_size_inches(20, 20)\n",
+    "\n",
+    "    #converting to gray scale\n",
+    "    grayscale = color.rgb2gray(y)\n",
+    "    #smoothing image\n",
+    "    blurred_image = gaussian(grayscale,sigma=0.7)\n",
+    "    #thresholding to distinguish foreground and background\n",
+    "    #using otsu thresholding for getting threshold value\n",
+    "    global_thresh = threshold_otsu(blurred_image)\n",
+    "\n",
+    "    #creating binary image based on threshold\n",
+    "    binary_global = blurred_image < global_thresh\n",
+    "    \n",
+    "    #resize image\n",
+    "    if x!=12:\n",
+    "      binary_global = resize(binary_global, (300, 450))\n",
+    "\n",
+    "    ax1.imshow(binary_global,cmap=\"gray\")\n",
+    "    ax1.axis('off')\n",
+    "    ax1.set_title(\"pre-processed Leads {} image\".format(x+1))\n",
+    "    plt.close('all')\n",
+    "    plt.ioff()\n",
+    "    #save the image\n",
+    "    fig1.savefig('{parent}/{folder_name}/Lead_{x}_preprocessed_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))\n",
+    "\n",
+    "    fig7 , ax7 = plt.subplots()\n",
+    "    plt.gca().invert_yaxis()\n",
+    "\n",
+    "    #find contour and get only the necessary signal contour\n",
+    "    contours = measure.find_contours(binary_global,0.8)\n",
+    "    contours_shape = sorted([x.shape for x in contours])[::-1][0:1]\n",
+    "    for contour in contours:\n",
+    "      if contour.shape in contours_shape:\n",
+    "        test = resize(contour, (255, 2))\n",
+    "        ax7.plot(test[:, 1], test[:, 0],linewidth=1,color='black')\n",
+    "    ax7.axis('image')\n",
+    "    ax7.set_title(\"Contour {} image\".format(x+1))\n",
+    "    plt.close('all')\n",
+    "    plt.ioff()\n",
+    "    #save the image\n",
+    "    fig7.savefig('{parent}/{folder_name}/Lead_{x}_Contour_Signal.png'.format(folder_name=folder_name,x=x+1,parent=parent))    \n",
+    "    lead_no=x\n",
+    "    #convert_csv(test,lead_no,folder_name,parent)\n",
+    "    #scale_csv(test,lead_no,folder_name,parent)\n",
+    "    scale_csv_1D(test,lead_no,folder_name,parent)\n",
+    "\n",
+    "\"\"\"#### **FUNCTION FOR  IMAGE LEADS(1-13) PRE-PROCESSING**\"\"\"\n",
+    "def Convert_Image_Lead(image_file, parent_folder):\n",
+    "    # Read the image using matplotlib.image.imread\n",
+    "    image = plt.imread('{parent}/{image_file}'.format(parent=str(parent_folder), image_file=str(image_file)))\n",
+    "\n",
+    "    # Check if the image has an alpha channel\n",
+    "    if image.shape[2] == 4:\n",
+    "        # Remove the alpha channel\n",
+    "        image_without_alpha = image[:, :, :3]\n",
+    "    else:\n",
+    "        image_without_alpha = image\n",
+    "\n",
+    "    # Dividing the ECG leads from 1-13 from the above image\n",
+    "    Lead_1 = image[300:600, 150:643]\n",
+    "    Lead_2 = image[300:600, 646:1135]\n",
+    "    Lead_3 = image[300:600, 1140:1625]\n",
+    "    Lead_4 = image[300:600, 1630:2125]\n",
+    "    Lead_5 = image[600:900, 150:643]\n",
+    "    Lead_6 = image[600:900, 646:1135]\n",
+    "    Lead_7 = image[600:900, 1140:1625]\n",
+    "    Lead_8 = image[600:900, 1630:2125]\n",
+    "    Lead_9 = image[900:1200, 150:643]\n",
+    "    Lead_10 = image[900:1200, 646:1135]\n",
+    "    Lead_11 = image[900:1200, 1140:1625]\n",
+    "    Lead_12 = image[900:1200, 1630:2125]\n",
+    "    Lead_13 = image[1250:1480, 150:2125]\n",
+    "\n",
+    "    # List of leads\n",
+    "    Leads = [Lead_1, Lead_2, Lead_3, Lead_4, Lead_5, Lead_6, Lead_7, Lead_8, Lead_9, Lead_10, Lead_11, Lead_12, Lead_13]\n",
+    "\n",
+    "    # Folder_name to store lead_images\n",
+    "    folder_name = re.sub('.png', '', image_file)\n",
+    "\n",
+    "    # Loop through leads and create separate images\n",
+    "    for x, y in enumerate(Leads):\n",
+    "        fig, ax = plt.subplots()\n",
+    "        ax.imshow(y)\n",
+    "        ax.axis('off')\n",
+    "        ax.set_title(\"Leads {0}\".format(x+1))\n",
+    "        if (os.path.exists(parent_folder+'/'+folder_name)):\n",
+    "            pass\n",
+    "        else:\n",
+    "            os.makedirs(parent_folder+'/'+folder_name)\n",
+    "\n",
+    "        # Save the image\n",
+    "        plt.close('all')\n",
+    "        plt.ioff()\n",
+    "        fig.savefig('{parent}/{folder_name}/Lead_{x}_Signal.png'.format(folder_name=folder_name, x=x+1, parent=parent_folder))\n",
+    "\n",
+    "    extract_signal_leads(Leads, folder_name, parent_folder)\n",
+    "\n",
+    "\n",
+    "\n",
+    "def convert_csv(test,lead_no,folder_name,parent):\n",
+    "#convert contour to dataframe\n",
+    "  target=folder_name[0:2]\n",
+    "  df = pd.DataFrame(test, columns = ['X','Y'])\n",
+    "  df['Target']=target\n",
+    "  #x_axis= 'Lead_{lead_no}_X'.format(lead_no=lead_no)\n",
+    "  #y_axis= 'Lead_{lead_no}_Y'.format(lead_no=lead_no)\n",
+    "  fig5, ax5 = plt.subplots()\n",
+    "  #convert to CSV\n",
+    "  df.to_csv('{parent}/{folder_name}/{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
+    "\n",
+    "def scale_csv(test,lead_no,folder_name,parent):\n",
+    "  #scaling the data and testing\n",
+    "  target=folder_name[0:2]\n",
+    "  scaler = MinMaxScaler()\n",
+    "  fit_transform_data = scaler.fit_transform(test)\n",
+    "  Normalized_Scaled=pd.DataFrame(fit_transform_data, columns = ['X','Y'])\n",
+    "  Normalized_Scaled=Normalized_Scaled.T\n",
+    "  Normalized_Scaled['Target']=target\n",
+    "  #scaled_data to CSV\n",
+    "  if (os.path.isfile('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
+    "    Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
+    "  else:\n",
+    "    Normalized_Scaled.to_csv('{parent}/Scaled_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
+    "\n",
+    "def scale_csv_1D(test,lead_no,folder_name,parent):\n",
+    "  target=folder_name[0:2]\n",
+    "  #scaling the data and testing\n",
+    "  scaler = MinMaxScaler()\n",
+    "  fit_transform_data = scaler.fit_transform(test)\n",
+    "  Normalized_Scaled=pd.DataFrame(fit_transform_data[:,0], columns = ['X'])\n",
+    "  fig6, ax6 = plt.subplots()\n",
+    "  plt.gca().invert_yaxis()\n",
+    "  ax6.plot(Normalized_Scaled,linewidth=1,color='black',linestyle='solid')\n",
+    "  plt.close('all')\n",
+    "  plt.ioff()\n",
+    "  fig6.savefig('{parent}/{folder_name}/ID_Lead_{lead_no}_Signal.png'.format(folder_name=folder_name,lead_no=lead_no+1,parent=parent))\n",
+    "  Normalized_Scaled=Normalized_Scaled.T\n",
+    "  Normalized_Scaled['Target']=target\n",
+    "  #scaled_data to CSV\n",
+    "  if (os.path.isfile('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent))):\n",
+    "    Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent), mode='a', header=False,index=False)\n",
+    "  else:\n",
+    "    Normalized_Scaled.to_csv('{parent}/scaled_data_1D_{lead_no}.csv'.format(lead_no=lead_no+1,parent=parent,folder_name=folder_name),index=False)\n",
+    "  \n",
+    "#load the different types of folders\n",
+    "normal_parent_dir = '../artifacts/ECG_Images/Normal Person ECG Images (284x12=3408)/'\n",
+    "abnormal_parent_dir = '../artifacts/ECG_Images/ECG Images of Patient that have abnormal heartbeat (233x12=2796)'\n",
+    "MI_parent_dir = '../artifacts/ECG_Images/ECG Images of Myocardial Infarction Patients (240x12=2880)'\n",
+    "MI_history_parent_dir = '../artifacts/ECG_Images/ECG Images of Patient that have History of MI (172x12=2064)'\n",
+    "\n",
+    "#Types of heart. Now taking only 3. will work on COVID-19 later\n",
+    "Types_ECG = {'Abnormal_hear_beat':abnormal_parent_dir,'MI':MI_parent_dir,'History_MI':MI_history_parent_dir,'Normal':normal_parent_dir}\n",
+    "\n",
+    "#Types_ECG_test = {'Abnormal':abnormal_parent_dir}\n",
+    "\n",
+    "#### parse all the files in the different folders and peform all the above steps as mentioned in the analysis Sample.\n",
+    "\n",
+    "#loop through folder/files and create seperate images of different leads\n",
+    "for types,folder in Types_ECG.items():\n",
+    "  for files in os.listdir(folder):\n",
+    "    #if files.endswith(\".jpg\"):\n",
+    "    with open(os.path.join(folder, files), 'r') as f:\n",
+    "      Convert_Image_Lead(files,folder)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}