Modified time series model

f00d7c27 · Hirunika R.A.S. · b9ac1201 · f00d7c27
Commit f00d7c27 authored Jan 25, 2023 by Hirunika R.A.S.
Hide whitespace changes
Inline Side-by-side

Showing with 108 additions and 0 deletions

BackEnd/time_series_models_builder.py BackEnd/time_series_models_builder.py +108 -0

No files found.
--- a/BackEnd/time_series_models_builder.py
+++ b/BackEnd/time_series_models_builder.py
@@ -61,7 +61,115 @@ class TimeSeriesModel:
      self.df[column] = pd.to_numeric(self.df[column] , errors = 'coerce')
    print(" =========== Non Numeric Values Set to Nan ===========")
  
+  def handle_Null_values(self , method = "fillMean"):
+    '''
+    AVAILABLE METHODS :-
+    dropna   - emoves any rows that contain missing or null values.
+    fillMean - fill with the mean values
+    ffil     - fills the missing values with the last non-NA/null value in the column
+    bfill    - fills the missing values with the next non-NA/null value in the column,
+    '''
+    print(" =========== handling Null Values - Started ===========")
+    
+    print("Applying For Columns -->" , end=" | ")
+    for colname in self.df.columns:
+      print(colname , end=" | ")
+
+      if(method=="dropna"):
+        print("Dropping rows with Null Values")
+        self.df[colname] = self.df[colname].dropna()
+      elif(method=="fillMean"):
+        self.df[colname] = self.df[colname].fillna(self.df[colname].mean())
+        pass
+      elif(method=="ffill"):
+        self.df[colname] = self.df[colname].ffill()
+      elif(method=="bfill"):
+        self.df[colname] = self.df[colname].bfill()
+      elif(method=="fill0"):
+        self.df[colname] = self.df[colname].fillna(0)
+      else:
+        print(" ************ SOMETHING WENT WRONG ************")
+    
+
+    print("\n=========== handling Null Values - Completed ===========")
+    return self.df.isnull().sum()
+
+  def convert_to_time_series(self):
+    self.df[self.df.columns[0]] = pd.to_datetime(self.df[self.df.columns[0]], format='%m/%d/%Y')
+    self.df = self.df.set_index(self.df.columns[0])
+    self.df = self.df.sort_index()
+    self.df = self.df.astype('float')
+    self.df.tail(2)
+    print(" =========== DataFrame Converted to a Time Series")
+
+  def view_past_data(self , district = "Kalpitiya"):
+    import pandas as pd
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    color_pal = sns.color_palette()
+    plt.style.use('fivethirtyeight')
+
+    self.df[district].plot(style='.',
+        figsize=(15, 5),
+        color=color_pal[0],
+        title='{} Data'.format(district))
+    plt.show()
+    
+  def perform_train_test_split(self):
+
+    split_date = self.train_test_split_date
+    self.train = self.df.loc[self.df.index < split_date]
+    self.test = self.df.loc[self.df.index >= split_date]
+    print("=========== Time Series Splited as Train and Test datasets ===========")
+
+  def plot_train_test_split(self , district = "Kalpitiya"):
+    import pandas as pd
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    color_pal = sns.color_palette()
+    
+    fig, ax = plt.subplots(figsize=(15, 5))
+    self.train[district].plot(ax=ax, label='Training Set', title='Data Train/Test Split')
+    self.test[district].plot(ax=ax, label='Test Set')
+    ax.axvline(self.train_test_split_date, color='black', ls='--')
+    ax.legend(['Training Set', 'Test Set'])
+    plt.show()
  
+  def plot_week_data(self, starts_from , district = "Kalpitiya"):
+    import pandas as pd
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+
+    end_date = pd.date_range(start=starts_from, periods=7+1, freq='D', closed='right')[-1]
+    print(end_date)
+    self.df[district].loc[(self.df.index >= starts_from) & (self.df.index < end_date)] \
+    .plot(figsize=(15, 5), title='Week Of Data')
+    plt.show()
+  
+  def create_features(self , tempDf  , trainOrTest = "train"):
+    """
+    Create time series features based on time series index.
+    """
+    
+    for colname in self.df.columns:
+      df = tempDf[colname].copy().to_frame()
+
+      df['dayofweek'] = tempDf[colname].index.dayofweek
+      df['quarter'] = tempDf[colname].index.quarter
+      df['month'] = tempDf[colname].index.month
+      df['year'] = tempDf[colname].index.year
+      df['dayofyear'] = tempDf[colname].index.dayofyear
+      df['dayofmonth'] = tempDf[colname].index.day
+      df['weekofyear'] = tempDf[colname].index.isocalendar().week
+      
+      if(trainOrTest=="train"):
+        self.feature_train_dataframes[colname] = df
+      elif(trainOrTest=="test"):
+        self.features_test_dataframes[colname] = df
+      else:
+        self.feature_dataframes[colname] = df
+    print("=========== Creating Features Completed For {} Dataset===========".format(trainOrTest))
+    #return self.feature_dataframes