Commit 470d861e authored by Hirunika R.A.S.'s avatar Hirunika R.A.S.

Time series model completed

parent f00d7c27
......@@ -172,7 +172,104 @@ class TimeSeriesModel:
#return self.feature_dataframes
def plot_featured_data(self , feature='dayofweek' , district = "Monaragala"):
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()
fig, ax = plt.subplots(figsize=(10, 8))
sns.boxplot(data=self.feature_dataframes[district], x=feature, y=district)
ax.set_title('Showing {} Data by {}'.format(district , feature))
plt.show()
def create_models(self):
import time
import xgboost as xgb
#This will create two dictionaries each contains feature datafranes for all the districts
self.create_features(self.train , trainOrTest="train")
self.create_features(self.test , trainOrTest="test")
FEATURES = ['dayofyear', 'dayofweek', 'quarter', 'month', 'year']
for district in self.df.columns:
TARGET = district
X_train = self.feature_train_dataframes[district][FEATURES]
y_train = self.feature_train_dataframes[district][TARGET]
X_test = self.features_test_dataframes[district][FEATURES]
y_test = self.features_test_dataframes[district][TARGET]
self.model_x_tests[district] = X_test
print("=========== Start Creating XGBooster for {} ===========".format(district))
reg = None
reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',
n_estimators=1000,
early_stopping_rounds=50,
objective='reg:linear',
max_depth=3,
learning_rate=0.01)
print("=========== Model Created ===========")
print("=========== Model Training : Started ===========")
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
verbose=100)
print("=========== Model Training : Completed ===========")
self.ts_models[district] = reg
print("=========== Model Saved Successfully : {} District ===========".format(district))
time.sleep(3)
def check_feature_importance(self , district="Monaragala"):
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()
reg = self.ts_models[district]
fi = pd.DataFrame(data=reg.feature_importances_,
index=reg.feature_names_in_,
columns=['importance'])
fi.sort_values('importance').plot(kind='barh', title='Feature Importance')
plt.show()
def forecaset_data(self, district="Monaragala"):
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()
reg = self.ts_models[district]
X_test = self.model_x_tests[district].copy()
test = self.features_test_dataframes[district].copy()
df = self.feature_dataframes[district].copy()
test['prediction'] = reg.predict(X_test) #get the predictions for testing dataset
df = df.merge(test[['prediction']], how='left', left_index=True, right_index=True)
ax = df[[district]].plot(figsize=(15, 5))
df['prediction'].plot(ax=ax, style='.')
plt.legend(['Truth Data', 'Predictions'])
ax.set_title('Raw Dat and Prediction - {} District'.format(district))
plt.show()
#return test , df
def forecast_week(self, starts_from , district="Monaragala"):
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
color_pal = sns.color_palette()
end_date = pd.date_range(start=starts_from, periods=7+1, freq='D', closed='right')[-1]
ax = df.loc[(df.index > starts_from) & (df.index < end_date)][district] \
.plot(figsize=(15, 5), title='Week Of Data')
df.loc[(df.index > starts_from) & (df.index < end_date)]['prediction'] \
.plot(style='.')
plt.legend(['Truth Data','Prediction'])
plt.show()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment