Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
22_23-J 21
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
22_23-J 21
22_23-J 21
Commits
f00d7c27
Commit
f00d7c27
authored
Jan 25, 2023
by
Hirunika R.A.S.
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Modified time series model
parent
b9ac1201
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
108 additions
and
0 deletions
+108
-0
BackEnd/time_series_models_builder.py
BackEnd/time_series_models_builder.py
+108
-0
No files found.
BackEnd/time_series_models_builder.py
View file @
f00d7c27
...
...
@@ -61,7 +61,115 @@ class TimeSeriesModel:
self
.
df
[
column
]
=
pd
.
to_numeric
(
self
.
df
[
column
]
,
errors
=
'coerce'
)
print
(
" =========== Non Numeric Values Set to Nan ==========="
)
def
handle_Null_values
(
self
,
method
=
"fillMean"
):
'''
AVAILABLE METHODS :-
dropna - emoves any rows that contain missing or null values.
fillMean - fill with the mean values
ffil - fills the missing values with the last non-NA/null value in the column
bfill - fills the missing values with the next non-NA/null value in the column,
'''
print
(
" =========== handling Null Values - Started ==========="
)
print
(
"Applying For Columns -->"
,
end
=
" | "
)
for
colname
in
self
.
df
.
columns
:
print
(
colname
,
end
=
" | "
)
if
(
method
==
"dropna"
):
print
(
"Dropping rows with Null Values"
)
self
.
df
[
colname
]
=
self
.
df
[
colname
]
.
dropna
()
elif
(
method
==
"fillMean"
):
self
.
df
[
colname
]
=
self
.
df
[
colname
]
.
fillna
(
self
.
df
[
colname
]
.
mean
())
pass
elif
(
method
==
"ffill"
):
self
.
df
[
colname
]
=
self
.
df
[
colname
]
.
ffill
()
elif
(
method
==
"bfill"
):
self
.
df
[
colname
]
=
self
.
df
[
colname
]
.
bfill
()
elif
(
method
==
"fill0"
):
self
.
df
[
colname
]
=
self
.
df
[
colname
]
.
fillna
(
0
)
else
:
print
(
" ************ SOMETHING WENT WRONG ************"
)
print
(
"
\n
=========== handling Null Values - Completed ==========="
)
return
self
.
df
.
isnull
()
.
sum
()
def
convert_to_time_series
(
self
):
self
.
df
[
self
.
df
.
columns
[
0
]]
=
pd
.
to_datetime
(
self
.
df
[
self
.
df
.
columns
[
0
]],
format
=
'
%
m/
%
d/
%
Y'
)
self
.
df
=
self
.
df
.
set_index
(
self
.
df
.
columns
[
0
])
self
.
df
=
self
.
df
.
sort_index
()
self
.
df
=
self
.
df
.
astype
(
'float'
)
self
.
df
.
tail
(
2
)
print
(
" =========== DataFrame Converted to a Time Series"
)
def
view_past_data
(
self
,
district
=
"Kalpitiya"
):
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
color_pal
=
sns
.
color_palette
()
plt
.
style
.
use
(
'fivethirtyeight'
)
self
.
df
[
district
]
.
plot
(
style
=
'.'
,
figsize
=
(
15
,
5
),
color
=
color_pal
[
0
],
title
=
'{} Data'
.
format
(
district
))
plt
.
show
()
def
perform_train_test_split
(
self
):
split_date
=
self
.
train_test_split_date
self
.
train
=
self
.
df
.
loc
[
self
.
df
.
index
<
split_date
]
self
.
test
=
self
.
df
.
loc
[
self
.
df
.
index
>=
split_date
]
print
(
"=========== Time Series Splited as Train and Test datasets ==========="
)
def
plot_train_test_split
(
self
,
district
=
"Kalpitiya"
):
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
color_pal
=
sns
.
color_palette
()
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
15
,
5
))
self
.
train
[
district
]
.
plot
(
ax
=
ax
,
label
=
'Training Set'
,
title
=
'Data Train/Test Split'
)
self
.
test
[
district
]
.
plot
(
ax
=
ax
,
label
=
'Test Set'
)
ax
.
axvline
(
self
.
train_test_split_date
,
color
=
'black'
,
ls
=
'--'
)
ax
.
legend
([
'Training Set'
,
'Test Set'
])
plt
.
show
()
def
plot_week_data
(
self
,
starts_from
,
district
=
"Kalpitiya"
):
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
import
seaborn
as
sns
end_date
=
pd
.
date_range
(
start
=
starts_from
,
periods
=
7
+
1
,
freq
=
'D'
,
closed
=
'right'
)[
-
1
]
print
(
end_date
)
self
.
df
[
district
]
.
loc
[(
self
.
df
.
index
>=
starts_from
)
&
(
self
.
df
.
index
<
end_date
)]
\
.
plot
(
figsize
=
(
15
,
5
),
title
=
'Week Of Data'
)
plt
.
show
()
def
create_features
(
self
,
tempDf
,
trainOrTest
=
"train"
):
"""
Create time series features based on time series index.
"""
for
colname
in
self
.
df
.
columns
:
df
=
tempDf
[
colname
]
.
copy
()
.
to_frame
()
df
[
'dayofweek'
]
=
tempDf
[
colname
]
.
index
.
dayofweek
df
[
'quarter'
]
=
tempDf
[
colname
]
.
index
.
quarter
df
[
'month'
]
=
tempDf
[
colname
]
.
index
.
month
df
[
'year'
]
=
tempDf
[
colname
]
.
index
.
year
df
[
'dayofyear'
]
=
tempDf
[
colname
]
.
index
.
dayofyear
df
[
'dayofmonth'
]
=
tempDf
[
colname
]
.
index
.
day
df
[
'weekofyear'
]
=
tempDf
[
colname
]
.
index
.
isocalendar
()
.
week
if
(
trainOrTest
==
"train"
):
self
.
feature_train_dataframes
[
colname
]
=
df
elif
(
trainOrTest
==
"test"
):
self
.
features_test_dataframes
[
colname
]
=
df
else
:
self
.
feature_dataframes
[
colname
]
=
df
print
(
"=========== Creating Features Completed For {} Dataset==========="
.
format
(
trainOrTest
))
#return self.feature_dataframes
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment