-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathForecast_App.py
172 lines (142 loc) · 8.97 KB
/
Forecast_App.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
############################################################################
### ###
## Time Series Forecast Application on Streamlit using Facebook Prophet ##
### ###
## Built by Philip Mathew ##
############################################################################
import streamlit as st
import pandas as pd
import numpy as np
import fbprophet
import altair as alt
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import base64
from fbprophet.plot import plot_plotly, plot_components_plotly
# CSV Download link generation function
def download_link(object_to_download, download_filename, download_link_text):
if isinstance(object_to_download,pd.DataFrame):
object_to_download = object_to_download.to_csv()
# strings <-> bytes conversions
b64 = base64.b64encode(object_to_download.encode()).decode()
return f'<a href="data:file/txt;base64,{b64}" download="{download_filename}">{download_link_text}</a>'
# main function
def main():
# title & header
st.title("Time Series Forecast Application")
st.markdown("Built by [Philip Mathew](https://www.linkedin.com/in/philipmathew97/)")
st.write("Data Science web application for automated forecasting of univariate time-series data.")
st.write("Univariate time-series dataset (csv) can be uploaded below and forecast period can be inputed for automated forecasting using facebook prophet library.")
st.markdown("Prophet forecasts time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is also robust to missing data and shifts in the trend, and typically handles outliers well.")
st.markdown("---")
st.header('Input Time Series Dataset')
# Streamlit file upload - csv only
data_file = st.file_uploader("Upload CSV with two columns (Date/Time & Variable):",type=['csv'])
df_comb = None
if data_file is not None:
df = pd.read_csv(data_file) # Reading csv
columns = list(df.columns.values) # Extracting column names
if len(columns) > 2 or len(columns) < 2 : # Limiting csv's to two columns for forecast
st.write("Error: Please input a csv file with only 2 columns, namely date/time & variable to forecast.")
st.stop()
# Extracting respective column names from df
elif len(columns) == 2 :
val = df.select_dtypes(include=np.number).columns.tolist() # Extracting variable column
date = columns
date.remove(val[0]) # Extracting date/time column
df = df[[date[0],val[0]]] # Reording df columns with date on left
df_fr = df
df_fr['ds'] = df_fr[date[0]] # Column 'ds' for Prophet to use
df_fr['y'] = df_fr[val[0]] # Column 'y' for Prophet to use
st.write('Columns', date[0], '&', val[0], ' were detected in dataset')
df = df.rename(columns={date[0]:'index'}).set_index('index')
st.subheader("Input Dataset")
st.dataframe(df[val[0]])
st.subheader("Summary Statistics")
st.write(df[val[0]].describe())
ca = alt.Chart(df.reset_index()).encode(x = alt.X('index:T', axis=alt.Axis(title=
date[0])),y=val[0]).interactive()
st.subheader("Time Series Plot")
st.altair_chart(ca.mark_line(color='#4267B2').properties(width=1100,height=400))
st.markdown("---")
# Forecast frequency selection
st.header('Select Forecast Frequency')
freq = st.selectbox('Select the frequency of forecast:', ['Day', 'Week', 'Month', 'Quarter', 'Year', '1 Second', '1 Minute', '1 Hour'])
if freq == 'Day':
freq = 'D'
elif freq == 'Week':
freq = 'W'
elif freq == 'Month':
freq = 'M'
elif freq == 'Quarter':
freq = 'Q'
elif freq == 'Year':
freq = 'Y'
elif freq == '1 Second':
freq = 1
elif freq == '1 Minute':
freq = 60
elif freq == '1 Hour':
freq = 3600
st.markdown("---")
# Forecast duration selection
st.header('Select Forecast Period')
dur = st.number_input('Enter number of periods (of frequency) to forecast in future:', min_value = 1, step = 1)
st.markdown("---")
# Forecast Button
if st.button("Forecast"):
if data_file is not None:
st.spinner()
with st.spinner(text='Prophet is forecasting....'):
# Data-Prep: (In testing)
# Applying Box-Cox Transform to value column and assigning to new column y
# Box-Cox Transform evaluates a set of lambda coefficients (λ) and selects the
# value that achieves the best approximation of normality
# Box-Cox will select the λ that maximizes the log-likelihood function
#df_fr['y'], lam = boxcox(df_fr[val[0]])
# Initiating Prophet:
ml = fbprophet.Prophet() # Creating an instance of Prophet object
ml.fit(df_fr) # Fitting prophet object to dataset
# Making future datapoints based on forecast duration input (dur):
future_points = ml.make_future_dataframe(periods= dur, freq = freq)
# Forecasting:
forecast = ml.predict(future_points)
# Components of forecast
comp = plot_components_plotly(ml, forecast)
fig = plot_plotly(ml, forecast)
# Applying Inverse Box-Cox Transform
#forecast[['yhat','yhat_upper','yhat_lower','trend','trend_lower','trend_upper']] = forecast[['yhat','yhat_upper','yhat_lower', 'trend','trend_lower','trend_upper']].apply(lambda x: inv_boxcox(x, lam))
# Reshaping historical + forecast:
future_forecast = forecast.tail(dur)
future_forecast = future_forecast[['ds','yhat','yhat_upper','yhat_lower']]
future_forecast['Type'] = 'Forecast'
future_forecast = future_forecast.rename(columns={'ds':'index', 'yhat':val[0]}).set_index('index')
df_fr['yhat_upper'] = 0
df_fr['yhat_lower'] = 0
df_fr['Type'] = 'Historic'
df_fr = df_fr.rename(columns={date[0]:'index'}).set_index('index')
df_fr = df_fr[[val[0],'yhat_upper','yhat_lower','Type']]
df_comb = df_fr.append(future_forecast)
# Plotting Forecast
rng = ['#4267B2','firebrick']
dom = ['Historic', 'Forecast']
base = alt.Chart(df_comb.reset_index()).encode(x = alt.X('index:T', axis=alt.Axis(title= date[0])))
ml_plot = base.mark_line(interpolate='monotone').encode(y=val[0],color=alt.Color('Type', scale=alt.Scale(domain=dom, range=rng)),).interactive()
area = base.mark_area(opacity=0.5,color='grey').encode(alt.Y('yhat_upper', axis=alt.Axis(title='')), alt.Y2('yhat_lower')).interactive()
# Processing complete
st.success('Done')
st.markdown("---")
st.header('Time Series Forecast')
# Plotting
st.plotly_chart(fig)
st.subheader("Forecast Components")
st.plotly_chart(comp)
st.subheader("Time Series Plot with Forecast")
st.altair_chart(alt.layer(ml_plot,area).properties(width=1100,height=400))
# Download link for forecast (csv)
tmp_download_link = download_link(df_comb, 'Data_w_forecast.csv', 'Click here to download your forecast!')
st.subheader("Download Forecast (csv file)")
st.markdown(tmp_download_link, unsafe_allow_html=True)
st.markdown("---")
if __name__ == '__main__':
main()