forked from netrasagarmal/Price-Prediction-SIH-2020
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
158 lines (124 loc) · 5.08 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#SARIMA MODEL
#STEP 1
#Importing the libraries
import csv
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
#setting the felds for prediction
fields = ['Modal_Price', 'Price_Date']
df= pd.read_csv("pure_cotton.csv",skipinitialspace=True, usecols=fields)
#Creating
df.Price_Date = pd.to_datetime(df.Price_Date, errors='coerce')
df=df.set_index('Price_Date')
df.head(2)
data = df.copy()
y = data
# The 'MS' string groups the data in buckets by start of the month
y = y['Modal_Price'].resample('MS').mean()
# The term bfill means that we use the value before filling in missing values
y = y.fillna(y.bfill())
#----------------------------------------------------------------------------
#STEP 2
plt.title('Time Series for cotton prices')
plt.xlabel('Years')
plt.ylabel('Price in Rs.')
y.plot(figsize=(15, 6))
plt.show()
#----------------------------------------------------------------------------
#STEP 3 & 4
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)
# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))
# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
warnings.filterwarnings("ignore") # specify to ignore warning messages
for param in pdq:
for param_seasonal in seasonal_pdq:
try:
mod = sm.tsa.statespace.SARIMAX(y,
order=param,
seasonal_order=param_seasonal,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('SARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
except:
continue
results.plot_diagnostics(figsize=(15, 12))
plt.title('Diagnosis')
plt.show()
#----------------------------------------------------------------------------
#Static Forecast -- Mostly used for a month prediction
enter = input("Enter the date : ")
pred = results.get_prediction(start=pd.to_datetime(enter), dynamic=False)
pred_ci = pred.conf_int()
"""
pred = results.get_prediction(start=pd.to_datetime('2020-01-01'), dynamic=False)
pred_ci = pred.conf_int()
"""
#pred_ci.to_csv.append("predicted_cotton_price_static.csv")
ax = y['1990':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7)
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Crop Price')
plt.legend()
plt.show()
#STEP 5
y_forecasted = pred.predicted_mean
y_truth = y['2020-01-01':]
#STEP 6
#meanabsolute percentage error
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_truth, y_forecasted)/100
#----------------------------------------------------------------------------
#Dynamic Forecast -- Used for prediction of same
pred_dynamic = results.get_prediction(start=pd.to_datetime('2020-01-01'), dynamic=True, full_results=True)
pred_dynamic_ci = pred_dynamic.conf_int()
ax = y['1990':].plot(label='observed', figsize=(20, 15))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)
ax.fill_between(pred_dynamic_ci.index,
pred_dynamic_ci.iloc[:, 0],
pred_dynamic_ci.iloc[:, 1], color='k', alpha=.25)
ax.fill_betweenx(ax.get_ylim(), pd.to_datetime('2020-01-01'), y.index[-1],
alpha=.1, zorder=-1)
ax.set_xlabel('Date')
ax.set_ylabel('CROP PRICE')
plt.legend()
plt.show()
# Extract the predicted and true values of our time series
y_forecasted = pred_dynamic.predicted_mean
y_truth = y['2020-01-01':]
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_truth, y_forecasted)/100
#----------------------------------------------------------------------------
#Dynamic Forecast -- Used for prediction of next years
# Get forecast 20 steps ahead in future
pred_uc = results.get_forecast(steps=20)
# Get confidence intervals of forecasts
pred_ci = pred_uc.conf_int()
#saved the file
pred_ci.to_csv("predicted_cotton_price_dynamic.csv")
ax = y.plot(label='observed', figsize=(20, 15))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('CROP PRICE')
plt.legend()
plt.show()
#----------------------------------------------------------------------------