# 패키지 선언
import pandas as pd
import numpy as np
import FinanceDataReader as fdr
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
ㅤ
# S&P500 지수 (NYSE)
sp = fdr.DataReader('US500', '2020-01-01', '2022-01-01')
sp
Output
| Close | Open | High | Low | Volume | Change |
---|
Date | | | | | | |
---|
2020-01-02 | 3257.85 | 3244.67 | 3258.14 | 3235.53 | 0.0 | 0.0084 |
---|
2020-01-03 | 3234.85 | 3226.36 | 3246.15 | 3222.34 | 0.0 | -0.0071 |
---|
2020-01-06 | 3246.28 | 3217.55 | 3246.84 | 3214.64 | 0.0 | 0.0035 |
---|
2020-01-07 | 3237.18 | 3241.86 | 3244.91 | 3232.43 | 0.0 | -0.0028 |
---|
2020-01-08 | 3253.05 | 3238.59 | 3267.07 | 3236.67 | 0.0 | 0.0049 |
---|
... | ... | ... | ... | ... | ... | ... |
---|
2021-12-27 | 4791.19 | 4733.99 | 4791.49 | 4733.99 | 0.0 | 0.0138 |
---|
2021-12-28 | 4786.36 | 4795.49 | 4807.02 | 4780.04 | 0.0 | -0.0010 |
---|
2021-12-29 | 4793.06 | 4788.64 | 4804.06 | 4778.08 | 0.0 | 0.0014 |
---|
2021-12-30 | 4778.73 | 4794.23 | 4808.93 | 4775.33 | 0.0 | -0.0030 |
---|
2021-12-31 | 4766.18 | 4775.21 | 4786.83 | 4765.75 | 0.0 | -0.0026
|
---|
2. Data Scalingfrom sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# 스케일을 적용할 column을 정의합니다.
scale_cols = ['Close']
# 스케일 후 columns
scaled = scaler.fit_transform(sp[scale_cols])
# DataFrame
df_scaled = pd.DataFrame(scaled, columns=scale_cols)
df_scaled
Output
| Close |
---|
0 | 0.399290 |
---|
1 | 0.390291 |
---|
2 | 0.394763 |
---|
3 | 0.391202 |
---|
4 | 0.397412 |
---|
... | ... |
---|
500 | 0.999268 |
---|
501 | 0.997378 |
---|
502 | 1.000000 |
---|
503 | 0.994393 |
---|
504 | 0.989482
|
---|
3. Train set / Test set 분할from sklearn.model_selection import train_test_split
train = df_scaled[:-28]
test = df_scaled[-28:]
print(train.shape,test.shape)
Output
(477, 1) (28, 1)
ㅤ
X_train=train[:-7]
y_train=train[7:]
print(X_train.shape,y_train.shape)
Output
(470, 1) (470, 1)
ㅤ
X_test=test[:-7]
y_test=test[7:]
print(X_test.shape,y_test.shape)
Output
(21, 1) (21, 1)
ㅤ
def make_dataset(data,label,window_size=7):
feature_list=[]
label_list=[]
for i in range(len(data)-window_size):
feature_list.append(np.array(data[i:i+window_size]))
label_list.append(np.array(label.iloc[i]))
return np.array(feature_list), np.array(label_list)
ㅤ
# train dataset
X_train, y_train=make_dataset(X_train,y_train,7)
# train, validation set 생성
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2)
# test dataset
X_test,y_test=make_dataset(X_test,y_test,7)
X_test.shape,y_test.shape
Output
((14, 7, 1), (14, 1 ))
4. Modeling - LSTM Model
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from keras.layers import LSTM
earlystopping = EarlyStopping(patience=10,verbose=1)
checkpoint = ModelCheckpoint('model.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
model=tf.keras.models.Sequential([
tf.keras.layers.LSTM(102,return_sequences=True,input_shape=(X_train.shape[1],1)),
tf.keras.layers.LSTM(56,return_sequences=False),
tf.keras.layers.Dense(28),
tf.keras.layers.Dense(1)
])
ㅤ
model.compile(loss='mean_squared_error', optimizer='adam')
hist=model.fit(X_train,y_train,epochs=100,batch_size=5,validation_data=(X_val,y_val),callbacks=[earlystopping])
5. Valid Loss plot 생성
str_plt_style = 'bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams["font.size"]=11
plt.title('S&P500_Loss')
plt.plot(hist.history['loss'],label='train loss')
plt.plot(hist.history['val_loss'],label='valid loss')
plt.legend()
plt.show()
2차 프로젝트 - Valid Loss plot
6. S&P500 지수 예측 plot 생성
y_pred=model.predict(X_test)
# 원래 값으로 되돌리기
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test)
ㅤ
str_plt_style='bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"]=(16,9)
plt.rcParams["font.size"]=11
plt.title('S&P500')
plt.plot(y_test,label='actual')
plt.plot(y_pred,label='prediction')
plt.legend()
plt.show()
2차 프로젝트 - S&P500 plot
7. Shiftin' plot 생성
result = pd.DataFrame(index=test.index[14:])
result.reset_index(inplace=True)
result['y_pred'] = y_pred
ㅤ
str_plt_style = 'bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams["font.size"]=11
plt.title('S&P500')
plt.plot(y_test,label='actual')
shift = result.y_pred.shift(-1).values
plt.plot(shift,label="shiftin'",color='orchid')
plt.legend()
plt.show()
2차 프로젝트 - S&P500 Shftin' plot
훈련 결과, 예측 값이 실제 값에 대해 shifting되는 경향을 보임
Network가 Test Data를 mimicking하는 것으로 추측
(Shifthin' plot을 통해 추세선의 유사성 확인 가능)
➟ Multi-Step Forecast 기법을 활용한 Seq2Seq 모델로 문제 해결 시도 예정
8. 정확도 측정
from sklearn.metrics import mean_squared_error
# RMSE
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
# MAPE
def MAPE(y_test, y_pred):
return np.mean(np.abs((y_test - y_pred) / y_test)) * 100
ㅤ
print('RMSE =',round(RMSE,2))
print('MAPE =',round(MAPE(y_test, y_pred),2),'%')
RMSE = 107.38
MAPE = 1.98 %
댓글
댓글 쓰기