tickers = ['USDT-BTC','USDT-ETH','USDT-XRP','USDT-ADA','USDT-LTC']
interval = 'minute60'
ㅤ
from tqdm import tqdm
coin_set = []
for ticker in tqdm(tickers):
coin = py.get_ohlcv(ticker=ticker,count=20000,interval=interval,to='2022-01-01')
coin_set.append(coin)
ㅤ
BTC = coin_set[0]
ETH = coin_set[1]
XRP = coin_set[2]
ADA = coin_set[3]
LTC = coin_set[4]
# BITCOIN 거래가 가져오기
BTC
Output
open | high | low | close | volume | value |
---|
2019-09-06 21:00:00+00:00 | 10837.073450 | 10892.000000 | 10825.000000 | 10826.240000 | 13.028313 | 141488.509446 |
---|
2019-09-06 22:00:00+00:00 | 10826.240000 | 10851.990000 | 10763.968517 | 10803.422690 | 26.329976 | 284421.274696 |
---|
2019-09-06 23:00:00+00:00 | 10803.410000 | 10850.000000 | 10803.410000 | 10827.649995 | 11.192036 | 121318.460494 |
---|
2019-09-07 00:00:00+00:00 | 10833.520000 | 10879.270000 | 10833.520000 | 10843.160000 | 7.257861 | 78828.225009 |
---|
2019-09-07 01:00:00+00:00 | 10841.006105 | 10856.282376 | 10827.649995 | 10855.177197 | 5.160667 | 55951.010868 |
---|
... | ... | ... | ... | ... | ... | ... |
---|
2022-05-01 04:00:00+00:00 | 38366.850000 | 38531.871679 | 38366.850000 | 38531.871679 | 0.017207 | 661.545067 |
---|
2022-05-01 05:00:00+00:00 | 38453.599000 | 38453.599000 | 38379.200000 | 38379.200000 | 0.019895 | 764.321204 |
---|
2022-05-01 06:00:00+00:00 | 38379.200000 | 38445.961000 | 38332.268000 | 38382.931000 | 0.153709 | 5905.244381 |
---|
2022-05-01 07:00:00+00:00 | 38301.414000 | 38422.006800 | 38259.735000 | 38399.064000 | 0.520722 | 19982.987904 |
---|
2022-05-01 08:00:00+00:00 | 38370.906000 | 38370.906000 | 37654.172000 | 37666.710000 | 2.583532 | 97615.410157 |
---|
# 날짜 지정 및 (시간대 별) 종가 추출
BTC = BTC[BTC.index >= '2020-01-01']
BTC = BTC[BTC.index < '2022-01-01']
BTC = BTC[['close']]
BTC
Output
| close |
---|
2020-01-01 00:00:00+00:00 | 7385.000 |
---|
2020-01-01 02:00:00+00:00 | 7385.000 |
---|
2020-01-01 03:00:00+00:00 | 7355.000 |
---|
2020-01-01 06:00:00+00:00 | 7440.000 |
---|
2020-01-01 10:00:00+00:00 | 7420.000 |
---|
... | ... |
---|
2021-12-31 19:00:00+00:00 | 48013.895 |
---|
2021-12-31 20:00:00+00:00 | 48009.388 |
---|
2021-12-31 21:00:00+00:00 | 48067.146 |
---|
2021-12-31 22:00:00+00:00 | 48014.406 |
---|
2021-12-31 23:00:00+00:00 | 48130.977
|
---|
2. Data Scalingfrom sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# 스케일을 적용할 column을 정의합니다.
scale_col = ['Close']
# 스케일 후 columns
BTC_scaled = scaler.fit_transform(BTC)
# DataFrame
BTC_scaled = pd.DataFrame(BTC_scaled)
BTC_scaled.columns = scale_col
BTC_scaled
Output
Close |
---|
0 | 0.045203 |
---|
1 | 0.045203 |
---|
2 | 0.044733 |
---|
3 | 0.046064 |
---|
4 | 0.045751 |
---|
... | ... |
---|
14861 | 0.681685 |
---|
14862 | 0.681614 |
---|
14863 | 0.682519 |
---|
14864 | 0.681693 |
---|
14865 | 0.683519 |
---|
3. Train set / Test set 분할from sklearn.model_selection import train_test_split
train = BTC_scaled[:-72]
test = BTC_scaled[-72:]
print(train.shape,test.shape)
Output
(14794, 1) (72, 1)
ㅤ
X_train=train[:-24]
y_train=train[24:]
print(X_train.shape,y_train.shape)
Output
(14770, 1) (14770, 1)
ㅤ
X_test=test[:-24]
y_test=test[24:]
print(X_test.shape,y_test.shape)
Output
(48, 1) (48, 1)
ㅤ
# window_size=24 : BITCOIN 1일 영업시간(00:00~24:00) 동안의 dataset
def make_dataset(data,label,window_size=24):
feature_list=[]
label_list=[]
for i in range(len(data)-window_size):
feature_list.append(np.array(data[i:i+window_size]))
label_list.append(np.array(label.iloc[i]))
return np.array(feature_list), np.array(label_list)
ㅤ
# train dataset
X_train, y_train = make_dataset(X_train,y_train,24)
# train, validation set 생성
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size = 0.2)
# test dataset
X_test,y_test = make_dataset(X_test,y_test,24)
X_test.shape,y_test.shape
Output
((24, 24, 1), (24, 1))
Test Set의 경우, 총 24시간의 데이터가 가운데 차원을 구성,
예측 값의 종류가 Close 한 개임에 따라 Feature의 개수는 한 개로 설정
4. Modeling - LSTM Model
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
from keras.layers import LSTM
earlystopping = EarlyStopping(patience=10,verbose=1)
checkpoint = ModelCheckpoint('model.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
model=tf.keras.models.Sequential([
tf.keras.layers.LSTM(192,return_sequences=True,input_shape=(X_train.shape[1],1)),
tf.keras.layers.LSTM(96,return_sequences=False),
tf.keras.layers.Dense(48),
tf.keras.layers.Dense(1)
])
ㅤ
model.compile(loss='mean_squared_error', optimizer='adam')
hist=model.fit(X_train,y_train,epochs=100,batch_size=5,validation_data=(X_val,y_val),callbacks=[earlystopping])
5. Valid Loss plot 생성
import matplotlib.pyplot as plt
str_plt_style = 'bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (10,6)
plt.rcParams["font.size"]=11
plt.title('Valid Loss')
plt.plot(hist.history['loss'],label='train loss')
plt.plot(hist.history['val_loss'],label='valid loss')
plt.legend()
plt.show()
2차 프로젝트 - Valid Loss plot (BITCOIN)
6. BITCOIN 시간대 별 거래가 예측 plot 생성
y_pred=model.predict(X_test)
# 원래 값으로 되돌리기
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test)
ㅤ
str_plt_style = 'bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams["font.size"]=11
plt.title('BITCOIN')
plt.plot(y_test,label='actual')
plt.plot(y_pred,label='prediction')
plt.legend()
plt.show()
2차 프로젝트 - BITCOIN plot
7. Shiftin' plot 생성
result = pd.DataFrame(index=test.index[14:])
result.reset_index(inplace=True)
result['y_pred'] = y_pred
ㅤ
str_plt_style = 'bmh'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams["font.size"]=11
plt.title('S&P500')
plt.plot(y_test,label='actual')
shift = result.y_pred.shift(-1).values
plt.plot(shift,label="shiftin'",color='orchid')
plt.legend()
plt.show()
2차 프로젝트 - BITCOIN Shftin' plot
훈련 결과, BITCOIN 역시 예측 값이 실제 값에 대해 shifting되는 경향을 보임
Network가 Test Data를 mimicking하는 것으로 추측
(Shifthin' plot을 통해 추세선의 유사성 확인 가능)
➟ Multi-Step Forecast 기법을 활용한 Seq2Seq 모델로 문제 해결 시도 예정
8. 정확도 측정
from sklearn.metrics import mean_squared_error
# RMSE
MSE = mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(MSE)
# MAPE
def MAPE(y_test, y_pred):
return np.mean(np.abs((y_test - y_pred) / y_test)) * 100
ㅤ
print('RMSE =',round(RMSE,2))
print('MAPE =',round(MAPE(y_test, y_pred),2),'%')
RMSE = 730.03
MAPE = 1.44 %
댓글
댓글 쓰기