[Python/파이썬] Numpy Pandas Matplotlib Seaborn Sklearn - 3. 신용등급 MinMaxScaler plot


1. 엑셀 파일 불러오기

# 패키지 선언
import numpy as npimport matplotlib
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# TC_EN_AREA_CRISIS_INFO.csv 분기별 데이터 추출 및 변환 코드

df1=pd.read_excel('C:/fintech6/_project1/_src/sample_01.xlsx', index_col=0) # 2020-1Q
df2=pd.read_excel('C:/fintech6/_project1/_src/sample_02.xlsx', index_col=0) # 2020-2Q
df3=pd.read_excel('C:/fintech6/_project1/_src/sample_03.xlsx', index_col=0) # 2020-3Q

*TC_EN_AREA_CRISIS_INFO_sample(신용등급).csv



















(출처: 경기지역경제포털, KED신용등급, https://bigdata-region.kr/#/dataset/6f393bec-a1e1-4e09-8075-8c53e19d51f5)

2. 업종코드 선택

# I : 숙박 및 음식점업
code1 = df1['INDUTY_LCLAS_CODE'] == 'I'
df_code1=df1[code1]
code2 = df2['INDUTY_LCLAS_CODE'] == 'I'
df_code2=df2[code2]
code3 = df3['INDUTY_LCLAS_CODE'] == 'I'
df_code3=df3[code3]

3. 신용등급 구간별(A등급/B등급/C등급/D등급) 합계 list 생성

# 2020-1Q
AAA = df1['CREDT_GRAD_NM'] == 'AAA'
subset_AAA=df1[code1 & AAA]
AAp = df1['CREDT_GRAD_NM'] == 'AA+'
subset_AAp=df1[code1 & AAp]
AA = df1['CREDT_GRAD_NM'] == 'AA'
subset_AA=df1[code1 & AA]
AAm = df1['CREDT_GRAD_NM'] == 'AA-'
subset_AAm=df1[code1 & AAm]
Ap = df1['CREDT_GRAD_NM'] == 'A+'
subset_Ap=df1[code1 & Ap]
A = df1['CREDT_GRAD_NM'] == 'A'
subset_A=df1[code1 & A]
Am = df1['CREDT_GRAD_NM'] == 'A-'
subset_Am=df1[code1 & Am]
lengA=len(subset_AAA)+len(subset_AAp)+len(subset_AA)+len(subset_AAm)+len(subset_Ap)+len(subset_A)+len(subset_Am)
BBBp = df1['CREDT_GRAD_NM'] == 'BBB+'
subset_BBBp=df1[code1 & BBBp]
BBB = df1['CREDT_GRAD_NM'] == 'BBB'
subset_BBB=df1[code1 & BBB]
BBBm = df1['CREDT_GRAD_NM'] == 'BBB-'
subset_BBBm=df1[code1 & BBBm]
BBp = df1['CREDT_GRAD_NM'] == 'BB+'
subset_BBp=df1[code1 & BBp]
BB = df1['CREDT_GRAD_NM'] == 'BB'
subset_BB=df1[code1 & BB]
BBm = df1['CREDT_GRAD_NM'] == 'BB-'
subset_BBm=df1[code1 & BBm]
Bp = df1['CREDT_GRAD_NM'] == 'B+'
subset_Bp=df1[code1 & Bp]
B = df1['CREDT_GRAD_NM'] == 'B'
subset_B=df1[code1 & B]
Bm = df1['CREDT_GRAD_NM'] == 'B-'
subset_Bm=df1[code1 & Bm]
lengB=len(subset_BBBp)+len(subset_BBB)+len(subset_BBBm)+len(subset_BBp)+len(subset_BB)+len(subset_BBm)+len(subset_Bp)+len(subset_B)+len(subset_Bm)
CCCp = df1['CREDT_GRAD_NM'] == 'CCC+'
subset_CCCp=df1[code1 & CCCp]
CCC = df1['CREDT_GRAD_NM'] == 'CCC'
subset_CCC=df1[code1 & CCC]
CCCm = df1['CREDT_GRAD_NM'] == 'CCC-'
subset_CCCm=df1[code1 & CCCm]
CC = df1['CREDT_GRAD_NM'] == 'CC'
subset_CC=df1[code1 & CC]
C = df1['CREDT_GRAD_NM'] == 'C'
subset_C=df1[code1 & C] 
lengC=len(subset_CCCp)+len(subset_CCC)+len(subset_CCCm)+len(subset_CC)+len(subset_C)
D = df1['CREDT_GRAD_NM'] == 'D'
subset_D=df1[code1 & D]
lengD=len(subset_D)
data1=[lengA,lengB,lengC,lengD] # 2020-1Q 신용등급 구간별 합계 list

#2020-2Q
AAA = df2['CREDT_GRAD_NM'] == 'AAA'
subset2_AAA=df2[code2 & AAA]
AAp = df2['CREDT_GRAD_NM'] == 'AA+'
subset2_AAp=df2[code2 & AAp]
AA = df2['CREDT_GRAD_NM'] == 'AA'
subset2_AA=df2[code2 & AA]
AAm = df2['CREDT_GRAD_NM'] == 'AA-'
subset2_AAm=df2[code2 & AAm]
Ap = df2['CREDT_GRAD_NM'] == 'A+'
subset2_Ap=df2[code2 & Ap]
A = df2['CREDT_GRAD_NM'] == 'A'
subset2_A=df2[code2 & A]
Am = df2['CREDT_GRAD_NM'] == 'A-'
subset2_Am=df2[code2 & Am]
lengA=len(subset2_AAA)+len(subset2_AAp)+len(subset2_AA)+len(subset2_AAm)+len(subset2_Ap)+len(subset2_A)+len(subset2_Am)
BBBp = df2['CREDT_GRAD_NM'] == 'BBB+'
subset2_BBBp=df2[code2 & BBBp]
BBB = df2['CREDT_GRAD_NM'] == 'BBB'
subset2_BBB=df2[code2 & BBB]
BBBm = df2['CREDT_GRAD_NM'] == 'BBB-'
subset2_BBBm=df2[code2 & BBBm]
BBp = df2['CREDT_GRAD_NM'] == 'BB+'
subset2_BBp=df2[code2 & BBp]
BB = df2['CREDT_GRAD_NM'] == 'BB'
subset2_BB=df2[code2 & BB]
BBm = df2['CREDT_GRAD_NM'] == 'BB-'
subset2_BBm=df2[code2 & BBm]
Bp = df2['CREDT_GRAD_NM'] == 'B+'
subset2_Bp=df2[code2 & Bp]
B = df2['CREDT_GRAD_NM'] == 'B'
subset2_B=df2[code2 & B]
Bm = df2['CREDT_GRAD_NM'] == 'B-'
subset2_Bm=df2[code2 & Bm]
lengB=len(subset2_BBBp)+len(subset2_BBB)+len(subset2_BBBm)+len(subset2_BBp)+len(subset2_BB)+len(subset2_BBm)+len(subset2_Bp)+len(subset2_B)+len(subset2_Bm)
CCCp = df2['CREDT_GRAD_NM'] == 'CCC+'
subset2_CCCp=df2[code2 & CCCp]
CCC = df2['CREDT_GRAD_NM'] == 'CCC'
subset2_CCC=df2[code2 & CCC]
CCCm = df2['CREDT_GRAD_NM'] == 'CCC-'
subset2_CCCm=df2[code2 & CCCm]
CC = df2['CREDT_GRAD_NM'] == 'CC'
subset2_CC=df2[code2 & CC]
C = df2['CREDT_GRAD_NM'] == 'C'
subset2_C=df2[code2 & C]
lengC=len(subset2_CCCp)+len(subset2_CCC)+len(subset2_CCCm)+len(subset2_CC)+len(subset2_C)
D = df2['CREDT_GRAD_NM'] == 'D'
subset2_D=df2[code2 & D]
lengD=len(subset2_D)
data2=[lengA,lengB,lengC,lengD] # 2020-2Q 신용등급 구간별 합계 list

#2020-3Q
AAA = df3['CREDT_GRAD_NM'] == 'AAA'
subset_AAA=df3[code3 & AAA]
AAp = df3['CREDT_GRAD_NM'] == 'AA+'
subset_AAp=df3[code3 & AAp]
AA = df3['CREDT_GRAD_NM'] == 'AA'
subset_AA=df3[code3 & AA]
AAm = df3['CREDT_GRAD_NM'] == 'AA-'
subset_AAm=df3[code3 & AAm]
Ap = df3['CREDT_GRAD_NM'] == 'A+'
subset_Ap=df3[code3 & Ap]
A = df3['CREDT_GRAD_NM'] == 'A'
subset_A=df3[code3 & A]
Am = df3['CREDT_GRAD_NM'] == 'A-'
subset_Am=df3[code3 & Am]
lengA=len(subset_AAA)+len(subset_AAp)+len(subset_AA)+len(subset_AAm)+len(subset_Ap)+len(subset_A)+len(subset_Am)
BBBp = df3['CREDT_GRAD_NM'] == 'BBB+'
subset_BBBp=df3[code3 & BBBp]
BBB = df3['CREDT_GRAD_NM'] == 'BBB'
subset_BBB=df3[code3 & BBB]
BBBm = df3['CREDT_GRAD_NM'] == 'BBB-'
subset_BBBm=df3[code3 & BBBm]
BBp = df3['CREDT_GRAD_NM'] == 'BB+'
subset_BBp=df3[code3 & BBp]
BB = df3['CREDT_GRAD_NM'] == 'BB'
subset_BB=df3[code3 & BB]
BBm = df3['CREDT_GRAD_NM'] == 'BB-'
subset_BBm=df3[code3 & BBm]
Bp = df3['CREDT_GRAD_NM'] == 'B+'
subset_Bp=df3[code3 & Bp]
B = df3['CREDT_GRAD_NM'] == 'B'
subset_B=df3[code3 & B]
Bm = df3['CREDT_GRAD_NM'] == 'B-'
subset_Bm=df3[code3 & Bm]
lengB=len(subset_BBBp)+len(subset_BBB)+len(subset_BBBm)+len(subset_BBp)+len(subset_BB)+len(subset_BBm)+len(subset_Bp)+len(subset_B)+len(subset_Bm)
CCCp = df3['CREDT_GRAD_NM'] == 'CCC+'
subset_CCCp=df3[code3 & CCCp]
CCC = df3['CREDT_GRAD_NM'] == 'CCC'
subset_CCC=df3[code3 & CCC]
CCCm = df3['CREDT_GRAD_NM'] == 'CCC-'
subset_CCCm=df3[code3 & CCCm]
CC = df3['CREDT_GRAD_NM'] == 'CC'
subset_CC=df3[code3 & CC]
C = df3['CREDT_GRAD_NM'] == 'C'
subset_C=df3[code3 & C]
lengC=len(subset_CCCp)+len(subset_CCC)+len(subset_CCCm)+len(subset_CC)+len(subset_C)
D = df3['CREDT_GRAD_NM'] == 'D'
subset_D=df3[code3 & D]
lengD=len(subset_D)
data3=[lengA,lengB,lengC,lengD] # 2020-3Q 신용등급 구간별 합계 list


4. DataFrame 생성

data={'2020-1Q':data1,'2020-2Q':data2,'2020-3Q':data3}
_df = pd.DataFrame(data,index=['A등급','B등급','C등급','D등급']) # Grouped Barplot 생성
df=_df.transpose() # MinMaxScaler plot 생성
df
Output
	A등급	B등급	C등급	D등급
2020-1Q	313	7247	2758	650
2020-2Q	372	7961	2850	662
2020-3Q	391	8389	3064	624

5. Min-Max 정규화

# 등급(column)별로 추출
dfA=df[['A등급']]
dfB=df[['B등급']]
dfC=df[['C등급']]
dfD=df[['D등급']]

minmax=MinMaxScaler()

# 등급(column)별로 Min-Max 정규화
minmaxdfA=minmax.fit_transform(dfA)
minmaxdfB=minmax.fit_transform(dfB)
minmaxdfC=minmax.fit_transform(dfC)
minmaxdfD=minmax.fit_transform(dfD)

# DataFrame Join
mmdfA=pd.DataFrame(minmaxdfA)
mmdfB=pd.DataFrame(minmaxdfB)
mmdfC=pd.DataFrame(minmaxdfC)
mmdfD=pd.DataFrame(minmaxdfD)
mm=pd.concat([mmdfA,mmdfB,mmdfC,mmdfD],axis=1,join='inner')

6. 새로운 DataFrame 생성

mm.index = ['2020-1Q','2020-2Q','2020-3Q']
mm.columns = ['A등급','B등급','C등급','D등급']
mm
Output
	A등급   B등급	  C등급	   D등급
2020-1Q	0.00000	0.000000  0.000000 0.684211
2020-2Q	0.75641	0.625219  0.300654 1.000000
2020-3Q	1.00000	1.000000  1.000000 0.000000

7. MinMaxScaler plot 생성

# MinMaxScaler plot_ratingA 생성
mm[['A등급']].plot(kind="line",color='powderblue',marker='o')

# 스타일 지정
str_plt_style = 'seaborn-darkgrid'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["font.size"]=12

# 맑은고딕체 출력
font_path = "C:/Windows/Fonts/MALGUNBD.TTF"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

# y축 눈금 제거
plt.ylabel("Y Label")
ax = plt.gca()
ax.axes.yaxis.set_visible(False)

# 저장
image_file='C:/fintech6/_project1/_img/I_ratingA.png' 
plt.savefig(image_file,dpi=400)

# MinMaxScaler plot_ratingB 생성
mm[['B등급']].plot(kind="line",color='skyblue',marker='o')

# 스타일 지정
str_plt_style = 'seaborn-darkgrid'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["font.size"]=12

# 맑은고딕체 출력
font_path = "C:/Windows/Fonts/MALGUNBD.TTF"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

# y축 눈금 제거
plt.ylabel("Y Label")
ax = plt.gca()
ax.axes.yaxis.set_visible(False)

# 저장
image_file='C:/fintech6/_project1/_img/I_ratingB.png' 
plt.savefig(image_file,dpi=400)

# MinMaxScaler plot_ratingC 생성
mm[['C등급']].plot(kind="line",color='steelblue',marker='o')

# 스타일 지정
str_plt_style = 'seaborn-darkgrid'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["font.size"]=12

# 맑은고딕체 출력
font_path = "C:/Windows/Fonts/MALGUNBD.TTF"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

# y축 눈금 제거
plt.ylabel("Y Label")
ax = plt.gca()
ax.axes.yaxis.set_visible(False)

# 저장
image_file='C:/fintech6/_project1/_img/I_ratingC.png' 
plt.savefig(image_file,dpi=400)

# MinMaxScaler plot_ratingD 생성
mm[['D등급']].plot(kind="line",color='midnightblue',marker='o')

# 스타일 지정
str_plt_style = 'seaborn-darkgrid'
plt.style.use([str_plt_style])
plt.rcParams["figure.figsize"] = (4,3)
plt.rcParams["font.size"]=12

# 맑은고딕체 출력
font_path = "C:/Windows/Fonts/MALGUNBD.TTF"
font = font_manager.FontProperties(fname=font_path).get_name()
rc('font', family=font)

# y축 눈금 제거
plt.ylabel("Y Label")
ax = plt.gca()
ax.axes.yaxis.set_visible(False)

# 저장
image_file='C:/fintech6/_project1/_img/I_ratingD.png' 
plt.savefig(image_file,dpi=400)


1차 프로젝트 - MinMaxScaler plot

댓글

이 블로그의 인기 게시물

[Python/파이썬] LSTM FinanceDataReader tensorflow keras sklearn - 1. LSTM 모델을 활용한 S&P500 예측

[Python/파이썬] Numpy Pandas Matplotlib Seaborn Sklearn - 2. 신용등급 Grouped Barplot