데모¶
라이브러리 import 및 설정¶
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import seaborn as sns
import warnings
rcParams['figure.figsize'] = (16, 8)
plt.style.use('fivethirtyeight')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
warnings.simplefilter('ignore')
학습데이터 로드¶
03-pandas-eda.ipynb에서 생성한 feature.csv
피처파일 사용
data_dir = Path('../data/dacon-dku')
feature_dir = Path('../build/feature')
val_dir = Path('../build/val')
tst_dir = Path('../build/tst')
sub_dir = Path('../build/sub')
trn_file = data_dir / 'train.csv'
tst_file = data_dir / 'test.csv'
sample_file = data_dir / 'sample_submission.csv'
target_col = 'class'
n_fold = 5
n_class = 3
seed = 42
algo_name = 'lrcv'
feature_name = 'polyfeature'
model_name = f'{algo_name}_{feature_name}'
feature_file = feature_dir / f'{feature_name}.csv'
p_val_file = val_dir / f'{model_name}.val.csv'
p_tst_file = tst_dir / f'{model_name}.tst.csv'
sub_file = sub_dir / f'{model_name}.csv'
Polynomial Feature 생성¶
df = pd.read_csv(feature_dir / 'feature.csv', index_col=0)
print(df.shape)
df.head()
(400000, 20)
z | redshift | dered_u | dered_g | dered_r | dered_i | dered_z | nObserve | airmass_u | class | d_dered_u | d_dered_g | d_dered_r | d_dered_i | d_dered_z | d_dered_ig | d_dered_zg | d_dered_rz | d_dered_iz | d_obs_det | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | ||||||||||||||||||||
0 | 16.9396 | -8.1086e-05 | 23.1243 | 20.2578 | 18.9551 | 17.6321 | 16.9089 | 2.9444 | 1.1898 | 0.0 | -0.1397 | -0.0790 | -0.0544 | -0.0403 | -0.0307 | -2.6257 | -3.3488 | 2.0462 | 0.7232 | -15.0556 |
1 | 13.1689 | 4.5061e-03 | 14.9664 | 14.0045 | 13.4114 | 13.2363 | 13.1347 | 0.6931 | 1.2533 | 1.0 | -0.0857 | -0.0574 | -0.0410 | -0.0322 | -0.0343 | -0.7683 | -0.8698 | 0.2767 | 0.1016 | -0.3069 |
2 | 15.3500 | 4.7198e-04 | 16.6076 | 15.6866 | 15.4400 | 15.3217 | 15.2961 | 1.0986 | 1.0225 | 0.0 | -0.1787 | -0.1388 | -0.0963 | -0.0718 | -0.0540 | -0.3649 | -0.3905 | 0.1440 | 0.0257 | -0.9014 |
3 | 19.6346 | 5.8143e-06 | 25.3536 | 20.9947 | 20.0873 | 19.7947 | 19.5552 | 1.6094 | 1.2054 | 0.0 | -0.3070 | -0.1941 | -0.1339 | -0.1003 | -0.0795 | -1.2000 | -1.4395 | 0.5321 | 0.2395 | -1.3906 |
4 | 17.9826 | -3.3247e-05 | 23.7714 | 20.4338 | 18.8630 | 18.1903 | 17.8759 | 2.6391 | 1.1939 | 0.0 | -0.6820 | -0.2653 | -0.1794 | -0.1339 | -0.1067 | -2.2436 | -2.5579 | 0.9871 | 0.3144 | -9.3609 |
scaler = StandardScaler()
poly = PolynomialFeatures(2)
X = poly.fit_transform(scaler.fit_transform(df.drop(target_col, axis=1)))
feature_names = poly.get_feature_names(df.columns)
feature_names
['1',
'z',
'redshift',
'dered_u',
'dered_g',
'dered_r',
'dered_i',
'dered_z',
'nObserve',
'airmass_u',
'class',
'd_dered_u',
'd_dered_g',
'd_dered_r',
'd_dered_i',
'd_dered_z',
'd_dered_ig',
'd_dered_zg',
'd_dered_rz',
'd_dered_iz',
'z^2',
'z redshift',
'z dered_u',
'z dered_g',
'z dered_r',
'z dered_i',
'z dered_z',
'z nObserve',
'z airmass_u',
'z class',
'z d_dered_u',
'z d_dered_g',
'z d_dered_r',
'z d_dered_i',
'z d_dered_z',
'z d_dered_ig',
'z d_dered_zg',
'z d_dered_rz',
'z d_dered_iz',
'redshift^2',
'redshift dered_u',
'redshift dered_g',
'redshift dered_r',
'redshift dered_i',
'redshift dered_z',
'redshift nObserve',
'redshift airmass_u',
'redshift class',
'redshift d_dered_u',
'redshift d_dered_g',
'redshift d_dered_r',
'redshift d_dered_i',
'redshift d_dered_z',
'redshift d_dered_ig',
'redshift d_dered_zg',
'redshift d_dered_rz',
'redshift d_dered_iz',
'dered_u^2',
'dered_u dered_g',
'dered_u dered_r',
'dered_u dered_i',
'dered_u dered_z',
'dered_u nObserve',
'dered_u airmass_u',
'dered_u class',
'dered_u d_dered_u',
'dered_u d_dered_g',
'dered_u d_dered_r',
'dered_u d_dered_i',
'dered_u d_dered_z',
'dered_u d_dered_ig',
'dered_u d_dered_zg',
'dered_u d_dered_rz',
'dered_u d_dered_iz',
'dered_g^2',
'dered_g dered_r',
'dered_g dered_i',
'dered_g dered_z',
'dered_g nObserve',
'dered_g airmass_u',
'dered_g class',
'dered_g d_dered_u',
'dered_g d_dered_g',
'dered_g d_dered_r',
'dered_g d_dered_i',
'dered_g d_dered_z',
'dered_g d_dered_ig',
'dered_g d_dered_zg',
'dered_g d_dered_rz',
'dered_g d_dered_iz',
'dered_r^2',
'dered_r dered_i',
'dered_r dered_z',
'dered_r nObserve',
'dered_r airmass_u',
'dered_r class',
'dered_r d_dered_u',
'dered_r d_dered_g',
'dered_r d_dered_r',
'dered_r d_dered_i',
'dered_r d_dered_z',
'dered_r d_dered_ig',
'dered_r d_dered_zg',
'dered_r d_dered_rz',
'dered_r d_dered_iz',
'dered_i^2',
'dered_i dered_z',
'dered_i nObserve',
'dered_i airmass_u',
'dered_i class',
'dered_i d_dered_u',
'dered_i d_dered_g',
'dered_i d_dered_r',
'dered_i d_dered_i',
'dered_i d_dered_z',
'dered_i d_dered_ig',
'dered_i d_dered_zg',
'dered_i d_dered_rz',
'dered_i d_dered_iz',
'dered_z^2',
'dered_z nObserve',
'dered_z airmass_u',
'dered_z class',
'dered_z d_dered_u',
'dered_z d_dered_g',
'dered_z d_dered_r',
'dered_z d_dered_i',
'dered_z d_dered_z',
'dered_z d_dered_ig',
'dered_z d_dered_zg',
'dered_z d_dered_rz',
'dered_z d_dered_iz',
'nObserve^2',
'nObserve airmass_u',
'nObserve class',
'nObserve d_dered_u',
'nObserve d_dered_g',
'nObserve d_dered_r',
'nObserve d_dered_i',
'nObserve d_dered_z',
'nObserve d_dered_ig',
'nObserve d_dered_zg',
'nObserve d_dered_rz',
'nObserve d_dered_iz',
'airmass_u^2',
'airmass_u class',
'airmass_u d_dered_u',
'airmass_u d_dered_g',
'airmass_u d_dered_r',
'airmass_u d_dered_i',
'airmass_u d_dered_z',
'airmass_u d_dered_ig',
'airmass_u d_dered_zg',
'airmass_u d_dered_rz',
'airmass_u d_dered_iz',
'class^2',
'class d_dered_u',
'class d_dered_g',
'class d_dered_r',
'class d_dered_i',
'class d_dered_z',
'class d_dered_ig',
'class d_dered_zg',
'class d_dered_rz',
'class d_dered_iz',
'd_dered_u^2',
'd_dered_u d_dered_g',
'd_dered_u d_dered_r',
'd_dered_u d_dered_i',
'd_dered_u d_dered_z',
'd_dered_u d_dered_ig',
'd_dered_u d_dered_zg',
'd_dered_u d_dered_rz',
'd_dered_u d_dered_iz',
'd_dered_g^2',
'd_dered_g d_dered_r',
'd_dered_g d_dered_i',
'd_dered_g d_dered_z',
'd_dered_g d_dered_ig',
'd_dered_g d_dered_zg',
'd_dered_g d_dered_rz',
'd_dered_g d_dered_iz',
'd_dered_r^2',
'd_dered_r d_dered_i',
'd_dered_r d_dered_z',
'd_dered_r d_dered_ig',
'd_dered_r d_dered_zg',
'd_dered_r d_dered_rz',
'd_dered_r d_dered_iz',
'd_dered_i^2',
'd_dered_i d_dered_z',
'd_dered_i d_dered_ig',
'd_dered_i d_dered_zg',
'd_dered_i d_dered_rz',
'd_dered_i d_dered_iz',
'd_dered_z^2',
'd_dered_z d_dered_ig',
'd_dered_z d_dered_zg',
'd_dered_z d_dered_rz',
'd_dered_z d_dered_iz',
'd_dered_ig^2',
'd_dered_ig d_dered_zg',
'd_dered_ig d_dered_rz',
'd_dered_ig d_dered_iz',
'd_dered_zg^2',
'd_dered_zg d_dered_rz',
'd_dered_zg d_dered_iz',
'd_dered_rz^2',
'd_dered_rz d_dered_iz',
'd_dered_iz^2']
df_poly = pd.DataFrame(data=X, columns=feature_names, index=df.index)
df_poly[target_col] = df[target_col]
df_poly.head()
df_poly.to_csv(feature_file)
del df_poly, df
df = pd.read_csv(feature_file, index_col=0)
print(df.shape)
df.head()
(400000, 210)
1 | z | redshift | dered_u | dered_g | dered_r | dered_i | dered_z | nObserve | airmass_u | class | d_dered_u | d_dered_g | d_dered_r | d_dered_i | d_dered_z | d_dered_ig | d_dered_zg | d_dered_rz | d_dered_iz | z^2 | z redshift | z dered_u | z dered_g | z dered_r | z dered_i | z dered_z | z nObserve | z airmass_u | z class | z d_dered_u | z d_dered_g | z d_dered_r | z d_dered_i | z d_dered_z | z d_dered_ig | z d_dered_zg | z d_dered_rz | z d_dered_iz | redshift^2 | redshift dered_u | redshift dered_g | redshift dered_r | redshift dered_i | redshift dered_z | redshift nObserve | redshift airmass_u | redshift class | redshift d_dered_u | redshift d_dered_g | ... | class d_dered_z | class d_dered_ig | class d_dered_zg | class d_dered_rz | class d_dered_iz | d_dered_u^2 | d_dered_u d_dered_g | d_dered_u d_dered_r | d_dered_u d_dered_i | d_dered_u d_dered_z | d_dered_u d_dered_ig | d_dered_u d_dered_zg | d_dered_u d_dered_rz | d_dered_u d_dered_iz | d_dered_g^2 | d_dered_g d_dered_r | d_dered_g d_dered_i | d_dered_g d_dered_z | d_dered_g d_dered_ig | d_dered_g d_dered_zg | d_dered_g d_dered_rz | d_dered_g d_dered_iz | d_dered_r^2 | d_dered_r d_dered_i | d_dered_r d_dered_z | d_dered_r d_dered_ig | d_dered_r d_dered_zg | d_dered_r d_dered_rz | d_dered_r d_dered_iz | d_dered_i^2 | d_dered_i d_dered_z | d_dered_i d_dered_ig | d_dered_i d_dered_zg | d_dered_i d_dered_rz | d_dered_i d_dered_iz | d_dered_z^2 | d_dered_z d_dered_ig | d_dered_z d_dered_zg | d_dered_z d_dered_rz | d_dered_z d_dered_iz | d_dered_ig^2 | d_dered_ig d_dered_zg | d_dered_ig d_dered_rz | d_dered_ig d_dered_iz | d_dered_zg^2 | d_dered_zg d_dered_rz | d_dered_zg d_dered_iz | d_dered_rz^2 | d_dered_rz d_dered_iz | d_dered_iz^2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
0 | 1.0 | 0.0002 | -0.1093 | 1.8170 | 0.1270 | 0.9086 | 0.0208 | 0.0011 | 1.6087 | 0.1200 | 0.0 | 0.0063 | 0.1783 | -0.0002 | 0.0008 | -0.0981 | -0.0626 | 0.0367 | 0.0156 | -1.3511 | 3.4181e-08 | -2.0203e-05 | 0.0003 | 2.3488e-05 | 0.0002 | 3.8455e-06 | 2.0594e-07 | 0.0003 | 2.2191e-05 | 2.2235e-05 | 1.1669e-06 | 3.2960e-05 | -3.7490e-08 | 1.4942e-07 | -1.8144e-05 | -1.1575e-05 | 6.7832e-06 | 2.8794e-06 | -0.0002 | 0.0119 | -0.1986 | -0.0139 | -0.0993 | -0.0023 | -0.0001 | -0.1758 | -0.0131 | -0.0131 | -0.0007 | -0.0195 | ... | -0.0118 | -0.0075 | 0.0044 | 0.0019 | -0.1625 | 3.9834e-05 | 1.1252e-03 | -1.2798e-06 | 5.1009e-06 | -6.1941e-04 | -3.9513e-04 | 2.3156e-04 | 9.8297e-05 | -0.0085 | 0.0318 | -3.6150e-05 | 1.4408e-04 | -0.0175 | -1.1161e-02 | 0.0065 | 2.7766e-03 | -0.2409 | 4.1119e-08 | -1.6389e-07 | 1.9901e-05 | 1.2695e-05 | -7.4398e-06 | -3.1582e-06 | 2.7398e-04 | 6.5318e-07 | -7.9317e-05 | -5.0598e-05 | 2.9652e-05 | 1.2587e-05 | -1.0920e-03 | 9.6316e-03 | 6.1442e-03 | -3.6007e-03 | -1.5285e-03 | 0.1326 | 3.9195e-03 | -2.2970e-03 | -9.7505e-04 | 0.0846 | 1.3461e-03 | 5.7142e-04 | -0.0496 | 2.4256e-04 | -0.0210 | 1.8255 |
1 | 1.0 | -0.0719 | -0.1007 | -2.4251 | -0.2656 | -2.8534 | -0.1751 | -0.1054 | -0.8761 | 0.6658 | 1.0 | 0.0081 | 0.2144 | 0.0001 | 0.0007 | 0.0190 | 0.0156 | -0.0133 | -0.0071 | 0.5650 | 5.1738e-03 | 7.2464e-03 | 0.1744 | 1.9104e-02 | 0.2052 | 1.2597e-02 | 7.5809e-03 | 0.0630 | -4.7893e-02 | -1.4200e-02 | -5.8349e-04 | -1.5419e-02 | -8.7153e-06 | -5.1123e-05 | -1.3637e-03 | -1.1239e-03 | 9.5460e-04 | 5.0978e-04 | -0.0406 | 0.0101 | 0.2443 | 0.0268 | 0.2875 | 0.0176 | 0.0106 | 0.0883 | -0.0671 | -0.0199 | -0.0008 | -0.0216 | ... | 0.0037 | 0.0031 | -0.0026 | -0.0014 | 0.1115 | 6.5806e-05 | 1.7389e-03 | 9.8290e-07 | 5.7656e-06 | 1.5379e-04 | 1.2675e-04 | -1.0766e-04 | -5.7493e-05 | 0.0046 | 0.0460 | 2.5973e-05 | 1.5236e-04 | 0.0041 | 3.3493e-03 | -0.0028 | -1.5193e-03 | 0.1211 | 1.4681e-08 | 8.6118e-08 | 2.2971e-06 | 1.8932e-06 | -1.6081e-06 | -8.5874e-07 | 6.8457e-05 | 5.0517e-07 | 1.3475e-05 | 1.1105e-05 | -9.4327e-06 | -5.0373e-06 | 4.0156e-04 | 3.5943e-04 | 2.9622e-04 | -2.5161e-04 | -1.3437e-04 | 0.0107 | 2.4412e-04 | -2.0736e-04 | -1.1074e-04 | 0.0088 | 1.7613e-04 | 9.4060e-05 | -0.0075 | 5.0230e-05 | -0.0040 | 0.3192 |
2 | 1.0 | -0.0302 | -0.1082 | -1.5717 | -0.1600 | -1.4768 | -0.0822 | -0.0444 | -0.4286 | -1.3179 | 0.0 | 0.0013 | 0.0650 | -0.0015 | 0.0002 | 0.0444 | 0.0308 | -0.0170 | -0.0099 | 0.4877 | 9.1298e-04 | 3.2708e-03 | 0.0475 | 4.8338e-03 | 0.0446 | 2.4830e-03 | 1.3416e-03 | 0.0130 | 3.9822e-02 | -1.9476e-03 | -3.9487e-05 | -1.9652e-03 | 4.4024e-05 | -5.3424e-06 | -1.3413e-03 | -9.2915e-04 | 5.1426e-04 | 2.9778e-04 | -0.0147 | 0.0117 | 0.1701 | 0.0173 | 0.1599 | 0.0089 | 0.0048 | 0.0464 | 0.1427 | -0.0070 | -0.0001 | -0.0070 | ... | 0.0029 | 0.0020 | -0.0011 | -0.0006 | 0.0314 | 1.7078e-06 | 8.4993e-05 | -1.9040e-06 | 2.3106e-07 | 5.8012e-05 | 4.0186e-05 | -2.2242e-05 | -1.2879e-05 | 0.0006 | 0.0042 | -9.4759e-05 | 1.1499e-05 | 0.0029 | 2.0000e-03 | -0.0011 | -6.4096e-04 | 0.0317 | 2.1228e-06 | -2.5761e-07 | -6.4677e-05 | -4.4803e-05 | 2.4797e-05 | 1.4359e-05 | -7.1063e-04 | 3.1262e-08 | 7.8488e-06 | 5.4370e-06 | -3.0093e-06 | -1.7425e-06 | 8.6238e-05 | 1.9706e-03 | 1.3651e-03 | -7.5553e-04 | -4.3748e-04 | 0.0217 | 9.4561e-04 | -5.2337e-04 | -3.0305e-04 | 0.0150 | 2.8967e-04 | 1.6773e-04 | -0.0083 | 9.7124e-05 | -0.0048 | 0.2379 |
3 | 1.0 | 0.0517 | -0.1091 | 2.9763 | 0.1733 | 1.6769 | 0.1172 | 0.0758 | 0.1352 | 0.2544 | 0.0 | -0.0033 | -0.0366 | -0.0026 | -0.0005 | -0.0083 | -0.0024 | -0.0061 | -0.0021 | 0.4242 | 2.6757e-03 | -5.6442e-03 | 0.1540 | 8.9650e-03 | 0.0867 | 6.0617e-03 | 3.9204e-03 | 0.0070 | 1.3162e-02 | -6.1500e-03 | -1.7133e-04 | -1.8939e-03 | -1.3410e-04 | -2.6646e-05 | -4.2725e-04 | -1.2163e-04 | -3.1351e-04 | -1.0655e-04 | 0.0219 | 0.0119 | -0.3248 | -0.0189 | -0.1830 | -0.0128 | -0.0083 | -0.0148 | -0.0278 | 0.0130 | 0.0004 | 0.0040 | ... | 0.0010 | 0.0003 | 0.0007 | 0.0002 | -0.0504 | 1.0971e-05 | 1.2127e-04 | 8.5865e-06 | 1.7062e-06 | 2.7358e-05 | 7.7883e-06 | 2.0075e-05 | 6.8228e-06 | -0.0014 | 0.0013 | 9.4914e-05 | 1.8860e-05 | 0.0003 | 8.6090e-05 | 0.0002 | 7.5418e-05 | -0.0155 | 6.7204e-06 | 1.3354e-06 | 2.1412e-05 | 6.0957e-06 | 1.5712e-05 | 5.3400e-06 | -1.0997e-03 | 2.6535e-07 | 4.2547e-06 | 1.2112e-06 | 3.1221e-06 | 1.0611e-06 | -2.1851e-04 | 6.8222e-05 | 1.9422e-05 | 5.0061e-05 | 1.7014e-05 | -0.0035 | 5.5290e-06 | 1.4251e-05 | 4.8436e-06 | -0.0010 | 3.6734e-05 | 1.2485e-05 | -0.0026 | 4.2431e-06 | -0.0009 | 0.1799 |
4 | 1.0 | 0.0201 | -0.1092 | 2.1535 | 0.1381 | 0.8461 | 0.0457 | 0.0284 | 1.2717 | 0.1560 | 0.0 | -0.0093 | -0.1596 | -0.0039 | -0.0013 | -0.0741 | -0.0376 | 0.0068 | 0.0007 | -0.6113 | 4.0534e-04 | -2.1983e-03 | 0.0434 | 2.7804e-03 | 0.0170 | 9.1963e-04 | 5.7181e-04 | 0.0256 | 3.1404e-03 | -1.3188e-02 | -1.8663e-04 | -3.2124e-03 | -7.9125e-05 | -2.5252e-05 | -1.4909e-03 | -7.5793e-04 | 1.3661e-04 | 1.3497e-05 | -0.0123 | 0.0119 | -0.2351 | -0.0151 | -0.0924 | -0.0050 | -0.0031 | -0.1389 | -0.0170 | 0.0715 | 0.0010 | 0.0174 | ... | 0.0485 | 0.0247 | -0.0044 | -0.0004 | 0.4004 | 8.5934e-05 | 1.4791e-03 | 3.6433e-05 | 1.1627e-05 | 6.8645e-04 | 3.4898e-04 | -6.2902e-05 | -6.2147e-06 | 0.0057 | 0.0255 | 6.2708e-04 | 2.0013e-04 | 0.0118 | 6.0068e-03 | -0.0011 | -1.0697e-04 | 0.0975 | 1.5446e-05 | 4.9295e-06 | 2.9103e-04 | 1.4796e-04 | -2.6668e-05 | -2.6348e-06 | 2.4024e-03 | 1.5732e-06 | 9.2880e-05 | 4.7219e-05 | -8.5109e-06 | -8.4088e-07 | 7.6673e-04 | 5.4834e-03 | 2.7877e-03 | -5.0247e-04 | -4.9644e-05 | 0.0453 | 1.4173e-03 | -2.5545e-04 | -2.5238e-05 | 0.0230 | 4.6043e-05 | 4.5490e-06 | -0.0041 | 4.4944e-07 | -0.0004 | 0.3737 |
5 rows × 210 columns
y = df[target_col].values[:320000]
df.drop(target_col, axis=1, inplace=True)
trn = df.iloc[:320000].values
tst = df.iloc[320000:].values
feature_name = df.columns.tolist()
print(y.shape, trn.shape, tst.shape)
(320000,) (320000, 209) (80000, 209)
Stratified K-Fold Cross Validation¶
cv = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)
Logistic Regression 모델 학습¶
p_val = np.zeros((trn.shape[0], n_class))
p_tst = np.zeros((tst.shape[0], n_class))
for i, (i_trn, i_val) in enumerate(cv.split(trn, y), 1):
print(f'training model for CV #{i}')
clf = LogisticRegression(multi_class='multinomial')
clf.fit(trn[i_trn], y[i_trn])
p_val[i_val, :] = clf.predict_proba(trn[i_val])
p_tst += clf.predict_proba(tst) / n_fold
training model for CV #1
training model for CV #2
training model for CV #3
training model for CV #4
training model for CV #5
print(f'{accuracy_score(y, np.argmax(p_val, axis=1)) * 100:.4f}%')
89.6659%
print(p_val.shape, p_tst.shape)
(320000, 3) (80000, 3)
np.savetxt(p_val_file, p_val, fmt='%.6f', delimiter=',')
np.savetxt(p_tst_file, p_tst, fmt='%.6f', delimiter=',')
피처 중요도 시각화¶
clf.coef_.shape
(3, 209)
imp = pd.DataFrame({'feature': df.columns, 'importance': clf.coef_[1, :].T})
imp = imp.sort_values('importance').set_index('feature')
imp.plot(kind='barh', figsize=(8, 32))
<matplotlib.axes._subplots.AxesSubplot at 0x7fa3b82d9390>
제출 파일 생성¶
sub = pd.read_csv(sample_file, index_col=0)
print(sub.shape)
sub.head()
(80000, 1)
class | |
---|---|
id | |
320000 | 0 |
320001 | 0 |
320002 | 0 |
320003 | 0 |
320004 | 0 |
sub[target_col] = np.argmax(p_tst, axis=1)
sub.head()
class | |
---|---|
id | |
320000 | 2 |
320001 | 0 |
320002 | 2 |
320003 | 0 |
320004 | 2 |
sub[target_col].value_counts()
2 42870
0 31013
1 6117
Name: class, dtype: int64
sub.to_csv(sub_file)