{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 선형회귀 데모" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 라이브러리 import 및 설정" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2020-09-10T00:33:44.310645Z", "start_time": "2020-09-10T00:33:43.068990Z" } }, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-09-10T00:33:46.678666Z", "start_time": "2020-09-10T00:33:44.312685Z" } }, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "from matplotlib import rcParams\n", "import numpy as np\n", "from pathlib import Path\n", "import pandas as pd\n", "import seaborn as sns\n", "from tqdm.notebook import tqdm\n", "import warnings" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-09-10T00:33:46.701863Z", "start_time": "2020-09-10T00:33:46.682640Z" } }, "outputs": [], "source": [ "rcParams['figure.figsize'] = (16, 8)\n", "plt.style.use('fivethirtyeight')\n", "pd.set_option('max_columns', 100)\n", "pd.set_option(\"display.precision\", 4)\n", "warnings.simplefilter('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 학습데이터 로드" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-09-10T00:33:46.723070Z", "start_time": "2020-09-10T00:33:46.705861Z" } }, "outputs": [], "source": [ "data_dir = Path('../input/')\n", "trn_file = data_dir / 'train.csv'\n", "seed = 42" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-09-10T00:33:47.696753Z", "start_time": "2020-09-10T00:33:46.725061Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(320000, 19)\n" ] }, { "data": { "text/html": [ "
\n", " | u | \n", "g | \n", "r | \n", "i | \n", "z | \n", "redshift | \n", "dered_u | \n", "dered_g | \n", "dered_r | \n", "dered_i | \n", "dered_z | \n", "nObserve | \n", "nDetect | \n", "airmass_u | \n", "airmass_g | \n", "airmass_r | \n", "airmass_i | \n", "airmass_z | \n", "class | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
0 | \n", "23.2640 | \n", "20.3368 | \n", "19.0095 | \n", "17.6724 | \n", "16.9396 | \n", "-8.1086e-05 | \n", "23.1243 | \n", "20.2578 | \n", "18.9551 | \n", "17.6321 | \n", "16.9089 | \n", "18 | \n", "18 | \n", "1.1898 | \n", "1.1907 | \n", "1.1890 | \n", "1.1894 | \n", "1.1902 | \n", "0 | \n", "
1 | \n", "15.0521 | \n", "14.0620 | \n", "13.4524 | \n", "13.2684 | \n", "13.1689 | \n", "4.5061e-03 | \n", "14.9664 | \n", "14.0045 | \n", "13.4114 | \n", "13.2363 | \n", "13.1347 | \n", "1 | \n", "1 | \n", "1.2533 | \n", "1.2578 | \n", "1.2488 | \n", "1.2510 | \n", "1.2555 | \n", "1 | \n", "
2 | \n", "16.7864 | \n", "15.8254 | \n", "15.5363 | \n", "15.3935 | \n", "15.3500 | \n", "4.7198e-04 | \n", "16.6076 | \n", "15.6866 | \n", "15.4400 | \n", "15.3217 | \n", "15.2961 | \n", "2 | \n", "2 | \n", "1.0225 | \n", "1.0241 | \n", "1.0210 | \n", "1.0217 | \n", "1.0233 | \n", "0 | \n", "
3 | \n", "25.6606 | \n", "21.1887 | \n", "20.2212 | \n", "19.8949 | \n", "19.6346 | \n", "5.8143e-06 | \n", "25.3536 | \n", "20.9947 | \n", "20.0873 | \n", "19.7947 | \n", "19.5552 | \n", "4 | \n", "3 | \n", "1.2054 | \n", "1.2061 | \n", "1.2049 | \n", "1.2051 | \n", "1.2057 | \n", "0 | \n", "
4 | \n", "24.4534 | \n", "20.6992 | \n", "19.0424 | \n", "18.3242 | \n", "17.9826 | \n", "-3.3247e-05 | \n", "23.7714 | \n", "20.4338 | \n", "18.8630 | \n", "18.1903 | \n", "17.8759 | \n", "13 | \n", "12 | \n", "1.1939 | \n", "1.1943 | \n", "1.1937 | \n", "1.1938 | \n", "1.1941 | \n", "0 | \n", "