{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "PROJECT_COVID19_GLOBAL_WK2.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "yHXugnGnhrw8", "colab_type": "text" }, "source": [ "\n", "# STEP 1: GET THAT DATA\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "RS7-uEhshdqr", "colab_type": "code", "colab": {} }, "source": [ "import pandas as pd\n", "import numpy as np\n", "## GLOBAL WK 2\n", "train_file = \"https://raw.githubusercontent.com/danielcaraway/data/master/covid19-global-forecasting-week-2/train.csv\"\n", "test_file = \"https://raw.githubusercontent.com/danielcaraway/data/master/covid19-global-forecasting-week-2/test.csv\"\n", "sub_file = \"https://raw.githubusercontent.com/danielcaraway/data/master/covid19-global-forecasting-week-2/submission.csv\"\n", "\n", "train = pd.read_csv(train_file)\n", "test = pd.read_csv(test_file)\n", "sub = pd.read_csv(sub_file)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "9wOUzHzthxnL", "colab_type": "text" }, "source": [ "# STEP 2: PREP THAT DATA\n", "\n", "* Deal with states + countries\n", "* Deal with datetimes\n", "* Deal with categoricals (LabelEncoder)\n" ] }, { "cell_type": "code", "metadata": { "id": "a_zKr-NojL8W", "colab_type": "code", "colab": {} }, "source": [ "subset = train.sample(n=500)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ptt28DTJiD_L", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 391 }, "outputId": "40a9030f-a5f8-4d59-a10c-a1ea06c70238" }, "source": [ "subset" ], "execution_count": 14, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdProvince_StateCountry_RegionDateConfirmedCasesFatalities
21523241NaNBurkina Faso2020-03-020.00.0
1509422847CaliforniaUS2020-03-080.00.0
42486425Inner MongoliaChina2020-02-1568.00.0
1267519204NaNQatar2020-01-250.00.0
36965601GuizhouChina2020-01-221.00.0
.....................
1585124012IowaUS2020-02-020.00.0
1844927936NaNUkraine2020-02-260.00.0
1874628403GibraltarUnited Kingdom2020-01-240.00.0
690610443NaNEswatini2020-03-040.00.0
1365120656NaNSlovenia2020-03-17275.01.0
\n", "

500 rows × 6 columns

\n", "
" ], "text/plain": [ " Id Province_State ... ConfirmedCases Fatalities\n", "2152 3241 NaN ... 0.0 0.0\n", "15094 22847 California ... 0.0 0.0\n", "4248 6425 Inner Mongolia ... 68.0 0.0\n", "12675 19204 NaN ... 0.0 0.0\n", "3696 5601 Guizhou ... 1.0 0.0\n", "... ... ... ... ... ...\n", "15851 24012 Iowa ... 0.0 0.0\n", "18449 27936 NaN ... 0.0 0.0\n", "18746 28403 Gibraltar ... 0.0 0.0\n", "6906 10443 NaN ... 0.0 0.0\n", "13651 20656 NaN ... 275.0 1.0\n", "\n", "[500 rows x 6 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 14 } ] }, { "cell_type": "code", "metadata": { "id": "he2i8tyriiRc", "colab_type": "code", "colab": {} }, "source": [ "# train = subset.copy()\n", "def use_country(state, country):\n", " if pd.isna(state):\n", " return country\n", " else:\n", " return state\n", "\n", "train['Province_State'] = train.apply(lambda x: use_country(x['Province_State'], x['Country_Region']), axis=1)\n", "test['Province_State'] = test.apply(lambda x: use_country(x['Province_State'], x['Country_Region']), axis=1)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "CZ0PINFLkfuG", "colab_type": "code", "colab": {} }, "source": [ "train_d = pd.get_dummies(train)\n", "test_d = pd.get_dummies(test)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5PKR15uJlgjk", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 427 }, "outputId": "70aacba2-5213-44d7-a6e7-b0233bb2f396" }, "source": [ "train_dummies" ], "execution_count": 23, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdConfirmedCasesFatalitiesProvince_State_AfghanistanProvince_State_AlabamaProvince_State_AlaskaProvince_State_AlbertaProvince_State_AlgeriaProvince_State_AngolaProvince_State_AnhuiProvince_State_Antigua and BarbudaProvince_State_ArgentinaProvince_State_ArizonaProvince_State_ArmeniaProvince_State_Australian Capital TerritoryProvince_State_AzerbaijanProvince_State_BahamasProvince_State_BangladeshProvince_State_BarbadosProvince_State_BeijingProvince_State_BelarusProvince_State_BelizeProvince_State_BhutanProvince_State_Bosnia and HerzegovinaProvince_State_BrazilProvince_State_British ColumbiaProvince_State_BruneiProvince_State_BulgariaProvince_State_Burkina FasoProvince_State_Cabo VerdeProvince_State_CaliforniaProvince_State_CambodiaProvince_State_CameroonProvince_State_Cayman IslandsProvince_State_ChadProvince_State_Channel IslandsProvince_State_ChileProvince_State_ColombiaProvince_State_ColoradoProvince_State_Congo (Brazzaville)...Date_2020-02-17Date_2020-02-18Date_2020-02-19Date_2020-02-20Date_2020-02-21Date_2020-02-22Date_2020-02-23Date_2020-02-24Date_2020-02-25Date_2020-02-26Date_2020-02-27Date_2020-02-28Date_2020-02-29Date_2020-03-01Date_2020-03-02Date_2020-03-03Date_2020-03-04Date_2020-03-05Date_2020-03-06Date_2020-03-07Date_2020-03-08Date_2020-03-09Date_2020-03-10Date_2020-03-11Date_2020-03-12Date_2020-03-13Date_2020-03-14Date_2020-03-15Date_2020-03-16Date_2020-03-17Date_2020-03-18Date_2020-03-19Date_2020-03-20Date_2020-03-21Date_2020-03-22Date_2020-03-23Date_2020-03-24Date_2020-03-25Date_2020-03-26Date_2020-03-27
215232410.00.00000000000000000000000000100000000000...0000000000000010000000000000000000000000
15094228470.00.00000000000000000000000000001000000000...0000000000000000000010000000000000000000
4248642568.00.00000000000000000000000000000000000000...0000000000000000000000000000000000000000
12675192040.00.00000000000000000000000000000000000000...0000000000000000000000000000000000000000
369656011.00.00000000000000000000000000000000000000...0000000000000000000000000000000000000000
......................................................................................................................................................................................................................................................
15851240120.00.00000000000000000000000000000000000000...0000000000000000000000000000000000000000
18449279360.00.00000000000000000000000000000000000000...0000000001000000000000000000000000000000
18746284030.00.00000000000000000000000000000000000000...0000000000000000000000000000000000000000
6906104430.00.00000000000000000000000000000000000000...0000000000000000100000000000000000000000
1365120656275.01.00000000000000000000000000000000000000...0000000000000000000000000000010000000000
\n", "

500 rows × 446 columns

\n", "
" ], "text/plain": [ " Id ConfirmedCases ... Date_2020-03-26 Date_2020-03-27\n", "2152 3241 0.0 ... 0 0\n", "15094 22847 0.0 ... 0 0\n", "4248 6425 68.0 ... 0 0\n", "12675 19204 0.0 ... 0 0\n", "3696 5601 1.0 ... 0 0\n", "... ... ... ... ... ...\n", "15851 24012 0.0 ... 0 0\n", "18449 27936 0.0 ... 0 0\n", "18746 28403 0.0 ... 0 0\n", "6906 10443 0.0 ... 0 0\n", "13651 20656 275.0 ... 0 0\n", "\n", "[500 rows x 446 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 23 } ] }, { "cell_type": "markdown", "metadata": { "id": "iVsj_VlQiE5D", "colab_type": "text" }, "source": [ "# STEP 3: MODEL THAT DATA\n", "* GridSearchCV\n", "* XGBRegressor" ] }, { "cell_type": "code", "metadata": { "id": "UhsWpEMtlwyX", "colab_type": "code", "colab": {} }, "source": [ "from sklearn.model_selection import GridSearchCV \n", "import time \n", "param_grid = {'n_estimators': [1000]}\n", "\n", "def gridSearchCV(model, X_Train, y_Train, param_grid, cv=10, scoring='neg_mean_squared_error'): \n", " start = time.time()" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "wXMtglrSmBv-", "colab_type": "code", "colab": {} }, "source": [ "X_Train = train.copy()\n", "y1_Train = X_Train['ConfirmedCases']\n", "y2_Train = X_Train['Fatalities']" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Lb10QHCblxns", "colab_type": "code", "colab": {} }, "source": [ "from xgboost import XGBRegressor\n", "\n", "model = XGBRegressor()\n", "model1 = gridSearchCV(model, X_Train, y1_Train, param_grid, 10, 'neg_mean_squared_error') \n", "model2 = gridSearchCV(model, X_Train, y2_Train, param_grid, 10, 'neg_mean_squared_error')" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "U3ik8M6_mthO", "colab_type": "code", "colab": {} }, "source": [ "countries = set(X_Train['Country_Region'])\n", "\n", "#models_C = {}\n", "#models_F = {}\n", "\n", "df_out = pd.DataFrame({'ForecastId': [], 'ConfirmedCases': [], 'Fatalities': []})\n", "\n", "for country in countries:\n", " states = set(X_Train['Province_State'])\n", " # states = X_Train.loc[X_Train.Country == country, :].State.unique()\n", " #print(country, states)\n", " # check whether string is nan or not\n", " for state in states:\n", " X_Train_CS = X_Train.loc[(X_Train.Country == country) & (X_Train.State == state), ['State', 'Country', 'Date', 'ConfirmedCases', 'Fatalities']]\n", " \n", " y1_Train_CS = X_Train_CS.loc[:, 'ConfirmedCases']\n", " y2_Train_CS = X_Train_CS.loc[:, 'Fatalities']\n", " \n", " X_Train_CS = X_Train_CS.loc[:, ['State', 'Country', 'Date']]\n", " \n", " X_Train_CS.Country = le.fit_transform(X_Train_CS.Country)\n", " X_Train_CS['State'] = le.fit_transform(X_Train_CS['State'])\n", " \n", " X_Test_CS = X_Test.loc[(X_Test.Country == country) & (X_Test.State == state), ['State', 'Country', 'Date', 'ForecastId']]\n", " \n", " X_Test_CS_Id = X_Test_CS.loc[:, 'ForecastId']\n", " X_Test_CS = X_Test_CS.loc[:, ['State', 'Country', 'Date']]\n", " \n", " X_Test_CS.Country = le.fit_transform(X_Test_CS.Country)\n", " X_Test_CS['State'] = le.fit_transform(X_Test_CS['State'])\n", " \n", " #models_C[country] = gridSearchCV(model, X_Train_CS, y1_Train_CS, param_grid, 10, 'neg_mean_squared_error')\n", " #models_F[country] = gridSearchCV(model, X_Train_CS, y2_Train_CS, param_grid, 10, 'neg_mean_squared_error')\n", " \n", " model1 = XGBRegressor(n_estimators=1000)\n", " model1.fit(X_Train_CS, y1_Train_CS)\n", " y1_pred = model1.predict(X_Test_CS)\n", " \n", " model2 = XGBRegressor(n_estimators=1000)\n", " model2.fit(X_Train_CS, y2_Train_CS)\n", " y2_pred = model2.predict(X_Test_CS)\n", " \n", " df = pd.DataFrame({'ForecastId': X_Test_CS_Id, 'ConfirmedCases': y1_pred, 'Fatalities': y2_pred})\n", " df_out = pd.concat([df_out, df], axis=0)\n", " # Done for state loop\n", "# Done for country Loop" ], "execution_count": 0, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "TVJbKWdBoeH_", "colab_type": "text" }, "source": [ "# SIDEQUEST: More on XGBoost" ] }, { "cell_type": "code", "metadata": { "id": "gX7RGdhno2_T", "colab_type": "code", "colab": {} }, "source": [ "b_train = train.copy()\n", "b_test = test.copy()" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "SiOUvGMWo8sk", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 391 }, "outputId": "d424cf92-efdf-47df-fc19-f6c4c0d56630" }, "source": [ "b_train_y1 = b_train['ConfirmedCases']\n", "b_train_y2 = b_train['Fatalities']\n", "b_train_X = b_train.drop(['ConfirmedCases','Fatalities'], axis=1)\n", "b_train_X" ], "execution_count": 36, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdProvince_StateCountry_RegionDate
21523241Burkina FasoBurkina Faso2020-03-02
1509422847CaliforniaUS2020-03-08
42486425Inner MongoliaChina2020-02-15
1267519204QatarQatar2020-01-25
36965601GuizhouChina2020-01-22
...............
1585124012IowaUS2020-02-02
1844927936UkraineUkraine2020-02-26
1874628403GibraltarUnited Kingdom2020-01-24
690610443EswatiniEswatini2020-03-04
1365120656SloveniaSlovenia2020-03-17
\n", "

500 rows × 4 columns

\n", "
" ], "text/plain": [ " Id Province_State Country_Region Date\n", "2152 3241 Burkina Faso Burkina Faso 2020-03-02\n", "15094 22847 California US 2020-03-08\n", "4248 6425 Inner Mongolia China 2020-02-15\n", "12675 19204 Qatar Qatar 2020-01-25\n", "3696 5601 Guizhou China 2020-01-22\n", "... ... ... ... ...\n", "15851 24012 Iowa US 2020-02-02\n", "18449 27936 Ukraine Ukraine 2020-02-26\n", "18746 28403 Gibraltar United Kingdom 2020-01-24\n", "6906 10443 Eswatini Eswatini 2020-03-04\n", "13651 20656 Slovenia Slovenia 2020-03-17\n", "\n", "[500 rows x 4 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 36 } ] }, { "cell_type": "code", "metadata": { "id": "Tq_rsp9-pbce", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 427 }, "outputId": "3c6edcbf-e5d7-4967-b920-db6f87ad5496" }, "source": [ "b_train_X_d = pd.get_dummies(b_train_X)\n", "b_train_X_d" ], "execution_count": 37, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IdProvince_State_AfghanistanProvince_State_AlabamaProvince_State_AlaskaProvince_State_AlbertaProvince_State_AlgeriaProvince_State_AngolaProvince_State_AnhuiProvince_State_Antigua and BarbudaProvince_State_ArgentinaProvince_State_ArizonaProvince_State_ArmeniaProvince_State_Australian Capital TerritoryProvince_State_AzerbaijanProvince_State_BahamasProvince_State_BangladeshProvince_State_BarbadosProvince_State_BeijingProvince_State_BelarusProvince_State_BelizeProvince_State_BhutanProvince_State_Bosnia and HerzegovinaProvince_State_BrazilProvince_State_British ColumbiaProvince_State_BruneiProvince_State_BulgariaProvince_State_Burkina FasoProvince_State_Cabo VerdeProvince_State_CaliforniaProvince_State_CambodiaProvince_State_CameroonProvince_State_Cayman IslandsProvince_State_ChadProvince_State_Channel IslandsProvince_State_ChileProvince_State_ColombiaProvince_State_ColoradoProvince_State_Congo (Brazzaville)Province_State_Congo (Kinshasa)Province_State_Connecticut...Date_2020-02-17Date_2020-02-18Date_2020-02-19Date_2020-02-20Date_2020-02-21Date_2020-02-22Date_2020-02-23Date_2020-02-24Date_2020-02-25Date_2020-02-26Date_2020-02-27Date_2020-02-28Date_2020-02-29Date_2020-03-01Date_2020-03-02Date_2020-03-03Date_2020-03-04Date_2020-03-05Date_2020-03-06Date_2020-03-07Date_2020-03-08Date_2020-03-09Date_2020-03-10Date_2020-03-11Date_2020-03-12Date_2020-03-13Date_2020-03-14Date_2020-03-15Date_2020-03-16Date_2020-03-17Date_2020-03-18Date_2020-03-19Date_2020-03-20Date_2020-03-21Date_2020-03-22Date_2020-03-23Date_2020-03-24Date_2020-03-25Date_2020-03-26Date_2020-03-27
21523241000000000000000000000000010000000000000...0000000000000010000000000000000000000000
1509422847000000000000000000000000000100000000000...0000000000000000000010000000000000000000
42486425000000000000000000000000000000000000000...0000000000000000000000000000000000000000
1267519204000000000000000000000000000000000000000...0000000000000000000000000000000000000000
36965601000000000000000000000000000000000000000...0000000000000000000000000000000000000000
......................................................................................................................................................................................................................................................
1585124012000000000000000000000000000000000000000...0000000000000000000000000000000000000000
1844927936000000000000000000000000000000000000000...0000000001000000000000000000000000000000
1874628403000000000000000000000000000000000000000...0000000000000000000000000000000000000000
690610443000000000000000000000000000000000000000...0000000000000000100000000000000000000000
1365120656000000000000000000000000000000000000000...0000000000000000000000000000010000000000
\n", "

500 rows × 444 columns

\n", "
" ], "text/plain": [ " Id Province_State_Afghanistan ... Date_2020-03-26 Date_2020-03-27\n", "2152 3241 0 ... 0 0\n", "15094 22847 0 ... 0 0\n", "4248 6425 0 ... 0 0\n", "12675 19204 0 ... 0 0\n", "3696 5601 0 ... 0 0\n", "... ... ... ... ... ...\n", "15851 24012 0 ... 0 0\n", "18449 27936 0 ... 0 0\n", "18746 28403 0 ... 0 0\n", "6906 10443 0 ... 0 0\n", "13651 20656 0 ... 0 0\n", "\n", "[500 rows x 444 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 37 } ] }, { "cell_type": "code", "metadata": { "id": "Vv0a2ZuWocvS", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "outputId": "fdbbce36-bc64-4add-c863-eda6b82407cc" }, "source": [ "import xgboost as xgb\n", "from sklearn.datasets import load_boston\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import cross_val_score, KFold\n", "from sklearn.metrics import mean_squared_error\n", "import matplotlib.pyplot as plt \n", "import numpy as np\n", " \n", "boston = load_boston()\n", "# x, y = boston.data, boston.target\n", "\n", "x,y = b_train_X_d, b_train_y1\n", "xtrain, xtest, ytrain, ytest=train_test_split(x, y, test_size=0.15)\n", "\n", "xgbr = xgb.XGBRegressor()\n", "print(xgbr)\n", "\n", "xgbr.fit(xtrain, ytrain)\n", " \n", "# - cross validataion \n", "scores = cross_val_score(xgbr, xtrain, ytrain, cv=5)\n", "print(\"Mean cross-validation score: %.2f\" % scores.mean())\n", "\n", "kfold = KFold(n_splits=10, shuffle=True)\n", "kf_cv_scores = cross_val_score(xgbr, xtrain, ytrain, cv=kfold )\n", "print(\"K-fold CV average score: %.2f\" % kf_cv_scores.mean())\n", " \n", "ypred = xgbr.predict(xtest)\n", "mse = mean_squared_error(ytest, ypred)\n", "print(\"MSE: %.2f\" % mse)\n", "print(\"RMSE: %.2f\" % np.sqrt(mse))\n", "\n", "x_ax = range(len(ytest))\n", "plt.scatter(x_ax, ytest, s=5, color=\"blue\", label=\"original\")\n", "plt.plot(x_ax, ypred, lw=0.8, color=\"red\", label=\"predicted\")\n", "plt.legend()\n", "plt.show()\n" ], "execution_count": 40, "outputs": [ { "output_type": "stream", "text": [ "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", " colsample_bynode=1, colsample_bytree=1, gamma=0,\n", " importance_type='gain', learning_rate=0.1, max_delta_step=0,\n", " max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n", " n_jobs=1, nthread=None, objective='reg:linear', random_state=0,\n", " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", " silent=None, subsample=1, verbosity=1)\n", "[22:54:12] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:12] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:13] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:13] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:13] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:14] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "Mean cross-validation score: -70.40\n", "[22:54:14] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n", "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:14] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:15] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:15] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:16] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:16] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:16] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:17] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n" ], "name": "stdout" }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", " if getattr(data, 'base', None) is not None and \\\n" ], "name": "stderr" }, { "output_type": "stream", "text": [ "[22:54:18] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n", "K-fold CV average score: -107.07\n", "MSE: 36809.57\n", "RMSE: 191.86\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZRU9Zn/8ffD2ogKyBZDo00iAf2B\nSIMIgkTDxH1EPZqEZBIcnTBJzC/JmBOjmTHOzyxjTnLUeJI4g4poTqJJTEzUMYoLHowICIiKuIAG\npRGkRUAW24bu5/fHvdVUV1d113Kr61bV53VOna671L1PLf3UU9/7vd9r7o6IiFSHHqUOQEREuo+S\nvohIFVHSFxGpIkr6IiJVRElfRKSK9Cp1AJ0ZMmSI19XVlToMEZGysmrVqnfdfWi6ZbFO+nV1daxc\nubLUYYiIlBUzezPTMjXviIhUESV9EZEqoqQvIlJFYt2mL9Vp//79NDQ00NTUVOpQyl5NTQ21tbX0\n7t271KFITCjpS+w0NDRw2GGHUVdXh5mVOpyy5e5s376dhoYGRo0aVepwJCbUvCOx09TUxODBg5Xw\nC2RmDB48WL+YpB0lfYklJfxo6HWUVEr6cdbcDB9+WOooRKSCKOnH2T33wG23lToKKdCTTz7Jueee\nC8D999/P9ddfn3HdnTt38qtf/Srnffznf/4nP/vZz/KOUaqHkn6c7dsX3CSWWlpacn7Meeedx1VX\nXZVxeb5JXyRbSvpxtn+/mndKZOPGjYwdO5YvfOELHHvssVx00UXs27ePuro6vvvd71JfX88f/vAH\nFi1axLRp06ivr+fiiy9mz549ADz88MOMHTuW+vp6/vSnP7Vtd+HChXz9618H4J133uGCCy5gwoQJ\nTJgwgaVLl3LVVVfx+uuvc8IJJ/Cd73wHgJ/+9KeceOKJHH/88Vx77bVt2/rRj37EJz7xCWbMmMGr\nr77aja+OlDN12Yyz5ubgJiXx6quvcvvttzN9+nQuvfTStgp88ODBrF69mnfffZcLL7yQxx57jP79\n+/OTn/yEG264gSuvvJIvf/nLPPHEExxzzDF89rOfTbv9b3zjG3zyk5/kvvvuo6WlhT179nD99dez\ndu1a1qxZA8CiRYtYv349K1aswN0577zzWLJkCf379+eee+5hzZo1HDhwgPr6eiZNmtRtr42ULyX9\nOFOlHzjzTHj33ei2N2QIPPxwl6uNHDmS6dOnA/BP//RP3HzzzQBtSXzZsmWsW7eubZ3m5mamTZvG\nK6+8wqhRoxg9enTbY+fPn99h+0888QR33XUXAD179mTAgAHs2LGj3TqLFi1i0aJFTJw4EYA9e/aw\nfv16du/ezQUXXMAhhxwCBM1GItlQ0o8z9d4JZJGgiyG1u2Niun///kBw8tOnP/1p7r777nbrJar0\nKLg7V199Nf/6r//abv5NN90U2T6kuqhNP87271fzTgm99dZbPPPMMwD89re/ZcaMGe2WT506laef\nfpoNGzYAsHfvXl577TXGjh3Lxo0bef311wE6fCkkzJo1i1tuuQUIDgrv2rWLww47jN27d7etc8YZ\nZ7BgwYK2YwWbN29m27ZtzJw5kz//+c988MEH7N69mwceeCDaJy8VS0k/zlTpl9SYMWP45S9/ybHH\nHsuOHTv46le/2m750KFDWbhwIXPmzOH4449va9qpqalh/vz5nHPOOdTX1zNs2LC02//5z3/O4sWL\nGT9+PJMmTWLdunUMHjyY6dOnM27cOL7zne9w+umn8/nPf55p06Yxfvx4LrroInbv3k19fT2f/exn\nmTBhAmeddRYnnnhid7wkUgHM3UsdQ0aTJ0/2qr6IyhVXQGMj/PrXpY6kW7388ssce+yxJY1h48aN\nnHvuuaxdu7akcUQhDq+ndC8zW+Xuk9MtU6UfZ6r0RSRiSvpxpt47JVNXV1cRVb5IKiX9OFM/fRGJ\nmJJ+nKnSF5GIdZn0zWyBmW0zsw6/dc3s22bmZjYknDYzu9nMNpjZC2ZWn7TuXDNbH97mRvs0KpQq\nfRGJWDaV/kLgzNSZZjYSOB14K2n2WcDo8DYPuCVc9wjgWuAkYApwrZkNKiTwqqBKX0Qi1mXSd/cl\nwHtpFt0IXAkk9/mcDdzlgWXAQDM7EjgDeNTd33P3HcCjpPkikRTqvRN7Z599Njt37ux0ne9///s8\n9thjeW0/eVhmkSjkNQyDmc0GNrv78ymnqo8ANiVNN4TzMs1Pt+15BL8SOOqoo/IJr3LojNzYcnfc\nnYceeqjLda+77rpuiEgkOzkfyDWzQ4DvAd+PPhxw9/nuPtndJw8dOrQYuygfzc2Qx5jtEo0bbriB\ncePGMW7cOG666SY2btzImDFj+NKXvsS4cePYtGkTdXV1vBsOBveDH/yAMWPGMGPGDObMmdN2UZNL\nLrmEe++9Fwi6gl577bXU19czfvx4XnnlFQBWrFjBtGnTmDhxIieffLKGSpaiyaf3zseBUcDzZrYR\nqAVWm9lHgM3AyKR1a8N5meZLZ/bvhz59Sh1FWWhthXfegahOMF+1ahV33HEHy5cvZ9myZdx6663s\n2LGD9evX87WvfY2XXnqJo48+um39Z599lj/+8Y88//zz/PWvf6WzM8mHDBnC6tWr+epXv9r2xTB2\n7FieeuopnnvuOa677jq+973vRfNERFLk3Lzj7i8CbYOJhIl/sru/a2b3A183s3sIDtrucvctZvYI\n8OOkg7enA1cXHH2la25W0s9CayucdhosXQonnwyLF0OPAjsj/+1vf+OCCy5oG1Hzwgsv5KmnnuLo\no49m6tSpHdZ/+umnmT17NjU1NdTU1PCP//iPGbd94YUXAjBp0qS2C6zs2rWLuXPnsn79esyM/fv3\nF/YERDLIpsvm3cAzwBgzazCzyzpZ/SHgDWADcCvwNQB3fw/4AfBseLsunCedaW0tPHtVgcbGIOEf\nOBD8bWws3r4SXwKF6Nu3LxCMoX/gwAEArrnmGk477TTWrl3LAw88QFNTU8H7kUDUvwLLXTa9d+a4\n+5Hu3tvda9399pTlde7+bnjf3f1yd/+4u49395VJ6y1w92PC2x3RPxWpVsOGBRV+r17B3wyDWubk\nlFNO4c9//jP79u1j79693HfffZxyyikZ158+fXpbst6zZw8PPvhgTvvbtWsXI0YEfRsWLlxYSOiS\nJPErsLYWTj01mK52uohKOXCHlAt6yEFmQZNOY2OQ8KN4qerr67nkkkuYMmUKAP/yL//CoEGZTy05\n8cQTOe+88zj++OMZPnw448ePZ8CAAVnv78orr2Tu3Ln88Ic/5Jxzzik4fgmk+xU4fHipoyotDa0c\nZ5MmBW36Tz4JYZNANSjXoYD37NnDoYceyr59+5g5cybz58+nvr6+6wcWWbm+nlFwDyr8xPGeJ5+s\njvqps6GVVenHXd++wQHdKkr65WrevHmsW7eOpqYm5s6dG4uEX+2K8Suw3Cnpx12fPsFZuYcdVupI\npAu//e1vSx2CpNGjh5p0kqlrSNwlKv0qE+dmx3Ki11FSKenHXd++VTf+Tk1NDdu3b1fCKpC7s337\ndmpqakodisSImnfiLtG8U0Vqa2tpaGigsZgd7qtETU0NtbW1pQ5DYkRJP65aW4OjTlXYvNO7d29G\njRpV6jBEKpKad+IqMe5OFVb6IlI8Svpx1dwMvXtXZaUvIsWjpB9XiUq/Cg/kikjxKOnHVaLSV/OO\niERIST+ukit9Ne+ISESU9ONKlb6IFIGSflyp0heRIlDSj6vk3juq9EUkIkr6cbV/v5p3RCRy2Vwu\ncYGZbTOztUnzfmpmr5jZC2Z2n5kNTFp2tZltMLNXzeyMpPlnhvM2mNlV0T+VCpO4Pq6ad0QkQtlU\n+guBM1PmPQqMc/fjgdcIL3JuZscBnwP+T/iYX5lZTzPrCfwSOAs4DpgTriuZqNIXkSLI5hq5S4D3\nUuYtcvcD4eQyIDGi02zgHnf/0N3/TnCB9CnhbYO7v+HuzcA94bqSiSp9ESmCKNr0LwX+Gt4fAWxK\nWtYQzss0vwMzm2dmK81sZVWPspio9HUgV0QiVFDSN7N/Bw4Av4kmHHD3+e4+2d0nDx06NKrNlp9E\npa/mHRGJUN5DK5vZJcC5wCw/eLWLzcDIpNVqw3l0Ml/SSa701bwjIhHJq9I3szOBK4Hz3H1f0qL7\ngc+ZWV8zGwWMBlYAzwKjzWyUmfUhONh7f2GhV7jkNn1V+iISkS4rfTO7GzgVGGJmDcC1BL11+gKP\nWnB5+WXu/hV3f8nMfg+sI2j2udzdW8LtfB14BOgJLHD3l4rwfCpHcu8dVfoiEpEuk767z0kz+/ZO\n1v8R8KM08x8CHsopumqmSl9EikBn5MaV+umLSBEo6ceV+umLSBEo6ceV+umLSBEo6ceV+umLSBEo\n6ceV+umLSBEo6cdVotLv3VtJX0Qio6QfV4lKv0cPaDvhWUSkMEr6cZWo9EVEIqSkH1eJSl9EJEJK\n+nGlSl9EikBJP65U6YtIESjpx5UqfREpAiX9uFKlLyJFoKQfV/v3H6z0zaC1tbTxiEhFUNKPq+bm\ng5W+zsoVkYgo6cdVcqWvQddEJCJK+nGVXOnr6lkiEpEuk76ZLTCzbWa2NmneEWb2qJmtD/8OCueb\nmd1sZhvM7AUzq096zNxw/fVmNrc4T6eCqNIXkSLIptJfCJyZMu8q4HF3Hw08Hk4DnEVwMfTRwDzg\nFgi+JAiurXsSMAW4NvFFIRmkVvpK+iISgS6TvrsvAd5LmT0buDO8fydwftL8uzywDBhoZkcCZwCP\nuvt77r4DeJSOXySSKrjovA7kihTioYdgx45SRxEb+bbpD3f3LeH9rcDw8P4IYFPSeg3hvEzzOzCz\neWa20sxWNjY25hlehVHzjkj+7r4b1q8vdRSxUfCBXHd3ILKxf919vrtPdvfJQ4cOjWqz5U0HckXy\n19QU3ATIP+m/EzbbEP7dFs7fDIxMWq82nJdpvmRDlb5I/j74ILgJkH/Svx9I9MCZC/wlaf6Xwl48\nU4FdYTPQI8DpZjYoPIB7ejhPsqEDuSL5U6XfTq+uVjCzu4FTgSFm1kDQC+d64PdmdhnwJvCZcPWH\ngLOBDcA+4J8B3P09M/sB8Gy43nXunnpwWDLRgVyR/KnSb6fLpO/uczIsmpVmXQcuz7CdBcCCnKKT\ngJp3RPKnSr8dnZFbDnQgVyR/qvTbUdKPo9QLoavSF8mfKv12lPTjqKUFevY8OK0DuSL5++ADJf0k\nSvpxlDzuDuhArkghmprUvJNEST+OksfdATXviBRClX47SvpxlFrp60CuSH7cg4JJlX4bJf04UqUv\nEo0PP4R+/VTpJ1HSj6N0bfpK+iK5++ADGDRIlX4SJf04Sq301bwjkp+mpiDpq9Jvo6QfR6r0RaKh\nSr8DJf04SlfpK+mL5E6VfgdK+nGkfvoi0VCl34GSfhyp945INJqaYMAAFU1JlPTjSP30RaLxwQdB\nl83U8ayqmJJ+HKnSF4lGUxPU1JQ6ilhR0o+jdJW+kr5I7hKVvrRR0o+jdJW+mndEcqdKv4OCkr6Z\n/ZuZvWRma83sbjOrMbNRZrbczDaY2e/MrE+4bt9wekO4vC6KJ1CR1E9fJBqq9DvIO+mb2QjgG8Bk\ndx8H9AQ+B/wEuNHdjwF2AJeFD7kM2BHOvzFcT9JJrfR79Qq+CEQkN4lKv0cPaG0tdTSxUGjzTi+g\nn5n1Ag4BtgCfAu4Nl98JnB/enx1OEy6fZWZW4P4rU2qlr5dJJD+JSr+mRidohfJO+u6+GfgZ8BZB\nst8FrAJ2uvuBcLUGYER4fwSwKXzsgXD9wanbNbN5ZrbSzFY2NjbmG155S630RSQ/iUpfSb9NIc07\ngwiq91HAR4H+wJmFBuTu8919srtPHjp0aKGbK0+plb6I5CdR6ffrp7NyQ4U07/wD8Hd3b3T3/cCf\ngOnAwLC5B6AW2Bze3wyMBAiXDwC2F7D/yqVKXyQaqvQ7KCTpvwVMNbNDwrb5WcA6YDFwUbjOXOAv\n4f37w2nC5U+46zS5tFTpi0RDlX4HhbTpLyc4ILsaeDHc1nzgu8AVZraBoM3+9vAhtwODw/lXAFcV\nEHdlS1fp62CuSO5U6XfQq+tVMnP3a4FrU2a/AUxJs24TcHEh+6sa6Sp9/SgSyZ0q/Q50Rm4cpav0\ne/aElpbSxCNSrlTpd6CkH0fpKn2dlSuSO1X6HSjpx1G6Sl/DK4vkTpV+B0r6caRKXyQaiaSvSr+N\nkn4cpav0lfRFctfaGoy7o0q/jZJ+HKWr9NW8I5I/VfptlPTjSJW+SLRU6bdR0o8jVfoi0aqpUaUf\nUtKPI1X6ItHq10+VfkhJP47Ue0ekcMlnsavSb6OkH0fqpy9SuObmoFgCVfpJlPTjSJW+SOGSr4+r\nSr+Nkn4cZar0lfRFspc4MQtU6SdR0o+jlpZggLVkffuqeUckF6r001LSj6vU8fPVvCOSG1X6aSnp\nlwsdyBXJjSr9tJT0y4UqfZHcJFf6NTX6/wkVlPTNbKCZ3Wtmr5jZy2Y2zcyOMLNHzWx9+HdQuK6Z\n2c1mtsHMXjCz+mieQpVQ0hfJTXKlb6arz4UKrfR/Djzs7mOBCcDLBNe+fdzdRwOPc/BauGcBo8Pb\nPOCWAvddXdS8I5Kb5Epf2uSd9M1sADCT8MLn7t7s7juB2cCd4Wp3AueH92cDd3lgGTDQzI7MO/Jq\no0pfJDfJlb60KaTSHwU0AneY2XNmdpuZ9QeGu/uWcJ2twPDw/ghgU9LjG8J57ZjZPDNbaWYrGxsb\nCwivwqjSF8mNKv20Ckn6vYB64BZ3nwjs5WBTDgDu7kBODWnuPt/dJ7v75KFDhxYQXoVRpS+SG1X6\naRWS9BuABndfHk7fS/Al8E6i2Sb8uy1cvhkYmfT42nCeJMt0sElJXyQ3qZW+DuYCBSR9d98KbDKz\nMeGsWcA64H5gbjhvLvCX8P79wJfCXjxTgV1JzUCSkG7cHVDzjkiuUit9FU5A0ERTiP8L/MbM+gBv\nAP9M8EXyezO7DHgT+Ey47kPA2cAGYF+4rqRKN+4O6AMrkqvUSj9xycQqb+cvKOm7+xpgcppFs9Ks\n68DlheyvKqjSF4lGaqWvSyYCOiM3flTpi0QjU6Vf5ZT04yZTpa+kL5IbVfppKenHTaZKX807IrlR\npZ+Wkn7cqNIXiYYq/bSU9ONGlb5INFTpp6WkHzeZKv1evYIraolIdlTpp6WkHzeZKn3Q2YQiuVCl\nn5aSftxkqvRFJDepJ2Kp0geU9OOns0o/9bq5IpJZayv07HlwWpU+oKQfP6r0RYpDlT6gpB8/nVX6\nIpI/VfqAkn78dFbp60CuSP5U6QNK+vHTVaWvxC+Sn5oaVfoo6cdPZ5V+795w4ED3xiNSjtIVR/36\nqdJHST9+Oqv0NRSDSHbSFU+q9AEl/fjprNLXUAwi2Ul3sRRV+oCSfvyo0hcpXFNTx4uiq9IHIkj6\nZtbTzJ4zswfD6VFmttzMNpjZ78JLKWJmfcPpDeHyukL3XZH278+c9FXpB558Ep5/vtRRSJyp0s8o\nikr/m8DLSdM/AW5092OAHcBl4fzLgB3h/BvD9SRVc3Pm5h1V+oFFi2Dp0lJHIXGmSj+jgpK+mdUC\n5wC3hdMGfAq4N1zlTuD88P7scJpw+axwfUnWWaWvpB/Yti24iWSiSj+jQiv9m4ArgdZwejCw090T\n/QobgBHh/RHAJoBw+a5w/XbMbJ6ZrTSzlY2NjQWGV4Y6q/TVvBNQ0peuZKr0lfTzT/pmdi6wzd1X\nRRgP7j7f3Se7++ShQ4dGuenyoEq/a0r60pV0lX6PHsEgbFWuVwGPnQ6cZ2ZnAzXA4cDPgYFm1ius\n5muBzeH6m4GRQIOZ9QIGANsL2H9lUqXftfffh2r8FSjZS1fpC1BApe/uV7t7rbvXAZ8DnnD3LwCL\ngYvC1eYCfwnv3x9OEy5/wl1jCnSgSr9rNTWwe3epo5A4S72AirQpRj/97wJXmNkGgjb728P5twOD\nw/lXAFcVYd/lT713OrdvHxxySKmjkLhLvVSitCmkeaeNuz8JPBnefwOYkmadJuDiKPZX0dRPv3ON\njTBsGDQ0BOMQ9YrkIyyVRpV+RjojN25U6Xdu2zYYOhSGDIF33y11NBJXnVX6Vd6qrKQfN1216Vd7\npb9tW1DpDxumHjySWaZKX/9DSvqx01Xvnagr/S9+EbZujXabxaSkL9nIVOnrrFwl/djp7t47zz8P\nf/97tNssJiV9yUamSl9n5Srpx05399NvaAhu5UJJX7KhSj8jJf246c5Kf+9e2LEDNm2KbpvFlui9\nM2yYTtCSzFTpZ6T+bnHTVe+dKCv9hoaD3R/LRaLS//BDVfqSmSr9jJT046azSr9fv6A6j0pDA0yb\nVl5Jf/t2OOIIJX3pnCr9jNS8EzetrcHAUOmMHBltU0xDA0ydWl5Jv7UVevYM+uor6UsmqvQzUtIv\nJ0cdBW+9Fd32Ghpg9OjyqXyST6rRiWrSGVX6GSnpl5Ooe+80NEBtbfDLoqUluu0Wy65dMGBAqaOQ\ncqBKPyMl/XJz6KHRjTCZSPof+Qi880402yymxEHchH79ggHYRFKp0s9ISb/c1NXBxo3RbGvr1iDh\n19aWR7t+mPRbW4PvKFe3Tckk02B8qvSV9MvOqFHRJf0DB4KDorW15dFXf9s2WocO47TTgpAfWDaM\n1q06mCs5UKWvpF92Ro2KZtiE5MvJlUul39jInn7DWLo0+L568Z1h7NqgSl9yoEpfSb/sRNW8s3lz\nkOyhfJL+tm0c9vFhnHxy8Mv90I8NY2CzKn3JgSp9nZxVdqKq9Bsagn7/EPwtk6Rvw4exeHE4GsPi\nYdhbb5Y6KiknqvTzr/TNbKSZLTazdWb2kpl9M5x/hJk9ambrw7+DwvlmZjeb2QYze8HM6qN6EhWj\npSVoY+/MiBHRJOhEz50Ct9l2ULU7rksRHsjt0QOGDwcbrkHXJEeq9Atq3jkAfNvdjwOmApeb2XEE\n17593N1HA49z8Fq4ZwGjw9s84JYC9p27FSuiHcKgGDobdyehV69o+tQnJ/1DD83rtWltpe2g6qmn\nBtNFlbhqVoLOypV0OhvKpKZGST/fB7r7FndfHd7fDbwMjABmA3eGq90JnB/enw3c5YFlwEAzOzLv\nyHN11VXwxBPdtru8dPZhTTZwIOzcWdi+kpM+gFnOWbuxkbaDqkuXdkPvyV274PDDD05reGVJp7Pr\n46p5J5oDuWZWB0wElgPD3X1LuGgrMDy8PwJI7hfYEM5L3dY8M1tpZisbo8oi7rB6dXCLs2wqfYim\nXX/TpvZJP48EOmwYbQdVTz65/XlTbfbuhXnzCos1mdnB+4MH6zq50lFn18dV807hSd/MDgX+CHzL\n3d9PXubuDuTU2uvu8919srtPHpr8U74Qf/87fPzj8U/62Vb6dXWFJ/2334Yjk35o5dGDxwwWLw4e\n9uST7fNxm1WrYOHCwsfJSZxTkKxnz25oU5Kyo0q/UwUlfTPrTZDwf+Pufwpnv5Notgn/JsrHzcDI\npIfXhvOKb9UqOP/86E5qKpZcKv1Cn8uBA+2/YPLsttl2UDVdwofgWEr//sFlGQuxfTsMGZJ+Wbcc\nRZaykWul//rr8NprxY8rJgrpvWPA7cDL7n5D0qL7gbnh/bnAX5LmfynsxTMV2JXUDFRcq1fDpElB\n+0Ocx5jprkr/ww87frkUq6/+8uVw2WXB30IkrpiVauBAeP/9jvOleuVa6d9yC/ziF8WPKyYKqfSn\nA18EPmVma8Lb2cD1wKfNbD3wD+E0wEPAG8AG4FbgawXsOzeJpD9pEjz3XLftNmfdVem//Xb79nwo\nXl/99evhC18IKv5CpA62lqCDuZKqs0o/Xe+dp5+GZcuKH1dM5H1ylrv/Dcj0o35WmvUduDzf/eXN\nPRhYbPhwqK8PmnrOPLPbw8hKtpX+kUfClgJ+JKX23IHiVPpbtwZJefx4WLu2sG11lfRHjy5s+1I5\nOqv0U7s8J0as7ds3uF70oEHFj6/EKn8Yhk2bgouPQJD043wwN9tKv0eP4ABmvm3Z3ZX0V6yAk04K\n/tEOPxzeey//banSl2x1VumneuaZoOvZKacEFX8VqPykv3p1kOwBPvax4KBNXGVb6UNwYlK+3RXT\nJf3DD4++bXzFCpgyJbh/4omwcmX+20o9MStBJ2hJqs4q/VRLlsDMmcFtyZLixhUTlZ/0V60K2vIh\nqJAHDQp6gsRRtpU+FDbwWmof/WRR9oRZvvxg0j/ppMIO5qrSl2zlUuk/9RStJ89g2zEn46r0K0Ry\npQ/B/bgezM2l0i/kBK10lT4EXSJz/fXw6qtw990d57e2Bj1uhofn5k2ZUtjBXCV9yVa2lX5TE753\nL6ddNJgRxx7OuhdbaX1/T/HjK7HKT/oNDcGAYglxbtfPtdLPN+lv3gwf/WjH+fm0699wA3zve0G/\n/2SvvQaf+MTB6aOOgjffzP+XRKbmHSV9SZVNpe8Oy5fzwfiT2oYSeXTPNHY9/Ez3xFhClZ303347\nSG7JZw5NmhQ0+cRRrpV+vs07zc1Bb4VUuSb9998Pqvfzz4f//d/2yxIHcRPMgi+qN/McCrmpKf0/\nspK+pOqq0u/dO/hfW7KEfmfMbBtKpPHYmQx8ofLb9Ss76ac27UDQtS+uZ991R6Wf8sXSbmjkXJP+\nb34T9MH/ylfgf/6n/bLk9vyEKVPyb9fPdMpvMQ5AS3nrqtJPnJW7ZAn2yZltQ4n88MkZ2N+e6r44\nS6T6kn7PnsFQwrt2lSamzuRS6ed7UfAtW9qadjoMjTwihxO03OGOO+CSS2DMmCD2N944uDzda3/S\nSfm16zc1pf9lAp2M/1D5uvVaBuWkq0q/piYoFN59F4488uBQIkOHwJ49FT8gW2Un/eSeO8kmTozn\nwdxcKv1Esst1wLGkg7ipQyPv6J9Dpb9sWfCrKTEezpe/DLfeGtxvagpOgOnfv/1jTjwRnn02t3gT\ngaYdwjPUo0c01xgoI91+LYNykk2lv3Rp+twwZUp+n9EyUtlJf+NGOProjvMnTYrnwdxcKn2Aj3wk\n97GEkpJ+6tDIRxyfQ9L/7/8OmnUSEu36zc2wZg2ccELHxwwcGFRS+/fnFnOmnjsJQ4bEtxtukXT7\ntQzKSTaV/iOPBH3zU1VBf3cVfD4AAApRSURBVP3KTfqJ3h5hRdzup3Bce/DkUulDft02k/rodxga\nedDA7M6afe89eOklmDHj4Lw+feDcc+G++zoexE02blzw2Fxk6rmTUIUnaGV1LYNqlU2lv2iRkn7F\nSWpT7vBTeMyxsG5daeNLJ9dKP58TtFL66LcbGjnRZNRVI/Fdd8HcuR3b0xNNPOkO4ibkc5JWV5V+\nFfbgyepaBtUqm0rfLH0rwEc/GnyWUrsgV5DKTvphm12Hn8I7egVv/J6YnYjRHZV+phOzEo44Ihh4\nKhN3+PWv4YtfTB9Pnz7w1FNw3HHpH5/PSVpK+ml1eS2DMlCUg9HZVPqf/GTmFy6uLQERyXuUzdhb\nvRouvhg4+FN46dKkn8ITJ8LXvtZ5s0GRucO+fXDIIeHnb/ly+Pa3s9/AqFHwwx/mNpDZ8uXtT1ZL\nNXJkEMMRR6RfvnMnTJgQtM+n85WvBCdspV7lKmHChKA0zeV5/u1v8OMfZ14+bBjcdlu3HoDr8N5B\n8EWY7lgGBM1qN99cfkdca2vhW9/KnCBvvRVeeSWvTbvDH35/8HSaz3wmoi+wNWu6TvqnnJJ5+cyZ\n8B//EYwOW0qzZ6dvgiqQeYz7e02ePNlX5jlIV+vK1TSOOIFhH+nRds3vRCcQM4JqttCrOSXvrzXI\nh4MGZffBbW2Ff/u3YMThcePgxhuDyo0pU4JMko2WluCbLJeeK/37B71oMtm8ORgDvzMTJ8KAAemX\nuXfd2+all3I78mgG06Zl/hW0Z09hg7nlKO1798FeuOaa4Ast+eLtELw/n/40XHpp57+y4mjBgqBN\n9NJLgZT/owcfCJL+FVfkten33gvqspZW6NkD/vCHzLVGTvr0CT4vmf4Rt28P/scyfTEcOBCMuFnq\n3HjMMXl/XsxslbtPTrvQ3WN7mzRpkuejpcV95kz3Xr2Cvy0teW2mqPvbujVYH4K/W7cWN0aJTsb3\n7te/dp87t+MDfvQj92uu6c4Qo7Nnj3t9vftrr7X7nJ8/dYu3Hn+8e2Nj3ptubW3/f9PaGmHcVQ5Y\n6RnyaskTe2e3fJN+dyfUfPaXzQe+pSXYVnf+M5Rin+Um43vX2uo+Z4777353cOUVK9xnzHBvbi5J\nrLnI+N4/+6z79Om+dVNz+Dlv9f+1s/29ex4p3j4rTHc/z86SfrcfyDWzM83sVTPbYGZXFWMf3d2d\nLdP+OjtI1VXvi3Qn3xT7DMzuOOGnEs4izfjemcGvfgX/9V9BG/6ePcExjoULc+uVVQKdvveTJ8O5\n5zLsv6/j5JPhmz1+wZ6PjmHgZ04veL+VcDC6K7E7kS7Tt0ExbkBP4HXgY0Af4HnguEzr51vpu3f9\nzZq6vBjTqU0+ucSU+uvh7be73l5X0129Lul+sWSzzWxf10zNYPk8j2z3me/rUlAMS5Z466xZvm/O\npd664I6ixRTl69Dle3/ggPunPuUtv7zFP6w/yVv3fRDZa1/Ic4hiH1G+jum2kc//VaG/DIhL8w4w\nDXgkafpq4OpM6xeS9DuTmnz27y9sOl0bfjZJu7OYDhxoP71lS+fbyybGro49pDZbpMbQ1fPu6nV9\n++30H/5CXutC38t0r0sUMSw86hr/nX3GZ57SWpSYon4dUt/r1OmWFnd/8033kSPdX3wxste+kOcQ\nxevUHZ+nrl7bfPJLV+KU9C8Cbkua/iLwi5R15gErgZVHHXVU7s82C6kJ+cUXC5tO14afmkBTk3bq\nY7qqBrraXjYxZnPsobMKpavn3dX6W7Z0bAvP9b3IdZ/5vC6RxNCz1aG1aDEV43Xoqjpt+2BHuM9C\nnkMUr1N3fJ66em3zyS9dKaukn3wrVqWfmkDTffvnMp3Nz8SuDtzmemA31+eQTQy5vk5dPYd066f+\nbM3neUT5Xub7Whb78xT1a1/oc0r3WSn2PrvjdeqOz1MUzzNXcUr6sWjecS+83S6fNrdC2xoLfQ7F\n2Eeh6+f7PAp5fD7twsWOoRht1VE/p1Lssztep2I8p2I8z1x0lvS79eQsM+sFvAbMAjYDzwKfd/e0\nI3AVcnKWiEi16uzkrG4dhsHdD5jZ14FHCHryLMiU8EVEJHrdPvaOuz8EPNTd+xURkUoeZVNERDpQ\n0hcRqSJK+iIiVURJX0SkisR6PH0zawTeLGATQ4B3IwqnWBRjNBRjNBRjdEoZ59HunvYKUbFO+oUy\ns5WZ+qrGhWKMhmKMhmKMTlzjVPOOiEgVUdIXEakilZ7055c6gCwoxmgoxmgoxujEMs6KbtMXEZH2\nKr3SFxGRJEr6IiJVpCKTfndcfD0fZrbAzLaZ2dqkeUeY2aNmtj78O6iE8Y00s8Vmts7MXjKzb8Yt\nxjCeGjNbYWbPh3H+v3D+KDNbHr7vvzOzPiWOs6eZPWdmD8YxvjCmjWb2opmtMbOV4by4vd8Dzexe\nM3vFzF42s2lxitHMxoSvX+L2vpl9K04xJqu4pG9mPYFfAmcBxwFzzOy40kbVZiFwZsq8q4DH3X00\n8Hg4XSoHgG+7+3HAVODy8LWLU4wAHwKfcvcJwAnAmWY2FfgJcKO7HwPsAC4rYYwA3wReTpqOW3wJ\np7n7CUl9yuP2fv8ceNjdxwITCF7T2MTo7q+Gr98JwCRgH3BfnGJsJ9PVVcr1Ro5X5ypBfHXA2qTp\nV4Ejw/tHAq+WOsak2P4CfDrmMR4CrAZOIjj7sVe6z0EJ4qol+Ef/FPAgYHGKLynOjcCQlHmxeb+B\nAcDfCTudxDHGlLhOB56Oc4wVV+kDI4BNSdMN4by4Gu7uW8L7W4HhpQwmwczqgInAcmIYY9h0sgbY\nBjwKvA7sdPcD4Sqlft9vAq4EWsPpwcQrvgQHFpnZKjObF86L0/s9CmgE7gibym4zs/7EK8ZknwPu\nDu/HMsZKTPply4OSoOR9aM3sUOCPwLfc/f3kZXGJ0d1bPPg5XQtMAcaWOKQ2ZnYusM3dV5U6lizM\ncPd6gubQy81sZvLCGLzfvYB64BZ3nwjsJaWZJAYxAhAeozkP+EPqsrjECJWZ9DcDI5Oma8N5cfWO\nmR0JEP7dVspgzKw3QcL/jbv/KZwdqxiTuftOYDFBc8nA8DrMUNr3fTpwnpltBO4haOL5OfGJr427\nbw7/biNoh55CvN7vBqDB3ZeH0/cSfAnEKcaEs4DV7v5OOB3HGCsy6T8LjA57SvQh+Ll1f4lj6sz9\nwNzw/lyCdvSSMDMDbgdedvcbkhbFJkYAMxtqZgPD+/0Ijju8TJD8LwpXK1mc7n61u9e6ex3B5+8J\nd/9CXOJLMLP+ZnZY4j5Be/RaYvR+u/tWYJOZjQlnzQLWEaMYk8zhYNMOxDPGyjuQGx40ORt4jaCd\n999LHU9SXHcDW4D9BBXMZQRtvY8D64HHgCNKGN8Mgp+gLwBrwtvZcYoxjPN44LkwzrXA98P5HwNW\nABsIfmL3jcF7firwYBzjC+N5Pry9lPhfieH7fQKwMny//wwMimGM/YHtwICkebGKMXHTMAwiIlWk\nEpt3REQkAyV9EZEqoqQvIlJFlPRFRKqIkr6ISBVR0hcRqSJK+iIiVeT/A97l6ixD+9+aAAAAAElF\nTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "cell_type": "code", "metadata": { "id": "pFnP1qFeog-a", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 228 }, "outputId": "d505e31f-7f73-4e9a-b1d6-c4863d86e653" }, "source": [ "boston.data" ], "execution_count": 29, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n", " 4.9800e+00],\n", " [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n", " 9.1400e+00],\n", " [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n", " 4.0300e+00],\n", " ...,\n", " [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n", " 5.6400e+00],\n", " [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n", " 6.4800e+00],\n", " [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n", " 7.8800e+00]])" ] }, "metadata": { "tags": [] }, "execution_count": 29 } ] }, { "cell_type": "code", "metadata": { "id": "uCcfjYYyomuX", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 33 }, "outputId": "69d7f95c-a5a2-420f-a8ea-fdf2d44dfb1c" }, "source": [ "boston.data.shape" ], "execution_count": 31, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(506, 13)" ] }, "metadata": { "tags": [] }, "execution_count": 31 } ] }, { "cell_type": "code", "metadata": { "id": "Apv7AXA5orcv", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 33 }, "outputId": "4b8996ff-a5e4-494e-eb7e-d29e027023ae" }, "source": [ "boston.target.shape" ], "execution_count": 39, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(506,)" ] }, "metadata": { "tags": [] }, "execution_count": 39 } ] }, { "cell_type": "code", "metadata": { "id": "313NadqlppNV", "colab_type": "code", "colab": {} }, "source": [ "" ], "execution_count": 0, "outputs": [] } ] }