{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "IST718_FinalProject_Colab_Keras_Categorical.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RIKbaZtyhAiO",
        "colab_type": "text"
      },
      "source": [
        "# STEP 1: Import ALL the things\n",
        "[link to tutorial](https://www.tensorflow.org/tutorials/structured_data/feature_columns)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "sTc2g5uFgoF3",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "7a3f0888-eb93-4408-f0a0-1a5f7e12221c"
      },
      "source": [
        "%tensorflow_version 2.x\n",
        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import tensorflow as tf\n",
        "from tensorflow import feature_column\n",
        "from tensorflow.keras import layers\n",
        "from sklearn.model_selection import train_test_split"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "TensorFlow 2.x selected.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W1tOvP0mgtmp",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 191
        },
        "outputId": "9ea282c4-1d3f-46d8-c46b-c5dafd48b703"
      },
      "source": [
        "url = 'https://storage.googleapis.com/applied-dl/heart.csv'\n",
        "df = pd.read_csv(url)\n",
        "df.head()"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>age</th>\n",
              "      <th>sex</th>\n",
              "      <th>cp</th>\n",
              "      <th>trestbps</th>\n",
              "      <th>chol</th>\n",
              "      <th>fbs</th>\n",
              "      <th>restecg</th>\n",
              "      <th>thalach</th>\n",
              "      <th>exang</th>\n",
              "      <th>oldpeak</th>\n",
              "      <th>slope</th>\n",
              "      <th>ca</th>\n",
              "      <th>thal</th>\n",
              "      <th>target</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>63</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>145</td>\n",
              "      <td>233</td>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>150</td>\n",
              "      <td>0</td>\n",
              "      <td>2.3</td>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>fixed</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>67</td>\n",
              "      <td>1</td>\n",
              "      <td>4</td>\n",
              "      <td>160</td>\n",
              "      <td>286</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>108</td>\n",
              "      <td>1</td>\n",
              "      <td>1.5</td>\n",
              "      <td>2</td>\n",
              "      <td>3</td>\n",
              "      <td>normal</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>67</td>\n",
              "      <td>1</td>\n",
              "      <td>4</td>\n",
              "      <td>120</td>\n",
              "      <td>229</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>129</td>\n",
              "      <td>1</td>\n",
              "      <td>2.6</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "      <td>reversible</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>37</td>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>130</td>\n",
              "      <td>250</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>187</td>\n",
              "      <td>0</td>\n",
              "      <td>3.5</td>\n",
              "      <td>3</td>\n",
              "      <td>0</td>\n",
              "      <td>normal</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>41</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>130</td>\n",
              "      <td>204</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>172</td>\n",
              "      <td>0</td>\n",
              "      <td>1.4</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>normal</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   age  sex  cp  trestbps  chol  ...  oldpeak  slope  ca        thal  target\n",
              "0   63    1   1       145   233  ...      2.3      3   0       fixed       0\n",
              "1   67    1   4       160   286  ...      1.5      2   3      normal       1\n",
              "2   67    1   4       120   229  ...      2.6      2   2  reversible       0\n",
              "3   37    1   3       130   250  ...      3.5      3   0      normal       0\n",
              "4   41    0   2       130   204  ...      1.4      1   0      normal       0\n",
              "\n",
              "[5 rows x 14 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QU0C9tHNhWV-",
        "colab_type": "text"
      },
      "source": [
        "# STEP 2: Split into test and train (and validation)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cEZRTp4ng2c_",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train, test = train_test_split(df, test_size=0.2)\n",
        "train, val = train_test_split(train, test_size=0.2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ns7Ib2x0hdkR",
        "colab_type": "text"
      },
      "source": [
        "# STEP 3: Create an INPUT PIPELINE using `tf.data`"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hDAYPP5ChcCZ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# A utility method to create a tf.data dataset from a Pandas Dataframe\n",
        "def df_to_dataset(dataframe, shuffle=True, batch_size=32):\n",
        "  dataframe = dataframe.copy()\n",
        "  labels = dataframe.pop('target')\n",
        "  # WTF IS HAPPENING HERE\n",
        "  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))\n",
        "  if shuffle:\n",
        "    ds = ds.shuffle(buffer_size=len(dataframe))\n",
        "  ds = ds.batch(batch_size)\n",
        "  return ds\n",
        "\n",
        "batch_size = 5 # A small batch sized is used for demonstration purposes\n",
        "train_ds = df_to_dataset(train, batch_size=batch_size)\n",
        "val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)\n",
        "test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4EIcI7IEhU7T",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        },
        "outputId": "51c5d695-bb45-4895-9f24-223b5cd9cc80"
      },
      "source": [
        "for feature_batch, label_batch in train_ds.take(1):\n",
        "  print('Every feature:', list(feature_batch.keys()))\n",
        "  print('A batch of ages:', feature_batch['age'])\n",
        "  print('A batch of targets:', label_batch )"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Every feature: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']\n",
            "A batch of ages: tf.Tensor([65 68 51 52 54], shape=(5,), dtype=int32)\n",
            "A batch of targets: tf.Tensor([0 0 1 0 1], shape=(5,), dtype=int32)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YFtV-V34lGXP",
        "colab_type": "text"
      },
      "source": [
        "# STEP 4: Create and Pick Feature Columns"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "FTFcxZ7yhoL6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# We will use this batch to demonstrate several types of feature columns\n",
        "# A utility method to create a feature column\n",
        "# and to transform a batch of data\n",
        "\n",
        "example_batch = next(iter(train_ds))[0]\n",
        "\n",
        "def demo(feature_column):\n",
        "  feature_layer = layers.DenseFeatures(feature_column)\n",
        "  print(feature_layer(example_batch).numpy())"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bOqD2u_nlehC",
        "colab_type": "text"
      },
      "source": [
        "### NUMERIC COLUMNS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "u19XL8Q0iP-6",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 230
        },
        "outputId": "2ced923a-8186-4a78-fbf9-068d21e04f6c"
      },
      "source": [
        "age = feature_column.numeric_column(\"age\")\n",
        "demo(age)"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "[[66.]\n",
            " [67.]\n",
            " [53.]\n",
            " [48.]\n",
            " [48.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jc5r-WKPlkBD",
        "colab_type": "text"
      },
      "source": [
        "### BUCKETIZED COLUMNS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6_EuEhBFiiW2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 230
        },
        "outputId": "050baa9f-b7f9-4c85-831a-5a5648001137"
      },
      "source": [
        "age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])\n",
        "demo(age_buckets)"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features_1 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n",
            " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n",
            " [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]\n",
            " [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n",
            " [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "u5RhLs20lnUl",
        "colab_type": "text"
      },
      "source": [
        "### CATEGORICAL COLUMNS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZEoaS44Zji9s",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 335
        },
        "outputId": "cb5adda8-6f4f-4a63-85ad-25e74bf20307"
      },
      "source": [
        "thal = feature_column.categorical_column_with_vocabulary_list(\n",
        "      'thal', ['fixed', 'normal', 'reversible'])\n",
        "\n",
        "thal_one_hot = feature_column.indicator_column(thal)\n",
        "demo(thal_one_hot)"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features_2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/feature_column/feature_column_v2.py:4267: IndicatorColumn._variable_shape (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.\n",
            "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/feature_column/feature_column_v2.py:4322: VocabularyListCategoricalColumn._num_buckets (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.\n",
            "[[1. 0. 0.]\n",
            " [0. 1. 0.]\n",
            " [0. 1. 0.]\n",
            " [0. 0. 1.]\n",
            " [0. 0. 1.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lo2NnWW8l9UW",
        "colab_type": "text"
      },
      "source": [
        "### EMBEDDING COLUMNS (best for columns with many values)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9yfyh65ImGQy",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 317
        },
        "outputId": "66c21701-4c43-43f0-b438-dd9865e4cfea"
      },
      "source": [
        "thal_embedding = feature_column.embedding_column(thal, dimension=8)\n",
        "demo(thal_embedding)"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features_3 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "[[ 0.02936197  0.31758815 -0.3563057   0.31574684 -0.421228    0.10793252\n",
            "  -0.13682729  0.2326817 ]\n",
            " [ 0.3237073  -0.4747037   0.04366031  0.0928458   0.27537555 -0.5144811\n",
            "   0.07425166  0.20759253]\n",
            " [ 0.3237073  -0.4747037   0.04366031  0.0928458   0.27537555 -0.5144811\n",
            "   0.07425166  0.20759253]\n",
            " [ 0.00689469  0.01643167  0.03457681  0.10944679  0.09321592  0.14874297\n",
            "  -0.09204395 -0.69555455]\n",
            " [ 0.00689469  0.01643167  0.03457681  0.10944679  0.09321592  0.14874297\n",
            "  -0.09204395 -0.69555455]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "q6BkjyWYmZsV",
        "colab_type": "text"
      },
      "source": [
        "### HASHED FEATURE COLUMNS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_n4ESZPhmcKi",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 282
        },
        "outputId": "d463cb3e-ff28-44bb-9ea2-4afd8ab46871"
      },
      "source": [
        "thal_hashed = feature_column.categorical_column_with_hash_bucket(\n",
        "      'thal', hash_bucket_size=1000)\n",
        "demo(feature_column.indicator_column(thal_hashed))"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features_4 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/feature_column/feature_column_v2.py:4322: HashedCategoricalColumn._num_buckets (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.\n",
            "[[0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hmERLs6Xmd93",
        "colab_type": "text"
      },
      "source": [
        "### CROSSED FEATURE COLUMNS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jh27uUy0mgN_",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 282
        },
        "outputId": "74bf1d46-907c-4b05-d675-5a49560c3973"
      },
      "source": [
        "crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)\n",
        "demo(feature_column.indicator_column(crossed_feature))"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:Layer dense_features_5 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
            "\n",
            "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
            "\n",
            "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
            "\n",
            "WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/feature_column/feature_column_v2.py:4322: CrossedColumn._num_buckets (from tensorflow.python.feature_column.feature_column_v2) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.\n",
            "[[0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]\n",
            " [0. 0. 0. ... 0. 0. 0.]]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4bJTAmxDjuAu",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "feature_columns = []\n",
        "\n",
        "# numeric cols\n",
        "for header in ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope', 'ca']:\n",
        "  feature_columns.append(feature_column.numeric_column(header))\n",
        "\n",
        "# bucketized cols\n",
        "age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])\n",
        "feature_columns.append(age_buckets)\n",
        "\n",
        "# indicator cols\n",
        "thal = feature_column.categorical_column_with_vocabulary_list(\n",
        "      'thal', ['fixed', 'normal', 'reversible'])\n",
        "thal_one_hot = feature_column.indicator_column(thal)\n",
        "feature_columns.append(thal_one_hot)\n",
        "\n",
        "# embedding cols\n",
        "thal_embedding = feature_column.embedding_column(thal, dimension=8)\n",
        "feature_columns.append(thal_embedding)\n",
        "\n",
        "# crossed cols\n",
        "crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)\n",
        "crossed_feature = feature_column.indicator_column(crossed_feature)\n",
        "feature_columns.append(crossed_feature)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fEyM-FfCmvrf",
        "colab_type": "text"
      },
      "source": [
        "# STEP 5: Create a feature LAYER\n",
        "\n",
        "After we've defined our feature columns, we will use a `DenseFeatures` layer to input them into our Keras model"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2QUKJhYTk8OB",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "feature_layer = tf.keras.layers.DenseFeatures(feature_columns)\n",
        "batch_size = 32\n",
        "train_ds = df_to_dataset(train, batch_size=batch_size)\n",
        "val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)\n",
        "test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2uvzMxPhnBB2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 227
        },
        "outputId": "46c5802b-1a3d-4722-b730-25948d052654"
      },
      "source": [
        "model = tf.keras.Sequential([\n",
        "  feature_layer,\n",
        "  layers.Dense(128, activation='relu'),\n",
        "  layers.Dense(128, activation='relu'),\n",
        "  layers.Dense(1)\n",
        "])\n",
        "\n",
        "model.compile(optimizer='adam',\n",
        "              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
        "              metrics=['accuracy'])\n",
        "\n",
        "model.fit(train_ds,\n",
        "          validation_data=val_ds,\n",
        "          epochs=5)"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Train for 7 steps, validate for 2 steps\n",
            "Epoch 1/5\n",
            "7/7 [==============================] - 1s 151ms/step - loss: 1.2195 - accuracy: 0.6062 - val_loss: 0.6267 - val_accuracy: 0.7347\n",
            "Epoch 2/5\n",
            "7/7 [==============================] - 0s 9ms/step - loss: 0.5732 - accuracy: 0.6788 - val_loss: 0.5630 - val_accuracy: 0.7143\n",
            "Epoch 3/5\n",
            "7/7 [==============================] - 0s 8ms/step - loss: 0.6204 - accuracy: 0.7150 - val_loss: 0.5220 - val_accuracy: 0.7551\n",
            "Epoch 4/5\n",
            "7/7 [==============================] - 0s 10ms/step - loss: 0.5884 - accuracy: 0.6995 - val_loss: 0.5647 - val_accuracy: 0.7143\n",
            "Epoch 5/5\n",
            "7/7 [==============================] - 0s 10ms/step - loss: 0.5835 - accuracy: 0.6943 - val_loss: 0.5326 - val_accuracy: 0.7347\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<tensorflow.python.keras.callbacks.History at 0x7f412c8eb518>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lTv6yijpnD75",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        },
        "outputId": "7604b025-e393-4db5-d86c-fc3fea370497"
      },
      "source": [
        "loss, accuracy = model.evaluate(test_ds)\n",
        "print(\"Accuracy\", accuracy)"
      ],
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2/2 [==============================] - 0s 4ms/step - loss: 0.5216 - accuracy: 0.8197\n",
            "Accuracy 0.8196721\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "XPiqjlZAnGQi",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}