{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sentiment Analysis, V1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk\n",
    "import pandas as pd\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from nltk.tokenize import word_tokenize\n",
    "from nltk.probability import FreqDist\n",
    "import matplotlib.pyplot as plt\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.stem import PorterStemmer\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "## PRE-PROCESSING\n",
    "### TOKENIZATION\n",
    "#### Intro to tokenization\n",
    "Tokenization is simply breaking text down into \"tokens\" which, in this case, is words!\n",
    "Here is a sample sentence. It can have punctuation, CAPS!, etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Kendra', 'loves', 'cats', '.', 'In', 'fact', ',', 'she', 'has', 'TEN', 'cats', '.', 'If', 'she', 'did', \"n't\", 'have', 'a', 'house', ',', 'a', 'husband', 'and', 'a', 'graduate', 'degree', 'in', 'data', 'science', ',', 'she', \"'d\", 'be', 'a', 'cat', 'lady', '!']\n"
     ]
    }
   ],
   "source": [
    "example_text = \"Kendra loves cats. In fact, she has TEN cats. If she didn't have a house, a husband and a graduate degree in data science, she'd be a cat lady!\"\n",
    "tokenized_example = word_tokenize(example_text)\n",
    "print(tokenized_example)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FREQUENCY DISTRIBUTIONS\n",
    "After turning the words into tokens, we can start to treat them a data\n",
    "For example, we can use a FREQUENCY DISTRIBUTION to count the words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_items([('Kendra', 1), ('loves', 1), ('cats', 2), ('.', 2), ('In', 1), ('fact', 1), (',', 3), ('she', 3), ('has', 1), ('TEN', 1), ('If', 1), ('did', 1), (\"n't\", 1), ('have', 1), ('a', 4), ('house', 1), ('husband', 1), ('and', 1), ('graduate', 1), ('degree', 1), ('in', 1), ('data', 1), ('science', 1), (\"'d\", 1), ('be', 1), ('cat', 1), ('lady', 1), ('!', 1)])\n"
     ]
    }
   ],
   "source": [
    "fdist = FreqDist(tokenized_example)\n",
    "print(fdist.items())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('a', 4)]\n"
     ]
    }
   ],
   "source": [
    "# We can use the same Frequency Distribution to find the most common word\n",
    "print(fdist.most_common(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.02702702702702703\n"
     ]
    }
   ],
   "source": [
    "# We can use the Frequency Distribution to find the frequency of specific words\n",
    "print(fdist.freq(\"cat\")) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEqCAYAAAAYtVcKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZhcZZn38e/dnbUTkkBCkhYJYUmiDCLSrYA6CqgjKuIMA264MSqOuCvK66gouDs6ysgIiswobgMqjiSCCCIgsnYQWYSQCEjArCxJJ52t0/f7x/NUd6VyTnVVdZ2uOl2/z3XVlao6T526q1Jddz27uTsiItK62hodgIiINJYSgYhIi1MiEBFpcUoEIiItTolARKTFKRGIiLS4cY0OoFqzZs3y+fPn1/TYLVu2MHny5EzKZ1W2WeJQzK0Vh2LOfxylli5dut7d90486O65unR1dXmtenp6MiufVdlmiUMxt1Ycijn/cZQCejzle1VNQyIiLU6JQESkxSkRiIi0OCUCEZEWl3kiMLN2M/ujmS1JODbRzC4xsxVmdquZzc86HhER2dVo1Ag+ANyXcuztwJPufhDwdeDLoxCPiIgUyTQRmNnTgVcB300p8hrg+/H6z4CXmJllEcuajVtZumobqzdszeL0IiK5lXWN4BvAx4CBlOP7ACsB3L0f2ADMzCKQL115P1+48Umuf2BtFqcXEckt84w2pjGz44FXuvvpZnY0cIa7H19S5h7gOHd/NN7+C3CEu68vKXcacBpAZ2dn1+LFi6uO57L7N/Gjuzdx/IIOTj1sWkWP6evro6Ojo6FlmyUOxdxacSjm/MdRqru7e6m7dyceTJtpNtIL8EXgUeBhYDXQB/ywpMxVwFHx+jhgPTE5pV1qnVl8zZ9X+35nLvFTLryl4seM9ZmHillx1KNss8SRx5izPncxGjGz2N0/7u5Pd/f5wOuBa939TSXFLgfeGq+fFMtkUkVZOGcPAJat6c3i9CIiuTXq8wjM7BwzOyHevAiYaWYrgA8D/y+r591nxmQmjTPW9W7jyc3bs3oaEZHcGZXVR939OuC6eP2sovu3AiePRgxtbca+08ax/IkdPLCmlyMOyKRPWkQkd1pqZvG+00Lee0DNQyIig1oqEcybHhKB+glERIa0ZCJ4YPWmBkciItI8WisRTBuqEWQ0OElEJHdaKhHMmNTGjI7xbNiyg7W92xodjohIU2ipRGBmLJwd5hOow1hEJGipRACwcO5UAJatViIQEYEWTASL5qhGICJSrOUSwdBSExo5JCICLZwIlq/pZWBAI4dERFouEew5ZQJ77zGRvu07eeypLY0OR0Sk4VouEYD6CUREirVkItCS1CIiQ1oyESyKQ0gf0BBSEZHWTAQLNHJIRGRQayaC2aFG8Jd1m+jfOdDgaEREGqslE8Eek8azz4zJbO8f4K9P9DU6HBGRhmrJRACwaG4cOaR+AhFpcS2bCDRySEQkaOFEEEcOKRGISItr4URQmFSmkUMi0tpaNhEcNHsqbQYPrd/Mtv6djQ5HRKRhWjYRTBrfzvyZU9g54Dy4bnOjwxERaZiWTQRQ3DykfgIRaV2ZJQIzm2Rmt5nZn8zsXjM7O6HM28xsnZndGS/vyCqeJIUOY+1WJiKtbFyG594GHOvum8xsPHCjmV3p7reUlLvE3d+bYRypFs5Vh7GISGaJwN0dKHzDjo+XptoJRstRi4hk3EdgZu1mdiewFrja3W9NKPbPZnaXmf3MzPbNMp5S82dNYXy78cgTffRt7x/NpxYRaRoWfrhn/CRmM4BfAO9z93uK7p8JbHL3bWb2LuB17n5swuNPA04D6Ozs7Fq8eHFNcfT19dHR0bHLfR/6zXoe2dDPl18yk4P2Gj9s+WrOXY+yzRKHYm6tOBRz/uMo1d3dvdTduxMPuvuoXICzgDPKHG8HNgx3nq6uLq9VT0/Pbve998d3+H5nLvFLbn+kovLVnLseZZslDsXcWnEo5vzHUQro8ZTv1SxHDe0dawKY2WTgZcD9JWU6i26eANyXVTxpFsWRQ8vVTyAiLSrLUUOdwPfNrJ3QF3Gpuy8xs3MImely4P1mdgLQDzwBvC3DeBIt1CY1ItLishw1dBfwnIT7zyq6/nHg41nFUAktRy0ira6lZxYD7LtnB5PGt7F641Y29O1odDgiIqOu5RNBW5uxYHasFaxVrUBEWk/LJwLQmkMi0tqUCIBFc+MmNeonEJEWpESAtq0UkdamRMDQyKFlq3sLk9tERFqGEgEwd9ok9pg4jif7drB+0/ZGhyMiMqqUCAAzG1ySWjOMRaTVKBFE6icQkValRBAV1hzSEFIRaTVKBNHCog5jEZFWokQQDU0q26SRQyLSUpQIollTJzJzygQ2bevnbxu2NjocEZFRo0RQREtNiEgrUiIooiWpRaQVKREU0RBSEWlFSgRFFmoIqYi0ICWCIgvmFGYXb2LngEYOiUhrUCIoMn3yeDqnT2Jb/wArn+hrdDgiIqNCiaCE+glEpNUoEZQY7CfQyCERaRFKBCVUIxCRVqNEUGJwLoESgYi0CCWCEgfNnooZPLhuMzs0ckhEWkBmicDMJpnZbWb2JzO718zOTigz0cwuMbMVZnarmc3PKp5KdUwYx7y9OugfcFb19jc6HBGRzGVZI9gGHOvuzwYOA44zsyNLyrwdeNLdDwK+Dnw5w3gqtmB2aB5auVGJQETGvnFZndjDWs6b4s3x8VLa1vIa4DPx+s+A88zMvMHrQC+aO5Vr7lvDTSu3ss/SRyt6zPjefroyjktEJAuZJQIAM2sHlgIHAf/l7reWFNkHWAng7v1mtgGYCazPMq7hPGPuNABueWwbt/z0TxU9ZsbENo5/kdPWZlmGJiJSdzYaP77NbAbwC+B97n5P0f33AMe5+6Px9l+AI9x9fcnjTwNOA+js7OxavHhxTXH09fXR0dExbLntO52f3NPL45t3MG5c+7Dlb3tsG1v6nfNfOYvZU4bPrZXGUUv5ZijbLHHkMeZmiUMx5z+OUt3d3UvdvTvxoLuPygU4Czij5L6rgKPi9XGEmoCVO09XV5fXqqenJ5Pyb7zwZt/vzCV+zZ9XNzSOZinbLHHkMeZmiUMx5z+OUkCPp3yvZjlqaO9YE8DMJgMvA+4vKXY58NZ4/STg2hhwrmgSmojkWZZ9BJ3A92M/QRtwqbsvMbNzCJnpcuAi4AdmtgJ4Anh9hvFkZlHRqqUiInmT5aihu4DnJNx/VtH1rcDJWcUwWhbG2cjLtD6RiOSQZhbXwYLZYaG6Fes20b9zoMHRiIhUR4mgDvaYNJ5ZHW1s7x/gr9rHQERyRomgTuZNGw/AcnUYi0jOKBHUybzpobtl2Wp1GItIvigR1Mm+MRFo+WoRyRslgjqZNy3WCJQIRCRnlAjqZJ9p4zCDh9ZvZlv/zkaHIyJSMSWCOpnYbsyfOYWdA85D6zc3OhwRkYopEdRRYeN7TSwTkTxRIqijwlIT6jAWkTxRIqijBYXF5zSEVERyRImgjhbNVY1ARPKn6kRgZnua2aFZBJN382dOYXy7sfLJPvq2a79jEcmHihKBmV1nZtPMbC/gDuBCM/uPbEPLnwnj2jhg1lTcYcVaNQ+JSD5UWiOY7u4bgROBi939COCl2YWVX1qSWkTyptJEMM7MOoHXAksyjCf3FsYlqdVPICJ5UWkiOJuwv/AKd7/dzA4AlmcXVn4tHOwwVtOQiORDpTuUrXL3wQ5id39QfQTJNJdARPKm0hrBNyu8r+Xtu1cHk8a3sWrDVjZs2dHocEREhlW2RmBmRwHPB/Y2sw8XHZoGtGcZWF61txkLZu/B3Y9tYPmaXrrn79XokEREyhquRjABmEpIGHsUXTYCJ2UbWn4tKKw5pOYhEcmBsjUCd78euN7Mvufufx2lmHKv0E+wXB3GIpIDlXYWTzSz7wDzix/j7sdmEVTeaS6BiORJpYngp8AFwHcB7boyDI0cEpE8qTQR9Lv7+dWc2Mz2BS4G5gAOfMfdzy0pczTwS+CheNdl7n5ONc/TjDqnT2KPieN4fPN21m/axqypExsdkohIqkqHjy42s9PNrNPM9ipchnlMP/ARdz8YOBJ4j5kdnFDu9+5+WLzkPgkAmNlgh/EDah4SkSZXaSJ4K/BR4CZgabz0lHuAu69y9zvi9V7gPmCf2kPNFy1JLSJ5UVHTkLvvP5InMbP5wHOAWxMOH2VmfwL+Bpzh7veO5LmaxcLCJjUaOSQiTc7cffhCZm9Jut/dL67gsVOB64HPu/tlJcemAQPuvsnMXgmc6+4LEs5xGnAaQGdnZ9fixYuHjTlJX18fHR0dmZQvLXv32m185vonWTRzPF84dmbD4mhE2WaJI48xN0scijn/cZTq7u5e6u7diQfdfdgLYTmJwuVC4EHgZxU8bjxhsboPV/g8DwOzypXp6uryWvX09GRWvrTs2o1bfb8zl/ghZ/3aBwYGGhZHI8o2Sxx5jLlZ4lDM+Y+jFNDjKd+rlTYNva/4tpnNAP633GPMzICLgPvcPXGBOjObC6xxdzez5xH6LB6vJKZmN2vqBPaaMoEnNm9n1YatPG3G5EaHJCKSqNLho6U2A8P1G7wAeDNwt5ndGe/7N2AegLtfQFim4t1m1g9sAV4fM1fumRkL50zllgef4IE1vUoEItK0KkoEZraYMBcAwmJzzwQuLfcYd78RsGHKnAecV0kMebRozh6DieDoRbMbHY6ISKJKawRfLbreD/zV3R/NIJ4xZWipCY0cEpHmVdE8Ag+Lz91PWHl0T2B7lkGNFQu11ISI5EBFicDMXgvcBpxM2Lf4VjPTMtTDWDg7rkK6tpeBgTHR9SEiY1ClTUOfAJ7r7msBzGxv4BrgZ1kFNhZM7xjP3GmTWL1xKyuf7GO/mVMaHZKIyG4qXWKirZAEosereGxL05LUItLsKv0y/7WZXWVmbzOztwG/Aq7ILqyxY+HsuPic+glEpEkNt2fxQcAcd/+omZ0IvDAeuhn4UdbBjQWDNQKtOSQiTWq4PoJvAB8H8LBO0GUAZvaseOzVmUY3BgxtW6kagYg0p+Gahua4+92ld8b75mcS0RhT2JfgL+s2sWPnQIOjERHZ3XCJYEaZY1ozoQIdE8Yxb68Odux0Hl6/udHhiIjsZrhE0GNm7yy908zeQdicRiqwMNYKlql5SESa0HB9BB8EfmFmpzD0xd8NTAD+KcvAxpKFc/bgmvvWhm0rD210NCIiuyqbCNx9DfB8MzsGOCTe/St3vzbzyMaQoW0rNXJIRJpPpfsR/A74XcaxjFlac0hEmplmB4+CA/aeQnub8fDjm9m6Y2ejwxER2YUSwSiYOK6d+TM7GHBYsVbNQyLSXJQIRslQP4Gah0SkuSgRjJKhfgLVCESkuSgRjJJF6jAWkSalRDBKtBy1iDQrJYJRst9eHUxob+Oxp7awZYfWHBKR5qFEMErGtbdxYNybYOXG/gZHIyIyRIlgFC2Kaw49okQgIk1EiWAUFfoJVm5QIhCR5pFZIjCzfc3sd2b2ZzO718w+kFDGzOw/zWyFmd1lZodnFU8zWDg7JIJHlAhEpIlUtNZQjfqBj7j7HWa2B7DUzK529z8XlXkFsCBejgDOj/+OSYVJZWoaEpFmklkicPdVwKp4vdfM7gP2AYoTwWuAi93dgVvMbIaZdcbHjjn7zJhMx4R2ntq6k3OvWc64dhv2MR0T2jmwXaOMRCQ7WdYIBpnZfOA5wK0lh/YBVhbdfjTeNyYTQVub8czOaSz965N8/ZoHKn7c6w6eyouOzDAwEWlpFn6MZ/gEZlOB64HPu/tlJceWAF9y9xvj7d8CZ7p7T0m504DTADo7O7sWL15cUyx9fX10dHRkUr7Ssn/dsIPr/tJL+/jxw5Zdvamfmx/dxuFzxvGJF82qaxxZlm2WOPIYc7PEoZjzH0ep7u7upe7enXjQ3TO7AOOBq4APpxz/NvCGotvLgM5y5+zq6vJa9fT0ZFY+i7L3r9ro+525xJ93zpUNjaPass0SRx5jbpY4FHP+4ygF9HjK92qWo4YMuAi4z93/I6XY5cBb4uihI4ENPkb7B2qx/6wpjGsz1m7eSd92dTCLSDay7CN4AfBm4G4zuzPe92/APAB3vwC4AnglsALoA07NMJ7cmTCujQP2nsIDazaxYu0mDn36jEaHJCJjUJajhm4Eyg6LidWV92QVw1iwYM4ePLBmE8tW9yoRiEgmNLO4yWn5ahHJmhJBk9OGNiKSNSWCJqctLkUka0oETW7eXh1MaINVG7ayYcuORocjImOQEkGTa28z9pkW+vSXq1YgIhlQIsiBedNDIlimRCAiGVAiyIF5gzUCdRiLSP0pEeTAvtPDukTa+F5EsqBEkAOFpiGNHBKRLCgR5MCsyW1MnTiOxzdvZ/2mbY0OR0TGGCWCHDAzFsSN7x9Q85CI1JkSQU5oqQkRyYoSQU4UlppYppFDIlJnSgQ5oaUmRCQrSgQ5Mbj43Orewm5uIiJ1oUSQE7OmTmDPjvH0butn9catjQ5HRMYQJYKcMLOhfgKNHBKROlIiyBH1E4hIFpQIcmSoRqCRQyJSP0oEOaIagYhkQYkgRxbODolg+dpeBgY0ckhE6kOJIEemd4xnzrSJbN0xwMon+xodjoiMEUoEOaORQyJSb0oEOaM1h0Sk3jJLBGb232a21szuSTl+tJltMLM74+WsrGIZSxbO1ZpDIlJf4zI89/eA84CLy5T5vbsfn2EMY06haUgb2YtIvWRWI3D3G4Ansjp/q1owO+xL8Jd1m9ixc6DB0YjIWGBZLmBmZvOBJe5+SMKxo4GfA48CfwPOcPd7U85zGnAaQGdnZ9fixYtriqevr4+Ojo5MymdVNqn8u69Yx9rNO/nGy2ex77RxZcs2S8zNXlZx1F62WeLIY8xZn7tYd3f3UnfvTjzo7pldgPnAPSnHpgFT4/VXAssrOWdXV5fXqqenJ7PyWZVNKv/2793m+525xBf/6bFRiyOP710eY26WOBRz/uMoBfR4yvdqw0YNuftGd98Ur18BjDezWY2KJ0+Kl6QWERmphiUCM5trZhavPy/G8nij4smTwUSgkUMiUgeZjRoys58ARwOzzOxR4NPAeAB3vwA4CXi3mfUDW4DXx+qLDGOh5hKISB1llgjc/Q3DHD+PMLxUqnTA3lNobzMefnwzW3fsZNL49kaHJCI5ppnFOTRpfDvzZ3Yw4LBirZqHRGRklAhyqrAk9fK1ah4SkZFRIsipBbO1SY2I1IcSQU5pkxoRqRclgpzSctQiUi9KBDk1f2YHE9rbeOypLWza1t/ocEQkx5QIcmpcexsHxgXotBKpiIyEEkGOLZoTEoH6CURkJJQIcmzBHI0cEpGRUyLIMW1bKSL1oESQY4sGt61UIhCR2ikR5Ng+MybTMaGddb3beHLz9kaHIyI5pUSQY21tNthPoOYhEamVEkHOLZytkUMiMjJKBDmnfgIRGSklgpwb2rZSQ0hFpDZKBDk3uPjc2l60wZuI1EKJIOdm7zGR6ZPH81TfDtb1bmt0OCKSQ0oEOWdmLIxLTaifQERqoUQwBmhJahEZCSWCMWBw28o16jAWkeopEYwBgzUCNQ2JSA2UCMaAQiJYvqaXAY0cEpEqKRGMAXtNmcCsqRPZvH0n6/t2NjocEcmZzBKBmf23ma01s3tSjpuZ/aeZrTCzu8zs8KxiaQWL5oaRQ49s0LaVIlKdLGsE3wOOK3P8FcCCeDkNOD/DWMa8QvPQyo1KBCJSnXFZndjdbzCz+WWKvAa42MN02FvMbIaZdbr7qqxiGssKm9Rc/9etfOr/Eithu1m3biP/t7L+ZbM891iPuVniUMzNGQd9m+nqqvjUFbMslyWIiWCJux+ScGwJ8CV3vzHe/i1wprv3JJQ9jVBroLOzs2vx4sU1xdPX10dHR0cm5bMqW2n5h57awRlXP17xOUUkfw6c0c5XXrZ3TY/t7u5e6u7dSccyqxHUk7t/B/gOQHd3t3fVmBKXLl1KNY+tpnxWZSst3wXs+bQ13HzXMubNm1fReR955JFMymZ57rEec7PEoZibM46Nax+r6rujUo1MBI8B+xbdfnq8T2r0kmfOYUbfo3R1za+o/NIJj2dSNstzj/WYmyUOxdykcSzNptbfyOGjlwNviaOHjgQ2qH9ARGT0ZVYjMLOfAEcDs8zsUeDTwHgAd78AuAJ4JbAC6ANOzSoWERFJl+WooTcMc9yB92T1/CIiUhnNLBYRaXFKBCIiLU6JQESkxSkRiIi0uExnFmfBzNYBf63x4bOA9RmVz6pss8ShmFsrDsWc/zhK7efuydOS3b1lLkBPVuWzKtsscSjm1opDMec/jmouahoSEWlxSgQiIi2u1RLBdzIsn1XZZolDMbdWHIo5/3FULHedxSIiUl+tViMQEZESSgQiIi1OiUBEpMXlYoeykTKzPYEFwKTCfe5+QwWP6wSecPdtZcpMBua5+7Iq49nX3e+q9DFjWdL7YWa/A9I6sNzdX1Jyji+7+5lmdrK7/zTDcKXJ1PI3mEEML3D3Pwx3X7Ma853FZvYO4AOEHdDuBI4Ebnb3Yyt47DXAgcDP3f2MhOOvBr4KTHD3/c3sMOAcdz8hoex1wAmE5LsUWAv8wd0/XMVrmevuqxPu3xs4EziYXZPdbq/RzF4A3Onum83sTcDhwLnunjhb28yeD8yn6EeDu1+cUO63CV/Ou91XdOw6yrwfZpa0H9+RwMeAte7+3JLz3Q0cCix198OTnrOo7GLSkwxJ/39pryftNZrZQuB8YI67H2JmhwInuPvnEsoacApwgLufY2bzgLnufltJubKvy93vKHd8OFXG3AF8hPAF/E4zWwAscvclZc5f6WdpEvB24O/Y9fP8LwllK/obNLMTy712d78s4dxzgC8AT3P3V5jZwcBR7n5RQtk7Sj93SfcVHVsK/DfwY3d/slxso6EVagQfAJ4L3OLux5jZMwj/ucNy95fGP9KDU4p8BngecF0sf6eZ7Z9Sdrq7b4yJ6WJ3/7SZVVsjuAh4VcL9PwIuicf+FXgrsC7lHOcDzzazZxP+kL8LXAy8uLSgmf2AkAjvBHbGuz2WL5SZBHQQNiDaE7B4aBqwT5nXUvb9cPelRc/xYuBThC+Ff3X3KxPO92vgSWCqmW0sfhnhdD6t6L6vxn9PBOYCP4y33wCsKT1xja/xQuCjwLfj67nLzH4M7PalCnwLGACOBc4BeoGfEz63xb4W/50EdAN/irEcCvQARxXF3Ev5ZDct4e5qYv4fQgIvPOdjwE+BxERQyWepyA+A+4GXE96PU4D7Ul7KZ6jsb/DV8d/ZwPOBa+PtY4CbgN0SAfA9wuv8RLz9AOHvbDARmNlR8Xx7m1nxj7ppQHtKzACvI2zGdbuZ9cTn+Y0X/TIvOR/x9f1HPPYmd/9h6fFatUIi2OruW80MM5vo7veb2aJKHxz/Y+5NObzD3TeEXDH0kJSy42JT02sZ+mBVxd2TkgDATHe/yMw+4O7XA9eb2e0pZfvd3c3sNcB58XFvTynbDRxc/OFM8C7gg8DTCF8MhTdjI3BemccN+36Y2cuBTwLbgM+7++/STubuHwU+ama/dPfXlHle4nuEmX3N3buLDi2Of5SlanmNHe5+W8lnoz+l7BHufriZ/THG96SZTUiI+5gY92XA4e5+d7x9COELsbjsHvHYZ4FVhC/XQs2jsw4xH+jurzOzN8Tn67OSB5ao5LNUcJC7n2xmr3H378dk9PuUshX9Dbr7qQBm9psYx6p4u5PwhZ9klrtfamYfj+foN7OdJWUmAFMJ36V7FN2/ETgp7QW6+wrgE2b2KeB4Qu1gp5n9D6GG/kTJ+UpNKXOsaq2QCB41sxnA/wFXm9mT1L5oXal7zeyNQHusGr+f8OsiydnAVcCN7n67mR0ALK9THDviv6vM7FXA34C9Usr2xg/2m4G/N7M24haiCe4h/GJO3Uva3c8FzjWz97n7N6uI+RzKvB8xke0N/Dtwc7xvsJqd1gwyXBIoMcXMDnD3B+P59yfhD6zG17jezA4kfimZ2Umkv487zKy9qOzehBpCmkWFJBDju8fMnplS9gR3f3bR7fPN7E/AWSOMeXtsmy+UPZCQsNMM+1kqUvg8PxWT3GrCL/kk1fwNQuiLKo5hDTAvpexmM5vJ0Gs8EthQXKDoh9f30ppX08Smt1MJW/b+nFCzfyGhtnKYu5+d9lh3/3Y1zzUsz2ABo2a9EJo/TiC0J9bjfB3A54HbCVXzzwOTUsq+oJL7aozjeGA6cAjwO8Kv1hNSys4FPgz8fbw9D3hLStnfEZpbrgIuL1xSyr4HmFF0e0/g9BG8puvi8yddrk0o30v4FbYxXu8tur4x5TmOAx6Jz3U98DDw8oRyL4qXI6uI/wDgGsJ+3I8BNxJWf0wqe0p8bx+Nn6FlwMllzv0TQpPe0fFyIfCTlLI3xfO3E0YJngLcVIeYXxbfs3WEL7CHgaPLxFzNZ+kd8fPzIuBBQv/Ruyr4G7yd0IyV+DcYy58XY3hbvFwJfDOl7OHAH4Cn4r8PAIemlC38aLmC8EV+bdLntKj8UuC3wBuBiSXHLqv176bWy5jvLG4W1XYmVXHeduD97v71Kh6zH7DA3a+JnX7t7t6bUG63fgMYalopKXunux9Wct8f3f05KTFU3CGYJTObCDwj3rzfE0aIxeo6wFPu/qEKz9vu7jvNbArQlvT+lpR/BvASQvPNb909rU288N69m/BFCXADcL67b00oOx84F3gB4ZftH4APuvvDCWX3d/eHimMu3JcSx0xCB74R+uBSl0eu8rO023OWi6NaZvZPFL137v6LlHKTgPcS+ip6CTXTb6a8z78h9B+cQVE/nbufmXLuwZpoM1AiGAELoyzOYPeREMcWlSl0Jn0QKP6yngb8k+9aba81jtvc/XkVln0ncBqwl7sfGKvTF3jK6J4qYrib8GupUI1uB+5y979LKf9TQofgGynqEHT3D8TjH3P3r8TruwwJNbMvuPu/jSDWY939WksZSeIJI0ji4yYROpjns+v/9zkJZR8hdGBfQvhlmPqHFptVHnX3bWZ2NKHz92J3f6riF1UHKT9Wlrr7biO4Yn/AsCOdRiGOqwm1p6fi7T2B/3X3l1z/hhQAABDhSURBVNchjksJtcofxbveSKj1npxQdqm7d5nZXe5+aLzvdt99dFvZUYIeO4NHWyv0EWTpp8AFhGp6aSdSQU2dSVX6g5mdR/jS2Vy405Pb0d9DGGVxayyz3Mx2aX+19BEnSSNwCn4NXGJmhbbLd8X70gzXIfh64Cvx+scJ73XBcUDNiYDwa/BawkiS4tdp8XZiIiD0M20gVOvLtYdDqGUcT3i/LzKzJYQvqBsTyv4c6Dazgwgjdi4HfkxoO96NhSHAnwH2Y9eEdEBC2b2Bd7J78vqXojLPINTMppckx2kU1dZKVDTSycxudPcXJnymdvss1RjHrOKE6aGjfbf+BDN7KD7/Onc/IuVcpQ5x9+IRg78zsz+nlK20n67wHbCI8F5dHm+/GhhxEq2VEsHI9Lv7+eUK+Ag6k6pQaJIp/mXqhD/SUtvcfXthlIWZjaPkS9/jiJMqnUn48n93vH01IUGmGa5D0FKuJ92uVm/8ZXYP4bUXzjdc9fjp7n5cJU/g7n3ApcCl8VfquYQ29aQhhQMeRqScSBjJ9U2LI4hSXAR8iJCQ0n6AFPySkGCvKVN2ESFpzWBomCWEL/d3pjym0pFOL4z/VvKZqiWOATOb5+6PwGCzZ9KoobRh3eXcYWZHuvst8dxHEPoCk3zOzKYThmR/k5C8PpgQx9nxXDcQRn71xtufAX5VQ4x1oURQAzMrZPrFZnY68AuKfiF6GPpVqs/M/p3d28SHndg2HI/DCit0vZn9GzDZzF4GnA4srkMMA4Q5CmUTY5HvxC/ITxJ+FU0lzBUYPGXK9aTb1Zoa/y38KvslIRkM96vsJjN7lheN2Ckntou/jlCD6SEMlU2yw8IwzLcw9AWYNpILYIMnz6VI0pHWTl3g7r8EfmlmR7n7zRWet9qRTsOqMY5PADea2fWE/8O/JzR91iw2czrh/+Cm2MznhBrY/SkPO5kwAu4e4Jj4HfFV0v+25gDbi25vj/c1hPoIalBUzSz+ZTr4RqZU0avqTKowjqrbGy0MF3078A+E+K8CvluuDbvCWBYAX2T32c27vRex/ETgnwlNFuOHiof2djMbADbFGCcTRrIQb09y93JflJXGfAPwqqJfZXsAv3L3F6WU/zNwEPAQIfEXmjcOTSj7MPBHQq3gcnffXFqmqOzBhM/Eze7+EwvDWF/r7l9OKf8lQs3iMnb9AbJbU6CZfY4wSuiKtOcvKlvNjN5TCEnucOD7hGbOT3odlveodiCBmc0idFrDMJ3WFT7/fuWOJ9XsLWFgRNJ9Rcc+QfhhUOio/kfgUnevaLJrvSkRjICZvRb4tYcZsp8i/FF8NuUPsqLOpCqf/9PxamJ7o7u/KeExJxK+7IZr4642lhuBTxM6xF9NGB/d5u5J49Uxs18z1N4+2GTh7l+Lx1P/iOoY8zJCB/e2eHsioYM7ccJh2hdEyhfDNHffmFR+pCysw5QQRuKSIr2EuRHbCM1xqf08w3XgJ5SveKRTNWqIYx927y8Zdi2xerIwN+Noj8tFxBrB9e7+rDKP6SLMG4Aweqlcc2CmlAhGoPClbmYvBD5LqAqeldQZZWa3uPuRZnYV8J+EzqSfufuBdYij4l+2FoZCHksYcngJIZGlzR6tJoZCoru78OFPG+kRj93j7oeUOd+Ih9YOJ+VX2SXu/sU6nLuaX9eFGuYu0mpTNcSyF7svupg0bPOP7v6cos/1eOD37n5kSbl24F53f0bpOeoUb0VxxLJfJtRM7mWoaco9Zb2orJjZWwgDGAo1opMJs+F/MMzjZrPr/8sjmQVZhvoIRqbwS/ZVwIXu/qtYFU9SUWdSjSpub3T3U+Mf1isIa+v8l5ld7e7vGGEM22Kz03Izey9hQtLUMuWHa2+fXa7pK6nZq1ru/nkzu5LQrgxwah1/lVWzXk7xMheTCF8iaTPDAYgjU0qTTNIw1qRFF28i/JIvVdGMXg/zI5YVd9LWWTUzi/+RMNO6rjXcarn7xRaWJynUyk5097QRRpjZCYS1o55GmDA3j/B5SRxunTUlgpF5zMJwyZcBX45NC2l7PFTbmVSNi4HbzKz4l+330wq7+474BeiE9vd/JMzmrJqZ/cDd30wYWtlBmOL/WcIfxFsTyhc64sYBp5rZgyS3t7cTEslIRwiVFZvxRrRqZ4qK18tx98dL7vqGhdUp05rVLiC818cQRmadRHondzWLLhY68D/FUAd+YgyEmb/3mtlt7DpkuR6/xIcbSFDsQUIfU0MTAUD84k/98i/xWUJSvibWfo4BdmvKHS1qGhoBC7NyjwPu9jAevxN4lrv/JqFsVZ1JNcRSUXujmb2CUJU+mrC0wqWEVQ9rah6KHagvJUzVP5qSL+7SEVSVdsSNRtNQlixO8ovNdqcTftXeljKQoPh1thFqCO/2lMmGRU0mhX+nAle6+98nlL3d3Z9rZncShnxuM7N7PWWiXxWvr+KZwlWcM6kGODi0N2Xww8+BZxOWayjuOH9/rXGMBjPrcffu2LfwHHcfMLM/pf2fZ001ghHwMFb8sqLbq0hfVKvNzPYs6Uyq5/t/Z3zucfH8adX2txD6Bt5Vp+r0BYQ/wgMYWpnTi/7d5YsvqWM1RaY1gVFQza/arxVd7yes25M21BRgS/y3z8yeBjxO+oqiFS+6aGaJv/6TmpxG8oVfRi2TrS4vKpcnT8UEfgPwIzNbS1HNarSpRjBKau1MqvDc7yOM2FlD6LdIHdYYy89haAbobe6+tg4xnO/u7x6+ZMXn26u0NpEnNszw2BGe+1OEfqaXAP9FSLgXpo3QKnrciwmLE/7a3bcnHP9I0c1JhMld96V0cCfNPt9AmC/xER/BOjrVDH6Ixxu+Q1m1LKzntBUGlwafDvwooZlwdOJRIhg9FsaLFzqTri3XmVTleVcQqv3DfojM7GRC38R1DE3A+ai7/6wesUhgwwyPLSk7nZDIC1901xN22dpQWjbhsRMJ8yqGLVuteO6r3P3ohGOfJayW+mPC5+j1hI1n7iA0a+32mCqet+JhvVbFLoGSTolgDLAwrvxllbTzxzbJlxVqARZmhF7TqLbJsWq44bElZX9OWO6i0MH/ZuDZ7p64KF4cmno6oU/ICctFJ64+OhKxaet2dz8o4dhu7dkWV6AdaVt3NcN6Y6f6scB1hf62at770ZZSk4Iy8ztGg/oIxoYHgevM7Ffs2mGWNMSyraQp6HHSRzpJ7apZjuJAd//nottnx87dNBcT1t8pbJLzRsJw1d1WxaxG0YguCKO29mbX9auK9VmYUFmoSZ5EaOqA5C+6ilU5rDdph7IRLXWRJa9tHa/MKRGMDY/Ey4R4KefXFia1/STefh1hMw2pgyqGxxbbYmYv9LgyqYXVRbcklCuoZlXMahxfdL0fWFOmlnkKYSG9bxFe7y3Am2J7/XtHGkgVw3qr3aFMEqhpaAwxs444kmm4cv9M2KgEwozNxI05pHqVDo8tecxhhGah6fGuJ4G3uvtdKc/xQ8IqpcWrYr7H3d9SY8xlJ681c6d9HML9CXZdO+uz9W4mG+uUCMYAC5vfXARMdfd5ZvZswvDQ0xscmlQgdoaeROhsnUHoZN5thJHtuirmIkItEOKs1JJaQjXPX7yI4jxCIrIYyyOesISzhU2ZzgfmuPshFvbfPcHd02bWSxNTIhgDzOxWwhfJ5WkdZs3aSSWDI4yeIjSFpI4wqqW2UWUcFwK/8LhSaZx8+I/u/q6EstcDHwW+3YhOWjP7hrt/0MwWk7xOk0YNVUF9BGOEu68s6TDbWXK8KTupBKhww5uiWdeJW1vWIY4j3X1wAxh3v9LMvpJStsPdbyv5zI148cIqFObffHUUn3PMUiIYG1aa2fMBt7Cg3AdIX+BMmk9VG96w69aW3yFsrJO6tWUV/mZmnwR+GG+fQlglN8n6mJAKG9OcRPqs+rpz96Xxag+wxcPGSIWVUSeOVhxjhZqGxgALG3OcS1jzx4DfAO9v5k4+2W2E0QLCMODhRhgNrsNkZh8jfAl+0+qwblXsNC6e2HYDcHbS58jMDiAkoecT+hQeAk4ZafNUtczsFuCl7r4p3p5KWDvr+aMZR94pEeSYme3r7itTjh3v7ktGOyapXK1t/rFP6BuE0TKvdveHRqt9PmFhuMmEeSiboT7Lg1cZz53ufthw90l5ahrKt6vN7Dh3f7j4TjM7lbDYmRJBExvBr+dTCVtbfj4mgf0ZajOvWZxl/jHK76tdujBcYb/nN1N+v+esbDazw+O8A8ysm/JzMCSBagQ5ZmavJPwyfJW7L4/3fZww0/QV7v5oI+OTfLEq9tWudmG4rMQv/ksY6svoBF5X1IcgFVCNIMfc/Qoz2wZcaWaFzWWeB7zI43LXMvZYdltbznT3i8zsAx6Wmb7ezG5PKVvxrngZ2x94DmH+w4nAEYxwiYtWpESQc+7+29gUdB1hav2xmlU55lW9tWWFCltErrKwFebfypw3aVe879Uhhmp9yt1/amHPhWMIw0nPJyQEqZCahnKsaJKYEYbM7WDX/Qg0SaxFmNlSd+8a4TmOJ2ynuS9D+2qf7e6JG79Y2FmtsDBc6q54WbKhje6/SNgp8Mf1GEHValQjyDFNEmtNlry15Yj+luP4+wVxpNkGwq/rsqpYGC5L1ewbLilUIxDJmbj/ROEPt7C15Vfd/YERnvc2d3/eCMMbVVbFvuGSTolAJGfixjSFbTALNYHdFqmr4bxfJyxodwlF++cWhmbK2KWmIZH8+T+GFqmr58CAwiSss+O/Rqh5HJtcXMYKJQKR/KlokboaLGFo8AHx+kYzO8zdy+2YJjmnThWR/LnJzJ6VwXm7CBPJOoGnAe8CXg5cGNc1kjFKfQQiOVHrInVVnP8G4JUlC7j9itAZu7TWjW+k+alpSCQ/jh++yIjMJiSWgh2EHci2xBnsMkYpEYjkxCgs8fwj4FYz+2W8/Wrgx2Y2Bfhzxs8tDaSmIREZFBdxe0G8+Qd372lkPDI6lAhERFqcRg2JiLQ4JQIRkRanRCAtzcw+YWb3mtldZnanmWW2fLGZXRfb4EWaikYNScsys6MIQzIPd/dtZjYLmNDgsERGnWoE0so6gfXuvg3A3de7+9/M7Cwzu93M7jGz75iZweAv+q+bWY+Z3WdmzzWzy8xsuZl9LpaZb2b3m9mPYpmfxRUyd2Fm/2BmN5vZHWb20zh5CzP7kpn9OdZQvjqK74W0MCUCaWW/AfY1swfM7Ftm9uJ4/3nu/lx3PwSYzK4Tuba7ezdwAWHj9vcAhwBvM7OZscwi4Fvu/kxgI3B68ZPGmscngZe6++FAD/Dh+Ph/Av4uzhL+XAavWWQ3SgTSsuJSCl3AacA64BIzextwjJndGpd0OBb4u6KHFXbruhu4191XxRrFg4SdvQBWuvsf4vUfAi8seeojgYOBP5jZnYRN4vcjbAizFbjIzE4E+ur2YkXKUB+BtDR330nY7/m6+MX/LuBQoNvdV5rZZwj7AhcUlloYYNflGAYo2hug9GlKbhtwtbu/oTQeM3se8BLgJOC9aAloGQWqEUjLMrNFZrag6K7DgGXx+vrYbn9SDaeeFzuiAd4I3Fhy/BbgBWZ2UIxjipktjM833d2vAD4EPLuG5xapmmoE0sqmAt80sxmELR9XEJqJngLuAVYDt9dw3mXAe8zsvwlr9JxffNDd18UmqJ/EPXYh9Bn0Ar+MO5AZ8OEanlukalpiQqSOzGw+sCR2NIvkgpqGRERanGoEIiItTjUCEZEWp0QgItLilAhERFqcEoGISItTIhARaXFKBCIiLe7/A1RoLwwvUBY7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# And we can use the Frequency Distribution to graph our sentence by word freqency\n",
    "fdist.N()\n",
    "fdist.plot(30,cumulative=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### But... the word \"a\" doesn't really tell us much about that sentence, does it?\n",
    "Good note! \"a\" is what is considered to be a STOPWORD"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STOPWORDS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"haven't\", 'why', 'so', \"mightn't\", 'your', \"won't\", 'm', 'for', \"isn't\", 'between', 'this', 'didn', \"you're\", 'ain', 'not', \"it's\", 'itself', 'what', 'some', 'now', 'll', 'by', 'but', 'nor', 'ourselves', 'those', 'yours', 'of', 'she', 'to', 'doesn', 'until', \"needn't\", 'herself', 'is', 'our', 'does', 'then', 'from', 'haven', \"hadn't\", 'or', \"she's\", 'over', \"didn't\", 'are', 'will', \"shan't\", 'very', 'too', 'had', 'off', 'through', 'he', 're', 'under', 'myself', 'when', 'don', 'each', 'be', 'ours', 'most', 'o', 'y', 'am', 'hers', 'which', 'can', 'yourselves', 'wasn', 'd', 'there', \"shouldn't\", 'wouldn', 'whom', 'into', 'an', 'in', 'a', 'just', 'you', 'them', \"that'll\", 'theirs', \"weren't\", 'they', 'it', 'been', 'having', 'that', 'themselves', 'and', 'about', 'further', \"wouldn't\", \"you'll\", 'as', 'my', \"wasn't\", 'needn', \"doesn't\", 'its', 'won', 'again', 'where', 'mustn', 'than', \"you'd\", 'both', \"should've\", 'weren', 'being', 'shouldn', 'me', 'ma', 'were', 'hasn', \"hasn't\", 'more', 'own', 'with', 'yourself', 'no', \"couldn't\", 'shan', 'on', 'hadn', 'should', 's', 'only', \"mustn't\", 'while', 'aren', 't', 'we', 've', 'these', 'doing', 'how', 'same', 'him', 'few', 'i', 'below', 'who', 'her', 'the', 'before', 'have', 'all', 'couldn', 'his', 'do', 'after', 'above', 'at', 'other', 'any', 'if', 'their', 'such', \"aren't\", 'against', 'up', 'because', 'did', \"you've\", 'has', 'was', 'once', 'here', 'out', 'mightn', 'down', 'during', 'himself', 'isn', \"don't\"}\n"
     ]
    }
   ],
   "source": [
    "stop_words = set(stopwords.words(\"english\"))\n",
    "print(stop_words)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### REMOVING STOPWORDS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tokenized text: ['Kendra', 'loves', 'cats', '.', 'In', 'fact', ',', 'she', 'has', 'TEN', 'cats', '.', 'If', 'she', 'did', \"n't\", 'have', 'a', 'house', ',', 'a', 'husband', 'and', 'a', 'graduate', 'degree', 'in', 'data', 'science', ',', 'she', \"'d\", 'be', 'a', 'cat', 'lady', '!']\n",
      "Filterd text: ['Kendra', 'loves', 'cats', '.', 'In', 'fact', ',', 'TEN', 'cats', '.', 'If', \"n't\", 'house', ',', 'husband', 'graduate', 'degree', 'data', 'science', ',', \"'d\", 'cat', 'lady', '!']\n"
     ]
    }
   ],
   "source": [
    "filtered_text=[]\n",
    "for w in tokenized_example:\n",
    "    if w not in stop_words:\n",
    "        filtered_text.append(w)\n",
    "print(\"Tokenized text:\",tokenized_example)\n",
    "print(\"Filterd text:\",filtered_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEqCAYAAAAPl8fDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZhcZZn+8e+djaSTsGahDYRAEnEQAekeQHAUHEVcGRlQEJFhxLiggz+UQVxAXEYdHR2VYVMYRIFxg4GEXYZFRJZuhiWATMImQSQJW0I6W6ef3x/nVHelqe46Vd2nqqvr/lxXXak65zx13iTd9dS7KyIwMzPrb0y9C2BmZiOTE4SZmZXkBGFmZiU5QZiZWUlOEGZmVpIThJmZlTSu3gUYTtOmTYs5c+ZUFbt27VomTZpU9b0d73jHO74R4zs7O1dGxPSSJyNi1Dza2tqiWh0dHVXHOt7xjnd8o8YDHTHAZ6qbmMzMrCQnCDMzK8kJwszMSnKCMDOzknJLEJImSrpL0n2SHpR0RolrtpD0C0lLJd0paU7RuVPT449Iente5TQzs9LyrEGsB94SEXsCewGHSNqv3zUfAV6IiHnA94FvA0jaDTgSeC1wCHCWpLE5ltXMzPrJLUGkI6heTl+OTx/91xY/FPhp+vzXwN9KUnr8vyJifUQ8DiwF9smjnF0buul88gUeXrkhj7c3M2tYufZBSBor6V5gOXBDRNzZ75JZwFMAEdENvARsV3w8tSw9NuzuefJF/v7s2/n5/avzeHszs4alqMGGQZK2Bi4HPh0Ri4uOLwYOiYhl6etHgX2BrwB3RMTP0+PnA9dExK9LvPcCYAFAa2tr28KFCysq2/NrN/HRRSuYPB5+euhMkgpM5bq6umhpaakq1vGOd7zj6xXf3t7eGRHtJU8ONINuuB/AacDn+h27DnhD+nwcsBIQcCpwaqnrBntUM5O6p6cndj/t2tjplEWxfNW6iuMLGnkmpeMd7/jmjaceM6klTU9rDkiaBLwN+GO/y64Ejk2fHw78T1rgK4Ej01FOOwPzgbtyKifzZk4BYMlyNzOZmRXk2QfRCtwk6X7gbpI+iEWSvirpvek15wPbSVoKnAR8HiAiHgR+CTwEXAucEBGb8irovOlJgnh0+ctlrjQzax65reYaEfcDry9x/LSi5+uAIwaI/wbwjbzKV2x+bw3CCcLMrMAzqYF5M5IEsdQJwsyslxMEMH/GVMA1CDOzYk4QwKytJzFhLKxYvZ6XujbWuzhmZiOCEwQwZoyYNTXpjlm6wiOZzMzACaLXDlumCcLNTGZmgBNEr0KCWPKsE4SZGThB9Nqht4nJCcLMDJwgerkGYWa2OSeI1PZTxjJujHj6xbV0beiud3HMzOrOCSI1bozYedpkAB5dvqbOpTEzqz8niCK9M6o91NXMzAmi2Pw0QbgfwszMCWIzc70mk5lZLyeIIoU1mZwgzMycIDazy/TJSPDk812s785t+wkzs4bgBFFk4vixzN62hU09wRMru+pdHDOzuspzy9EdJd0k6SFJD0o6scQ1J0u6N30slrRJ0rbpuSckPZCe68irnP0VdpdzM5OZNbs8axDdwGcjYjdgP+AESbsVXxAR34mIvSJiL+BU4JaIeL7okoPS8+05lnMz3p/azCyRW4KIiGci4p70+WrgYWDWICFHAZfmVZ6sXIMwM0soIvK/iTQHuBXYPSJWlTjfAiwD5hVqEJIeB14AAjg3Is4b4L0XAAsAWltb2xYuXFhVGbu6umhpaWHJ8xv4/I3PM3urcXz/4GkVx1fL8Y53vOPrEd/e3t45YCtNROT6AKYAncBhg1zzAWBhv2Oz0j9nAPcBbyp3r7a2tqhWR0dHRESsWrshdjplUcz/4tWxsXtTxfFDvb/jHe94x9cyHuiIAT5Tcx3FJGk88Bvg4oi4bJBLj6Rf81JEPJ3+uRy4HNgnr3IWmzpxPK1bTWRDdw9PvbC2Frc0MxuR8hzFJOB84OGI+N4g120FvBm4oujYZElTC8+Bg4HFeZW1v3meUW1mlmsN4gDgGOAtRUNZ3ynp45I+XnTd+4DrI6J4CdWZwG2S7gPuAq6KiGtzLOtmnCDMzGBcXm8cEbcBynDdhcCF/Y49BuyZS8EyKCQID3U1s2bmmdQlFNZketQ1CDNrYk4QJRQ3MUUNhgGbmY1EThAlbDt5AttNnsCaDZt45qV19S6OmVldOEEMYG5vP4SbmcysOTlBDGC+RzKZWZNzghhAXz+ERzKZWXNyghiAd5czs2bnBDGAeUV9EB7JZGbNyAliADO33IKpW4zjxa6NPLdmQ72LY2ZWc04QA5DUN5LpWTczmVnzcYIYRO9IphVOEGbWfJwgBtE7kulZj2Qys+bjBDGI+TNdgzCz5uUEMYh505Ohru6DMLNm5AQxiFnbTGLi+DEsX72el9ZurHdxzMxqygliEGPHiF2meckNM2tOeW45uqOkmyQ9JOlBSSeWuOZASS8V7Th3WtG5QyQ9ImmppM/nVc5yCv0Q3hvCzJpNbjvKAd3AZyPinnR/6U5JN0TEQ/2u+11EvLv4gKSxwH8AbwOWAXdLurJEbO7mTffucmbWnHKrQUTEMxFxT/p8NfAwMCtj+D7A0oh4LCI2AP8FHJpPSQfXO5LJNQgzazKqxTpDkuYAtwK7R8SqouMHAr8hqSX8GfhcRDwo6XDgkIg4Pr3uGGDfiPhUifdeACwAaG1tbVu4cGFVZezq6qKlpeUVx5et6ubE61Yyo2UsZ79resXxQ72/4x3veMfnGd/e3t4ZEe0lT0ZErg9gCtAJHFbi3JbAlPT5O4El6fPDgZ8UXXcMcGa5e7W1tUW1Ojo6Sh7f0L0p5p56Vcz5/KJYs35jxfFDvb/jHe94x+cZD3TEAJ+puY5ikjSepIZwcURcViI5rYqIl9PnVwPjJU0DngZ2LLp0h/RYzY0fO4Y50yYTAY+tWFOPIpiZ1UWeo5gEnA88HBHfG+Ca7dPrkLRPWp7ngLuB+ZJ2ljQBOBK4Mq+yluPd5cysGeU5iukAkqahByTdmx77AjAbICLOIWlK+oSkbmAtcGRa5emW9CngOmAscEFEPJhjWQfVtzeERzKZWfPILUFExG2AylxzJnDmAOeuBq7OoWgVm+cahJk1Ic+kzqB4dzkzs2bhBJHB3OlTkODJ57rY0N1T7+KYmdWEE0QGE8ePZcdtWtjUEzzxnEcymVlzcILIyCOZzKzZOEFkNM/7U5tZk3GCyGie96c2sybjBJFRXw3CcyHMrDk4QWRUSBCPrVzDpp78Fzg0M6s3J4iMpk4cz/ZbTmRDdw9PPd9V7+KYmeXOCaIC3hvCzJqJE0QF5k73jGozax5OEBVwDcLMmokTRAUK+1Mv9aquZtYEnCAqMH/mVCCpQUQNtmo1M6snJ4gKbDt5AttOnsCaDZt45qV19S6OmVmu8txRbkdJN0l6SNKDkk4scc3Rku6X9ICk2yXtWXTuifT4vZI68ipnpbw3hJk1izxrEN3AZyNiN2A/4ARJu/W75nHgzRHxOuBrwHn9zh8UEXtFRHuO5ayI94Yws2aR545yzwDPpM9XS3oYmAU8VHTN7UUhdwA75FWe4eJVXc2sWdSkD0LSHOD1wJ2DXPYR4Jqi1wFcL6lT0oL8SleZviYmj2Qys9FNeY/GkTQFuAX4RkRcNsA1BwFnAW+MiOfSY7Mi4mlJM4AbgE9HxK0lYhcACwBaW1vbFi5cWFU5u7q6aGlpKXvdc12bWHDVCqZMEBe+dwaSKoof6v0d73jHO34449vb2zsHbMaPiNwewHjgOuCkQa7ZA3gUePUg13wF+Fy5+7W1tUW1Ojo6Ml3X09MTrz3t2tjplEWxYvW6iuOHen/HO97xjh/OeKAjBvhMzXMUk4DzgYcj4nsDXDMbuAw4JiL+r+j4ZElTC8+Bg4HFeZW1EpKY634IM2sCuXVSAwcAxwAPSLo3PfYFYDZARJwDnAZsB5yVNtV0R1LVmQlcnh4bB1wSEdfmWNaKzJ8xhfueepEly19mv122q3dxzMxykecoptsAlbnmeOD4EscfA/Z8ZcTIUOioftQ1CDMbxTyTugrze+dCeCSTmY1eThBV8GxqM2sGThBV2GGbFrYYN4ZnV61n1bqN9S6OmVkunCCqMHaM2GW6axFmNrpVnCAkbSNpjzwK00h6l9x41gnCzEanTAlC0s2StpS0LXAP8GNJJec2NIvefogVThBmNjplrUFsFRGrgMOAiyJiX+Ct+RVr5OsdyfSsRzKZ2eiUNUGMk9QKvB9YlGN5GoZrEGY22mVNEGeQrKm0NCLulrQLsCS/Yo18O203mXFjxLIX1rJ2w6Z6F8fMbNhlnUn9TET0dkxHxGPN3gcxYdwYdtquhUdXrOFR1yLMbBTKWoP4UcZjTWX+jKmAh7qa2eg0aA1C0huA/YHpkk4qOrUlMDbPgjWCeTOmwINJgthxWr1LY2Y2vMrVICYAU0gSydSixyrg8HyLNvLNn+k1mcxs9Bq0BhERtwC3SLowIp6sUZkaxtzi2dS7Ta1zaczMhlfWTuotJJ0HzCmOiYi35FGoRjF3+hQkeOK5Ljb2TKl3cczMhlXWBPEr4BzgJ4DHdKYmTRjLDttM4qnn1/KXl/3PYmajS9ZRTN0RcXZE3BURnYXHYAGSdpR0k6SHJD0o6cQS10jSDyUtlXS/pL2Lzh0raUn6OLbCv1fNFEYyLVvVXeeSmJkNr6wJYqGkT0pqlbRt4VEmphv4bETsBuwHnCBpt37XvAOYnz4WAGcDpO99OrAvsA9wuqRtMpa1pgozqp0gzGy0ydrEVPgGf3LRsQB2GSggIp4Bnkmfr5b0MDALeKjoskNJ1nYK4A5JW6dLehwI3BARzwNIugE4BLg0Y3lrppAgOp9Zz09vf6Kq95Bg2w1OMGY2smRKEBGx81BuImkO8Hrgzn6nZgFPFb1elh4b6PiI85rtkyamJc9v5PQrH6z6feZuM453v3m4SmVmNnRKvryXuUj6cKnjEXFRhtgpwC3ANyLisn7nFgHfiojb0tc3AqeQ1CAmRsTX0+NfBtZGxHdLvP8CkuYpWltb2xYuXFj271NKV1cXLS0tFcdFBIuWdLHsxXWMGze+qntf+2gXYwWXHDaTcWNU1XtUW37HO97xzR3f3t7eGRHtJU9GRNkHybIahcePgceAX2eIG0+yyN9JA5w/Fziq6PUjQCtwFHDuQNcN9Ghra4tqdXR0VB071Pj9v3lj7HTKoljy7Oq63N/xjnd888YDHTHAZ2qmTuqI+HTR46PA3iQzrAckScD5wMMRMdDCflcCH05HM+0HvBRJ38V1wMHp7nXbAAenx0alwoxsr+lkZiNJ1k7q/tYA5folDgCOAR6QdG967AvAbICIOAe4GngnsBToAo5Lzz0v6WvA3WncVyPtsB6N5k2fws2PrGDp8tXA9vUujpkZkDFBSFpIMmoJkkX6/gr45WAxkfQrDNqgnlZvThjg3AXABVnK1+hcgzCzkShrDaK4c7gbeDIiluVQnqZUGCq7xAnCzEaQrH0QtwB/JFnJdRtgQ56FajbzpidDZR9d8TI9PeVHlZmZ1UKmBCHp/cBdwBEk+1LfKanpl/seLlu1jGfriWNYt7GHp19cW+/imJkB2ZuYvgj8dUQsB5A0Hfgt8Ou8CtZsdtxyHC+u25BsPrRt9eOhzcyGS9a1mMYUkkPquQpiLYNZU5Nc7c2HzGykyFqDuFbSdfSthfQBkiGqNkx23DL5r/BIJjMbKcrtST0PmBkRJ0s6DHhjeuoPwMV5F66ZzNoy2eLbI5nMbKQo10z07yT7TxMRl0XESRFxEnB5es6GSXENIjKsj2VmlrdyCWJmRDzQ/2B6bE4uJWpSW20xhq0mjWf1um6Wr15f7+KYmZVNEFsPcm7ScBak2Uli/gzPqDazkaNcguiQ9NH+ByUdDwy65ahVrndG9bMeyWRm9VduFNNngMslHU1fQmgHJgDvy7NgzaiQIJaucA3CzOpv0AQREc8C+0s6CNg9PXxVRPxP7iVrQn01CCcIM6u/rFuO3gTclHNZmt78mX1rMpmZ1ZtnQ48gr9pqIi0TxrLy5Q28sMbrIZpZfTlBjCCS3A9hZiNGbglC0gWSlktaPMD5kyXdmz4WS9okadv03BOSHkjPdeRVxpFo3nT3Q5jZyJBnDeJC4JCBTkbEdyJir4jYCzgVuKXftqIHpefbcyzjiDPPu8uZ2QiRW4KIiFuBrPtIH0XfQoBNrbcG4VVdzazOlOe6P5LmAIsiYvdBrmkBlgHzCjUISY8DL5Dsg31uRJw3SPwCYAFAa2tr28KFC6sqa1dXFy0t1e/DMFzxf17dzaevXcm0SWM4990zan5/xzve8c0V397e3jlgS01E5PYgWa9pcZlrPgAs7HdsVvrnDOA+4E1Z7tfW1hbV6ujoqDp2OOM3dm+K+V+4OnY6ZVGsXrex5vd3vOMd31zxQEcM8Jk6EkYxHUm/5qWIeDr9cznJyrH71KFcdTFu7Bh2mT4ZgEfdD2FmdVTXBCFpK+DNwBVFxyZLmlp4DhwMlBwJNVrNLcyodoIwszrKuqNcxSRdChwITJO0DDgdGA8QEeekl70PuD4i1hSFziRZ/6lQvksi4tq8yjkSeVVXMxsJcksQEXFUhmsuJBkOW3zsMWDPfErVGHony3kkk5nV0Ujog7B+5s9I1mRyDcLM6skJYgSaM62FMYI/Pd/Fuo2b6l0cM2tSThAj0BbjxjJnu8n0BDy+ck35ADOzHDhBjFAeyWRm9eYEMUJ5JJOZ1ZsTxAjlkUxmVm9OECOURzKZWb05QYxQc2cky208vnIN3Zt66lwaM2tGThAjVMuEcczaehIbNwVPPt9V7+KYWRNyghjBCv0Q3l3OzOrBCWIEK4xketT7U5tZHThBjGB9NQiPZDKz2nOCGMHmF/andg3CzOrACWIEmze9b6hrT09+W8OamZXiBDGCbdUynulTt2Ddxh6efnFtvYtjZk0mtwQh6QJJyyWV3A1O0oGSXpJ0b/o4rejcIZIekbRU0ufzKmMjmDfdS26YWX3kWYO4EDikzDW/i4i90sdXASSNBf4DeAewG3CUpN1yLOeI1tsP4QRhZjWWW4KIiFuB56sI3QdYGhGPRcQG4L+AQ4e1cA2kdyST12Qysxqrdx/EGyTdJ+kaSa9Nj80Cniq6Zll6rCnN86quZlYnishvdIykOcCiiNi9xLktgZ6IeFnSO4EfRMR8SYcDh0TE8el1xwD7RsSnBrjHAmABQGtra9vChQurKmtXVxctLS1VxeYZ/8K6TRy/cAUt48VFh85AUk3v73jHO350x7e3t3dGRHvJkxGR2wOYAyzOeO0TwDTgDcB1RcdPBU7N8h5tbW1RrY6Ojqpj84zv6emJ151+bex0yqJ49qW1Nb+/4x3v+NEdD3TEAJ+pdWtikrS90q/DkvYhae56DrgbmC9pZ0kTgCOBK+tVznqTxPyZyXwI7y5nZrWU5zDXS4E/ALtKWibpI5I+Lunj6SWHA4sl3Qf8EDgyTWjdwKeA64CHgV9GxIN5lbMReKirmdXDuLzeOCKOKnP+TODMAc5dDVydR7kaUWGoq0cymVkt1XsUk2Uw1yOZzKwOnCAawHwnCDOrAyeIBvCqrSYxafxYVr68gRe7NtS7OGbWJJwgGsCYMfKEOTOrOSeIBtG35IYThJnVhhNEg3ANwsxqzQmiQbgGYWa15gTRIAojmR51gjCzGnGCaBCzt21hwtgxPP3iWtas7653ccysCThBNIhxY8ew87TJADy6wrUIM8ufE0QD6e2HeNYJwszy5wTRQHpHMrkGYWY14ATRQFyDMLNacoJoIIVVXd0HYWa14ATRQHaeNpkxgiefW8O6jZvqXRwzG+WcIBrIFuPGstN2k+kJeOK5NfUujpmNcnnuKHeBpOWSFg9w/mhJ90t6QNLtkvYsOvdEevxeSR15lbERzZ3ufggzq408axAXAocMcv5x4M0R8Trga8B5/c4fFBF7RUR7TuVrSIV+CK/JZGZ5y3PL0VslzRnk/O1FL+8AdsirLKOJ96c2s1oZKX0QHwGuKXodwPWSOiUtqFOZRiTXIMysVhQR+b15UoNYFBG7D3LNQcBZwBsj4rn02KyIeFrSDOAG4NMRcesA8QuABQCtra1tCxcurKqsXV1dtLS0VBVby/i13T186PLljBNccthMxo5RTe/veMc7fnTFt7e3dw7YlB8RuT2AOcDiQc7vATwKvHqQa74CfC7L/dra2qJaHR0dVcfWOn7/b94YO52yKB5dvrou93e84x0/euKBjhjgM7VuTUySZgOXAcdExP8VHZ8saWrhOXAwUHIkVLOa670hzKwG8hzmeinwB2BXScskfUTSxyV9PL3kNGA74Kx+w1lnArdJug+4C7gqIq7Nq5yNaL53lzOzGshzFNNRZc4fDxxf4vhjwJ6vjLACbz9qZrUwUkYxWQVcgzCzWnCCaEDFNYienvxGoZlZc3OCaEBbt0xg2pQtWLtxE39+aW29i2Nmo5QTRIOaNyPZftQjmcwsL04QDWr+jKkAPOoEYWY5cYJoUN5dzszy5gTRoOZ7f2ozy5kTRIPqq0GsLixJYmY2rJwgGtT0qVuw5cRxrFrXzYqX19e7OGY2CjlBNChJffMh3A9hZjlwgmhghZFM7ocwszw4QTQwj2Qyszw5QTSwed5dzsxy5ATRwAr7U3s2tZnlwQmigc3aehKTxo9l5cvrWb2hp97FMbNRxgmigY0ZI+amazI9vaq7zqUxs9Em1wQh6QJJyyWV3DJUiR9KWirpfkl7F507VtKS9HFsnuVsZIWRTMucIMxsmOVdg7gQOGSQ8+8A5qePBcDZAJK2BU4H9gX2AU6XtE2uJW1QhZFMTzlBmNkwy23LUYCIuFXSnEEuORS4KJK1Iu6QtLWkVuBA4IaIeB5A0g0kiebSPMvbiAoJ4u4/r+fL/12yopbJihWr+O+nHO94xzdq/I6vXseMqROrfo9SlPc6PmmCWBQRu5c4twj4VkTclr6+ETiFJEFMjIivp8e/DKyNiO+WeI8FJLUPWltb2xYuXFhVObu6umhpaakqtp7xy9d088mrV+LVmMya2w/ePo0dtqz8O397e3tnRLSXOpdrDaIWIuI84DyA9vb2aGtrq+p9Ojs7qTa23vE/a13Jzfc8xOzZs6u+/5/+9CfHO97xDRx/4L57s1XL+Krfo5R6J4ingR2LXu+QHnuapBZRfPzmmpWqwbxx/jQmrZpMW9ucqt+jc8Jzjne84xs4friTA9R/mOuVwIfT0Uz7AS9FxDPAdcDBkrZJO6cPTo+ZmVmN5FqDkHQpSU1gmqRlJCOTxgNExDnA1cA7gaVAF3Bceu55SV8D7k7f6quFDmszM6uNvEcxHVXmfAAnDHDuAuCCPMplZmbl1buJyczMRignCDMzK8kJwszMSnKCMDOzknKfSV1LklYAT1YZPg1YOYTbO97xjnd8I8bvFBHTS56JCD+SJNnheMc73vHNGD/Qw01MZmZWkhOEmZmV5ATR5zzHO97xjm/S+JJGVSe1mZkNH9cgzMysJCcIMzMryQnCzMxKqveGQSNKuh/28xGxvsK4bYAdI+L+fEpmBZJuggF3WI2I+NsM7/HtiDhF0hER8avhLaHVkqRJwOyIeKQO9z4gIn5f7lgjcyd1EUm/BeYCv4mIz5W59mbgvSRJthNYDvw+Ik4aYhm2j4i/ZLx2Oske3rsBvbuVR8RbMsYfANwbEWskfQjYG/hBRGSejS5pf2AORV82IuKiDHE39v8wL3WsRFypfVn3A/4ZWB4Rf53h3g8AewCdEbF3uesHeZ+q/g5F1wo4GtglIr4qaTawfUTcVSZu0DJHxD0Z7/9q4GxgZkTsLmkP4L2R7gWfIb4F+CzJB/RHJc0Hdo2IRRnjJwIfAV7L5j+//5gx/j3Ad4EJEbGzpL1I9o55b5m4wwY7HxGXZbz/Pf1/fkodGyS+k2RLg0si4oUsMbXmGkSRiHhr+ku7W4bLt4qIVZKOBy6KiNMlDUcN4nzgXRmvvRj4RXr9x4FjgRUV3OtsYE9Je5L8ov8EuAh4c5ZgST8jSaj3ApvSw5G+x0AxE4EWkk2ktgGUntoSmFXunhHRWfRebwa+TPLh8vGIuCZLuYFrgReAKZJWFRcvuUVsOVjwUP8ORc4CeoC3AF8FVgO/AcoluX9L/5wItAP3pWXYA+gA3pDx/j8GTgbOBYiI+yVdAmRKEMB/knw5KtzvaeBXQKYEAfwM+CPwdpK//9HAwxljAb4C7EO6HXFE3Ctp5wxx70n/nAHsD/xP+vog4HZg0AQh6Q1p3HRJxV8ItwTGZiw7wAdINkm7W1IHyb/n9ZHhW3u/+wIQEd9Lz30oIn5eQTkG5ATRT/qf82CGS8elTVLvB744jPfPmhwAtouI8yWdGBG3ALdIurtsVJ/uiAhJhwJnpu/1kQri24HdsvxAF/kY8BngVSQfLoUP11XAmVneQNLbgS8B64FvRMRNFdyfiDgZOFnSFRFxaCWxqSH/HVL7RsTekv43LdcLkiaUC4qIgwAkXQbsHREPpK93J/nQzKolIu5KvhP16q4gfm5EfEDSUWm5utTvzcqYFxFHSDo0In6aJqffVRC/MSJe6nfLsj+LEXEcgKTrSX5+n0lftwIXZrjvBGAKyefn1KLjq4DDM5U8KcdS4IuSvgy8m6Q2sUnSf5LU5AfbRXPqIOcmZy1DOU4Q1TuDZJ/s2yLibkm7AEtqXIaN6Z/PSHoX8Gdg2wriV0s6FTgG+BtJY0i3hM1oMbA98EzWgIj4AfADSZ+OiB9VcC8A0gQ4HfgO8If0WG+VPmvzSnptNclhyH+HIhsljSX9UEubDHsqiN+1kBzSci2W9FcVxK+UNLfo/odTwf8lsCHtAyjEzyVJ2lkVfn5fTJPbX0i+1Wf1oKQPAmPT5q1/IqkBZLVjITmkngVmlwsq+jJ2YSXNsaWkzXrHkWy9/BuSVoE3ktRq9hqkDGcMcu7coZSpmBNE9Z6JiD0KLyLiMUnfq3EZvi5pK5LmoR+RVHH/XwXxHwA+CPxjRPwlbQP/TgXx04CHJN1F0QdDuTbgVI+krSPiRejt6D8qIs4qE7cGeBSl7qUAABCXSURBVJnkm1r/b2tB0lwzKEmr6fumWfj6GWRvYnpT+rSS2lopPwQuB2ZI+gbJ3+dLFcTfL+knQKE54WigkmbOE0hm4L5G0tPA4+l7ZHU6SXPdjpIuBg4A/qGC+PPS//cvAVeSfCv/cgXxnyapva8HLiH5wpa1eQzgRknXAZemrz8A/LaC+C5J3+GVfShZ+wA7gRdJmpU/XzQ45s60f7Du3EldpaF2UA3D/ccC/xQR3x/i++wEzI+I36adjmMjYnXG2JJ9Fek3rHKx90bEXv2O/W9EvD7LvespbQIAeDEiKknIpd7rNcDfkiSnGyMicxt82hfyCaCQsG4Fzo6IdRnjd46IxyVNBsZExOrCsQrKsB3JIAEBd0RE5iWnS92r0vsPlaT3UfTvFxGXVxB7PUkf4Oco6gOMiFMyxu8SEY9VWOSacoKoUFEH1WeA4g/nLYH3RcSeNSzLXRGxzxDiPwosALaNiLlpNf2crKNwhqIwkqjQf5EmvPsj4rVl4v45Iv41fb7ZMFVJ/xIRX8iz3P3KMhE4jFeO4vpqxvi5wLKIWC/pQJJO5osKtaq8DfAlpzMiSo0UKxVf1SisYbz/DcAR/Wqh/xURb88SP1SFskq6v9CaIOnuciPpSnUwFyt0No8EbmKq3LB0UA2T30s6k+RbzJrCwQra4U8gGQVyZxq3RFLZNuB+TTSbnSJDE03qWuAXkgrtpR9Lj5VzJPCv6fNTSUbNFBwC1CxBAP8NvETSUV3R3JnUb4B2SfNIRhJdSdJU8s4swWkzxFeAndg8Qe1SJu41JM0iW2nzIZ9bUtRUkkFVo7CG8f7TipNp2smf5ef3cZKf3xURsW8F9+uv2j7AwufGriT/Vlemr98DZEquteIEUaHh7KAaBoUmmuJvrJna4VPrI2JDYRSIpHFkGwUy2AiKrE4hSQqfSF/fQDLMthwN8LzU67ztEBGHDCG+JyK60w/JMyPiR4URTRmdT9Ln1EnfMOMsdiUZNbM1fUM+IfmA/2gF71PVKKxhvH+PpNkR8SfobS7N8vObZShsFqX6AD+T4f5nAEi6lWQU2ur09VeAq4apbMPCCaJ6Q+qgGg6F4Y5DcIukLwCTJL0N+CSwcOglKy8iekjmYZxdaegAz0u9ztvtkl5XPJKoQhvTIaIfpu+DspJRZC9F9rkfvSLiCuAKSW+IiD9UGl+kqlFYw3j/LwK3SbqF5MvB35A0mdbKESSjGBcDB0nalmTiXtbfoZnAhqLXG9JjI4b7IKo01A6qId57WNow02GtHwEOJvkFuw74SYXzGqqS9nd8k1fOAi/XPNJDMopJwCSgq3AKmBgRlXzADomkh4B5JKN/1tPXxLbHoIF98buR/Oz8ISIuVTLJ6/0R8e2M8d8imZh1GZuPIss6k3qoM5mPJhn5szfwU9JRWJFx+ZKh3j99j2kkneRQYSf5UJUaVFHJQAtJXySZR1XoGP874JcR8S/DW9LqOUFUqdoOqmG69+np05JtmBHxoYzvcxhwVVS49tRwkHQbyTDJ75OU+ziSkTSnlYkbMSOd0iaNV6hV06OSdalK3D7zMMtfkcxk/iBFM5kj4sQKyjCUUVjDcf9ZvLIP5tas8UMh6T7gwEiXyUhrELdExOsqeI82knkPkIyiqqSJMXdOEFWSdEdE7JeOo/4hSQfVryNibg3LcCvwrqI2zKkkH/hvGjyyN/4/SforbiWpDV0bEZXMpK1aUYJ9oPALlWUESy2HEuetqLN0M+VqUcN4//+NiNcXvuRIGg/8LiL2yxA7FngwIl5Tj/un8d8mqcE8SF/TVkS2eThDJunDJIMiCjWmI0hm9v+swveZweY1qD8NWyGHyH0Q1auqg2qYDakNMyKOS38p3wEcBfyHpBsi4vjhLWZJ69MmriWSPkWyjs+UDHEzBmtiG0lDBDNoL3o+keQDppKZ8KSjZ/o30WQaZssQZjJHxCZJjxR3EldhqDOp/45kNnnNa8CQLEqpZA2lQo3tsIh4KGu8pPeSrKv1KpLFPmeT1KgGHepdS04Q1RtqB9VwuAi4S1JxG+ZPK3mDiNgo6RqSb7KT0vfILUFI+llEHEMyRLSFZHmEr5H8kh2b4S3GkiSSWo9YGnYR8Vy/Q/+uZHbtoM1sBZLOIfk3PIhkBNjhVDZMsjCT+cv0zWTOdO/UNiTLXdzF5sOss36DH+pM6sdIOvXrkiAA0oSQOSn08zWS/pPfpjWpg4BMzcO14iamKg21g2oYy1F1G6akd5BU0Q8kWRHzlySrSebWzJR27L4VuCa97+YrrQ2+QNloa2Iq/nuMIalRfCIyTrYsapop/DkFuCYi/iaP8pa4f1Uz6QeoAfYueVLBIIvfAHsCN7J5J/0/ZYmvN0kdEdGe9mW8PiJ6JN2X9f+/FlyDqN4YSdv066Cqx7/nvSQLrI1Ly1FJlf/DJH0PH6thNf0ckl/oXehbCTWK/izX/t7wNYci/1b0vBt4gmRUS1Zr0z+7JL0KeA5ozRosqWRtIWsTVblEMIjhmih2ZVFsI3oxTeq3AhdLWk5RTWwkcA2iSsPVQTXEMnyaZCTQsyQTpSoaZpm+x0z6Zr7eFRHLh72gpe97dkR8ovyVr4jbtlwto1koWSb6RySjiP6DJMH+uNxIsKL4zxa9nEgyee3hCoa5lppR/xLJnhSfjTLrDA11kEUaU7cd5YZKyRpY60h+b48GtgIuLtH0WDdOEEOQjmMvdFD9TyUdVMN0/6Uks1mr+oGSdARJv8nN9E00Ojkifj1shbQBpYMcTqdvsbhbSHZEe6mK99qCZB5IxbH93uO6iDgw4/VfA5aRLA8ikmVQ5gL3kDSVDfo+kh4hWY9rfdH974+IXTPev6od5Sw7J4gGlo6Df1u1fQZp2+fbCrUGJTNhfzuS2kBHs7QNfTF9AwuOAfaMiEG3xCyKn0gy+/2NJN/kb6OC1VxLvN82wN0RMS/j9a9oL1e6Sm+WtvQBJor9IiK+mfH+nSRf0G4u9P1JWhwRu2eJr5cBal5ARWuZ1YT7IBrbY8DNkq5i8066rEM9x/RrUnqOpLPUamNuRPx90eszJN1bQfxFJOsXFTYt+iDJNp5HZAlWsqJu4YNqLMlGTFmHyELS9/F+oFDjPJykyQRKfwBuJiK+kY6gK3SqH1fhRLFSO8pVsuFSXcTwrGVWE04Qje1P6WNC+qjUtXrlhilXD1PZrLy1kt4YEbcBhdVZ15aJKbZ7RBTvn35TOkosq3cXPe8Gnq2wNno08AOSVV0DuAP4UNov8KksbxDJsiCZdwHsZ6g7ylkZbmIaBSS1RERX+StLxv49yU5gkMxizbxhig1N2mb+U5LOSYAXgGMjItOucJJ+TrIK7B3p632BEyLiw2XiBp2M1yiDAJRscPVFNl9L7GvVNrHZKzlBNDAlmxedD0yJiNmS9iQZsvrJOhfNMkg7ZQ8n6djdmmQEUJQbZlrUNDSeZKhoYVjzbOCP/WoVpeILS3wojXkhfb418KfIuBy2pFeTrMY7MyJ2V7K/8nsjopJtP20Ec4JoYJLuJPmAubKSTrpG6iQbzSRdS7In8T0U7ecQEf82YBADLxJYFJ9psUBJPwYuj4ir09fvAP4uIj6WMf4W4GTg3Fp2Ekv694j4jKSFlF7LyqOYhon7IBpcRDzVr5Ou7MYxjdRJNspVteFQIQFogC1LK3ir/SKid4OeiLhG0r8OFtBPS0Tc1e/nrxaLPRbmGn23Bvdqak4Qje0pSfsDkS66dyKQebllq7uhbjhUvGXpecAVVLBlKfBnSV8Cfp6+PppkVeKsVqZJqrBh0OEks/pzFRGd6dMOYG0km08VVpjdIu/7NxM3MTUwJZul/IBkbSMB1wP/1CidjM2qqA9hHDCfZLhyNRsO3RPJlp//TPJB+aNK1gNLO6uLJ+rdCpyR9edH0i4kiWl/kn6Mx4GjszZxDZWkO4C3RsTL6espJGuJ7V+L+zcDJ4gGJGnHiHhqgHPvjohFtS6TZTeMfQh3Av9OMpLnPRHxeI36APovtjeJZP7MGqjdkuuFSXnljln13MTUmG6QdEhEPFF8UNJxJEsnO0GMYMP4Dfs4ki1Lv5Emh53pa58vK505/89Uvq96/8X2riCp/RxDZYvtDdUaSXuncymQ1E5l80isDNcgGpCkd5J8c3xXRCxJj51KMpP2HRGxrJ7ls8agIe6rPhyL7Q1FmhB+QV+/SSvwgaI+Chsi1yAaUERcLWk9cI2kwgY/+wBvinT5cRv9NPQtS7eLiPMlnRjJ0t23SLq7giIMaUfDYbAz8HqSuRyHAfuSYYkPy84JokFFxI1pk9LNJMsLvMUzSJvOULcsLWz5+YySrUv/XGF8qR0NL6wgfqi+HBG/krQ1ya563yWZuLdvDcswqrmJqQEVTXQTybC+jWy+H4QnujUpSZ0R0Zbx2ncDvwN2pG9f9TMiIvMmPEp2xSsstlfRjoZDVRixJembwAMRcUklo7isPNcgGpAnuhkMuGVppt/pdM7A/HTE20sk38ArNsTF9obqaUnnAm8Dvp0uXeLViIeRaxBmDSrdD6TwC1zYsvS7EfF/GePvioh9cipe7tLF+g4hqT0skdQKvC4irq9z0UYNJwizBpVuGPT3wBz6ag5lF/sriv8+yYJ/v6BoL+TCsFEzNzGZNa7/pm+xv2oGKBQmlJ2R/imSGkm5eRDWJJwgzBpXVYv9FVlE32AH0uerJO0VEZXsbGejlDt0zBrX7ZJeN4T4NpIJcq3Aq4CPAW8Hfpyu72RNzn0QZg1mGBf7uxV4Z7/F7q4i6fjtLLfxkI1+bmIyazzvLn9JJjNIEkvBRpLd4damM/WtyTlBmDWYYVzs72LgTklXpK/fA1wiaTLw0DDdwxqYm5jMmli64N0B6cvfR0RHPctjI4sThJmZleRRTGZmVpIThJmZleQEYVaCpC9KelDS/ZLulZTbEtKSbk77AsxGFI9iMutH0htIhpLuHRHrJU0DJtS5WGY15xqE2Su1AisjYj1ARKyMiD9LOk3S3ZIWSzpPkqC3BvB9SR2SHpb015Iuk7RE0tfTa+ZI+qOki9Nrfp2uRroZSQdL+oOkeyT9Kp28hqRvSXoordF8t4b/FtbEnCDMXul6YEdJ/yfpLElvTo+fGRF/HRG7A5PYfMLahohoB84BrgBOAHYH/kHSduk1uwJnRcRfAauATxbfNK2pfAl4a0TsDXQAJ6Xx7wNem86S/noOf2ezV3CCMOsnXXqiDVgArAB+IekfgIMk3ZkudfEW4LVFYYVd2B4AHoyIZ9IayGMkO7YBPBURv0+f/xx4Y79b7wfsBvxe0r3AscBOJBv6rAPOl3QY0DVsf1mzQbgPwqyEiNhEst/3zWlC+BiwB9AeEU9J+grJPtAFhaUpeth8+YoeivZq6H+bfq8F3BARR/Uvj6R9gL8FDgc+hZfkthpwDcKsH0m7SppfdGgv4JH0+cq0X+DwKt56dtoBDvBB4LZ+5+8ADpA0Ly3HZEmvTu+3VURcDfw/YM8q7m1WMdcgzF5pCvAjSVuTbOW5lKS56UVgMfAX4O4q3vcR4ARJF5CsdXR28cmIWJE2ZV2a7q8MSZ/EauCKdAc5ASdVcW+zinmpDbMakDQHWJR2cJs1BDcxmZlZSa5BmJlZSa5BmJlZSU4QZmZWkhOEmZmV5ARhZmYlOUGYmVlJThBmZlbS/wcrBv6wkNgMRQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fdist_filtered = FreqDist(filtered_text)\n",
    "fdist_filtered.plot(30,cumulative=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### But wait, doesn't the word \"cat\" appear more than 2 times? \n",
    "Oh! That's because we have \"cat\" AND \"cats\" which the computer is counting as two different words!\n",
    "Introducing..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEMMING"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "ps = PorterStemmer()\n",
    "stemmed_words=[]\n",
    "for w in filtered_text:\n",
    "    stemmed_words.append(ps.stem(w))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEnCAYAAACzCdQdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deZydZX338c83k8ky2SCEhDGEhCWJRRRkRsClCqiIdaFSUCgi2gfjY7G1Ijy0VgG3V2vr0haqgIBIC2hVKCQii8giUoQZGiFhSyQqhCWEhGyTbTK/54/7PsnJcGbmnDPnPtt836/XeeWc+76v6/xmMnN+c93XpojAzMysv1G1DsDMzOqTE4SZmRXkBGFmZgU5QZiZWUFOEGZmVtDoWgdQSdOmTYs5c+aUVXbz5s2MHz9+WO/fLHXUQwyuw3VkXUc9xFAPdXR3d6+OiL0LnoyIpnl0dHREubq6usou22x11EMMrsN1ZF1HPcRQD3UAXTHAZ6pvMZmZWUFOEGZmVpAThJmZFeQEYWZmBTlBmJlZQZklCEnjJD0g6TeSlkr6YoFrxkr6oaTlkn4taU7eub9Ljz8h6V1ZxWlmZoVl2YLYChwbEYcChwHHSzqq3zX/B1gbEQcB3wK+BiDpYOAU4DXA8cC3JbVkGKuZmfWT2US5dHztxvRla/rov7b4CcCF6fMfAxdLUnr8BxGxFVghaTlwBPA/lY7z0WfXc+b3H2Tbtm2Mue2OsutpaRF/euAYOjoqGJyZWQ0pMtwPIv2rvxs4CPj3iDiv3/klwPER8Uz6+rfAkSRJ4/6I+M/0+BXAzyLixwXeYwGwAKC9vb1j4cKFJcW4fM12zrvjpRK/ssIO3KOFf3pn4QmJxerp6aGtra2mddRDDK7DdWRdRz3EUA91dHZ2dkdEZ8GTA82gq+QD2AO4Ezik3/ElwL55r38LTAMuBj6cd/wK4KSh3qecmdRbtvfGM2t74mf3/DqeWdtT1uPRZ9fF7PMWxfy//2n09fWVHEO+Ws+qrJcYXIfryLqOeoihHupgkJnUVVmLKSJelnQnSX/CkrxTK4FZwDOSRgNTgJfyjufsmx6ruLGjW5i5x3ieb0v+LcfMPcYzbeJYVm/cysqXN7PvnsP7a8DMrB5kOYppb0l7pM/HA+8EHu932U3AGenzk4BfpBntJuCUdJTT/sBc4IGsYq2E+ftMBODJFzbUOBIzs8rIchRTO3CnpIeBB4HbI2KRpC9Jen96zRXAXmkn9NnA3wJExFLgv4BHgVuAsyJiR4axDtu8GZMAeOL5jUNcaWbWGLIcxfQw8PoCx8/Pe74FOHmA8l8FvppVfJWWSxBuQZhZs/BM6grZ1YJwgjCz5uAEUSHzZiR9EMtf3MiOvuyGDpuZVYsTRIVMGtfKtLZRbOvt4/cvbap1OGZmw+YEUUGzJiddOu6HMLNm4ARRQftNaQU8ksnMmoMTRAXt5xaEmTURJ4gK2m+KE4SZNQ8niAqaOXk0EqxYvYmtvXU9r8/MbEhOEBU0tkXMntpGb1+wYrVHMplZY3OCqDBPmDOzZuEEUWHz9/GSG2bWHJwgKmzXmkwe6mpmjc0JosLcgjCzZuEEUWFz9prA6FHiD2t66NnWW+twzMzK5gRRYWNGj+KAvScQActX+TaTmTUuJ4gMeCSTmTWDLLccnSXpTkmPSloq6dMFrjlX0uL0sUTSDklT03O/k/RIeq4rqzizMD9NEMvcgjCzBpbZjnJAL/DZiHhI0iSgW9LtEfFo7oKI+GfgnwEkvQ/4TESsyavjmIhYnWGMmZi3j1sQZtb4MmtBRMRzEfFQ+nwD8Bgwc5AipwLXZRVPNXn7UTNrBorIfvczSXOAe4BDImJ9gfNtwDPAQbkWhKQVwFoggEsj4rIB6l4ALABob2/vWLhwYVkx9vT00NbWVlbZ/nXsiODD17/Atj64+oTpTBhTfB6uZBy1Ku86XEcj1FEPMdRDHZ2dnd0R0VnwZERk+gAmAt3AiYNc8yFgYb9jM9N/pwO/Ad461Ht1dHREubq6usouW6iO9/zbPTH7vEXx4IqXahpHLcq7DtfRCHXUQwz1UAfQFQN8pmY6iklSK/AT4JqIuH6QS0+h3+2liFiZ/rsKuAE4Iqs4s+AZ1WbW6LIcxSTgCuCxiPjmINdNAd4G3Jh3bELasY2kCcBxwJKsYs3CfPdDmFmDy3IU05uB04FHJC1Oj30O2A8gIi5Jj30AuC0i8tfHngHckOQYRgPXRsQtGcZacZ4LYWaNLrMEERH3AiriuquAq/odewo4NJPAqmSe12QyswbnmdQZedWUcUwcO5qXNm1j9cattQ7HzKxkThAZkcS8GRMBtyLMrDE5QWRo59Lf7ocwswbkBJGhudPTjmoPdTWzBuQEkSFvHmRmjcwJIkM7J8s9vyE3O9zMrGE4QWRo2sQxTJ0whg1be3l+/ZZah2NmVhIniAxJYu70ZCSTJ8yZWaNxgsiY+yHMrFE5QWRs15IbHslkZo3FCSJjbkGYWaNygsjYvOm5/ak30NfnkUxm1jicIDI2pa2VGZPHsmV7H0+v7al1OGZmRXOCqAIv/W1mjcgJogq8eZCZNSIniCrI7Q3hNZnMrJFkueXoLEl3SnpU0lJJny5wzdGS1klanD7Ozzt3vKQnJC2X9LdZxVkNuRbEMrcgzKyBZLnlaC/w2Yh4KN1fulvS7RHxaL/rfhkR780/IKkF+HfgncAzwIOSbipQtiEclM6m/u2LG9m+o4/WFjfczKz+ZfZJFRHPRcRD6fMNwGPAzCKLHwEsj4inImIb8APghGwizd6EsaOZNXU823cEv1u9aegCZmZ1QNVYZVTSHOAe4JCIWJ93/GjgJySthGeBcyJiqaSTgOMj4sz0utOBIyPiUwXqXgAsAGhvb+9YuHBhWTH29PTQ1tZWVtli6viHe9fS9dxWzj5qCm+eNb5mcVSjvOtwHY1QRz3EUA91dHZ2dkdEZ8GTEZHpA5gIdAMnFjg3GZiYPv8TYFn6/CTg8rzrTgcuHuq9Ojo6olxdXV1lly2mjq/97LGYfd6i+Matj9c0jmqUdx2uoxHqqIcY6qEOoCsG+EzN9Ga4pFaSFsI1EXF9geS0PiI2ps9vBlolTQNWArPyLt03Pdawdi254ZFMZtYYshzFJOAK4LGI+OYA1+yTXoekI9J4XgIeBOZK2l/SGOAU4KasYq2G3PajngthZo0iy1FMbya5NfSIpMXpsc8B+wFExCUkt5I+KakX2AyckjZ5eiV9CrgVaAGujIilGcaauQP2nkDLKPG7lzaxZfsOxrW21DokM7NBZZYgIuJeQENcczFw8QDnbgZuziC0mhjX2sKcvdr47YubWL5qI4fMnFLrkMzMBuUB+VXkpb/NrJE4QVTRvBnuqDazxuEEUUXzvGifmTUQJ4gq8rLfZtZInCCqaM5ebYxpGcXKlzezYcv2WodjZjYoJ4gqGt0yigPThfuWrXI/hJnVNyeIKps3I00Q7ocwszrnBFFlu/oh3IIws/rmBFFl3n7UzBqFE0SVzd+5/agThJnVNyeIKpu5x3jaxrTw4oatrN20rdbhmJkNyAmiykaNEnPTkUy+zWRm9cwJogY8o9rMGoETRA24H8LMGoETRA3sbEF4qKuZ1bEsd5SbJelOSY9KWirp0wWuOU3Sw5IekXSfpEPzzv0uPb5YUldWcdZCfgsi2R/JzKz+ZLmjXC/w2Yh4SNIkoFvS7RHxaN41K4C3RcRaSe8GLgOOzDt/TESszjDGmpg+aSyTx41m3ebtvLhhK9Mnj6t1SGZmr5BZCyIinouIh9LnG4DHgJn9rrkvItamL+8H9s0qnnoiyf0QZlb3VI1bHJLmAPcAh0TE+gGuOQd4dUScmb5eAawFArg0Ii4boNwCYAFAe3t7x8KFC8uKsaenh7a2trLKllPHpd3ruO2pzXz00Em8b96EmsWRRXnX4ToaoY56iKEe6ujs7OyOiM6CJyMi0wcwEegGThzkmmNIWhh75R2bmf47HfgN8Nah3qujoyPK1dXVVXbZcur4/n0rYvZ5i+LcHy2uaRxZlHcdrqMR6qiHGOqhDqArBvhMzXQUk6RW4CfANRFx/QDXvA64HDghIl7KHY+Ilem/q4AbgCOyjLXadi7a5+1HzaxOZTmKScAVwGMR8c0BrtkPuB44PSKezDs+Ie3YRtIE4DhgSVax1kIuQSx/YQN9fR7JZGb1J8tRTG8GTgcekbQ4PfY5YD+AiLgEOB/YC/h2kk/ojeRe2AzghvTYaODaiLglw1irbuqEMUybOJbVG7ey8uXNzJo6vHuQZmaVllmCiIh7AQ1xzZnAmQWOPwUc+soSzWX+PhNZvXwrT76wwQnCzOqOZ1LX0K5+CA91NbP64wRRQzs3D3reCcLM6o8TRA3N3bmqq0cymVn9cYKooXkzkn0hlr+4kd4dfTWOxsxsdyUnCEl7pnMXbJgmjWtl5h7j2dbbx+/X9NQ6HDOz3RSVICTdJWmypKnAQ8B3JRWc22ClybUi3A9hZvWm2BbElEjWUDoRuDoijgTekV1YI8c8L9pnZnWq2AQxWlI78EFgUYbxjDjzpicJYpk7qs2szhSbIL4I3Aosj4gHJR0ALMsurJHDy36bWb0qdib1cxGxs2M6Ip5yH0RlHDR9IhKsWL2Jrb07ah2OmdlOxbYgLirymJVoXGsLc/aawI6+4KkXN9U6HDOznQZtQUh6I/AmYG9JZ+edmgy0ZBnYSDJ3+kRWrN7Eky9sGBlb6plZQxiqBTGGZMOf0cCkvMd64KRsQxs5cv0QT7ofwszqyKAtiIi4G7hb0lUR8fsqxTTi7Fy07/mNHDtt0AVwzcyqpthO6rGSLgPm5JeJiGOzCGqk2a0FccjkGkdjZpYoNkH8CLiEZGtQD7WpsDl7TaC1RfxhTQ9beifWOhwzM6D4UUy9EfGdiHggIrpzj8EKSJol6U5Jj0paKunTBa6RpH+TtFzSw5IOzzt3hqRl6eOMEr+uhjJm9Cj2nzYBgGfWO/+aWX0oNkEslPSXktolTc09hijTC3w2Ig4GjgLOknRwv2veDcxNHwuA7wCkdV8AHAkcAVwgac8iY21IuX6IP6zfXuNIzMwSxSaIM4BzgfuA7vTRNViBiHguIh5Kn28AHgNm9rvsBJK1nSIi7gf2SJf0eBdwe0SsiYi1wO3A8UXG2pBymwf9YV1vjSMxM0soIrJ/E2kOcA9wSLroX+74IuAf0/2rkXQHcB5wNDAuIr6SHv8CsDkivl6g7gUkrQ/a29s7Fi5cWFaMPT09tLUNb1/o4dTx65Vb+Kf7Xua1e7dw4dF71yyOSpR3Ha6jEeqohxjqoY7Ozs7uiOgseDIihnwAHyn0KLLsRJIWx4kFzi0C3pL3+g6gEzgH+Hze8S8A5wz1Xh0dHVGurq6usstWoo4VL26M2ectitdfeHNN46hEedfhOhqhjnqIoR7qALpigM/UYkcxvSHv+Tjg7ST7Qlw9WCFJrcBPgGsi4voCl6wEZuW93jc9tpKkFZF//K4iY21Is6a2MXb0KNZs7mPd5u1MGd9a65DMbIQrqg8iIv4q7/Fx4HCSlsGAJAm4AngsIgZa2O8m4CPpaKajgHUR8RzJyrHHpbvX7Qkclx5rWi2jxNx086BlnlFtZnWg2BZEf5uA/Ye45s3A6cAjkhanxz4H7AcQEZcANwN/AiwHeoCPpefWSPoy8GBa7ksRsabMWBvGvBmTWLJyPU+8sIHOOUMNEjMzy1ZRCULSQiDXm90C/BHwX4OViaTjedB1I9L7X2cNcO5K4Mpi4msWuZFM3n7UzOpBsS2I/NFDvcDvI+KZDOIZ0bz9qJnVk2L7IO4GHidZyXVPYFuWQY1Uucly3n7UzOpBUQlC0geBB4CTSfal/rUkL/ddYa+aMo7xo8VLm7axeuPWWodjZiNcsbeY/h54Q0SsApC0N/Bz4MdZBTYSSWK/KaN54qXtPPn8BqYdNLbWIZnZCFbsUhujcskh9VIJZa0EsyYnOdv9EGZWa8W2IG6RdCtwXfr6QyRDVK3C9puS/Jd4dzkzq7Wh9qQ+CJgREedKOhF4S3rqf4Brsg5uJMq1IJ50R7WZ1dhQt4n+hWT/aSLi+og4OyLOBm5Iz1mF7WxBPL8htw6VmVlNDJUgZkTEI/0PpsfmZBLRCDdl7CimThjDhq29PLduS63DMbMRbKgEsccg58ZXMhBLSGJeuiaTO6rNrJaGShBdkj7e/6CkM0mW8LYMeMkNM6sHQ41i+hvgBkmnsSshdAJjgA9kGdhINjeXINxRbWY1NGiCiIgXgDdJOgY4JD3804j4ReaRjWDz98klCLcgzKx2ipoHERF3AndmHIul5k1P12RatYEdfUHLqEEXxTUzy4RnQ9ehKW2t7DN5HFu29/H0mp5ah2NmI5QTRJ3y0t9mVmuZJQhJV0paJWnJAOfPlbQ4fSyRtEPS1PTc7yQ9kp7ryirGejZvurcfNbPayrIFcRVw/EAnI+KfI+KwiDgM+Dvg7n7bih6Tnu/MMMa6tasF4ZFMZlYbmSWIiLgHKHYf6VPZtRCg4bkQZlZ7ynK9H0lzgEURccgg17QBzwAH5VoQklYAa0n2wb40Ii4bpPwCYAFAe3t7x8KFC8uKtaenh7a2trLKZlHHlt4+TrthFS2Ca06cQWsJI5mGG0e9fS9ch+vIoo56iKEe6ujs7Owe8E5NRGT2IFmvackQ13wIWNjv2Mz03+nAb4C3FvN+HR0dUa6urq6yy2ZVxx9/7Rcx+7xF8cTz66saRz1+L1yH66h0HfUQQz3UAXTFAJ+p9TCK6RT63V6KiJXpv6tIVo49ogZx1VxuTSZPmDOzWqhpgpA0BXgbcGPesQmSJuWeA8cBBUdCNbt57ocwsxoqdke5kkm6DjgamCbpGeACoBUgIi5JL/sAcFtEbMorOoNk/adcfNdGxC1ZxVnP5nsuhJnVUGYJIiJOLeKaq0iGw+Yfewo4NJuoGss8L9pnZjVUD30QNoAD9p5Ayyjx+5c2sWX7jlqHY2YjjBNEHRs7uoU5e7XRF7B8lVsRZlZdThB1zkt/m1mtOEHUuVw/hDuqzazanCDqnJfcMLNacYKoc/P28UgmM6sNJ4g6N3tqG2NaRrHy5c1s2LK91uGY2QjiBFHnRreM4sDc3hAeyWRmVeQE0QDm59Zkcj+EmVWRE0QD8PajZlYLThANIDeSaZk7qs2sipwgGoDnQphZLThBNICZe4ynbUwLL27YyppN22odjpmNEE4QDWDUKDF3hpfcMLPqcoJoEPO9u5yZVVlmCULSlZJWSSq4G5ykoyWtk7Q4fZyfd+54SU9IWi7pb7OKsZHMcwvCzKosyxbEVcDxQ1zzy4g4LH18CUBSC/DvwLuBg4FTJR2cYZwNYdf2ox7JZGbVkVmCiIh7gDVlFD0CWB4RT0XENuAHwAkVDa4B5W8/GhE1jsbMRgJl+WEjaQ6wKCIOKXDuaOAnwDPAs8A5EbFU0knA8RFxZnrd6cCREfGpAd5jAbAAoL29vWPhwoVlxdrT00NbW1tZZatRR0Tw0RtXsXF78N337s3U8S2ZxVHv3wvX4ToqUUc9xFAPdXR2dnZHRGfBkxGR2QOYAywZ4NxkYGL6/E+AZenzk4DL8647Hbi4mPfr6OiIcnV1dZVdtlp1nPyd+2L2eYvi7idWZRpHI3wvXIfr8M95ZeoAumKAz9SajWKKiPURsTF9fjPQKmkasBKYlXfpvumxEW/ePh7JZGbVU7MEIWkfSUqfH5HG8hLwIDBX0v6SxgCnADfVKs564pFMZlZNo7OqWNJ1wNHANEnPABcArQARcQnJraRPSuoFNgOnpM2dXkmfAm4FWoArI2JpVnE2kl1Lbngkk5llL7MEERGnDnH+YuDiAc7dDNycRVyNbN7ORfs20NcXjBqlGkdkZs3MM6kbyNQJY9h70lh6tu1g5cubax2OmTU5J4gGM9/9EGZWJU4QDWZuuiaTl/42s6w5QTSYnS0Ibz9qZhlzgmgwu7Yf9UgmM8uWE0SDmTs9ucX02xc30rujr8bRmFkzc4JoMJPGtTJzj/Fs6+3j92t6ah2OmTUxJ4gGNC+3eZD7IcwsQ04QDWhe3tLfZmZZcYJoQJ4LYWbV4ATRgHYt2ueRTGaWHSeIBnTQ9ImMEqxYvYmtvTtqHY6ZNSkniAY0rrWF2XtNYEdf8NSLm2odjpk1KSeIBrVzJJP7IcwsI04QDSrXUf2Eh7qaWUacIBpUbqirO6rNLCuZJQhJV0paJWnJAOdPk/SwpEck3Sfp0Lxzv0uPL5bUlVWMjcxDXc0sa1m2IK4Cjh/k/ArgbRHxWuDLwGX9zh8TEYdFRGdG8TW0OdMm0Noi/rCmh55tvbUOx8yaUGYJIiLuAdYMcv6+iFibvrwf2DerWJpRa8soDpiWdFQv820mM8uAIiK7yqU5wKKIOGSI684BXh0RZ6avVwBrgQAujYj+rYv8sguABQDt7e0dCxcuLCvWnp4e2trayipbqzq+ef/L/OrpLZzVOZlj999VZrhxNOL3wnW4jkaMoR7q6Ozs7B7wTk1EZPYA5gBLhrjmGOAxYK+8YzPTf6cDvwHeWsz7dXR0RLm6urrKLlurOi6648mYfd6i+MqipRWNoxG/F67DdTRiDPVQB9AVA3ym1nQUk6TXAZcDJ0TES7njEbEy/XcVcANwRG0irG+5JTe8eZCZZaFmCULSfsD1wOkR8WTe8QmSJuWeA8cBBUdCjXTzvP2omWVodFYVS7oOOBqYJukZ4AKgFSAiLgHOB/YCvi0JoDeS+2AzgBvSY6OBayPilqzibGSzprYxrnUUz6/fwrqe7Uxpa611SGbWRDJLEBFx6hDnzwTOLHD8KeDQV5aw/lpGibnTJ/HIynU8uWoDb5gztdYhmVkT8UzqBjfPE+bMLCNOEA1u/j7eftTMsuEE0eDmzvD2o2aWDSeIBjffu8uZWUacIBpc+5RxTBo7mjWbtrF649Zah2NmTcQJosFJ2rX0t/shzKyCnCCawDz3Q5hZBpwgmoC3HzWzLDhBNAFvP2pmWXCCaAK5PohlL2zMrYZrZjZsThBNYNrEsew1YQwbtvby3LottQ7HzJqEE0STcEe1mVWaE0ST2NlR7X4IM6sQJ4gmkeuHcAvCzCrFCaJJ5EYyLfOSG2ZWIU4QTSK3aN+yVRvY4ZFMZlYBmSYISVdKWiWp4JahSvybpOWSHpZ0eN65MyQtSx9nZBlnM5gyvpX2KePYsr2PVZt21DocM2sCme0ol7oKuBi4eoDz7wbmpo8jge8AR0qaSrJFaScQQLekmyJibcbxNrS5Mybx3LotdD27lVlPv1x2PcvXbGf0MMq7DtfRCHXUQwyVrOM123cwrrVlWPX0p6wnVkmaAyyKiEMKnLsUuCsirktfP0Gyj/XRwNER8YlC1w2ks7Mzurq6yoqzu7ubjo6OssrWSx1f/emjfPeXK4b1/mbWmH5+9ts4aPrEkstJ6o6IzkLnsm5BDGUm8HTe62fSYwMdfwVJC4AFAO3t7XR3d5cVSE9PT9ll66WO17T18pq9x9CzrZdRo8q/e9jX1zes8q7DdTRCHfUQQyXrWPb4UtY9XeGP9IjI9AHMAZYMcG4R8Ja813eQ3FY6B/h83vEvAOcM9V4dHR1Rrq6urrLLNlsd9RCD63AdWddRDzHUQx1AVwzwmVrrUUwrgVl5r/dNjw103MzMqqTWCeIm4CPpaKajgHUR8RxwK3CcpD0l7Qkclx4zM7MqybQPQtJ1JB3O0yQ9QzIyqRUgIi4Bbgb+BFgO9AAfS8+tkfRl4MG0qi9FxJosYzUzs91lmiAi4tQhzgdw1gDnrgSuzCIuMzMbWq1vMZmZWZ1ygjAzs4KcIMzMrCAnCDMzKyjzpTaqSdKLwO/LLD4NWD3MEJqljnqIwXW4jqzrqIcY6qGO2RGxd6ETTZUghkNSVwywHslIq6MeYnAdriPrOuohhnqqoxDfYjIzs4KcIMzMrCAniF0ucx11FYPrcB1Z11EPMdRTHa/gPggzMyvILQgzMyvICcLMzApygjAzs4JGbIKQ9OZijpVYZ7ukscOpw2pL0tfSf0+udSz1QtL+xRyz5jNiO6klPRQRhw91rMQ6fw4cCPwkIs4ZZnz7RMTzw6mjjPecDcyNiJ9LGg+MjogNJdbxJpJtZncuJR8RV5dQ/o6IePtQxwYpPxb4swIxfKnI8o8ArwO6h/OzkNY1rK8lvf5A4JmI2Crp6DS2qyPi5SLKDhp/RDxUZAyFfle6I6KjmPLp9QJOAw6IiC9J2g/YJyIeKKGOccD/AV4DjMsdj4i/KKGO24GTc9+/dEOyH0TEu4ooe+Jg5yPi+iLqODYifjFQXcXUUU2Z7gdRjyS9EXgTsLeks/NOTQZahlN3RLwj/UU4eDj1pK4A3lPMhelufBcBfwSMIfk6NkXE5GLfTNLHgQXAVJIkty9wCVDKh9l/pGUXAzvSwwEMmSDSX/42ks2l9gSUnpoMzCw2BuBGYB3QDWwtoVzOLcBaYKKk9fkhkmxhMuT3tIJfC8BPgE5JB5EMZbwRuJZko62hfCP9dxzJXu+/SWN5HdAFvHGwwpJeTfJhPKXfB9pk8j6gi/RtoA84FvgSsIHka3tDCXX8B/A48K60jtOAx0qMY1p+co2ItZKmF1n2fem/00k+Q36Rvj4GuA8o5sP9bWm59xU4F0XWQb/PrqRwxDfTcx+OiP8spp6hjLgEQfIBOpHka5+Ud3w9cNJwK083QVpagXqKSg6pi4FTgB+RfBB8BJhX4lueBRwB/Dp9/2Ul/OLkdAIHR3nN0k8AfwO8iuTDPfehup7k6yvWvhFxfBnvD0BEnAucK+nGiDihzGoq9bUA9EVEr6QPABdFxEWS/reYghFxDICk64HDI+KR9PUhwIVFVDEfeC+wB7t/oG0APl78lwDAkRFxeC729IN5TIl1HBQRJ0s6ISK+L+la4Jcl1tEnab+I+APsbDUX9fMaER9Ly9xG8nP+XPq6HbiqyDouyK9rGCYNcm7CMOveacQliIi4G7hb0lURUe7CfnUnIpZLaomIHcD30l/Evyuhiq0RsS1pAIGk0RT5i5NnCbAP8FyJ5YiIfwX+VdJfRcRFpZbPc9iuNWsAABAZSURBVJ+k1+Y+DMs1jORQya8FYLukU4Ez2PUh3VpiHfPzvx8RsUTSHw1VKCJuBG6U9MaI+J8S37O/7ZJaSH+mJO1N0qIoqY7035fTJPc8yV/zpfh74F5Jd5Mk7j8maTmXYlYuOaReAPYrpmChv/zz5VoBQ4mILw5y7tJi6ijGiEsQeXok/TOvvJ95bO1CKltP+tfYYkn/RPIBXeoAhLslfQ4YL+mdwF8CC0usYxrwqKQHyLu9ExHvL6GOPkl79LtHfGpEfLvI8m8BPippRRpD7tbQ64opLGkDuxJj7i//oLRbTG9Nnz446IXF+Rjwf4GvRsSKtHP4P0qs42FJlwO52w6nAQ+XUP5/JZ3FMO79A/8G3ABMl/RVktb650soD3BZ+vPweeAmkjsBXyilgoi4Je2bOSo99DcRUeoqqHdIuhW4Ln39IeDnRZbN/eU/n+T22k3p6/cBRffHVMtI7qS+DfghcA7JL+AZwIsRcV5NAytD2kx+geT22WeAKcC3I2J5CXWMIukAPI7kw/BW4PJSbhdJeluh42mrrdg6FkfEYf2O/W9EvL7I8rMHiKFqrUVJ30ufvhwRn6nW+w4k7RP5JJBLXPcA34mILUWW/xHJvf8/J+/ef0R8usQ4Xk3SpyXgjogoqf9A0v4RsWKoY0XUMxOYze6DGO4psY4PkPf9jIgbSix/D/Ce3CAQSZOAn0bEWwcvWV0jOUF0R0SHpIdzf11KejAiSuk0q7m02X51RJw2zHpOJPkBLadjt2Jyo4hyiSn9+h6OiNeUUMdbSEZjfS+9lTGx1A+RSkg/mE+kzBFVaR0rKHCrLyIOqECIxcbwvxHx+tzviqRW4JcRcdSQhdn5f7g0Il49zDgqMZrqayR/8S9l1y2uKLGVO2ySniD5Od+avh5L8nM+v5pxDGUk32LK3c98TtJ7gGdJRvA0lIjYIWm2pDERsW0YVb0P+Fb6l80PgVsioreYgv1uy+x2iiJvy+S5BfihpNx91E+kx4oi6QKSzvL5wPdI7tf/JzCsOS5l+m+GN6IKkq8lZxxwMiX+nCqZ33Mhr/yrudgkM6x7/+nP6BP5ncOlqPBoqj8l6ZMp+f8jL1m/GBFHllq+n6uBByTlWh5/Cnx/mHVW3EhuQbyXZATELJIhopOBCyOi1PvuNSfpapIhrjcBm3LHi+3wyqunFXg3yV9YbwFuj4gzKxhqMTGMIkkKueG1t5Pc6toxcKndyi8GXg88lLstld9KrCZJSyLikAzqLfWv5sdJbj12s2v4MRHxUpHlzyQZkvo6kqQ7ETg/Ii4pIYZ7SP5fHmD3n9Eh/3KXdALJB+j72XXPHpLRVD+IiPtKiONnJPMgNhZbJiuSOkh+zyC5TVXU6LRqGsktiJOBeyNiCXCMpKnA1ym9Y7Ye/DZ9jGLw4W+Diojt6S9QAONJfimrmiAiog/4Tvoox7aICEm5W1QVG/JXhmGPqNLuk91GkbQoSv29XRcRPys3hoi4PH16N1Dura2SOpP7vX8lR1P1kAzmuIPdB1L89TDrLVlEdEt6mrQVVG4LK0sjuQXxio7PUjpDm42kXMvhaOAu4L+A24q9zVTBOOYC/0Ay2TB/xExRH0ySzgHmAu9M6/kL4NoKDDctmaRHgYOAskZUpXXcmfeyF/gd8PWIeKKEOv6RZPLk9ez+oVjsTOrzCx0vpS+lElSZmdRnFDoeEVW9vSPp/SQTGV8FrCIZJvt4KX1t1TCSWxCjJO0ZEWsB0hZEQ30/JC1kkLkKJXa8fYSk7+ETNe6o/h5wAfAtkhmqH6O0Ibt7Az8mmZQ2HzgfeEeFYyzWu4dbQaST3YYpd788vz8jSGY1F2NT3vNxJJPnSh2BVKifah3JjO7PRsRTRVQz7JnUkUywGw/sV0qSzcCXSYba/jwdAHAM8OEaxlPQSG5BfAT4HMnsY0huOX01IkodY14zecNKTySZoJYb534q8EKpQywlzWDX0gcPRMSqigRaWgy50WWPRMRr848VWb7QSJea9EFUgqQpJAkzN/zxbuBLEbGuhjGNBW6NiKNLKPNl4BmSZUJEMvP/QOAh4JPF1DXc0VRpHe8juZU8JiL2l3QYyfez2qOYuiKiU9JvgNdHRJ+k30TEodWMYygN9RdzJUXE1ZK62PVX1IkR8WgtYypVbn6BpG9ERP5fhwvTr61oSlYv/TrJ7SUBF0k6NyJ+XKl4i7Q17aheJulTwEqSTtFBSfokyeS+AyTlTwKbBPwqk0ir40qSGeofTF+fTtLKGnThuP7SkXr9b82Ue4uojWStrlK8v9+H32XpnJfzlEzQLEYlZlJfSLKkzF0AEbFYUtWGDOd5WdJEkjkp10haxe4ttbowYhMEQJoQGiopDGCCpANyzXQls21L7Zz9PPCGXKshnT/wc5LbNZmT9B8RcTrJ0NA24K9JmuHHkkxiHMq1wM9I+h3+Nu/4hohYU+Fwq+nAiPizvNdfTEdqFU3SJSTf02OAy0lmMZeyiuoj7Lo91EJyG6/U5NIj6YPs+nk6CchN1Cv2NsawZ1ID2yNinaT8Y6Uu+VEJJ5B8/Z8huVU2hdK/p5kb0QmiiXwGuEvSUyR//c8mGSpailH9bim9RHX3C+mQ9CqSX5bvkow2+WyxhdNbLutIbq81k82S3hIR98LOOQ2bS6zjTektmYcj4ouSvkGSTIv13rznvSS3L0sdvHAa8K8kq7oGcD/w4bQ/4FODFdTu6xflFrn79/TfUv8QWirpz4GWdEDEX5OsxFpVEZHfWqi7+Q85ThBNIJL1ZeYCuZmqj5fR0XyLXrm+zM2VirEIlwB3kAyjzK2AGnn/1uI2QD34JPD9tC8CkqXIi2lR5csllJ40Cb8EtA9VKB24Acl8g3yTJVFKyyxt3RZa4hrg3iGKV3L9or8iWbBvK8nP+q0kLdWqqPCk0syN2E7qZqNhbtST1vFn7Jpx/MsocX2ZSpD0nYj4ZLXft16lHcInkXTo7kHSSooSl+v4Aslk0LeT/OUdwHcjouDw1bxyuZnDIhmGuTZ9vgfwh4goelc5SfNI5rbMiIhDJL2OpF/iKyXU0RDrFzUTJ4gmoAE26qnF5B+rLEm3AC+TjPbJnwX9jQELDV7fWGBcKaOgJH0XuCEibk5fvxv404go+jamkuW1zwUuzZvhXtJMcw1j/SJJ/xIRfzPQ0PBqj2JqFL7F1BzK3qin0Zq8I9CwNkCCnRPM/pJkWYcg2Q+h6NVcgaMiYucGQRHxMyXLypeiLSIe6Nc5XGo/RqH1i64qsmxu+PrXS3zPEc0JojkMZ6OespfmsKqoxAZIV5P0I+Rmk/85yQfmyUWWf1bS59l9P4lnS4xhtZL9tXNLoJxEiT+vEfHVdCmYP04PfazY9Ysiojt92gVsTpd0ya00O7aUOEYS32JqAulyDIeRdNiVu1GP1ZG8oaWjSZYOeYryl+t4NCIOHurYIOWnsvtkvXuAL5bSSZ3ONbiMZC/ntSTLj5wWVd7VUdL9wDsiXawvnYtwW0S8qZpxNAq3IJrDhbUOwCruvUNfUrSHJB0VEfcDSDqS5C/poqSJoKTNgXL6DVG9GbiTZPj0JuDPgJJWHK6AcZG3kmtEbJTUVuUYGoYTRBOIiLuV7KQ2NyJ+nv7At9Q6LitfJf6yzmuFtJLcqsqtFLofyZpGxdazN/D/KG973v5DVG8kaQWdTm222Nwk6fDcQoWSOil9XsmI4VtMTUDSx0k2Xp8aEQemcyIuiYi3D1HUmpgG2H41p9gkpApsz1svQ1TThPBDdvWhtAMfyuujsDzVnClr2TmLZP7CeoCIWEbpa9RYk4mI36dJYDTwfPp8f5JlHkpZ7G+viLiCZJmKuyNZXrvYlWBzZgD5Ox5uS49V2/4kGxd9kmQzqicofqmPEccJojlsjbztRiWNxj/0tstPgB2SDiLpKJ5FsnZVsXbbnlfS6yl9e97cENULJV0I/Jrih6hW0hciYj3JZL9jSJb+KHdzqqbnBNEc7k5XxBwv6Z0kS5jfNEQZGzn60rWTTgQuiohzKWKpjTxfSZf6+CzJbabLSdb/KlpEfJVkHaW16eNjEfEPpdRRIbnJhu8hmU3+U2BMDeJoCO6kbg4/Ihnm+gjJIn03U8acCGta2yWdSrIpVG49pNZiCqbzBOZGxCKS21Jlb2CUdgwXtYtdhlZKupRkx8GvpbOx/YfyAPyNaQ6XAvdHxMkRcRKwkWHsAWxN52PAG0k2xFqRLgdf1MZYEbGD5loh94MkC/S9KyJeJrlVdm5tQ6pfHsXUBNJJSD8m+UV+K8lfiu8tZb0ds4FI+hZJi+OH5G1qE0XuaW2NywmiSaSrZf438AfgAxHhsd0G7LYq624ioqgl1NOZ+uTVkZvNXepIJmsw7oNoYP12+oKkudwC/Dpdr78h92G2isvfjnYcyRpMpYxCWsSuZb9Jn6+XdFhElLS7nTUWtyAaWKUmQtnII6k7IjqKvPZakiRzE0mSeC/wMMn+Iz+KiFJXdrUG4RZEA3MCsGJIOjzv5SiSD/tSfvf3BQ7PW+DuAuCnJP1d3YATRJNygjBrft9g163IXuB3FL/UNySz8vO3sN1OsjPcZkmlbm1rDcQJwqz5vZtk5dQ57PqdPwUodtvSa0j6tW5MX78PuFbSBODRCsZpdcZ9EGZNrhLblqaL3OX2K/9VRBS9XLg1LicIsyZX6t7PZjmeSW3W/O6T9NpaB2GNxy0IsyZVyW1LbWRygjBrUp4nY8PlBGFmZgW5D8LMzApygjAzs4KcIMwKkPT3kpZKeljSYklHZvhed6XzDMzqimdSm/Uj6Y0kC9IdHhFbJU3D21LaCOQWhNkrtQOrI2IrQESsjohnJZ0v6UFJSyRdJkmwswXwLUldkh6T9AZJ10taJukr6TVzJD0u6Zr0mh9Lauv/xpKOk/Q/kh6S9CNJE9Pj/yjp0bRF8/Uqfi9sBHOCMHul24BZkp6U9G1Jb0uPXxwRb0hnJY8naWXkbIuITuAS4EbgLOAQ4KOS9kqvmQ98OyL+CFgP/GX+m6Ytlc8D74iIw4Eu4Oy0/AeA16RzF76Swdds9gpOEGb9pMtadwALgBeBH0r6KHCMpF+nE9COBV6TV+ym9N9HgKUR8VzaAnkKmJWeezoifpU+/0/gLf3e+ijgYOBXkhYDZwCzgXXAFuAKSScCPRX7Ys0G4T4IswIiYgdwF3BXmhA+AbwO6IyIpyVdSLI7W05u2es+dl8au49dv2f9Jx31fy3g9og4tX88ko4A3g6cBHyKJEGZZcotCLN+JM2XNDfv0GHAE+nz1Wm/wEllVL1f2gEO8OfAvf3O3w+8WdJBaRwTJM1L329KRNwMfAY4tIz3NiuZWxBmrzQRuEjSHiQb7Cwnud30MrAEeB54sIx6nwDOknQlyT4K38k/GREvpreyrpM0Nj38eWADcKOkcSStjLPLeG+zknmpDbMqkDQHWORlt62R+BaTmZkV5BaEmZkV5BaEmZkV5ARhZmYFOUGYmVlBThBmZlaQE4SZmRX0/wE+nukPuANd6gAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fdist_stemmed = FreqDist(stemmed_words)\n",
    "fdist_stemmed.plot(30,cumulative=False)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### LEMMATIZATION\n",
    "Also, Lemmatization! A side quest!\n",
    "Lemmatization reduces words to their base word"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Lemmatized Word: carry\n",
      "Stemmed Word: carri\n"
     ]
    }
   ],
   "source": [
    "from nltk.stem.wordnet import WordNetLemmatizer\n",
    "lem = WordNetLemmatizer() \n",
    "from nltk.stem.porter import PorterStemmer\n",
    "stem = PorterStemmer()\n",
    "word = \"carrying\"\n",
    "# 'v' because carrying is a verb\n",
    "print(\"Lemmatized Word:\",lem.lemmatize(word,\"v\"))\n",
    "print(\"Stemmed Word:\",stem.stem(word))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Lemmatized Word: good\n",
      "Stemmed Word: better\n"
     ]
    }
   ],
   "source": [
    "word = \"better\"\n",
    "# 'a' because better is an adj\n",
    "print(\"Lemmatized Word:\",lem.lemmatize(word,\"a\"))\n",
    "print(\"Stemmed Word:\",stem.stem(word))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### PART-OF-SPEECH (POS) Tagging\n",
    "Another side quest!!\n",
    "This helps identify the grammatical group -- Noun, pronoun, adjective, verb, adverb etc "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('Kendra', 'NNP'), ('loves', 'VBZ'), ('cats', 'NNS'), ('.', '.'), ('In', 'IN'), ('fact', 'NN'), (',', ','), ('she', 'PRP'), ('has', 'VBZ'), ('TEN', 'VBN'), ('cats', 'NNS'), ('.', '.'), ('If', 'IN'), ('she', 'PRP'), ('did', 'VBD'), (\"n't\", 'RB'), ('have', 'VB'), ('a', 'DT'), ('house', 'NN'), (',', ','), ('a', 'DT'), ('husband', 'NN'), ('and', 'CC'), ('a', 'DT'), ('graduate', 'NN'), ('degree', 'NN'), ('in', 'IN'), ('data', 'NNS'), ('science', 'NN'), (',', ','), ('she', 'PRP'), (\"'d\", 'MD'), ('be', 'VB'), ('a', 'DT'), ('cat', 'JJ'), ('lady', 'NN'), ('!', '.')]\n"
     ]
    }
   ],
   "source": [
    "sent = \"Kendra loves cats. In fact, she has TEN cats. If she didn't have a house, a husband and a graduate degree in data science, she'd be a cat lady!\"\n",
    "Mytokens=nltk.word_tokenize(sent)\n",
    "MyTAGS = nltk.pos_tag(Mytokens)\n",
    "print(MyTAGS)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "## MACHINE LEARNING\n",
    "### Count Vectorizer\n",
    "##### TESTING SMALL FILES FIRST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn\n",
    "from sklearn.feature_extraction.text import CountVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Kendra loves cats. In fact, she has TEN cats. If she didn't have a house, a husband and a graduate degree in data science, she'd be a cat lady!\""
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_file_names = []\n",
    "path=\"/Users/danielcaraway/Documents/IST_736_TextMining/SmallTextDocsK\"\n",
    "FileNameList=os.listdir(path)\n",
    "\n",
    "# METHOD A:\n",
    "# Getting a list of filenames\n",
    "ListOfCompleteFiles=[]\n",
    "for name in os.listdir(path):\n",
    "    next=path+ \"/\" + name\n",
    "    ListOfCompleteFiles.append(next)\n",
    "\n",
    "# METHOD B:\n",
    "# Getting the text from the filenames\n",
    "AllText_AllFiles=[]\n",
    "for file in ListOfCompleteFiles:\n",
    "    f=open(file)\n",
    "    content=f.read()\n",
    "    AllText_AllFiles.append(content)\n",
    "    f.close()\n",
    "    \n",
    "# AllText_AllFiles[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>and</th>\n",
       "      <th>be</th>\n",
       "      <th>cartoon</th>\n",
       "      <th>cat</th>\n",
       "      <th>cats</th>\n",
       "      <th>data</th>\n",
       "      <th>degree</th>\n",
       "      <th>didn</th>\n",
       "      <th>does</th>\n",
       "      <th>fact</th>\n",
       "      <th>...</th>\n",
       "      <th>of</th>\n",
       "      <th>one</th>\n",
       "      <th>sailors</th>\n",
       "      <th>science</th>\n",
       "      <th>she</th>\n",
       "      <th>so</th>\n",
       "      <th>spinach</th>\n",
       "      <th>ten</th>\n",
       "      <th>too</th>\n",
       "      <th>wow</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   and  be  cartoon  cat  cats  data  degree  didn  does  fact  ...  of  one  \\\n",
       "0    1   1        0    1     2     1       1     1     0     1  ...   0    0   \n",
       "1    0   0        1    0     1     0       0     0     1     0  ...   1    1   \n",
       "\n",
       "   sailors  science  she  so  spinach  ten  too  wow  \n",
       "0        0        1    3   0        0    1    0    0  \n",
       "1        1        0    0   1        2    0    1    1  \n",
       "\n",
       "[2 rows x 37 columns]"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# METHOD A (with filenames)\n",
    "vec_filename = CountVectorizer(input='filename')\n",
    "transformed_files = vec_filename.fit_transform(ListOfCompleteFiles)\n",
    "files_feature_names = vec_filename.get_feature_names()\n",
    "transformed_files_df = pd.DataFrame(transformed_files.toarray(),columns=files_feature_names)\n",
    "transformed_files_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>and</th>\n",
       "      <th>be</th>\n",
       "      <th>cartoon</th>\n",
       "      <th>cat</th>\n",
       "      <th>cats</th>\n",
       "      <th>data</th>\n",
       "      <th>degree</th>\n",
       "      <th>didn</th>\n",
       "      <th>does</th>\n",
       "      <th>fact</th>\n",
       "      <th>...</th>\n",
       "      <th>of</th>\n",
       "      <th>one</th>\n",
       "      <th>sailors</th>\n",
       "      <th>science</th>\n",
       "      <th>she</th>\n",
       "      <th>so</th>\n",
       "      <th>spinach</th>\n",
       "      <th>ten</th>\n",
       "      <th>too</th>\n",
       "      <th>wow</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 37 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   and  be  cartoon  cat  cats  data  degree  didn  does  fact  ...  of  one  \\\n",
       "0    1   1        0    1     2     1       1     1     0     1  ...   0    0   \n",
       "1    0   0        1    0     1     0       0     0     1     0  ...   1    1   \n",
       "\n",
       "   sailors  science  she  so  spinach  ten  too  wow  \n",
       "0        0        1    3   0        0    1    0    0  \n",
       "1        1        0    0   1        2    0    1    1  \n",
       "\n",
       "[2 rows x 37 columns]"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# METHOD B (with all text from files)\n",
    "vec_content = CountVectorizer(input='content')\n",
    "transformed_text = vec_content.fit_transform(AllText_AllFiles)\n",
    "content_feature_names = vec_content.get_feature_names()\n",
    "transformed_text_df = pd.DataFrame(transformed_text.toarray(),columns=content_feature_names)\n",
    "transformed_text_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### TESTING SMALL DATASET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [],
   "source": [
    "vec_removing_sw = CountVectorizer(input='filename',\n",
    "                        analyzer = 'word',\n",
    "                        stop_words='english',\n",
    "                        token_pattern='(?u)[a-zA-Z]+')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "# POSITIVE FILES\n",
    "path=\"/Users/danielcaraway/Documents/IST_736_TextMining/AI_POS\"\n",
    "pos_files=[]\n",
    "for name in os.listdir(path):\n",
    "    next=path+ \"/\" + name\n",
    "    pos_files.append(next)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['advancements',\n",
       " 'artificial',\n",
       " 'cancer',\n",
       " 'care',\n",
       " 'child',\n",
       " 'day',\n",
       " 'daydream',\n",
       " 'dog',\n",
       " 'excited',\n",
       " 'free',\n",
       " 'given',\n",
       " 'grateful',\n",
       " 'gritty',\n",
       " 'groceries',\n",
       " 'grow',\n",
       " 'intelligence',\n",
       " 'kids',\n",
       " 'life',\n",
       " 'likely',\n",
       " 'love',\n",
       " 'm',\n",
       " 'nitty',\n",
       " 'order',\n",
       " 'pay',\n",
       " 'school',\n",
       " 'taken',\n",
       " 'taxes',\n",
       " 'thanks',\n",
       " 'time',\n",
       " 'world',\n",
       " 'yes']"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pos_transformed = vec_removing_sw.fit_transform(pos_files)\n",
    "pos_transformed_feature_names = vec_removing_sw.get_feature_names()\n",
    "pos_transformed_df = pd.DataFrame(pos_transformed.toarray(), columns = pos_transformed_feature_names)\n",
    "pos_transformed_df['PoN'] = 'p'\n",
    "pos_transformed_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [],
   "source": [
    "# NEGATIVE FILES\n",
    "path=\"/Users/danielcaraway/Documents/IST_736_TextMining/AI_NEG\"\n",
    "neg_files=[]\n",
    "for name in os.listdir(path):\n",
    "    next=path+ \"/\" + name\n",
    "    neg_files.append(next)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>artificial</th>\n",
       "      <th>brains</th>\n",
       "      <th>captcha</th>\n",
       "      <th>cars</th>\n",
       "      <th>children</th>\n",
       "      <th>dog</th>\n",
       "      <th>drive</th>\n",
       "      <th>going</th>\n",
       "      <th>hack</th>\n",
       "      <th>hate</th>\n",
       "      <th>...</th>\n",
       "      <th>jobs</th>\n",
       "      <th>melt</th>\n",
       "      <th>oh</th>\n",
       "      <th>right</th>\n",
       "      <th>s</th>\n",
       "      <th>t</th>\n",
       "      <th>terrified</th>\n",
       "      <th>took</th>\n",
       "      <th>trust</th>\n",
       "      <th>PoN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   artificial  brains  captcha  cars  children  dog  drive  going  hack  hate  \\\n",
       "0           1       0        0     0         0    0      0      0     0     0   \n",
       "1           1       0        1     1         0    0      1      0     1     0   \n",
       "2           1       0        0     0         0    0      0      0     0     1   \n",
       "3           1       0        0     0         0    1      0      0     0     0   \n",
       "4           1       1        0     0         1    0      0      1     0     0   \n",
       "\n",
       "   ...  jobs  melt  oh  right  s  t  terrified  took  trust  PoN  \n",
       "0  ...     2     0   1      1  1  0          0     1      0    n  \n",
       "1  ...     0     0   0      0  0  1          0     0      1    n  \n",
       "2  ...     0     0   0      0  0  0          0     0      0    n  \n",
       "3  ...     0     0   0      0  0  0          1     0      0    n  \n",
       "4  ...     0     1   0      0  0  0          0     0      0    n  \n",
       "\n",
       "[5 rows x 21 columns]"
      ]
     },
     "execution_count": 177,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "neg_transformed = vec_removing_sw.fit_transform(neg_files)\n",
    "neg_transformed_feature_names = vec_removing_sw.get_feature_names()\n",
    "neg_transformed_df = pd.DataFrame(neg_transformed.toarray(), columns = neg_transformed_feature_names)\n",
    "neg_transformed_df['PoN'] = 'n'\n",
    "neg_transformed_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>advancements</th>\n",
       "      <th>artificial</th>\n",
       "      <th>cancer</th>\n",
       "      <th>care</th>\n",
       "      <th>child</th>\n",
       "      <th>day</th>\n",
       "      <th>daydream</th>\n",
       "      <th>dog</th>\n",
       "      <th>excited</th>\n",
       "      <th>free</th>\n",
       "      <th>...</th>\n",
       "      <th>hate</th>\n",
       "      <th>jobs</th>\n",
       "      <th>melt</th>\n",
       "      <th>oh</th>\n",
       "      <th>right</th>\n",
       "      <th>s</th>\n",
       "      <th>t</th>\n",
       "      <th>terrified</th>\n",
       "      <th>took</th>\n",
       "      <th>trust</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   advancements  artificial  cancer  care  child  day  daydream  dog  excited  \\\n",
       "0           1.0           1     0.0   0.0    0.0  0.0       0.0    1      1.0   \n",
       "1           0.0           1     0.0   1.0    1.0  0.0       1.0    0      1.0   \n",
       "2           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "3           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "4           0.0           1     1.0   0.0    1.0  1.0       0.0    0      0.0   \n",
       "0           NaN           1     NaN   NaN    NaN  NaN       NaN    0      NaN   \n",
       "1           NaN           1     NaN   NaN    NaN  NaN       NaN    0      NaN   \n",
       "2           NaN           1     NaN   NaN    NaN  NaN       NaN    0      NaN   \n",
       "3           NaN           1     NaN   NaN    NaN  NaN       NaN    1      NaN   \n",
       "4           NaN           1     NaN   NaN    NaN  NaN       NaN    0      NaN   \n",
       "\n",
       "   free  ...  hate  jobs  melt   oh  right    s    t  terrified  took  trust  \n",
       "0   0.0  ...   NaN   NaN   NaN  NaN    NaN  NaN  NaN        NaN   NaN    NaN  \n",
       "1   0.0  ...   NaN   NaN   NaN  NaN    NaN  NaN  NaN        NaN   NaN    NaN  \n",
       "2   0.0  ...   NaN   NaN   NaN  NaN    NaN  NaN  NaN        NaN   NaN    NaN  \n",
       "3   0.0  ...   NaN   NaN   NaN  NaN    NaN  NaN  NaN        NaN   NaN    NaN  \n",
       "4   1.0  ...   NaN   NaN   NaN  NaN    NaN  NaN  NaN        NaN   NaN    NaN  \n",
       "0   NaN  ...   0.0   2.0   0.0  1.0    1.0  1.0  0.0        0.0   1.0    0.0  \n",
       "1   NaN  ...   0.0   0.0   0.0  0.0    0.0  0.0  1.0        0.0   0.0    1.0  \n",
       "2   NaN  ...   1.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "3   NaN  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        1.0   0.0    0.0  \n",
       "4   NaN  ...   0.0   0.0   1.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "\n",
       "[10 rows x 49 columns]"
      ]
     },
     "execution_count": 183,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combined = pos_transformed_df.append(neg_transformed_df, sort=False)\n",
    "combined"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>advancements</th>\n",
       "      <th>artificial</th>\n",
       "      <th>cancer</th>\n",
       "      <th>care</th>\n",
       "      <th>child</th>\n",
       "      <th>day</th>\n",
       "      <th>daydream</th>\n",
       "      <th>dog</th>\n",
       "      <th>excited</th>\n",
       "      <th>free</th>\n",
       "      <th>...</th>\n",
       "      <th>hate</th>\n",
       "      <th>jobs</th>\n",
       "      <th>melt</th>\n",
       "      <th>oh</th>\n",
       "      <th>right</th>\n",
       "      <th>s</th>\n",
       "      <th>t</th>\n",
       "      <th>terrified</th>\n",
       "      <th>took</th>\n",
       "      <th>trust</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   advancements  artificial  cancer  care  child  day  daydream  dog  excited  \\\n",
       "0           1.0           1     0.0   0.0    0.0  0.0       0.0    1      1.0   \n",
       "1           0.0           1     0.0   1.0    1.0  0.0       1.0    0      1.0   \n",
       "2           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "3           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "4           0.0           1     1.0   0.0    1.0  1.0       0.0    0      0.0   \n",
       "0           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "1           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "2           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "3           0.0           1     0.0   0.0    0.0  0.0       0.0    1      0.0   \n",
       "4           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "\n",
       "   free  ...  hate  jobs  melt   oh  right    s    t  terrified  took  trust  \n",
       "0   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "1   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "2   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "3   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "4   1.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "0   0.0  ...   0.0   2.0   0.0  1.0    1.0  1.0  0.0        0.0   1.0    0.0  \n",
       "1   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  1.0        0.0   0.0    1.0  \n",
       "2   0.0  ...   1.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "3   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        1.0   0.0    0.0  \n",
       "4   0.0  ...   0.0   0.0   1.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "\n",
       "[10 rows x 49 columns]"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "combined = combined.fillna(0)\n",
    "combined"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare Train and Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>advancements</th>\n",
       "      <th>artificial</th>\n",
       "      <th>cancer</th>\n",
       "      <th>care</th>\n",
       "      <th>child</th>\n",
       "      <th>day</th>\n",
       "      <th>daydream</th>\n",
       "      <th>dog</th>\n",
       "      <th>excited</th>\n",
       "      <th>free</th>\n",
       "      <th>...</th>\n",
       "      <th>hate</th>\n",
       "      <th>jobs</th>\n",
       "      <th>melt</th>\n",
       "      <th>oh</th>\n",
       "      <th>right</th>\n",
       "      <th>s</th>\n",
       "      <th>t</th>\n",
       "      <th>terrified</th>\n",
       "      <th>took</th>\n",
       "      <th>trust</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   advancements  artificial  cancer  care  child  day  daydream  dog  excited  \\\n",
       "0           1.0           1     0.0   0.0    0.0  0.0       0.0    1      1.0   \n",
       "1           0.0           1     0.0   1.0    1.0  0.0       1.0    0      1.0   \n",
       "2           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "3           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "4           0.0           1     1.0   0.0    1.0  1.0       0.0    0      0.0   \n",
       "0           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "1           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "2           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "3           0.0           1     0.0   0.0    0.0  0.0       0.0    1      0.0   \n",
       "4           0.0           1     0.0   0.0    0.0  0.0       0.0    0      0.0   \n",
       "\n",
       "   free  ...  hate  jobs  melt   oh  right    s    t  terrified  took  trust  \n",
       "0   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "1   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "2   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "3   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "4   1.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "0   0.0  ...   0.0   2.0   0.0  1.0    1.0  1.0  0.0        0.0   1.0    0.0  \n",
       "1   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  1.0        0.0   0.0    1.0  \n",
       "2   0.0  ...   1.0   0.0   0.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "3   0.0  ...   0.0   0.0   0.0  0.0    0.0  0.0  0.0        1.0   0.0    0.0  \n",
       "4   0.0  ...   0.0   0.0   1.0  0.0    0.0  0.0  0.0        0.0   0.0    0.0  \n",
       "\n",
       "[10 rows x 49 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display, HTML\n",
    "display(combined)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
