{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HW7: Comparing MNB & SVM with Kaggle Sentiment Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## OVERVIEW"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### VECTORIZERS USED:\n",
    "    CountVectorizer\n",
    "    TfidfVectorizer\n",
    "\n",
    "### MODELS USED:\n",
    "    Multinomial Naive Bayes (MNB)\n",
    "    Support Vector Machines (SVM)\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "#### VECTORIZATION PARAMS:\n",
    "    Binary\n",
    "    Stopwords\n",
    "    Unigrams, Bigrams\n",
    "    Min & Max df\n",
    "---\n",
    "\n",
    "#### TODO:\n",
    "    Stemming?\n",
    "    Vadar + TextBlob"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FUNCTION & PACKAGE PARTY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "## =======================================================\n",
    "## TOKENIZING\n",
    "## =======================================================\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "\n",
    "## =======================================================\n",
    "## VECTORIZING\n",
    "## =======================================================\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "## ----- VECTORIZORS\n",
    "unigram_bool_cv_v1 = CountVectorizer(encoding='latin-1', binary=True, min_df=5, stop_words='english')\n",
    "unigram_bool_cv_v2 = CountVectorizer(encoding='latin-1', binary=True, min_df=5, stop_words='english', \n",
    "                                     token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b' )\n",
    "\n",
    "unigram_cv = CountVectorizer(encoding='latin-1', binary=False, min_df=5, stop_words='english', \n",
    "                             token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b' )\n",
    "\n",
    "bigram_cv = CountVectorizer(encoding='latin-1', ngram_range=(1,2), min_df=5, stop_words='english')\n",
    "bigram_cv_v2 = CountVectorizer(encoding='latin-1', ngram_range=(1,2), min_df=5, stop_words='english', \n",
    "                               token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b')\n",
    "\n",
    "unigram_tv = TfidfVectorizer(encoding='latin-1', use_idf=True, min_df=5, stop_words='english')\n",
    "unigram_tv_v2 = TfidfVectorizer(encoding='latin-1', use_idf=True, min_df=5, stop_words='english', \n",
    "                                token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b')\n",
    "\n",
    "bigram_tv = TfidfVectorizer(encoding='latin-1', use_idf=True, ngram_range=(1,2), min_df=5, stop_words='english')\n",
    "bigram_tv_v2 = TfidfVectorizer(encoding='latin-1', use_idf=True, ngram_range=(1,2), min_df=5, stop_words='english', \n",
    "                               token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b')\n",
    "\n",
    "## =======================================================\n",
    "## MODELING\n",
    "## =======================================================\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.naive_bayes import BernoulliNB, MultinomialNB\n",
    "\n",
    "## ----- CLASSIFIERS\n",
    "mnb = MultinomialNB()\n",
    "svm = LinearSVC(C=1)\n",
    "\n",
    "def get_test_train_vec(X,y,vectorizer):\n",
    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)\n",
    "    X_train_vec = vectorizer.fit_transform(X_train)\n",
    "    X_test_vec = vectorizer.transform(X_test)\n",
    "    return X_train_vec, X_test_vec, y_train, y_test\n",
    "\n",
    "def run_classifier(X_train_vec, X_test_vec, y_train, y_test, labels, target_names, classifier):\n",
    "    clf = classifier\n",
    "    clf.fit(X_train_vec,y_train)\n",
    "    y_pred = clf.predict(X_test_vec)\n",
    "    report = classification_report(y_test, y_pred, target_names=target_names,output_dict=True)\n",
    "    score = clf.score(X_test_vec,y_test)\n",
    "    return clf, score, report\n",
    "    \n",
    "def get_model(X, y, labels, target_names, classifier, vec):\n",
    "    X_train_vec, X_test_vec, y_train, y_test = get_test_train_vec(X,y,vec)\n",
    "    model, score, report = run_classifier(X_train_vec, X_test_vec, y_train, y_test, labels, target_names, classifier)\n",
    "    return model, score, report\n",
    "    \n",
    "## =======================================================\n",
    "## VISUALIZING\n",
    "## =======================================================\n",
    "from tabulate import tabulate\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DATA GOES HERE:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import pandas as pd\n",
    "# train=pd.read_csv(\"kaggle-sentiment/train.tsv\", delimiter='\\t')\n",
    "# y=train['Sentiment'].values\n",
    "# X=train['Phrase'].values\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.read_csv('../death_row_discritized.csv')\n",
    "\n",
    "def to_string(tokens):\n",
    "    try:\n",
    "        return \" \".join(eval(tokens))\n",
    "    except:\n",
    "        return \"error\"\n",
    "    \n",
    "column_name = 'time_spent'\n",
    "df['statement_string'] = df.apply(lambda x: to_string(x['last_statement']), axis=1)\n",
    "# y=df['vic_kid'].values\n",
    "y=df[column_name].values\n",
    "\n",
    "\n",
    "y = [value if type(value) == str else y[0] for value in y]\n",
    "y_labels = list(set(y))\n",
    "X=df['statement_string'].values\n",
    "y_labels\n",
    "\n",
    "\n",
    "def return_features(vec, model):\n",
    "    for i,feature_probability in enumerate(model.coef_):\n",
    "        print('============', column_name,': ', y_labels[i])\n",
    "        df1 = pd.DataFrame(sorted(zip(feature_probability, vec.get_feature_names()))[:10])\n",
    "        df2 = pd.DataFrame(sorted(zip(feature_probability, vec.get_feature_names()))[-10:])\n",
    "        df3 = pd.concat([df1, df2], axis=1)\n",
    "        print(tabulate(df3, tablefmt=\"fancy_grid\", headers=[\"Most\",\"Likely\",\"Least\",\"Likely\"], floatfmt=\".2f\"))\n",
    "\n",
    "def update_big_df(big_df, new_row):\n",
    "    big_df.append(new_row)\n",
    "    df = pd.DataFrame(big_df)\n",
    "    df = df.drop_duplicates()\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TASK 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 1 -- MNB & SVM with Vectorizer 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [],
   "source": [
    "big_df = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════╤═════════╤══════════════════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely               │\n",
      "╞════╪════════╪════════════╪═════════╪══════════════════════╡\n",
      "│  0 │  -7.99 │ especially │   -4.35 │ thank                │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  1 │  -7.29 │ brought    │   -4.32 │ im                   │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  2 │  -7.29 │ doesnt     │   -4.32 │ sorry                │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  3 │  -7.29 │ doing      │   -4.25 │ god                  │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  4 │  -7.29 │ faith      │   -4.20 │ know                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  5 │  -7.29 │ given      │   -4.10 │ want                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  6 │  -7.29 │ grace      │   -3.93 │ family               │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  7 │  -7.29 │ human      │   -3.59 │ love                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  8 │  -7.29 │ jack       │   -3.26 │ pronoun              │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  9 │  -7.29 │ joe        │   -3.20 │ first_person_pronoun │\n",
      "╘════╧════════╧════════════╧═════════╧══════════════════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013"
      ]
     },
     "execution_count": 205,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_bool_cv_v1\n",
    "classifier = mnb\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V1', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 206,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════╤═════════╤═══════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely    │\n",
      "╞════╪════════╪══════════╪═════════╪═══════════╡\n",
      "│  0 │  -1.16 │ faith    │    0.80 │ today     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  1 │  -1.12 │ taking   │    0.85 │ change    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  2 │  -1.05 │ jack     │    0.86 │ someday   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  3 │  -0.91 │ brought  │    0.93 │ shown     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  4 │  -0.86 │ thanks   │    0.94 │ happening │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  5 │  -0.85 │ brings   │    0.97 │ heaven    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  6 │  -0.82 │ guilty   │    1.00 │ send      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  7 │  -0.82 │ allah    │    1.04 │ david     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  8 │  -0.81 │ loves    │    1.06 │ strong    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  9 │  -0.80 │ john     │    1.20 │ committed │\n",
      "╘════╧════════╧══════════╧═════════╧═══════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581"
      ]
     },
     "execution_count": 206,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_bool_cv_v1\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V1', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NOTES: Very interesting!! MNB is very cluttered with numbers. SVM is not. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 2 -- MNB & SVM with Vectorizer 2\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 207,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════╤═════════╤══════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely   │\n",
      "╞════╪════════╪════════════╪═════════╪══════════╡\n",
      "│  0 │  -7.95 │ especially │   -4.34 │ like     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  1 │  -7.25 │ brought    │   -4.31 │ thank    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  2 │  -7.25 │ doesnt     │   -4.28 │ im       │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  3 │  -7.25 │ doing      │   -4.28 │ sorry    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  4 │  -7.25 │ faith      │   -4.21 │ god      │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  5 │  -7.25 │ given      │   -4.16 │ know     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  6 │  -7.25 │ grace      │   -4.05 │ want     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  7 │  -7.25 │ human      │   -3.89 │ family   │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  8 │  -7.25 │ jack       │   -3.55 │ love     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  9 │  -7.25 │ joe        │   -3.22 │ pronoun  │\n",
      "╘════╧════════╧════════════╧═════════╧══════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013"
      ]
     },
     "execution_count": 207,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_bool_cv_v2\n",
    "classifier = mnb\n",
    "\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V2', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════╤═════════╤═══════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely    │\n",
      "╞════╪════════╪══════════╪═════════╪═══════════╡\n",
      "│  0 │  -1.18 │ faith    │    0.83 │ loved     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  1 │  -1.13 │ taking   │    0.85 │ change    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  2 │  -1.04 │ jack     │    0.88 │ someday   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  3 │  -0.95 │ brought  │    0.89 │ happening │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  4 │  -0.90 │ guilty   │    0.92 │ shown     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  5 │  -0.86 │ brings   │    1.00 │ heaven    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  6 │  -0.85 │ allah    │    1.03 │ strong    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  7 │  -0.84 │ thanks   │    1.06 │ send      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  8 │  -0.80 │ john     │    1.06 │ david     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  9 │  -0.77 │ promise  │    1.19 │ committed │\n",
      "╘════╧════════╧══════════╧═════════╧═══════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771"
      ]
     },
     "execution_count": 208,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_bool_cv_v2\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V2', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 3 -- MNB & SVM with Vectorizer 3\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════╤═════════╤══════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely   │\n",
      "╞════╪════════╪════════════╪═════════╪══════════╡\n",
      "│  0 │  -8.43 │ especially │   -4.33 │ im       │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  1 │  -7.73 │ doesnt     │   -4.25 │ yall     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  2 │  -7.73 │ given      │   -4.16 │ sorry    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  3 │  -7.73 │ grace      │   -4.16 │ thank    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  4 │  -7.73 │ jack       │   -4.11 │ god      │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  5 │  -7.73 │ joe        │   -3.93 │ want     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  6 │  -7.73 │ john       │   -3.91 │ family   │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  7 │  -7.73 │ lived      │   -3.73 │ know     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  8 │  -7.73 │ members    │   -3.13 │ love     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  9 │  -7.73 │ showed     │   -1.69 │ pronoun  │\n",
      "╘════╧════════╧════════════╧═════════╧══════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771\n",
       "4        mnb         V3  0.466960"
      ]
     },
     "execution_count": 209,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_cv\n",
    "classifier = mnb\n",
    "\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V3', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════╤═════════╤═══════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely    │\n",
      "╞════╪════════╪══════════╪═════════╪═══════════╡\n",
      "│  0 │  -1.16 │ taking   │    0.75 │ fear      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  1 │  -1.11 │ faith    │    0.76 │ strong    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  2 │  -1.08 │ jack     │    0.77 │ change    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  3 │  -0.94 │ brought  │    0.79 │ david     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  4 │  -0.87 │ amen     │    0.81 │ send      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  5 │  -0.86 │ words    │    0.82 │ happening │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  6 │  -0.84 │ row      │    0.86 │ someday   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  7 │  -0.83 │ best     │    0.87 │ world     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  8 │  -0.81 │ blessing │    0.94 │ waiting   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  9 │  -0.77 │ guilty   │    1.27 │ committed │\n",
      "╘════╧════════╧══════════╧═════════╧═══════════╛\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771\n",
       "4        mnb         V3  0.466960\n",
       "5        svm         V3  0.506608"
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = unigram_cv\n",
    "classifier = svm\n",
    "\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V3', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 4 -- MNB & SVM with Vectorizer 4\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤═══════════════════════════════╤═════════╤═══════════════════════════════════════════╕\n",
      "│    │   Most │ Likely                        │   Least │ Likely                                    │\n",
      "╞════╪════════╪═══════════════════════════════╪═════════╪═══════════════════════════════════════════╡\n",
      "│  0 │  -9.13 │ especially                    │   -4.63 │ first_person_pronoun first_person_pronoun │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  1 │  -9.13 │ peace god                     │   -4.63 │ want                                      │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  2 │  -8.44 │ come pronoun                  │   -4.62 │ family                                    │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  3 │  -8.44 │ daughter first_person_pronoun │   -4.44 │ know                                      │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  4 │  -8.44 │ doesnt                        │   -4.16 │ love pronoun                              │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  5 │  -8.44 │ first_person_pronoun come     │   -4.08 │ pronoun first_person_pronoun              │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  6 │  -8.44 │ first_person_pronoun daughter │   -4.00 │ first_person_pronoun love                 │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  7 │  -8.44 │ given                         │   -3.83 │ love                                      │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  8 │  -8.44 │ given first_person_pronoun    │   -2.40 │ pronoun                                   │\n",
      "├────┼────────┼───────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  9 │  -8.44 │ god forgive                   │   -1.88 │ first_person_pronoun                      │\n",
      "╘════╧════════╧═══════════════════════════════╧═════════╧═══════════════════════════════════════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════════════════════════╤═════════╤═════════════════════════════╕\n",
      "│    │   Most │ Likely                       │   Least │ Likely                      │\n",
      "╞════╪════════╪══════════════════════════════╪═════════╪═════════════════════════════╡\n",
      "│  0 │  -0.95 │ say pronoun                  │    0.49 │ like say                    │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  1 │  -0.86 │ soon                         │    0.52 │ first_person_pronoun got    │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  2 │  -0.81 │ statement                    │    0.52 │ hold                        │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  3 │  -0.74 │ theres                       │    0.52 │ praise                      │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  4 │  -0.66 │ blessing                     │    0.53 │ mama                        │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  5 │  -0.56 │ kids                         │    0.55 │ first_person_pronoun im     │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  6 │  -0.55 │ brought                      │    0.57 │ first_person_pronoun want   │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  7 │  -0.54 │ words                        │    0.62 │ good                        │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  8 │  -0.53 │ brought first_person_pronoun │    0.66 │ yall im                     │\n",
      "├────┼────────┼──────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  9 │  -0.47 │ first_person_pronoun heart   │    0.72 │ strong first_person_pronoun │\n",
      "╘════╧════════╧══════════════════════════════╧═════════╧═════════════════════════════╛\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771\n",
       "4        mnb         V3  0.466960\n",
       "5        svm         V3  0.506608\n",
       "6        mnb         V4  0.497797\n",
       "7        svm         V4  0.493392"
      ]
     },
     "execution_count": 211,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vec = bigram_cv\n",
    "classifier = mnb\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V4', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V4', 'score': score})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771\n",
       "4        mnb         V3  0.466960\n",
       "5        svm         V3  0.506608\n",
       "6        mnb         V4  0.497797\n",
       "7        svm         V4  0.493392"
      ]
     },
     "execution_count": 212,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 5 -- MNB & SVM with Vectorizer 5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════════╤═════════╤═════════════════╕\n",
      "│    │   Most │ Likely       │   Least │ Likely          │\n",
      "╞════╪════════╪══════════════╪═════════╪═════════════════╡\n",
      "│  0 │  -8.72 │ especially   │   -4.49 │ pronoun pronoun │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  1 │  -8.03 │ come pronoun │   -4.46 │ sorry           │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  2 │  -8.03 │ doesnt       │   -4.46 │ thank           │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  3 │  -8.03 │ given        │   -4.41 │ god             │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  4 │  -8.03 │ god forgive  │   -4.22 │ want            │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  5 │  -8.03 │ grace        │   -4.21 │ family          │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  6 │  -8.03 │ jack         │   -4.03 │ know            │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  7 │  -8.03 │ joe          │   -3.75 │ love pronoun    │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  8 │  -8.03 │ john         │   -3.43 │ love            │\n",
      "├────┼────────┼──────────────┼─────────┼─────────────────┤\n",
      "│  9 │  -8.03 │ lived        │   -1.99 │ pronoun         │\n",
      "╘════╧════════╧══════════════╧═════════╧═════════════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════════════╤═════════╤═════════════════╕\n",
      "│    │   Most │ Likely             │   Least │ Likely          │\n",
      "╞════╪════════╪════════════════════╪═════════╪═════════════════╡\n",
      "│  0 │  -1.12 │ say pronoun        │    0.59 │ apologize       │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  1 │  -0.84 │ brought            │    0.60 │ committed       │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  2 │  -0.78 │ soon               │    0.60 │ reason          │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  3 │  -0.76 │ know did           │    0.64 │ good            │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  4 │  -0.75 │ statement          │    0.64 │ pronoun support │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  5 │  -0.72 │ pronoun appreciate │    0.64 │ pronoun heart   │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  6 │  -0.63 │ theres             │    0.71 │ hold            │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  7 │  -0.61 │ taking             │    0.73 │ loved pronoun   │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  8 │  -0.59 │ blessing           │    0.77 │ yall im         │\n",
      "├────┼────────┼────────────────────┼─────────┼─────────────────┤\n",
      "│  9 │  -0.55 │ thanks             │    0.79 │ strong          │\n",
      "╘════╧════════╧════════════════════╧═════════╧═════════════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = bigram_cv_v2\n",
    "classifier = mnb\n",
    "\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V5', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V5', 'score': score})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>svm</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  classifier vectorizer     score\n",
       "0        mnb         V1  0.511013\n",
       "1        svm         V1  0.484581\n",
       "2        mnb         V2  0.511013\n",
       "3        svm         V2  0.475771\n",
       "4        mnb         V3  0.466960\n",
       "5        svm         V3  0.506608\n",
       "6        mnb         V4  0.497797\n",
       "7        svm         V4  0.493392\n",
       "8        mnb         V5  0.475771\n",
       "9        svm         V5  0.493392"
      ]
     },
     "execution_count": 214,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 6 -- MNB & SVM with Vectorizer 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════╤═════════╤══════════════════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely               │\n",
      "╞════╪════════╪════════════╪═════════╪══════════════════════╡\n",
      "│  0 │  -6.65 │ especially │   -4.78 │ sorry                │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  1 │  -6.60 │ showed     │   -4.73 │ god                  │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  2 │  -6.59 │ touch      │   -4.72 │ know                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  3 │  -6.58 │ given      │   -4.67 │ yall                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  4 │  -6.58 │ taking     │   -4.66 │ want                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  5 │  -6.57 │ brought    │   -4.65 │ im                   │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  6 │  -6.57 │ john       │   -4.60 │ family               │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  7 │  -6.56 │ leave      │   -4.06 │ love                 │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  8 │  -6.55 │ night      │   -3.06 │ pronoun              │\n",
      "├────┼────────┼────────────┼─────────┼──────────────────────┤\n",
      "│  9 │  -6.54 │ thought    │   -2.61 │ first_person_pronoun │\n",
      "╘════╧════════╧════════════╧═════════╧══════════════════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════╤═════════╤═══════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely    │\n",
      "╞════╪════════╪══════════╪═════════╪═══════════╡\n",
      "│  0 │  -1.42 │ taking   │    0.98 │ fear      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  1 │  -1.38 │ row      │    0.98 │ today     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  2 │  -1.37 │ years    │    0.99 │ david     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  3 │  -1.26 │ father   │    1.00 │ everybody │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  4 │  -1.19 │ ive      │    1.03 │ reason    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  5 │  -1.17 │ brought  │    1.04 │ committed │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  6 │  -1.16 │ ah       │    1.05 │ happening │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  7 │  -1.12 │ words    │    1.14 │ brother   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  8 │  -1.00 │ allah    │    1.20 │ praise    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  9 │  -0.99 │ jack     │    1.35 │ strong    │\n",
      "╘════╧════════╧══════════╧═════════╧═══════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = unigram_tv\n",
    "classifier = mnb\n",
    "\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V6', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V6', 'score': score})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>svm</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>svm</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.453744</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   classifier vectorizer     score\n",
       "0         mnb         V1  0.511013\n",
       "1         svm         V1  0.484581\n",
       "2         mnb         V2  0.511013\n",
       "3         svm         V2  0.475771\n",
       "4         mnb         V3  0.466960\n",
       "5         svm         V3  0.506608\n",
       "6         mnb         V4  0.497797\n",
       "7         svm         V4  0.493392\n",
       "8         mnb         V5  0.475771\n",
       "9         svm         V5  0.493392\n",
       "10        mnb         V6  0.497797\n",
       "11        svm         V6  0.453744"
      ]
     },
     "execution_count": 216,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 7 -- MNB & SVM with Vectorizer 7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════╤═════════╤══════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely   │\n",
      "╞════╪════════╪════════════╪═════════╪══════════╡\n",
      "│  0 │  -6.67 │ especially │   -4.65 │ thank    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  1 │  -6.59 │ showed     │   -4.62 │ god      │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  2 │  -6.59 │ taking     │   -4.62 │ sorry    │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  3 │  -6.58 │ given      │   -4.59 │ im       │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  4 │  -6.58 │ john       │   -4.59 │ yall     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  5 │  -6.58 │ touch      │   -4.58 │ know     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  6 │  -6.56 │ brought    │   -4.53 │ want     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  7 │  -6.54 │ leave      │   -4.43 │ family   │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  8 │  -6.54 │ night      │   -3.93 │ love     │\n",
      "├────┼────────┼────────────┼─────────┼──────────┤\n",
      "│  9 │  -6.54 │ soon       │   -2.92 │ pronoun  │\n",
      "╘════╧════════╧════════════╧═════════╧══════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤══════════╤═════════╤═══════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely    │\n",
      "╞════╪════════╪══════════╪═════════╪═══════════╡\n",
      "│  0 │  -1.51 │ taking   │    0.98 │ send      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  1 │  -1.36 │ row      │    0.99 │ today     │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  2 │  -1.28 │ years    │    1.00 │ everybody │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  3 │  -1.25 │ father   │    1.00 │ fear      │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  4 │  -1.24 │ ive      │    1.07 │ reason    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  5 │  -1.14 │ brought  │    1.08 │ committed │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  6 │  -1.13 │ ah       │    1.11 │ happening │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  7 │  -1.11 │ jack     │    1.13 │ praise    │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  8 │  -1.09 │ words    │    1.18 │ brother   │\n",
      "├────┼────────┼──────────┼─────────┼───────────┤\n",
      "│  9 │  -1.02 │ guilty   │    1.28 │ strong    │\n",
      "╘════╧════════╧══════════╧═════════╧═══════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = unigram_tv_v2\n",
    "classifier = mnb\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V7', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V7', 'score': score})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>svm</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>svm</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.453744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>svm</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   classifier vectorizer     score\n",
       "0         mnb         V1  0.511013\n",
       "1         svm         V1  0.484581\n",
       "2         mnb         V2  0.511013\n",
       "3         svm         V2  0.475771\n",
       "4         mnb         V3  0.466960\n",
       "5         svm         V3  0.506608\n",
       "6         mnb         V4  0.497797\n",
       "7         svm         V4  0.493392\n",
       "8         mnb         V5  0.475771\n",
       "9         svm         V5  0.493392\n",
       "10        mnb         V6  0.497797\n",
       "11        svm         V6  0.453744\n",
       "12        mnb         V7  0.511013\n",
       "13        svm         V7  0.475771"
      ]
     },
     "execution_count": 218,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 8 -- MNB & SVM with Vectorizer 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤═════════════════════════════╤═════════╤═══════════════════════════════════════════╕\n",
      "│    │   Most │ Likely                      │   Least │ Likely                                    │\n",
      "╞════╪════════╪═════════════════════════════╪═════════╪═══════════════════════════════════════════╡\n",
      "│  0 │  -7.15 │ especially                  │   -5.35 │ first_person_pronoun first_person_pronoun │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  1 │  -7.15 │ peace god                   │   -5.34 │ im                                        │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  2 │  -7.11 │ first_person_pronoun come   │   -5.34 │ want                                      │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  3 │  -7.11 │ lord pronoun                │   -5.30 │ family                                    │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  4 │  -7.11 │ guilty first_person_pronoun │   -5.09 │ pronoun first_person_pronoun              │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  5 │  -7.11 │ years pronoun               │   -4.99 │ love pronoun                              │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  6 │  -7.10 │ showed                      │   -4.97 │ first_person_pronoun love                 │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  7 │  -7.09 │ people first_person_pronoun │   -4.77 │ love                                      │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  8 │  -7.09 │ jesus first_person_pronoun  │   -3.76 │ pronoun                                   │\n",
      "├────┼────────┼─────────────────────────────┼─────────┼───────────────────────────────────────────┤\n",
      "│  9 │  -7.09 │ going first_person_pronoun  │   -3.31 │ first_person_pronoun                      │\n",
      "╘════╧════════╧═════════════════════════════╧═════════╧═══════════════════════════════════════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤════════════════════════════╤═════════╤═════════════════════════════╕\n",
      "│    │   Most │ Likely                     │   Least │ Likely                      │\n",
      "╞════╪════════╪════════════════════════════╪═════════╪═════════════════════════════╡\n",
      "│  0 │  -1.18 │ taking                     │    0.87 │ everybody                   │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  1 │  -1.06 │ death row                  │    0.90 │ love pronoun                │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  2 │  -1.05 │ years                      │    0.90 │ good                        │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  3 │  -1.03 │ say pronoun                │    0.93 │ kill pronoun                │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  4 │  -1.01 │ row                        │    0.93 │ brother                     │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  5 │  -0.99 │ kids                       │    0.94 │ david                       │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  6 │  -0.99 │ ive                        │    1.00 │ strong                      │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  7 │  -0.98 │ thank first_person_pronoun │    1.18 │ strong first_person_pronoun │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  8 │  -0.96 │ thanks                     │    1.19 │ praise                      │\n",
      "├────┼────────┼────────────────────────────┼─────────┼─────────────────────────────┤\n",
      "│  9 │  -0.94 │ first_person_pronoun heart │    1.27 │ first_person_pronoun want   │\n",
      "╘════╧════════╧════════════════════════════╧═════════╧═════════════════════════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = bigram_tv\n",
    "classifier = mnb\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V8', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V8', 'score': score})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>svm</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>svm</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.453744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>svm</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V8</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>svm</td>\n",
       "      <td>V8</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   classifier vectorizer     score\n",
       "0         mnb         V1  0.511013\n",
       "1         svm         V1  0.484581\n",
       "2         mnb         V2  0.511013\n",
       "3         svm         V2  0.475771\n",
       "4         mnb         V3  0.466960\n",
       "5         svm         V3  0.506608\n",
       "6         mnb         V4  0.497797\n",
       "7         svm         V4  0.493392\n",
       "8         mnb         V5  0.475771\n",
       "9         svm         V5  0.493392\n",
       "10        mnb         V6  0.497797\n",
       "11        svm         V6  0.453744\n",
       "12        mnb         V7  0.511013\n",
       "13        svm         V7  0.475771\n",
       "14        mnb         V8  0.493392\n",
       "15        svm         V8  0.475771"
      ]
     },
     "execution_count": 220,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## TEST 9 -- MNB & SVM with Vectorizer 9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤═══════════════╤═════════╤══════════════╕\n",
      "│    │   Most │ Likely        │   Least │ Likely       │\n",
      "╞════╪════════╪═══════════════╪═════════╪══════════════╡\n",
      "│  0 │  -6.99 │ especially    │   -5.11 │ sorry        │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  1 │  -6.94 │ years pronoun │   -5.09 │ im           │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  2 │  -6.93 │ pronoun time  │   -5.09 │ yall         │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  3 │  -6.92 │ showed        │   -5.08 │ god          │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  4 │  -6.91 │ given         │   -5.04 │ know         │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  5 │  -6.91 │ taking        │   -5.00 │ want         │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  6 │  -6.91 │ touch         │   -4.93 │ family       │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  7 │  -6.91 │ john          │   -4.66 │ love pronoun │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  8 │  -6.91 │ pronoun wont  │   -4.43 │ love         │\n",
      "├────┼────────┼───────────────┼─────────┼──────────────┤\n",
      "│  9 │  -6.90 │ god forgive   │   -3.41 │ pronoun      │\n",
      "╘════╧════════╧═══════════════╧═════════╧══════════════╛\n",
      "============ time_spent :  10_or_less\n",
      "╒════╤════════╤═════════════╤═════════╤══════════════╕\n",
      "│    │   Most │ Likely      │   Least │ Likely       │\n",
      "╞════╪════════╪═════════════╪═════════╪══════════════╡\n",
      "│  0 │  -1.34 │ know did    │    0.90 │ everybody    │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  1 │  -1.33 │ taking      │    0.91 │ love pronoun │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  2 │  -1.14 │ ive         │    0.92 │ happening    │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  3 │  -1.08 │ death row   │    0.92 │ good         │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  4 │  -1.06 │ tell family │    0.94 │ fear         │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  5 │  -1.06 │ father      │    0.94 │ reason       │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  6 │  -1.05 │ brought     │    1.04 │ brother      │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  7 │  -1.05 │ years       │    1.10 │ praise       │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  8 │  -1.03 │ thanks      │    1.11 │ pronoun dont │\n",
      "├────┼────────┼─────────────┼─────────┼──────────────┤\n",
      "│  9 │  -1.00 │ row         │    1.28 │ strong       │\n",
      "╘════╧════════╧═════════════╧═════════╧══════════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = bigram_tv_v2\n",
    "classifier = mnb\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'mnb', 'vectorizer': 'V9', 'score': score})\n",
    "\n",
    "classifier = svm\n",
    "\n",
    "model, score, report = get_model(X,y,y_labels, y_labels, classifier, vec)\n",
    "return_features(vec, model)\n",
    "df = update_big_df(big_df,{ 'classifier': 'svm', 'vectorizer': 'V9', 'score': score})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>classifier</th>\n",
       "      <th>vectorizer</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>svm</td>\n",
       "      <td>V1</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>svm</td>\n",
       "      <td>V2</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.466960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>svm</td>\n",
       "      <td>V3</td>\n",
       "      <td>0.506608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>svm</td>\n",
       "      <td>V4</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>svm</td>\n",
       "      <td>V5</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.497797</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11</td>\n",
       "      <td>svm</td>\n",
       "      <td>V6</td>\n",
       "      <td>0.453744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.511013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>13</td>\n",
       "      <td>svm</td>\n",
       "      <td>V7</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V8</td>\n",
       "      <td>0.493392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15</td>\n",
       "      <td>svm</td>\n",
       "      <td>V8</td>\n",
       "      <td>0.475771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>mnb</td>\n",
       "      <td>V9</td>\n",
       "      <td>0.502203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>17</td>\n",
       "      <td>svm</td>\n",
       "      <td>V9</td>\n",
       "      <td>0.484581</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   classifier vectorizer     score\n",
       "0         mnb         V1  0.511013\n",
       "1         svm         V1  0.484581\n",
       "2         mnb         V2  0.511013\n",
       "3         svm         V2  0.475771\n",
       "4         mnb         V3  0.466960\n",
       "5         svm         V3  0.506608\n",
       "6         mnb         V4  0.497797\n",
       "7         svm         V4  0.493392\n",
       "8         mnb         V5  0.475771\n",
       "9         svm         V5  0.493392\n",
       "10        mnb         V6  0.497797\n",
       "11        svm         V6  0.453744\n",
       "12        mnb         V7  0.511013\n",
       "13        svm         V7  0.475771\n",
       "14        mnb         V8  0.493392\n",
       "15        svm         V8  0.475771\n",
       "16        mnb         V9  0.502203\n",
       "17        svm         V9  0.484581"
      ]
     },
     "execution_count": 222,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}