{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HW7: Comparing MNB & SVM with Kaggle Sentiment Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## OVERVIEW"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### VECTORIZERS USED:\n",
    "    CountVectorizer\n",
    "    TfidfVectorizer\n",
    "\n",
    "### MODELS USED:\n",
    "    Multinomial Naive Bayes (MNB)\n",
    "    Support Vector Machines (SVM)\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "#### VECTORIZATION PARAMS:\n",
    "    Binary\n",
    "    Stopwords\n",
    "    Unigrams, Bigrams\n",
    "    Min & Max df\n",
    "---\n",
    "\n",
    "#### TODO:\n",
    "    Stemming?\n",
    "    Vadar + TextBlob"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FUNCTION & PACKAGE PARTY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "## =======================================================\n",
    "## TOKENIZING\n",
    "## =======================================================\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "\n",
    "## =======================================================\n",
    "## VECTORIZING\n",
    "## =======================================================\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "## ----- VECTORIZORS\n",
    "unigram_bool_cv = CountVectorizer(encoding='latin-1', binary=True, min_df=5, stop_words='english')\n",
    "unigram_bool_cv_v2 = CountVectorizer(encoding='latin-1', binary=True, min_df=5, stop_words='english', token_pattern=r'(?u)\\b[a-zA-Z]{2,}\\b' )\n",
    "\n",
    "unigram_cv = CountVectorizer(encoding='latin-1', binary=False, min_df=5, stop_words='english')\n",
    "bigram_cv = CountVectorizer(encoding='latin-1', ngram_range=(1,2), min_df=5, stop_words='english')\n",
    "unigram_tv = TfidfVectorizer(encoding='latin-1', use_idf=True, min_df=5, stop_words='english')\n",
    "bigram_tv = TfidfVectorizer(encoding='latin-1', use_idf=True, ngram_range=(1,2), min_df=5, stop_words='english')\n",
    "\n",
    "## =======================================================\n",
    "## MODELING\n",
    "## =======================================================\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.svm import LinearSVC\n",
    "from sklearn.naive_bayes import BernoulliNB, MultinomialNB\n",
    "\n",
    "## ----- CLASSIFIERS\n",
    "mnb = MultinomialNB()\n",
    "svm = LinearSVC(C=1)\n",
    "\n",
    "def get_test_train_vec(X,y,vectorizer):\n",
    "    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)\n",
    "    X_train_vec = vectorizer.fit_transform(X_train)\n",
    "    X_test_vec = vectorizer.transform(X_test)\n",
    "    return X_train_vec, X_test_vec, y_train, y_test\n",
    "\n",
    "def run_classifier(X_train_vec, X_test_vec, y_train, y_test, labels, target_names, classifier):\n",
    "    clf = classifier\n",
    "    clf.fit(X_train_vec,y_train)\n",
    "    print(clf.score(X_test_vec,y_test))\n",
    "    return clf\n",
    "    \n",
    "def get_model(X, y, labels, target_names, classifier, vec):\n",
    "    X_train_vec, X_test_vec, y_train, y_test = get_test_train_vec(X,y,vec)\n",
    "    model = run_classifier(X_train_vec, X_test_vec, y_train, y_test, labels, target_names, classifier)\n",
    "    return model\n",
    "    \n",
    "## =======================================================\n",
    "## VISUALIZING\n",
    "## =======================================================\n",
    "from tabulate import tabulate\n",
    "import pandas as pd\n",
    "\n",
    "def return_features(vec, model):\n",
    "    for i,feature_probability in enumerate(model.coef_):\n",
    "        print('============ Sentiment Score: ', i)\n",
    "        df1 = pd.DataFrame(sorted(zip(feature_probability, vec.get_feature_names()))[:10])\n",
    "        df2 = pd.DataFrame(sorted(zip(feature_probability, vec.get_feature_names()))[-10:])\n",
    "        df3 = pd.concat([df1, df2], axis=1)\n",
    "        print(tabulate(df3, tablefmt=\"fancy_grid\", headers=[\"Most\",\"Likely\",\"Least\",\"Likely\"], floatfmt=\".2f\"))\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### DATA GOES HERE:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import pandas as pd\n",
    "train=pd.read_csv(\"kaggle-sentiment/train.tsv\", delimiter='\\t')\n",
    "y=train['Sentiment'].values\n",
    "X=train['Phrase'].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TASK 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MNB | Vectorizer 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.606401384083045\n",
      "============ Sentiment Score:  0\n",
      "╒════╤════════╤══════════╤═════════╤════════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely     │\n",
      "╞════╪════════╪══════════╪═════════╪════════════╡\n",
      "│  0 │ -10.48 │ 102      │   -5.95 │ time       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  1 │ -10.48 │ 10th     │   -5.94 │ minutes    │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  2 │ -10.48 │ 127      │   -5.93 │ characters │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  3 │ -10.48 │ 13th     │   -5.93 │ story      │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  4 │ -10.48 │ 14       │   -5.90 │ comedy     │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  5 │ -10.48 │ 16       │   -5.70 │ just       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  6 │ -10.48 │ 163      │   -5.20 │ like       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  7 │ -10.48 │ 168      │   -5.07 │ bad        │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  8 │ -10.48 │ 170      │   -4.85 │ film       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  9 │ -10.48 │ 1790     │   -4.32 │ movie      │\n",
      "╘════╧════════╧══════════╧═════════╧════════════╛\n",
      "============ Sentiment Score:  1\n",
      "╒════╤════════╤══════════╤═════════╤════════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely     │\n",
      "╞════╪════════╪══════════╪═════════╪════════════╡\n",
      "│  0 │ -11.33 │ 000      │   -5.74 │ characters │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  1 │ -11.33 │ 10th     │   -5.73 │ bad        │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  2 │ -11.33 │ 127      │   -5.66 │ rrb        │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  3 │ -11.33 │ 14       │   -5.64 │ little     │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  4 │ -11.33 │ 168      │   -5.48 │ story      │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  5 │ -11.33 │ 1790     │   -5.44 │ just       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  6 │ -11.33 │ 1915     │   -5.43 │ does       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  7 │ -11.33 │ 1920     │   -5.05 │ like       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  8 │ -11.33 │ 1933     │   -4.69 │ film       │\n",
      "├────┼────────┼──────────┼─────────┼────────────┤\n",
      "│  9 │ -11.33 │ 1937     │   -4.58 │ movie      │\n",
      "╘════╧════════╧══════════╧═════════╧════════════╛\n",
      "============ Sentiment Score:  2\n",
      "╒════╤════════╤═════════════╤═════════╤════════════╕\n",
      "│    │   Most │ Likely      │   Least │ Likely     │\n",
      "╞════╪════════╪═════════════╪═════════╪════════════╡\n",
      "│  0 │ -11.84 │ abroad      │   -5.95 │ movies     │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  1 │ -11.84 │ acclaim     │   -5.90 │ characters │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  2 │ -11.84 │ acumen      │   -5.79 │ time       │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  3 │ -11.84 │ adding      │   -5.79 │ life       │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  4 │ -11.84 │ admirers    │   -5.59 │ lrb        │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  5 │ -11.84 │ affirms     │   -5.49 │ story      │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  6 │ -11.84 │ aggravating │   -5.34 │ rrb        │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  7 │ -11.84 │ aimlessly   │   -5.30 │ like       │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  8 │ -11.84 │ amaze       │   -4.75 │ movie      │\n",
      "├────┼────────┼─────────────┼─────────┼────────────┤\n",
      "│  9 │ -11.84 │ ambiguities │   -4.68 │ film       │\n",
      "╘════╧════════╧═════════════╧═════════╧════════════╛\n",
      "============ Sentiment Score:  3\n",
      "╒════╤════════╤══════════╤═════════╤══════════╕\n",
      "│    │   Most │   Likely │   Least │ Likely   │\n",
      "╞════╪════════╪══════════╪═════════╪══════════╡\n",
      "│  0 │ -11.47 │      102 │   -5.77 │ lrb      │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  1 │ -11.47 │      104 │   -5.76 │ love     │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  2 │ -11.47 │      105 │   -5.68 │ rrb      │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  3 │ -11.47 │      110 │   -5.67 │ life     │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  4 │ -11.47 │      120 │   -5.57 │ like     │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  5 │ -11.47 │      127 │   -5.50 │ story    │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  6 │ -11.47 │      140 │   -5.49 │ funny    │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  7 │ -11.47 │      146 │   -5.10 │ good     │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  8 │ -11.47 │     1915 │   -4.80 │ movie    │\n",
      "├────┼────────┼──────────┼─────────┼──────────┤\n",
      "│  9 │ -11.47 │     1959 │   -4.49 │ film     │\n",
      "╘════╧════════╧══════════╧═════════╧══════════╛\n",
      "============ Sentiment Score:  4\n",
      "╒════╤════════╤══════════╤═════════╤══════════════╕\n",
      "│    │   Most │ Likely   │   Least │ Likely       │\n",
      "╞════╪════════╪══════════╪═════════╪══════════════╡\n",
      "│  0 │ -10.63 │ 000      │   -5.81 │ performance  │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  1 │ -10.63 │ 101      │   -5.77 │ comedy       │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  2 │ -10.63 │ 102      │   -5.73 │ great        │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  3 │ -10.63 │ 103      │   -5.69 │ story        │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  4 │ -10.63 │ 104      │   -5.64 │ performances │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  5 │ -10.63 │ 105      │   -5.47 │ good         │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  6 │ -10.63 │ 10th     │   -5.24 │ funny        │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  7 │ -10.63 │ 110      │   -5.15 │ best         │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  8 │ -10.63 │ 112      │   -4.78 │ movie        │\n",
      "├────┼────────┼──────────┼─────────┼──────────────┤\n",
      "│  9 │ -10.63 │ 12       │   -4.26 │ film         │\n",
      "╘════╧════════╧══════════╧═════════╧══════════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = unigram_bool_cv\n",
    "classifier = mnb\n",
    "\n",
    "model = get_model(X,y,[0,1,2,3,4],['0','1','2','3','4'], classifier, vec)\n",
    "return_features(vec, model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SVM | Vectorizer 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6245033961296937\n",
      "============ Sentiment Score:  0\n",
      "╒════╤════════╤═══════════╤═════════╤════════════════╕\n",
      "│    │   Most │ Likely    │   Least │ Likely         │\n",
      "╞════╪════════╪═══════════╪═════════╪════════════════╡\n",
      "│  0 │  -1.81 │ hawke     │    1.63 │ cesspool       │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  1 │  -1.71 │ collar    │    1.66 │ pompous        │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  2 │  -1.69 │ giddy     │    1.69 │ stinks         │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  3 │  -1.59 │ swimfan   │    1.70 │ distasteful    │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  4 │  -1.57 │ blue      │    1.71 │ unwatchable    │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  5 │  -1.45 │ dogtown   │    1.72 │ disappointment │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  6 │  -1.41 │ victim    │    1.76 │ unbearable     │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  7 │  -1.41 │ joan      │    1.81 │ disgusting     │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  8 │  -1.41 │ won       │    1.81 │ stinker        │\n",
      "├────┼────────┼───────────┼─────────┼────────────────┤\n",
      "│  9 │  -1.40 │ innocence │    1.82 │ worthless      │\n",
      "╘════╧════════╧═══════════╧═════════╧════════════════╛\n",
      "============ Sentiment Score:  1\n",
      "╒════╤════════╤═════════════════╤═════════╤════════════╕\n",
      "│    │   Most │ Likely          │   Least │ Likely     │\n",
      "╞════╪════════╪═════════════════╪═════════╪════════════╡\n",
      "│  0 │  -2.22 │ hunk            │    1.67 │ activity   │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  1 │  -2.14 │ odor            │    1.67 │ clunkiness │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  2 │  -2.11 │ efficient       │    1.70 │ razzie     │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  3 │  -2.05 │ indescribably   │    1.70 │ squanders  │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  4 │  -2.05 │ norton          │    1.75 │ slimed     │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  5 │  -1.85 │ metropolis      │    1.77 │ muddy      │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  6 │  -1.79 │ unimaginatively │    1.79 │ padded     │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  7 │  -1.79 │ heels           │    1.81 │ charitable │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  8 │  -1.77 │ italicizes      │    1.96 │ outshined  │\n",
      "├────┼────────┼─────────────────┼─────────┼────────────┤\n",
      "│  9 │  -1.77 │ penetrating     │    2.04 │ flatfooted │\n",
      "╘════╧════════╧═════════════════╧═════════╧════════════╛\n",
      "============ Sentiment Score:  2\n",
      "╒════╤════════╤═══════════════╤═════════╤════════════╕\n",
      "│    │   Most │ Likely        │   Least │ Likely     │\n",
      "╞════╪════════╪═══════════════╪═════════╪════════════╡\n",
      "│  0 │  -2.80 │ flatfooted    │    1.83 │ handy      │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  1 │  -2.04 │ freshly       │    1.98 │ cunning    │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  2 │  -1.88 │ dimness       │    1.99 │ fashioning │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  3 │  -1.85 │ magnificent   │    2.02 │ nouvelle   │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  4 │  -1.82 │ insensitivity │    2.18 │ batch      │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  5 │  -1.81 │ elegantly     │    2.19 │ tidings    │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  6 │  -1.75 │ irresistibly  │    2.37 │ unseemly   │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  7 │  -1.72 │ clenching     │    2.49 │ pint       │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  8 │  -1.71 │ lagging       │    2.56 │ warren     │\n",
      "├────┼────────┼───────────────┼─────────┼────────────┤\n",
      "│  9 │  -1.70 │ strongest     │    2.57 │ spades     │\n",
      "╘════╧════════╧═══════════════╧═════════╧════════════╛\n",
      "============ Sentiment Score:  3\n",
      "╒════╤════════╤════════════╤═════════╤═══════════════╕\n",
      "│    │   Most │ Likely     │   Least │ Likely        │\n",
      "╞════╪════════╪════════════╪═════════╪═══════════════╡\n",
      "│  0 │  -2.32 │ mib        │    1.59 │ serb          │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  1 │  -2.13 │ facts      │    1.60 │ compels       │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  2 │  -2.08 │ strung     │    1.64 │ clenching     │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  3 │  -2.03 │ brutality  │    1.70 │ companionable │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  4 │  -1.95 │ ghost      │    1.70 │ pulpiness     │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  5 │  -1.95 │ wider      │    1.88 │ efficient     │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  6 │  -1.94 │ agenda     │    1.90 │ knotting      │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  7 │  -1.89 │ mud        │    1.98 │ ideally       │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  8 │  -1.78 │ mid        │    1.99 │ irresistibly  │\n",
      "├────┼────────┼────────────┼─────────┼───────────────┤\n",
      "│  9 │  -1.77 │ concession │    2.12 │ marveled      │\n",
      "╘════╧════════╧════════════╧═════════╧═══════════════╛\n",
      "============ Sentiment Score:  4\n",
      "╒════╤════════╤════════════════╤═════════╤═════════════╕\n",
      "│    │   Most │ Likely         │   Least │ Likely      │\n",
      "╞════╪════════╪════════════════╪═════════╪═════════════╡\n",
      "│  0 │  -2.32 │ sacrifices     │    1.57 │ stunning    │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  1 │  -1.86 │ maintained     │    1.58 │ flawless    │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  2 │  -1.77 │ argue          │    1.59 │ refreshes   │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  3 │  -1.66 │ failure        │    1.61 │ astonish    │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  4 │  -1.61 │ bore           │    1.63 │ phenomenal  │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  5 │  -1.55 │ nonchallenging │    1.65 │ masterful   │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  6 │  -1.53 │ clone          │    1.68 │ masterfully │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  7 │  -1.53 │ forcefully     │    1.91 │ glorious    │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  8 │  -1.52 │ lame           │    1.96 │ miraculous  │\n",
      "├────┼────────┼────────────────┼─────────┼─────────────┤\n",
      "│  9 │  -1.47 │ homage         │    2.02 │ perfection  │\n",
      "╘════╧════════╧════════════════╧═════════╧═════════════╛\n"
     ]
    }
   ],
   "source": [
    "vec = unigram_bool_cv\n",
    "classifier = svm\n",
    "\n",
    "model = get_model(X,y,[0,1,2,3,4],['0','1','2','3','4'], classifier, vec)\n",
    "return_features(vec, model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}