{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "train=p.read_csv(\"kaggle-sentiment/train.tsv\", delimiter='\\t')\n",
    "y=train['Sentiment'].values\n",
    "X=train['Phrase'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5529026288030471 | B? F | CV: 2 | Classifier: BernoulliNB\n",
      "0.5531524365695574 | B? F | CV: 3 | Classifier: BernoulliNB\n",
      "0.5592720169584305 | B? F | CV: 2 | Classifier: MultinomialNB\n",
      "0.5595474569680894 | B? F | CV: 3 | Classifier: MultinomialNB\n",
      "0.5596116298457374 | B? T | CV: 2 | Classifier: MultinomialNB\n",
      "0.5601369637205256 | B? T | CV: 3 | Classifier: MultinomialNB\n"
     ]
    }
   ],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.naive_bayes import BernoulliNB, MultinomialNB\n",
    "\n",
    "def runPipeline(classifier, boolean, cv):\n",
    "    nb_clf_pipe = Pipeline([('vect', CountVectorizer(encoding='latin-1', binary=boolean)),('nb', classifier)])\n",
    "    scores = cross_val_score(nb_clf_pipe, X, y, cv=cv)\n",
    "    avg=sum(scores)/len(scores)\n",
    "    pretty_line = \"{} | B? {} | CV: {} | Classifier: {}\"\n",
    "    print(pretty_line.format(avg, str(boolean)[0], cv, str(classifier).split('(')[0]))\n",
    "    \n",
    "runPipeline(BernoulliNB(), False, 2)\n",
    "runPipeline(BernoulliNB(), False, 3)\n",
    "runPipeline(MultinomialNB(), False, 2)\n",
    "runPipeline(MultinomialNB(), False, 3)\n",
    "runPipeline(MultinomialNB(), True, 2)\n",
    "runPipeline(MultinomialNB(), True, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 2, ..., 3, 2, 2])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('hw6_data_sentiment.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>PoN</th>\n",
       "      <th>tokens</th>\n",
       "      <th>num_tokens</th>\n",
       "      <th>sentences</th>\n",
       "      <th>num_sentences</th>\n",
       "      <th>no_sw</th>\n",
       "      <th>num_no_sw</th>\n",
       "      <th>topwords_unfil</th>\n",
       "      <th>topwords_fil</th>\n",
       "      <th>...</th>\n",
       "      <th>v_pos_fd</th>\n",
       "      <th>bow</th>\n",
       "      <th>bow_nosw</th>\n",
       "      <th>diy_cleaner</th>\n",
       "      <th>pruned</th>\n",
       "      <th>nltk_negs</th>\n",
       "      <th>unigram_feats</th>\n",
       "      <th>bigram_feats</th>\n",
       "      <th>bigram_feats_neg</th>\n",
       "      <th>no_shared_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>I went to XYZ restaurant last week and I was v...</td>\n",
       "      <td>N</td>\n",
       "      <td>['i', 'went', 'to', 'xyz', 'restaurant', 'last...</td>\n",
       "      <td>50</td>\n",
       "      <td>['I went to XYZ restaurant last week and I was...</td>\n",
       "      <td>3</td>\n",
       "      <td>['went', 'xyz', 'restaurant', 'last', 'week', ...</td>\n",
       "      <td>25</td>\n",
       "      <td>[('was', 4), ('to', 3), ('i', 2), ('and', 2), ...</td>\n",
       "      <td>[('went', 1), ('xyz', 1), ('restaurant', 1), (...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.186</td>\n",
       "      <td>Counter({'was': 4, 'to': 3, 'i': 2, 'and': 2, ...</td>\n",
       "      <td>Counter({'went': 1, 'xyz': 1, 'restaurant': 1,...</td>\n",
       "      <td>i went to xyz restaurant last week and i was v...</td>\n",
       "      <td>went restaurant last week very disappointed. f...</td>\n",
       "      <td>['i', 'went', 'to', 'xyz', 'restaurant', 'last...</td>\n",
       "      <td>['was', 'to', 'i', 'and', 'the', 'a_NEG', 'for...</td>\n",
       "      <td>['i_went', 'went_to', 'to_xyz', 'xyz_restauran...</td>\n",
       "      <td>['i_went', 'went_to', 'to_xyz', 'xyz_restauran...</td>\n",
       "      <td>['i', 'to', 'xyz', 'week', 'and', 'i', 'was', ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>In each of the diner dish there are at least o...</td>\n",
       "      <td>N</td>\n",
       "      <td>['in', 'each', 'of', 'the', 'diner', 'dish', '...</td>\n",
       "      <td>78</td>\n",
       "      <td>['In each of the diner dish there are at least...</td>\n",
       "      <td>4</td>\n",
       "      <td>['diner', 'dish', 'least', 'one', 'fly', 'wait...</td>\n",
       "      <td>31</td>\n",
       "      <td>[('the', 6), ('in', 4), ('to', 4), ('of', 3), ...</td>\n",
       "      <td>[('want', 3), ('dish', 2), ('diner', 1), ('lea...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.042</td>\n",
       "      <td>Counter({'the': 6, 'in': 4, 'to': 4, 'of': 3, ...</td>\n",
       "      <td>Counter({'want': 3, 'dish': 2, 'diner': 1, 'le...</td>\n",
       "      <td>in each of the diner dish there are at least o...</td>\n",
       "      <td>each diner dish there least waiting hour dish ...</td>\n",
       "      <td>['in', 'each', 'of', 'the', 'diner', 'dish', '...</td>\n",
       "      <td>['to_NEG', 'the', 'want_NEG', 'the_NEG', 'in',...</td>\n",
       "      <td>['in_each', 'each_of', 'of_the', 'the_diner', ...</td>\n",
       "      <td>['in_each', 'each_of', 'of_the', 'the_diner', ...</td>\n",
       "      <td>['in', 'of', 'the', 'diner', 'are', 'at', 'lea...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>This is the last place you would want to dine ...</td>\n",
       "      <td>N</td>\n",
       "      <td>['this', 'is', 'the', 'last', 'place', 'you', ...</td>\n",
       "      <td>151</td>\n",
       "      <td>['This is the last place you would want to din...</td>\n",
       "      <td>7</td>\n",
       "      <td>['last', 'place', 'would', 'want', 'dine', 'pr...</td>\n",
       "      <td>61</td>\n",
       "      <td>[('to', 10), ('the', 9), ('and', 7), ('we', 5)...</td>\n",
       "      <td>[('minutes', 3), ('place', 2), ('price', 2), (...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.171</td>\n",
       "      <td>Counter({'to': 10, 'the': 9, 'and': 7, 'we': 5...</td>\n",
       "      <td>Counter({'minutes': 3, 'place': 2, 'price': 2,...</td>\n",
       "      <td>this is the last place you would want to dine ...</td>\n",
       "      <td>this last place would want dine price that exp...</td>\n",
       "      <td>['this', 'is', 'the', 'last', 'place', 'you', ...</td>\n",
       "      <td>['to_NEG', 'the_NEG', 'and_NEG', 'we_NEG', 'ha...</td>\n",
       "      <td>['this_is', 'is_the', 'the_last', 'last_place'...</td>\n",
       "      <td>['this_is', 'is_the', 'the_last', 'last_place'...</td>\n",
       "      <td>['is', 'the', 'you', 'to', 'dine', 'at', 'the'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>I went to this restaurant where I had ordered ...</td>\n",
       "      <td>N</td>\n",
       "      <td>['i', 'went', 'to', 'this', 'restaurant', 'whe...</td>\n",
       "      <td>75</td>\n",
       "      <td>['I went to this restaurant where I had ordere...</td>\n",
       "      <td>6</td>\n",
       "      <td>['went', 'restaurant', 'ordered', 'complimenta...</td>\n",
       "      <td>33</td>\n",
       "      <td>[('i', 6), ('the', 6), ('to', 3), ('for', 3), ...</td>\n",
       "      <td>[('salad', 3), ('restaurant', 2), ('waiter', 2...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.162</td>\n",
       "      <td>Counter({'i': 6, 'the': 6, 'to': 3, 'for': 3, ...</td>\n",
       "      <td>Counter({'salad': 3, 'restaurant': 2, 'waiter'...</td>\n",
       "      <td>i went to this restaurant where i had ordered ...</td>\n",
       "      <td>went this restaurant where ordered complimenta...</td>\n",
       "      <td>['i', 'went', 'to', 'this', 'restaurant', 'whe...</td>\n",
       "      <td>['the', 'i', 'salad', 'had', 'for', 'waiter', ...</td>\n",
       "      <td>['i_went', 'went_to', 'to_this', 'this_restaur...</td>\n",
       "      <td>['i_went', 'went_to', 'to_this', 'this_restaur...</td>\n",
       "      <td>['i', 'to', 'i', 'had', 'for', 'the', 'complim...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>I went there with two friends at 6pm. Long que...</td>\n",
       "      <td>N</td>\n",
       "      <td>['i', 'went', 'there', 'with', 'two', 'friends...</td>\n",
       "      <td>73</td>\n",
       "      <td>['I went there with two friends at 6pm.', 'Lon...</td>\n",
       "      <td>10</td>\n",
       "      <td>['went', 'two', 'friends', 'long', 'queue', 'd...</td>\n",
       "      <td>38</td>\n",
       "      <td>[('there', 3), ('but', 3), ('it', 3), ('a', 3)...</td>\n",
       "      <td>[('two', 2), ('friends', 2), ('long', 2), ('di...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.353</td>\n",
       "      <td>Counter({'there': 3, 'but': 3, 'it': 3, 'a': 3...</td>\n",
       "      <td>Counter({'two': 2, 'friends': 2, 'long': 2, 'd...</td>\n",
       "      <td>i went there with two friends at 6pm. long que...</td>\n",
       "      <td>went there with friends 6pm. long queue there....</td>\n",
       "      <td>['i', 'went', 'there', 'with', 'two', 'friends...</td>\n",
       "      <td>['a_NEG', 'there', 'us_NEG', 'but_NEG', 'and_N...</td>\n",
       "      <td>['i_went', 'went_there', 'there_with', 'with_t...</td>\n",
       "      <td>['i_went', 'went_there', 'there_with', 'with_t...</td>\n",
       "      <td>['i', 'two', 'at', 'queue', 'was', 'but', 'it'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>83</td>\n",
       "      <td>This place was one of the best restaurant I ha...</td>\n",
       "      <td>P</td>\n",
       "      <td>['this', 'place', 'was', 'one', 'of', 'the', '...</td>\n",
       "      <td>70</td>\n",
       "      <td>['This place was one of the best restaurant I ...</td>\n",
       "      <td>6</td>\n",
       "      <td>['place', 'one', 'best', 'restaurant', 'price'...</td>\n",
       "      <td>32</td>\n",
       "      <td>[('the', 5), ('i', 3), ('and', 3), ('this', 2)...</td>\n",
       "      <td>[('best', 2), ('area', 2), ('place', 1), ('one...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.300</td>\n",
       "      <td>Counter({'the': 5, 'i': 3, 'and': 3, 'this': 2...</td>\n",
       "      <td>Counter({'best': 2, 'area': 2, 'place': 1, 'on...</td>\n",
       "      <td>this place was one of the best restaurant i ha...</td>\n",
       "      <td>this place best restaurant have been. price li...</td>\n",
       "      <td>['this', 'place', 'was', 'one', 'of', 'the', '...</td>\n",
       "      <td>['the', 'i', 'and', 'this', 'best', 'is', 'are...</td>\n",
       "      <td>['this_place', 'place_was', 'was_one', 'one_of...</td>\n",
       "      <td>['this_place', 'place_was', 'was_one', 'one_of...</td>\n",
       "      <td>['was', 'one', 'of', 'the', 'i', 'the', 'is', ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>84</td>\n",
       "      <td>The best experience I ever had happened in Lon...</td>\n",
       "      <td>P</td>\n",
       "      <td>['the', 'best', 'experience', 'i', 'ever', 'ha...</td>\n",
       "      <td>42</td>\n",
       "      <td>['The best experience I ever had happened in L...</td>\n",
       "      <td>3</td>\n",
       "      <td>['best', 'experience', 'ever', 'happened', 'lo...</td>\n",
       "      <td>21</td>\n",
       "      <td>[('the', 3), ('in', 3), ('food', 2), ('a', 2),...</td>\n",
       "      <td>[('food', 2), ('best', 1), ('experience', 1), ...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.283</td>\n",
       "      <td>Counter({'the': 3, 'in': 3, 'food': 2, 'a': 2,...</td>\n",
       "      <td>Counter({'food': 2, 'best': 1, 'experience': 1...</td>\n",
       "      <td>the best experience i ever had happened in lon...</td>\n",
       "      <td>best experience ever happened london britain. ...</td>\n",
       "      <td>['the', 'best', 'experience', 'i', 'ever', 'ha...</td>\n",
       "      <td>['in', 'the', 'best', 'experience', 'i', 'ever...</td>\n",
       "      <td>['the_best', 'best_experience', 'experience_i'...</td>\n",
       "      <td>['the_best', 'best_experience', 'experience_i'...</td>\n",
       "      <td>['the', 'i', 'had', 'happened', 'in', 'london'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>85</td>\n",
       "      <td>This Japanese restaurant is so popular recentl...</td>\n",
       "      <td>P</td>\n",
       "      <td>['this', 'japanese', 'restaurant', 'is', 'so',...</td>\n",
       "      <td>88</td>\n",
       "      <td>['This Japanese restaurant is so popular recen...</td>\n",
       "      <td>12</td>\n",
       "      <td>['japanese', 'restaurant', 'popular', 'recentl...</td>\n",
       "      <td>49</td>\n",
       "      <td>[('is', 4), ('the', 4), ('japanese', 2), ('a',...</td>\n",
       "      <td>[('japanese', 2), ('food', 2), ('right', 2), (...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.462</td>\n",
       "      <td>Counter({'is': 4, 'the': 4, 'japanese': 2, 'a'...</td>\n",
       "      <td>Counter({'japanese': 2, 'food': 2, 'right': 2,...</td>\n",
       "      <td>this japanese restaurant is so popular recentl...</td>\n",
       "      <td>this japanese restaurant popular recently that...</td>\n",
       "      <td>['this', 'japanese', 'restaurant', 'is', 'so',...</td>\n",
       "      <td>['the_NEG', 'is_NEG', 'japanese', 'a', 'and_NE...</td>\n",
       "      <td>['this_japanese', 'japanese_restaurant', 'rest...</td>\n",
       "      <td>['this_japanese', 'japanese_restaurant', 'rest...</td>\n",
       "      <td>['is', 'so', 'popular', 'recently', 'as', 'a',...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>86</td>\n",
       "      <td>Hibachi the grill is one of my favorite restau...</td>\n",
       "      <td>P</td>\n",
       "      <td>['hibachi', 'the', 'grill', 'is', 'one', 'of',...</td>\n",
       "      <td>65</td>\n",
       "      <td>['Hibachi the grill is one of my favorite rest...</td>\n",
       "      <td>5</td>\n",
       "      <td>['hibachi', 'grill', 'one', 'favorite', 'resta...</td>\n",
       "      <td>30</td>\n",
       "      <td>[('the', 8), ('is', 6), ('it', 3), ('hibachi',...</td>\n",
       "      <td>[('hibachi', 2), ('grill', 2), ('restaurants',...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.388</td>\n",
       "      <td>Counter({'the': 8, 'is': 6, 'it': 3, 'hibachi'...</td>\n",
       "      <td>Counter({'hibachi': 2, 'grill': 2, 'restaurant...</td>\n",
       "      <td>hibachi the grill is one of my favorite restau...</td>\n",
       "      <td>hibachi grill favorite restaurants. like drama...</td>\n",
       "      <td>['hibachi', 'the', 'grill', 'is', 'one', 'of',...</td>\n",
       "      <td>['the', 'is', 'it', 'hibachi', 'grill', 'of', ...</td>\n",
       "      <td>['hibachi_the', 'the_grill', 'grill_is', 'is_o...</td>\n",
       "      <td>['hibachi_the', 'the_grill', 'grill_is', 'is_o...</td>\n",
       "      <td>['hibachi', 'the', 'grill', 'is', 'one', 'of',...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>87</td>\n",
       "      <td>I went to this ultra-luxurious restaurant in D...</td>\n",
       "      <td>P</td>\n",
       "      <td>['i', 'went', 'to', 'this', 'restaurant', 'in'...</td>\n",
       "      <td>63</td>\n",
       "      <td>['I went to this ultra-luxurious restaurant in...</td>\n",
       "      <td>5</td>\n",
       "      <td>['went', 'restaurant', 'downtown', 'new', 'yor...</td>\n",
       "      <td>35</td>\n",
       "      <td>[('i', 4), ('this', 3), ('and', 3), ('restaura...</td>\n",
       "      <td>[('restaurant', 2), ('expensive', 2), ('went',...</td>\n",
       "      <td>...</td>\n",
       "      <td>0.223</td>\n",
       "      <td>Counter({'i': 4, 'this': 3, 'and': 3, 'restaur...</td>\n",
       "      <td>Counter({'restaurant': 2, 'expensive': 2, 'wen...</td>\n",
       "      <td>i went to this ultra-luxurious restaurant in d...</td>\n",
       "      <td>went this ultra-luxurious restaurant downtown ...</td>\n",
       "      <td>['i', 'went', 'to', 'this', 'restaurant', 'in'...</td>\n",
       "      <td>['i', 'this', 'and', 'restaurant', 'in', 'expe...</td>\n",
       "      <td>['i_went', 'went_to', 'to_this', 'this_restaur...</td>\n",
       "      <td>['i_went', 'went_to', 'to_this', 'this_restaur...</td>\n",
       "      <td>['i', 'to', 'in', 'downtown', 'new', 'york', '...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>88 rows × 40 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    0 PoN  \\\n",
       "0   I went to XYZ restaurant last week and I was v...   N   \n",
       "1   In each of the diner dish there are at least o...   N   \n",
       "2   This is the last place you would want to dine ...   N   \n",
       "3   I went to this restaurant where I had ordered ...   N   \n",
       "4   I went there with two friends at 6pm. Long que...   N   \n",
       "..                                                ...  ..   \n",
       "83  This place was one of the best restaurant I ha...   P   \n",
       "84  The best experience I ever had happened in Lon...   P   \n",
       "85  This Japanese restaurant is so popular recentl...   P   \n",
       "86  Hibachi the grill is one of my favorite restau...   P   \n",
       "87  I went to this ultra-luxurious restaurant in D...   P   \n",
       "\n",
       "                                               tokens  num_tokens  \\\n",
       "0   ['i', 'went', 'to', 'xyz', 'restaurant', 'last...          50   \n",
       "1   ['in', 'each', 'of', 'the', 'diner', 'dish', '...          78   \n",
       "2   ['this', 'is', 'the', 'last', 'place', 'you', ...         151   \n",
       "3   ['i', 'went', 'to', 'this', 'restaurant', 'whe...          75   \n",
       "4   ['i', 'went', 'there', 'with', 'two', 'friends...          73   \n",
       "..                                                ...         ...   \n",
       "83  ['this', 'place', 'was', 'one', 'of', 'the', '...          70   \n",
       "84  ['the', 'best', 'experience', 'i', 'ever', 'ha...          42   \n",
       "85  ['this', 'japanese', 'restaurant', 'is', 'so',...          88   \n",
       "86  ['hibachi', 'the', 'grill', 'is', 'one', 'of',...          65   \n",
       "87  ['i', 'went', 'to', 'this', 'restaurant', 'in'...          63   \n",
       "\n",
       "                                            sentences  num_sentences  \\\n",
       "0   ['I went to XYZ restaurant last week and I was...              3   \n",
       "1   ['In each of the diner dish there are at least...              4   \n",
       "2   ['This is the last place you would want to din...              7   \n",
       "3   ['I went to this restaurant where I had ordere...              6   \n",
       "4   ['I went there with two friends at 6pm.', 'Lon...             10   \n",
       "..                                                ...            ...   \n",
       "83  ['This place was one of the best restaurant I ...              6   \n",
       "84  ['The best experience I ever had happened in L...              3   \n",
       "85  ['This Japanese restaurant is so popular recen...             12   \n",
       "86  ['Hibachi the grill is one of my favorite rest...              5   \n",
       "87  ['I went to this ultra-luxurious restaurant in...              5   \n",
       "\n",
       "                                                no_sw  num_no_sw  \\\n",
       "0   ['went', 'xyz', 'restaurant', 'last', 'week', ...         25   \n",
       "1   ['diner', 'dish', 'least', 'one', 'fly', 'wait...         31   \n",
       "2   ['last', 'place', 'would', 'want', 'dine', 'pr...         61   \n",
       "3   ['went', 'restaurant', 'ordered', 'complimenta...         33   \n",
       "4   ['went', 'two', 'friends', 'long', 'queue', 'd...         38   \n",
       "..                                                ...        ...   \n",
       "83  ['place', 'one', 'best', 'restaurant', 'price'...         32   \n",
       "84  ['best', 'experience', 'ever', 'happened', 'lo...         21   \n",
       "85  ['japanese', 'restaurant', 'popular', 'recentl...         49   \n",
       "86  ['hibachi', 'grill', 'one', 'favorite', 'resta...         30   \n",
       "87  ['went', 'restaurant', 'downtown', 'new', 'yor...         35   \n",
       "\n",
       "                                       topwords_unfil  \\\n",
       "0   [('was', 4), ('to', 3), ('i', 2), ('and', 2), ...   \n",
       "1   [('the', 6), ('in', 4), ('to', 4), ('of', 3), ...   \n",
       "2   [('to', 10), ('the', 9), ('and', 7), ('we', 5)...   \n",
       "3   [('i', 6), ('the', 6), ('to', 3), ('for', 3), ...   \n",
       "4   [('there', 3), ('but', 3), ('it', 3), ('a', 3)...   \n",
       "..                                                ...   \n",
       "83  [('the', 5), ('i', 3), ('and', 3), ('this', 2)...   \n",
       "84  [('the', 3), ('in', 3), ('food', 2), ('a', 2),...   \n",
       "85  [('is', 4), ('the', 4), ('japanese', 2), ('a',...   \n",
       "86  [('the', 8), ('is', 6), ('it', 3), ('hibachi',...   \n",
       "87  [('i', 4), ('this', 3), ('and', 3), ('restaura...   \n",
       "\n",
       "                                         topwords_fil  ... v_pos_fd  \\\n",
       "0   [('went', 1), ('xyz', 1), ('restaurant', 1), (...  ...    0.186   \n",
       "1   [('want', 3), ('dish', 2), ('diner', 1), ('lea...  ...    0.042   \n",
       "2   [('minutes', 3), ('place', 2), ('price', 2), (...  ...    0.171   \n",
       "3   [('salad', 3), ('restaurant', 2), ('waiter', 2...  ...    0.162   \n",
       "4   [('two', 2), ('friends', 2), ('long', 2), ('di...  ...    0.353   \n",
       "..                                                ...  ...      ...   \n",
       "83  [('best', 2), ('area', 2), ('place', 1), ('one...  ...    0.300   \n",
       "84  [('food', 2), ('best', 1), ('experience', 1), ...  ...    0.283   \n",
       "85  [('japanese', 2), ('food', 2), ('right', 2), (...  ...    0.462   \n",
       "86  [('hibachi', 2), ('grill', 2), ('restaurants',...  ...    0.388   \n",
       "87  [('restaurant', 2), ('expensive', 2), ('went',...  ...    0.223   \n",
       "\n",
       "                                                  bow  \\\n",
       "0   Counter({'was': 4, 'to': 3, 'i': 2, 'and': 2, ...   \n",
       "1   Counter({'the': 6, 'in': 4, 'to': 4, 'of': 3, ...   \n",
       "2   Counter({'to': 10, 'the': 9, 'and': 7, 'we': 5...   \n",
       "3   Counter({'i': 6, 'the': 6, 'to': 3, 'for': 3, ...   \n",
       "4   Counter({'there': 3, 'but': 3, 'it': 3, 'a': 3...   \n",
       "..                                                ...   \n",
       "83  Counter({'the': 5, 'i': 3, 'and': 3, 'this': 2...   \n",
       "84  Counter({'the': 3, 'in': 3, 'food': 2, 'a': 2,...   \n",
       "85  Counter({'is': 4, 'the': 4, 'japanese': 2, 'a'...   \n",
       "86  Counter({'the': 8, 'is': 6, 'it': 3, 'hibachi'...   \n",
       "87  Counter({'i': 4, 'this': 3, 'and': 3, 'restaur...   \n",
       "\n",
       "                                             bow_nosw  \\\n",
       "0   Counter({'went': 1, 'xyz': 1, 'restaurant': 1,...   \n",
       "1   Counter({'want': 3, 'dish': 2, 'diner': 1, 'le...   \n",
       "2   Counter({'minutes': 3, 'place': 2, 'price': 2,...   \n",
       "3   Counter({'salad': 3, 'restaurant': 2, 'waiter'...   \n",
       "4   Counter({'two': 2, 'friends': 2, 'long': 2, 'd...   \n",
       "..                                                ...   \n",
       "83  Counter({'best': 2, 'area': 2, 'place': 1, 'on...   \n",
       "84  Counter({'food': 2, 'best': 1, 'experience': 1...   \n",
       "85  Counter({'japanese': 2, 'food': 2, 'right': 2,...   \n",
       "86  Counter({'hibachi': 2, 'grill': 2, 'restaurant...   \n",
       "87  Counter({'restaurant': 2, 'expensive': 2, 'wen...   \n",
       "\n",
       "                                          diy_cleaner  \\\n",
       "0   i went to xyz restaurant last week and i was v...   \n",
       "1   in each of the diner dish there are at least o...   \n",
       "2   this is the last place you would want to dine ...   \n",
       "3   i went to this restaurant where i had ordered ...   \n",
       "4   i went there with two friends at 6pm. long que...   \n",
       "..                                                ...   \n",
       "83  this place was one of the best restaurant i ha...   \n",
       "84  the best experience i ever had happened in lon...   \n",
       "85  this japanese restaurant is so popular recentl...   \n",
       "86  hibachi the grill is one of my favorite restau...   \n",
       "87  i went to this ultra-luxurious restaurant in d...   \n",
       "\n",
       "                                               pruned  \\\n",
       "0   went restaurant last week very disappointed. f...   \n",
       "1   each diner dish there least waiting hour dish ...   \n",
       "2   this last place would want dine price that exp...   \n",
       "3   went this restaurant where ordered complimenta...   \n",
       "4   went there with friends 6pm. long queue there....   \n",
       "..                                                ...   \n",
       "83  this place best restaurant have been. price li...   \n",
       "84  best experience ever happened london britain. ...   \n",
       "85  this japanese restaurant popular recently that...   \n",
       "86  hibachi grill favorite restaurants. like drama...   \n",
       "87  went this ultra-luxurious restaurant downtown ...   \n",
       "\n",
       "                                            nltk_negs  \\\n",
       "0   ['i', 'went', 'to', 'xyz', 'restaurant', 'last...   \n",
       "1   ['in', 'each', 'of', 'the', 'diner', 'dish', '...   \n",
       "2   ['this', 'is', 'the', 'last', 'place', 'you', ...   \n",
       "3   ['i', 'went', 'to', 'this', 'restaurant', 'whe...   \n",
       "4   ['i', 'went', 'there', 'with', 'two', 'friends...   \n",
       "..                                                ...   \n",
       "83  ['this', 'place', 'was', 'one', 'of', 'the', '...   \n",
       "84  ['the', 'best', 'experience', 'i', 'ever', 'ha...   \n",
       "85  ['this', 'japanese', 'restaurant', 'is', 'so',...   \n",
       "86  ['hibachi', 'the', 'grill', 'is', 'one', 'of',...   \n",
       "87  ['i', 'went', 'to', 'this', 'restaurant', 'in'...   \n",
       "\n",
       "                                        unigram_feats  \\\n",
       "0   ['was', 'to', 'i', 'and', 'the', 'a_NEG', 'for...   \n",
       "1   ['to_NEG', 'the', 'want_NEG', 'the_NEG', 'in',...   \n",
       "2   ['to_NEG', 'the_NEG', 'and_NEG', 'we_NEG', 'ha...   \n",
       "3   ['the', 'i', 'salad', 'had', 'for', 'waiter', ...   \n",
       "4   ['a_NEG', 'there', 'us_NEG', 'but_NEG', 'and_N...   \n",
       "..                                                ...   \n",
       "83  ['the', 'i', 'and', 'this', 'best', 'is', 'are...   \n",
       "84  ['in', 'the', 'best', 'experience', 'i', 'ever...   \n",
       "85  ['the_NEG', 'is_NEG', 'japanese', 'a', 'and_NE...   \n",
       "86  ['the', 'is', 'it', 'hibachi', 'grill', 'of', ...   \n",
       "87  ['i', 'this', 'and', 'restaurant', 'in', 'expe...   \n",
       "\n",
       "                                         bigram_feats  \\\n",
       "0   ['i_went', 'went_to', 'to_xyz', 'xyz_restauran...   \n",
       "1   ['in_each', 'each_of', 'of_the', 'the_diner', ...   \n",
       "2   ['this_is', 'is_the', 'the_last', 'last_place'...   \n",
       "3   ['i_went', 'went_to', 'to_this', 'this_restaur...   \n",
       "4   ['i_went', 'went_there', 'there_with', 'with_t...   \n",
       "..                                                ...   \n",
       "83  ['this_place', 'place_was', 'was_one', 'one_of...   \n",
       "84  ['the_best', 'best_experience', 'experience_i'...   \n",
       "85  ['this_japanese', 'japanese_restaurant', 'rest...   \n",
       "86  ['hibachi_the', 'the_grill', 'grill_is', 'is_o...   \n",
       "87  ['i_went', 'went_to', 'to_this', 'this_restaur...   \n",
       "\n",
       "                                     bigram_feats_neg  \\\n",
       "0   ['i_went', 'went_to', 'to_xyz', 'xyz_restauran...   \n",
       "1   ['in_each', 'each_of', 'of_the', 'the_diner', ...   \n",
       "2   ['this_is', 'is_the', 'the_last', 'last_place'...   \n",
       "3   ['i_went', 'went_to', 'to_this', 'this_restaur...   \n",
       "4   ['i_went', 'went_there', 'there_with', 'with_t...   \n",
       "..                                                ...   \n",
       "83  ['this_place', 'place_was', 'was_one', 'one_of...   \n",
       "84  ['the_best', 'best_experience', 'experience_i'...   \n",
       "85  ['this_japanese', 'japanese_restaurant', 'rest...   \n",
       "86  ['hibachi_the', 'the_grill', 'grill_is', 'is_o...   \n",
       "87  ['i_went', 'went_to', 'to_this', 'this_restaur...   \n",
       "\n",
       "                                      no_shared_words  \n",
       "0   ['i', 'to', 'xyz', 'week', 'and', 'i', 'was', ...  \n",
       "1   ['in', 'of', 'the', 'diner', 'are', 'at', 'lea...  \n",
       "2   ['is', 'the', 'you', 'to', 'dine', 'at', 'the'...  \n",
       "3   ['i', 'to', 'i', 'had', 'for', 'the', 'complim...  \n",
       "4   ['i', 'two', 'at', 'queue', 'was', 'but', 'it'...  \n",
       "..                                                ...  \n",
       "83  ['was', 'one', 'of', 'the', 'i', 'the', 'is', ...  \n",
       "84  ['the', 'i', 'had', 'happened', 'in', 'london'...  \n",
       "85  ['is', 'so', 'popular', 'recently', 'as', 'a',...  \n",
       "86  ['hibachi', 'the', 'grill', 'is', 'one', 'of',...  \n",
       "87  ['i', 'to', 'in', 'downtown', 'new', 'york', '...  \n",
       "\n",
       "[88 rows x 40 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}