{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SENTIMENT ANALYSIS (PANDAS STYLE!)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 1: Import ALL the things!\n",
    "#### Libraries and paths and files\n",
    "I'm sure there is a cleaner way to do this, plz lmk [via email](mailto:danielcaraway42@gmail.com)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "negative = os.listdir('NEG/')\n",
    "positive = os.listdir('POS/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [],
   "source": [
    "positive_alltext = []\n",
    "for file in positive:\n",
    "    f=open('POS/'+file)\n",
    "    content=f.read()\n",
    "    positive_alltext.append(content)\n",
    "    f.close()\n",
    "\n",
    "negative_alltext = []\n",
    "for file in negative:\n",
    "    f=open('NEG/'+file)\n",
    "    content=f.read()\n",
    "    negative_alltext.append(content)\n",
    "    f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 2: Turn that fresh text into a pandas DF and add a column to mark it as either positive or negative"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [],
   "source": [
    "positive_df = pd.DataFrame(positive_alltext)\n",
    "negative_df = pd.DataFrame(negative_alltext)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [],
   "source": [
    "positive_df['PoN'] = 'P'\n",
    "negative_df['PoN'] = 'N'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 185,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine the pos and neg dfs\n",
    "all_df = positive_df.append(negative_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>PoN</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>films adapted from comic books have had plenty...</td>\n",
       "      <td>P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>you've got mail works alot better than it dese...</td>\n",
       "      <td>P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>\" jaws \" is a rare film that grabs your atten...</td>\n",
       "      <td>P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>every now and then a movie comes along from a ...</td>\n",
       "      <td>P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>P</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>that's exactly how long the movie felt to me ....</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>\" quest for camelot \" is warner bros . ' firs...</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>so ask yourself what \" 8mm \" ( \" eight millime...</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>synopsis : a mentally unstable man undergoing ...</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>capsule : in 2176 on the planet mars police ta...</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   0 PoN\n",
       "0  films adapted from comic books have had plenty...   P\n",
       "1  you've got mail works alot better than it dese...   P\n",
       "2   \" jaws \" is a rare film that grabs your atten...   P\n",
       "3  every now and then a movie comes along from a ...   P\n",
       "4  moviemaking is a lot like being the general ma...   P\n",
       "0  that's exactly how long the movie felt to me ....   N\n",
       "1   \" quest for camelot \" is warner bros . ' firs...   N\n",
       "2  so ask yourself what \" 8mm \" ( \" eight millime...   N\n",
       "3  synopsis : a mentally unstable man undergoing ...   N\n",
       "4  capsule : in 2176 on the planet mars police ta...   N"
      ]
     },
     "execution_count": 186,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Our results!\n",
    "all_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 3: TOKENIZE (and clean)!!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [],
   "source": [
    "''' \n",
    "clean_tokens = [word.lower() for word in tokens if word.isalpha()]\n",
    "IN ENGLISH: for every word in this set of words lower case the word if it is \"is alpha\"\n",
    "\"isalpha()\" meaning \"not a number or punctuation\"\n",
    "'''\n",
    "\n",
    "from nltk.tokenize import word_tokenize\n",
    "def get_tokens(sentence):\n",
    "    tokens = word_tokenize(sentence)\n",
    "    clean_tokens = [word.lower() for word in tokens if word.isalpha()]\n",
    "    return clean_tokens\n",
    "\n",
    "all_df['tokenized'] = all_df.apply(lambda x: get_tokens(x[0]),axis=1)\n",
    "all_df['tokenized_count'] = all_df.apply(lambda x: len(x['tokenized']),axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 4: Remove Stopwords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.corpus import stopwords\n",
    "stop_words = set(stopwords.words(\"english\"))\n",
    "def remove_stopwords(sentence):\n",
    "    filtered_text = []\n",
    "    for word in sentence:\n",
    "        if word not in stop_words:\n",
    "            filtered_text.append(word)\n",
    "    return filtered_text\n",
    "all_df['no_stopwords'] = all_df.apply(lambda x: remove_stopwords(x['tokenized']),axis=1)\n",
    "all_df['no_stopwords_count'] = all_df.apply(lambda x: len(x['no_stopwords']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>PoN</th>\n",
       "      <th>tokenized</th>\n",
       "      <th>tokenized_count</th>\n",
       "      <th>no_stopwords</th>\n",
       "      <th>no_stopwords_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>films adapted from comic books have had plenty...</td>\n",
       "      <td>P</td>\n",
       "      <td>[films, adapted, from, comic, books, have, had...</td>\n",
       "      <td>673</td>\n",
       "      <td>[films, adapted, comic, books, plenty, success...</td>\n",
       "      <td>387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>you've got mail works alot better than it dese...</td>\n",
       "      <td>P</td>\n",
       "      <td>[you, got, mail, works, alot, better, than, it...</td>\n",
       "      <td>412</td>\n",
       "      <td>[got, mail, works, alot, better, deserves, ord...</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>\" jaws \" is a rare film that grabs your atten...</td>\n",
       "      <td>P</td>\n",
       "      <td>[jaws, is, a, rare, film, that, grabs, your, a...</td>\n",
       "      <td>993</td>\n",
       "      <td>[jaws, rare, film, grabs, attention, shows, si...</td>\n",
       "      <td>552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>every now and then a movie comes along from a ...</td>\n",
       "      <td>P</td>\n",
       "      <td>[every, now, and, then, a, movie, comes, along...</td>\n",
       "      <td>628</td>\n",
       "      <td>[every, movie, comes, along, suspect, studio, ...</td>\n",
       "      <td>326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>P</td>\n",
       "      <td>[moviemaking, is, a, lot, like, being, the, ge...</td>\n",
       "      <td>630</td>\n",
       "      <td>[moviemaking, lot, like, general, manager, nfl...</td>\n",
       "      <td>345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>that's exactly how long the movie felt to me ....</td>\n",
       "      <td>N</td>\n",
       "      <td>[that, exactly, how, long, the, movie, felt, t...</td>\n",
       "      <td>550</td>\n",
       "      <td>[exactly, long, movie, felt, even, nine, laugh...</td>\n",
       "      <td>308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>\" quest for camelot \" is warner bros . ' firs...</td>\n",
       "      <td>N</td>\n",
       "      <td>[quest, for, camelot, is, warner, bros, first,...</td>\n",
       "      <td>444</td>\n",
       "      <td>[quest, camelot, warner, bros, first, attempt,...</td>\n",
       "      <td>247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>so ask yourself what \" 8mm \" ( \" eight millime...</td>\n",
       "      <td>N</td>\n",
       "      <td>[so, ask, yourself, what, eight, millimeter, i...</td>\n",
       "      <td>527</td>\n",
       "      <td>[ask, eight, millimeter, really, wholesome, su...</td>\n",
       "      <td>283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>synopsis : a mentally unstable man undergoing ...</td>\n",
       "      <td>N</td>\n",
       "      <td>[synopsis, a, mentally, unstable, man, undergo...</td>\n",
       "      <td>706</td>\n",
       "      <td>[synopsis, mentally, unstable, man, undergoing...</td>\n",
       "      <td>371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>capsule : in 2176 on the planet mars police ta...</td>\n",
       "      <td>N</td>\n",
       "      <td>[capsule, in, on, the, planet, mars, police, t...</td>\n",
       "      <td>649</td>\n",
       "      <td>[capsule, planet, mars, police, taking, custod...</td>\n",
       "      <td>355</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   0 PoN  \\\n",
       "0  films adapted from comic books have had plenty...   P   \n",
       "1  you've got mail works alot better than it dese...   P   \n",
       "2   \" jaws \" is a rare film that grabs your atten...   P   \n",
       "3  every now and then a movie comes along from a ...   P   \n",
       "4  moviemaking is a lot like being the general ma...   P   \n",
       "0  that's exactly how long the movie felt to me ....   N   \n",
       "1   \" quest for camelot \" is warner bros . ' firs...   N   \n",
       "2  so ask yourself what \" 8mm \" ( \" eight millime...   N   \n",
       "3  synopsis : a mentally unstable man undergoing ...   N   \n",
       "4  capsule : in 2176 on the planet mars police ta...   N   \n",
       "\n",
       "                                           tokenized  tokenized_count  \\\n",
       "0  [films, adapted, from, comic, books, have, had...              673   \n",
       "1  [you, got, mail, works, alot, better, than, it...              412   \n",
       "2  [jaws, is, a, rare, film, that, grabs, your, a...              993   \n",
       "3  [every, now, and, then, a, movie, comes, along...              628   \n",
       "4  [moviemaking, is, a, lot, like, being, the, ge...              630   \n",
       "0  [that, exactly, how, long, the, movie, felt, t...              550   \n",
       "1  [quest, for, camelot, is, warner, bros, first,...              444   \n",
       "2  [so, ask, yourself, what, eight, millimeter, i...              527   \n",
       "3  [synopsis, a, mentally, unstable, man, undergo...              706   \n",
       "4  [capsule, in, on, the, planet, mars, police, t...              649   \n",
       "\n",
       "                                        no_stopwords  no_stopwords_count  \n",
       "0  [films, adapted, comic, books, plenty, success...                 387  \n",
       "1  [got, mail, works, alot, better, deserves, ord...                 203  \n",
       "2  [jaws, rare, film, grabs, attention, shows, si...                 552  \n",
       "3  [every, movie, comes, along, suspect, studio, ...                 326  \n",
       "4  [moviemaking, lot, like, general, manager, nfl...                 345  \n",
       "0  [exactly, long, movie, felt, even, nine, laugh...                 308  \n",
       "1  [quest, camelot, warner, bros, first, attempt,...                 247  \n",
       "2  [ask, eight, millimeter, really, wholesome, su...                 283  \n",
       "3  [synopsis, mentally, unstable, man, undergoing...                 371  \n",
       "4  [capsule, planet, mars, police, taking, custod...                 355  "
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 5: Create a Frequency Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.probability import FreqDist\n",
    "def get_most_common(tokens):\n",
    "    fdist = FreqDist(tokens)\n",
    "    return fdist.most_common(1)\n",
    "all_df['most_common_unfiltered_word'] = all_df.apply(lambda x: get_most_common(x['tokenized']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.probability import FreqDist\n",
    "def get_most_common(tokens):\n",
    "    fdist = FreqDist(tokens)\n",
    "    return fdist.most_common(5)\n",
    "all_df['most_common_filtered_word'] = all_df.apply(lambda x: get_most_common(x['no_stopwords']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>PoN</th>\n",
       "      <th>tokenized</th>\n",
       "      <th>tokenized_count</th>\n",
       "      <th>no_stopwords</th>\n",
       "      <th>no_stopwords_count</th>\n",
       "      <th>most_common_unfiltered_word</th>\n",
       "      <th>most_common_filtered_word</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>films adapted from comic books have had plenty...</td>\n",
       "      <td>P</td>\n",
       "      <td>[films, adapted, from, comic, books, have, had...</td>\n",
       "      <td>673</td>\n",
       "      <td>[films, adapted, comic, books, plenty, success...</td>\n",
       "      <td>387</td>\n",
       "      <td>[(the, 46)]</td>\n",
       "      <td>[(comic, 5), (hell, 5), (film, 5), (like, 4), ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>you've got mail works alot better than it dese...</td>\n",
       "      <td>P</td>\n",
       "      <td>[you, got, mail, works, alot, better, than, it...</td>\n",
       "      <td>412</td>\n",
       "      <td>[got, mail, works, alot, better, deserves, ord...</td>\n",
       "      <td>203</td>\n",
       "      <td>[(the, 33)]</td>\n",
       "      <td>[(two, 3), (shop, 3), (much, 3), (fox, 3), (go...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>\" jaws \" is a rare film that grabs your atten...</td>\n",
       "      <td>P</td>\n",
       "      <td>[jaws, is, a, rare, film, that, grabs, your, a...</td>\n",
       "      <td>993</td>\n",
       "      <td>[jaws, rare, film, grabs, attention, shows, si...</td>\n",
       "      <td>552</td>\n",
       "      <td>[(the, 63)]</td>\n",
       "      <td>[(shark, 16), (jaws, 8), (film, 7), (spielberg...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>every now and then a movie comes along from a ...</td>\n",
       "      <td>P</td>\n",
       "      <td>[every, now, and, then, a, movie, comes, along...</td>\n",
       "      <td>628</td>\n",
       "      <td>[every, movie, comes, along, suspect, studio, ...</td>\n",
       "      <td>326</td>\n",
       "      <td>[(the, 35)]</td>\n",
       "      <td>[(even, 6), (gets, 6), (film, 5), (school, 5),...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>P</td>\n",
       "      <td>[moviemaking, is, a, lot, like, being, the, ge...</td>\n",
       "      <td>630</td>\n",
       "      <td>[moviemaking, lot, like, general, manager, nfl...</td>\n",
       "      <td>345</td>\n",
       "      <td>[(the, 41)]</td>\n",
       "      <td>[(jackie, 10), (like, 9), (chan, 8), (got, 4),...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>that's exactly how long the movie felt to me ....</td>\n",
       "      <td>N</td>\n",
       "      <td>[that, exactly, how, long, the, movie, felt, t...</td>\n",
       "      <td>550</td>\n",
       "      <td>[exactly, long, movie, felt, even, nine, laugh...</td>\n",
       "      <td>308</td>\n",
       "      <td>[(the, 31)]</td>\n",
       "      <td>[(grant, 12), (movie, 7), (nine, 5), (hugh, 5)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>\" quest for camelot \" is warner bros . ' firs...</td>\n",
       "      <td>N</td>\n",
       "      <td>[quest, for, camelot, is, warner, bros, first,...</td>\n",
       "      <td>444</td>\n",
       "      <td>[quest, camelot, warner, bros, first, attempt,...</td>\n",
       "      <td>247</td>\n",
       "      <td>[(the, 21)]</td>\n",
       "      <td>[(quest, 5), (camelot, 4), (kayley, 4), (disne...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>so ask yourself what \" 8mm \" ( \" eight millime...</td>\n",
       "      <td>N</td>\n",
       "      <td>[so, ask, yourself, what, eight, millimeter, i...</td>\n",
       "      <td>527</td>\n",
       "      <td>[ask, eight, millimeter, really, wholesome, su...</td>\n",
       "      <td>283</td>\n",
       "      <td>[(of, 21)]</td>\n",
       "      <td>[(like, 4), (schumacher, 4), (film, 4), (welle...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>synopsis : a mentally unstable man undergoing ...</td>\n",
       "      <td>N</td>\n",
       "      <td>[synopsis, a, mentally, unstable, man, undergo...</td>\n",
       "      <td>706</td>\n",
       "      <td>[synopsis, mentally, unstable, man, undergoing...</td>\n",
       "      <td>371</td>\n",
       "      <td>[(the, 48)]</td>\n",
       "      <td>[(stalked, 12), (daryl, 7), (stalker, 6), (bro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>capsule : in 2176 on the planet mars police ta...</td>\n",
       "      <td>N</td>\n",
       "      <td>[capsule, in, on, the, planet, mars, police, t...</td>\n",
       "      <td>649</td>\n",
       "      <td>[capsule, planet, mars, police, taking, custod...</td>\n",
       "      <td>355</td>\n",
       "      <td>[(the, 30)]</td>\n",
       "      <td>[(mars, 14), (ghosts, 10), (carpenter, 8), (fi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   0 PoN  \\\n",
       "0  films adapted from comic books have had plenty...   P   \n",
       "1  you've got mail works alot better than it dese...   P   \n",
       "2   \" jaws \" is a rare film that grabs your atten...   P   \n",
       "3  every now and then a movie comes along from a ...   P   \n",
       "4  moviemaking is a lot like being the general ma...   P   \n",
       "0  that's exactly how long the movie felt to me ....   N   \n",
       "1   \" quest for camelot \" is warner bros . ' firs...   N   \n",
       "2  so ask yourself what \" 8mm \" ( \" eight millime...   N   \n",
       "3  synopsis : a mentally unstable man undergoing ...   N   \n",
       "4  capsule : in 2176 on the planet mars police ta...   N   \n",
       "\n",
       "                                           tokenized  tokenized_count  \\\n",
       "0  [films, adapted, from, comic, books, have, had...              673   \n",
       "1  [you, got, mail, works, alot, better, than, it...              412   \n",
       "2  [jaws, is, a, rare, film, that, grabs, your, a...              993   \n",
       "3  [every, now, and, then, a, movie, comes, along...              628   \n",
       "4  [moviemaking, is, a, lot, like, being, the, ge...              630   \n",
       "0  [that, exactly, how, long, the, movie, felt, t...              550   \n",
       "1  [quest, for, camelot, is, warner, bros, first,...              444   \n",
       "2  [so, ask, yourself, what, eight, millimeter, i...              527   \n",
       "3  [synopsis, a, mentally, unstable, man, undergo...              706   \n",
       "4  [capsule, in, on, the, planet, mars, police, t...              649   \n",
       "\n",
       "                                        no_stopwords  no_stopwords_count  \\\n",
       "0  [films, adapted, comic, books, plenty, success...                 387   \n",
       "1  [got, mail, works, alot, better, deserves, ord...                 203   \n",
       "2  [jaws, rare, film, grabs, attention, shows, si...                 552   \n",
       "3  [every, movie, comes, along, suspect, studio, ...                 326   \n",
       "4  [moviemaking, lot, like, general, manager, nfl...                 345   \n",
       "0  [exactly, long, movie, felt, even, nine, laugh...                 308   \n",
       "1  [quest, camelot, warner, bros, first, attempt,...                 247   \n",
       "2  [ask, eight, millimeter, really, wholesome, su...                 283   \n",
       "3  [synopsis, mentally, unstable, man, undergoing...                 371   \n",
       "4  [capsule, planet, mars, police, taking, custod...                 355   \n",
       "\n",
       "  most_common_unfiltered_word  \\\n",
       "0                 [(the, 46)]   \n",
       "1                 [(the, 33)]   \n",
       "2                 [(the, 63)]   \n",
       "3                 [(the, 35)]   \n",
       "4                 [(the, 41)]   \n",
       "0                 [(the, 31)]   \n",
       "1                 [(the, 21)]   \n",
       "2                  [(of, 21)]   \n",
       "3                 [(the, 48)]   \n",
       "4                 [(the, 30)]   \n",
       "\n",
       "                           most_common_filtered_word  \n",
       "0  [(comic, 5), (hell, 5), (film, 5), (like, 4), ...  \n",
       "1  [(two, 3), (shop, 3), (much, 3), (fox, 3), (go...  \n",
       "2  [(shark, 16), (jaws, 8), (film, 7), (spielberg...  \n",
       "3  [(even, 6), (gets, 6), (film, 5), (school, 5),...  \n",
       "4  [(jackie, 10), (like, 9), (chan, 8), (got, 4),...  \n",
       "0  [(grant, 12), (movie, 7), (nine, 5), (hugh, 5)...  \n",
       "1  [(quest, 5), (camelot, 4), (kayley, 4), (disne...  \n",
       "2  [(like, 4), (schumacher, 4), (film, 4), (welle...  \n",
       "3  [(stalked, 12), (daryl, 7), (stalker, 6), (bro...  \n",
       "4  [(mars, 14), (ghosts, 10), (carpenter, 8), (fi...  "
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
