{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SENTIMENT ANALYSIS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "(via [these docs](http://www.nltk.org/howto/sentiment.html))  |  10-06-19"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 1: Import ALL the things"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.classify import NaiveBayesClassifier\n",
    "from nltk.corpus import subjectivity\n",
    "from nltk.sentiment import SentimentAnalyzer\n",
    "from nltk.sentiment.util import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 2: Borrow `subjective` and `objective` sentences from the nltk for practice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_instances = 100\n",
    "subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]\n",
    "obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 3: Create `test` and `train` for both `subj` and `obj`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_subj_docs = subj_docs[:80]\n",
    "test_subj_docs = subj_docs[80:100]\n",
    "train_obj_docs = obj_docs[:80]\n",
    "test_obj_docs = obj_docs[80:100]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 4: Combine the two `test` and `train` sets "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "training_docs = train_subj_docs + train_obj_docs\n",
    "testing_docs = test_subj_docs + test_obj_docs\n",
    "# training_docs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 5: Use `SentimentAnalyzer` to mark negation in training docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentim_analyzer = SentimentAnalyzer()\n",
    "# WTF IS MARK_NEGATION\n",
    "all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(['smart',\n",
       "   'and',\n",
       "   'alert',\n",
       "   ',',\n",
       "   'thirteen',\n",
       "   'conversations',\n",
       "   'about',\n",
       "   'one',\n",
       "   'thing',\n",
       "   'is',\n",
       "   'a',\n",
       "   'small',\n",
       "   'gem',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['color',\n",
       "   ',',\n",
       "   'musical',\n",
       "   'bounce',\n",
       "   'and',\n",
       "   'warm',\n",
       "   'seas',\n",
       "   'lapping',\n",
       "   'on',\n",
       "   'island',\n",
       "   'shores',\n",
       "   '.',\n",
       "   'and',\n",
       "   'just',\n",
       "   'enough',\n",
       "   'science',\n",
       "   'to',\n",
       "   'send',\n",
       "   'you',\n",
       "   'home',\n",
       "   'thinking',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['it',\n",
       "   'is',\n",
       "   'not',\n",
       "   'a',\n",
       "   'mass-market',\n",
       "   'entertainment',\n",
       "   'but',\n",
       "   'an',\n",
       "   'uncompromising',\n",
       "   'attempt',\n",
       "   'by',\n",
       "   'one',\n",
       "   'artist',\n",
       "   'to',\n",
       "   'think',\n",
       "   'about',\n",
       "   'another',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'light-hearted',\n",
       "   'french',\n",
       "   'film',\n",
       "   'about',\n",
       "   'the',\n",
       "   'spiritual',\n",
       "   'quest',\n",
       "   'of',\n",
       "   'a',\n",
       "   'fashion',\n",
       "   'model',\n",
       "   'seeking',\n",
       "   'peace',\n",
       "   'of',\n",
       "   'mind',\n",
       "   'while',\n",
       "   'in',\n",
       "   'a',\n",
       "   'love',\n",
       "   'affair',\n",
       "   'with',\n",
       "   'a',\n",
       "   'veterinarian',\n",
       "   'who',\n",
       "   'is',\n",
       "   'a',\n",
       "   'non-practicing',\n",
       "   'jew',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['my',\n",
       "   'wife',\n",
       "   'is',\n",
       "   'an',\n",
       "   'actress',\n",
       "   'has',\n",
       "   'its',\n",
       "   'moments',\n",
       "   'in',\n",
       "   'looking',\n",
       "   'at',\n",
       "   'the',\n",
       "   'comic',\n",
       "   'effects',\n",
       "   'of',\n",
       "   'jealousy',\n",
       "   '.',\n",
       "   'in',\n",
       "   'the',\n",
       "   'end',\n",
       "   ',',\n",
       "   'though',\n",
       "   ',',\n",
       "   'it',\n",
       "   'is',\n",
       "   'only',\n",
       "   'mildly',\n",
       "   'amusing',\n",
       "   'when',\n",
       "   'it',\n",
       "   'could',\n",
       "   'have',\n",
       "   'been',\n",
       "   'so',\n",
       "   'much',\n",
       "   'more',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['works',\n",
       "   'both',\n",
       "   'as',\n",
       "   'an',\n",
       "   'engaging',\n",
       "   'drama',\n",
       "   'and',\n",
       "   'an',\n",
       "   'incisive',\n",
       "   'look',\n",
       "   'at',\n",
       "   'the',\n",
       "   'difficulties',\n",
       "   'facing',\n",
       "   'native',\n",
       "   'americans',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['even',\n",
       "   'a',\n",
       "   'hardened',\n",
       "   'voyeur',\n",
       "   'would',\n",
       "   'require',\n",
       "   'the',\n",
       "   'patience',\n",
       "   'of',\n",
       "   'job',\n",
       "   'to',\n",
       "   'get',\n",
       "   'through',\n",
       "   'this',\n",
       "   'interminable',\n",
       "   ',',\n",
       "   'shapeless',\n",
       "   'documentary',\n",
       "   'about',\n",
       "   'the',\n",
       "   'swinging',\n",
       "   'subculture',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['when',\n",
       "   'perry',\n",
       "   'fists',\n",
       "   'a',\n",
       "   'bull',\n",
       "   'at',\n",
       "   'the',\n",
       "   'moore',\n",
       "   'farm',\n",
       "   ',',\n",
       "   \"it's\",\n",
       "   'only',\n",
       "   'a',\n",
       "   'matter',\n",
       "   'of',\n",
       "   'time',\n",
       "   'before',\n",
       "   'he',\n",
       "   'gets',\n",
       "   'the',\n",
       "   'upper',\n",
       "   'hand',\n",
       "   'in',\n",
       "   'matters',\n",
       "   'of',\n",
       "   'the',\n",
       "   'heart',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'characters',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'are',\n",
       "   'paper-thin',\n",
       "   ',',\n",
       "   'and',\n",
       "   'their',\n",
       "   'personalities',\n",
       "   'undergo',\n",
       "   'radical',\n",
       "   'changes',\n",
       "   'when',\n",
       "   'it',\n",
       "   'suits',\n",
       "   'the',\n",
       "   'script',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'script',\n",
       "   'is',\n",
       "   'a',\n",
       "   'tired',\n",
       "   'one',\n",
       "   ',',\n",
       "   'with',\n",
       "   'few',\n",
       "   'moments',\n",
       "   'of',\n",
       "   'joy',\n",
       "   'rising',\n",
       "   'above',\n",
       "   'the',\n",
       "   'stale',\n",
       "   'material',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'bland',\n",
       "   'outweighs',\n",
       "   'the',\n",
       "   'nifty',\n",
       "   ',',\n",
       "   'and',\n",
       "   'cletis',\n",
       "   'tout',\n",
       "   'never',\n",
       "   'becomes',\n",
       "   'the',\n",
       "   'clever',\n",
       "   'crime',\n",
       "   'comedy',\n",
       "   'it',\n",
       "   'thinks',\n",
       "   'it',\n",
       "   'is',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['directed',\n",
       "   'by',\n",
       "   'david',\n",
       "   'twohy',\n",
       "   'with',\n",
       "   'the',\n",
       "   'same',\n",
       "   'great',\n",
       "   'eye',\n",
       "   'for',\n",
       "   'eerie',\n",
       "   'understatement',\n",
       "   'that',\n",
       "   'he',\n",
       "   'brought',\n",
       "   'to',\n",
       "   'pitch',\n",
       "   'black',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"it's\",\n",
       "   'a',\n",
       "   'very',\n",
       "   'tasteful',\n",
       "   'rock',\n",
       "   'and',\n",
       "   'roll',\n",
       "   'movie',\n",
       "   '.',\n",
       "   'you',\n",
       "   'could',\n",
       "   'put',\n",
       "   'it',\n",
       "   'on',\n",
       "   'a',\n",
       "   'coffee',\n",
       "   'table',\n",
       "   'anywhere',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['provides',\n",
       "   'the',\n",
       "   'kind',\n",
       "   'of',\n",
       "   \"'laugh\",\n",
       "   \"therapy'\",\n",
       "   'i',\n",
       "   'need',\n",
       "   'from',\n",
       "   'movie',\n",
       "   'comedies',\n",
       "   '--',\n",
       "   'offbeat',\n",
       "   'humor',\n",
       "   ',',\n",
       "   'amusing',\n",
       "   'characters',\n",
       "   ',',\n",
       "   'and',\n",
       "   'a',\n",
       "   'happy',\n",
       "   'ending',\n",
       "   '.',\n",
       "   'after',\n",
       "   'seeing',\n",
       "   \"'analyze\",\n",
       "   'that',\n",
       "   ',',\n",
       "   \"'\",\n",
       "   'i',\n",
       "   'feel',\n",
       "   'better',\n",
       "   'already',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['worth',\n",
       "   'a',\n",
       "   'look',\n",
       "   'by',\n",
       "   'those',\n",
       "   'on',\n",
       "   'both',\n",
       "   'sides',\n",
       "   'of',\n",
       "   'the',\n",
       "   'issues',\n",
       "   ',',\n",
       "   'if',\n",
       "   'only',\n",
       "   'for',\n",
       "   'the',\n",
       "   'perspective',\n",
       "   'it',\n",
       "   'offers',\n",
       "   ',',\n",
       "   'one',\n",
       "   'the',\n",
       "   'public',\n",
       "   'rarely',\n",
       "   'sees',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['watching',\n",
       "   'the',\n",
       "   'film',\n",
       "   'is',\n",
       "   'like',\n",
       "   'reading',\n",
       "   'a',\n",
       "   'times',\n",
       "   'portrait',\n",
       "   'of',\n",
       "   'grief',\n",
       "   'that',\n",
       "   'keeps',\n",
       "   'shifting',\n",
       "   'focus',\n",
       "   'to',\n",
       "   'the',\n",
       "   'journalist',\n",
       "   'who',\n",
       "   'wrote',\n",
       "   'it',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['despite',\n",
       "   'these',\n",
       "   'annoyances',\n",
       "   ',',\n",
       "   'the',\n",
       "   'capable',\n",
       "   'clayburgh',\n",
       "   'and',\n",
       "   'tambor',\n",
       "   'really',\n",
       "   'do',\n",
       "   'a',\n",
       "   'great',\n",
       "   'job',\n",
       "   'of',\n",
       "   'anchoring',\n",
       "   'the',\n",
       "   'characters',\n",
       "   'in',\n",
       "   'the',\n",
       "   'emotional',\n",
       "   'realities',\n",
       "   'of',\n",
       "   'middle',\n",
       "   'age',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"it's\",\n",
       "   'a',\n",
       "   'good',\n",
       "   'thing',\n",
       "   'that',\n",
       "   'woolly',\n",
       "   'mammoths',\n",
       "   'are',\n",
       "   'extinct',\n",
       "   ',',\n",
       "   'because',\n",
       "   'this',\n",
       "   'movie',\n",
       "   'will',\n",
       "   'have',\n",
       "   'every',\n",
       "   'kid',\n",
       "   'in',\n",
       "   'the',\n",
       "   'schoolyard',\n",
       "   'wishing',\n",
       "   'for',\n",
       "   'their',\n",
       "   'very',\n",
       "   'own',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['preposterous',\n",
       "   'and',\n",
       "   'tedious',\n",
       "   ',',\n",
       "   'sonny',\n",
       "   'is',\n",
       "   'spiked',\n",
       "   'with',\n",
       "   'unintentional',\n",
       "   'laughter',\n",
       "   'that',\n",
       "   ',',\n",
       "   'unfortunately',\n",
       "   ',',\n",
       "   'occurs',\n",
       "   'too',\n",
       "   'infrequently',\n",
       "   'to',\n",
       "   'make',\n",
       "   'the',\n",
       "   'film',\n",
       "   'even',\n",
       "   'a',\n",
       "   'guilty',\n",
       "   'pleasure',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['4ever',\n",
       "   'has',\n",
       "   'the',\n",
       "   'same',\n",
       "   'sledgehammer',\n",
       "   'appeal',\n",
       "   'as',\n",
       "   'pokemon',\n",
       "   'videos',\n",
       "   ',',\n",
       "   'but',\n",
       "   'it',\n",
       "   'breathes',\n",
       "   'more',\n",
       "   'on',\n",
       "   'the',\n",
       "   'big',\n",
       "   'screen',\n",
       "   'and',\n",
       "   'induces',\n",
       "   'headaches',\n",
       "   'more',\n",
       "   'slowly',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['si',\n",
       "   'el',\n",
       "   'siglo',\n",
       "   'xxi',\n",
       "   'necesita',\n",
       "   'de',\n",
       "   'hÃ©roes',\n",
       "   ',',\n",
       "   'el',\n",
       "   'hombre',\n",
       "   'araÃ±a',\n",
       "   'parece',\n",
       "   'haber',\n",
       "   'llegado',\n",
       "   'para',\n",
       "   'quedarse',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"it's\",\n",
       "   'hard',\n",
       "   'to',\n",
       "   'tell',\n",
       "   'with',\n",
       "   'all',\n",
       "   'the',\n",
       "   'crashing',\n",
       "   'and',\n",
       "   'banging',\n",
       "   'where',\n",
       "   'the',\n",
       "   'salesmanship',\n",
       "   'ends',\n",
       "   'and',\n",
       "   'the',\n",
       "   'movie',\n",
       "   'begins',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['it',\n",
       "   'desperately',\n",
       "   'wants',\n",
       "   'to',\n",
       "   'be',\n",
       "   'a',\n",
       "   'wacky',\n",
       "   ',',\n",
       "   'screwball',\n",
       "   'comedy',\n",
       "   ',',\n",
       "   'but',\n",
       "   'the',\n",
       "   'most',\n",
       "   'screwy',\n",
       "   'thing',\n",
       "   'here',\n",
       "   'is',\n",
       "   'how',\n",
       "   'so',\n",
       "   'many',\n",
       "   'talented',\n",
       "   'people',\n",
       "   'were',\n",
       "   'convinced',\n",
       "   'to',\n",
       "   'waste',\n",
       "   'their',\n",
       "   'time',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['writer/director',\n",
       "   'walter',\n",
       "   'hill',\n",
       "   'is',\n",
       "   'in',\n",
       "   'his',\n",
       "   'hypermasculine',\n",
       "   'element',\n",
       "   'here',\n",
       "   ',',\n",
       "   'once',\n",
       "   'again',\n",
       "   'able',\n",
       "   'to',\n",
       "   'inject',\n",
       "   'some',\n",
       "   'real',\n",
       "   'vitality',\n",
       "   'and',\n",
       "   'even',\n",
       "   'art',\n",
       "   'into',\n",
       "   'a',\n",
       "   'pulpy',\n",
       "   'concept',\n",
       "   'that',\n",
       "   ',',\n",
       "   'in',\n",
       "   'many',\n",
       "   'other',\n",
       "   'hands',\n",
       "   'would',\n",
       "   'be',\n",
       "   'completely',\n",
       "   'forgettable',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['hill',\n",
       "   'looks',\n",
       "   'to',\n",
       "   'be',\n",
       "   'going',\n",
       "   'through',\n",
       "   'the',\n",
       "   'motions',\n",
       "   ',',\n",
       "   'beginning',\n",
       "   'with',\n",
       "   'the',\n",
       "   'pale',\n",
       "   'script',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['extremely',\n",
       "   'well',\n",
       "   'cast',\n",
       "   ',',\n",
       "   'especially',\n",
       "   'in',\n",
       "   'the',\n",
       "   'large',\n",
       "   'number',\n",
       "   'of',\n",
       "   'supporting',\n",
       "   'roles',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'chilling',\n",
       "   'tale',\n",
       "   'of',\n",
       "   'one',\n",
       "   'of',\n",
       "   'the',\n",
       "   'great',\n",
       "   'crimes',\n",
       "   'of',\n",
       "   '20th',\n",
       "   'century',\n",
       "   'france',\n",
       "   ':',\n",
       "   'the',\n",
       "   'murder',\n",
       "   'of',\n",
       "   'two',\n",
       "   'rich',\n",
       "   'women',\n",
       "   'by',\n",
       "   'their',\n",
       "   'servants',\n",
       "   'in',\n",
       "   '1933',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'dark',\n",
       "   ',',\n",
       "   'quirky',\n",
       "   'road',\n",
       "   'movie',\n",
       "   'that',\n",
       "   'constantly',\n",
       "   'defies',\n",
       "   'expectation',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['impostor',\n",
       "   \"doesn't\",\n",
       "   'do',\n",
       "   'much',\n",
       "   'with',\n",
       "   'its',\n",
       "   'template',\n",
       "   ',',\n",
       "   'despite',\n",
       "   'a',\n",
       "   'remarkably',\n",
       "   'strong',\n",
       "   'cast',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['though',\n",
       "   'it',\n",
       "   'lacks',\n",
       "   'the',\n",
       "   'utter',\n",
       "   'authority',\n",
       "   'of',\n",
       "   'a',\n",
       "   'genre',\n",
       "   'gem',\n",
       "   ',',\n",
       "   \"there's\",\n",
       "   'a',\n",
       "   'certain',\n",
       "   'robustness',\n",
       "   'to',\n",
       "   'this',\n",
       "   'engaging',\n",
       "   'mix',\n",
       "   'of',\n",
       "   'love',\n",
       "   'and',\n",
       "   'bloodletting',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['if',\n",
       "   'you',\n",
       "   'can',\n",
       "   'keep',\n",
       "   'your',\n",
       "   'eyes',\n",
       "   'open',\n",
       "   'amid',\n",
       "   'all',\n",
       "   'the',\n",
       "   'blood',\n",
       "   'and',\n",
       "   'gore',\n",
       "   ',',\n",
       "   \"you'll\",\n",
       "   'see',\n",
       "   'del',\n",
       "   'toro',\n",
       "   'has',\n",
       "   'brought',\n",
       "   'unexpected',\n",
       "   'gravity',\n",
       "   'to',\n",
       "   'blade',\n",
       "   'ii',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"there's\",\n",
       "   'undeniable',\n",
       "   'enjoyment',\n",
       "   'to',\n",
       "   'be',\n",
       "   'had',\n",
       "   'from',\n",
       "   'films',\n",
       "   'crammed',\n",
       "   'with',\n",
       "   'movie',\n",
       "   'references',\n",
       "   ',',\n",
       "   'but',\n",
       "   'the',\n",
       "   'fun',\n",
       "   'wears',\n",
       "   'thin',\n",
       "   '--',\n",
       "   'then',\n",
       "   'out',\n",
       "   '--',\n",
       "   'when',\n",
       "   \"there's\",\n",
       "   'nothing',\n",
       "   'else',\n",
       "   'happening',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'work',\n",
       "   'that',\n",
       "   'lacks',\n",
       "   'both',\n",
       "   'a',\n",
       "   'purpose',\n",
       "   'and',\n",
       "   'a',\n",
       "   'strong',\n",
       "   'pulse',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['it',\n",
       "   'helps',\n",
       "   'that',\n",
       "   'lil',\n",
       "   'bow',\n",
       "   'wow',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'tones',\n",
       "   'down',\n",
       "   'his',\n",
       "   'pint-sized',\n",
       "   'gangsta',\n",
       "   'act',\n",
       "   'to',\n",
       "   'play',\n",
       "   'someone',\n",
       "   'who',\n",
       "   'resembles',\n",
       "   'a',\n",
       "   'real',\n",
       "   'kid',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'mimetic',\n",
       "   'approximation',\n",
       "   'of',\n",
       "   'better',\n",
       "   'films',\n",
       "   'like',\n",
       "   'contempt',\n",
       "   'and',\n",
       "   '8',\n",
       "   '1/2',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['eastwood',\n",
       "   'is',\n",
       "   'an',\n",
       "   'icon',\n",
       "   'of',\n",
       "   'moviemaking',\n",
       "   ',',\n",
       "   'one',\n",
       "   'of',\n",
       "   'the',\n",
       "   'best',\n",
       "   'actors',\n",
       "   ',',\n",
       "   'directors',\n",
       "   'and',\n",
       "   'producers',\n",
       "   'around',\n",
       "   ',',\n",
       "   'responsible',\n",
       "   'for',\n",
       "   'some',\n",
       "   'excellent',\n",
       "   'work',\n",
       "   '.',\n",
       "   'but',\n",
       "   'even',\n",
       "   'a',\n",
       "   'hero',\n",
       "   'can',\n",
       "   'stumble',\n",
       "   'sometimes',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['nair',\n",
       "   \"doesn't\",\n",
       "   'use',\n",
       "   '[monsoon',\n",
       "   'wedding]',\n",
       "   'to',\n",
       "   'lament',\n",
       "   'the',\n",
       "   'loss',\n",
       "   'of',\n",
       "   'culture',\n",
       "   '.',\n",
       "   'instead',\n",
       "   ',',\n",
       "   'she',\n",
       "   'sees',\n",
       "   'it',\n",
       "   'as',\n",
       "   'a',\n",
       "   'chance',\n",
       "   'to',\n",
       "   'revitalize',\n",
       "   'what',\n",
       "   'is',\n",
       "   'and',\n",
       "   'always',\n",
       "   'has',\n",
       "   'been',\n",
       "   'remarkable',\n",
       "   'about',\n",
       "   'clung-to',\n",
       "   'traditions',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['stuffed',\n",
       "   'to',\n",
       "   'the',\n",
       "   'brim',\n",
       "   'with',\n",
       "   'ideas',\n",
       "   ',',\n",
       "   'american',\n",
       "   'instigator',\n",
       "   'michael',\n",
       "   \"moore's\",\n",
       "   'film',\n",
       "   'is',\n",
       "   'a',\n",
       "   'rambling',\n",
       "   'examination',\n",
       "   'of',\n",
       "   'american',\n",
       "   'gun',\n",
       "   'culture',\n",
       "   'that',\n",
       "   'uses',\n",
       "   'his',\n",
       "   'usual',\n",
       "   'modus',\n",
       "   'operandi',\n",
       "   'of',\n",
       "   'crucifixion',\n",
       "   'through',\n",
       "   'juxtaposition',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'a',\n",
       "   'joke',\n",
       "   'at',\n",
       "   'once',\n",
       "   'flaky',\n",
       "   'and',\n",
       "   'resonant',\n",
       "   ',',\n",
       "   'lightweight',\n",
       "   'and',\n",
       "   'bizarrely',\n",
       "   'original',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"fontaine's\",\n",
       "   'direction',\n",
       "   ',',\n",
       "   'especially',\n",
       "   'her',\n",
       "   'agreeably',\n",
       "   'startling',\n",
       "   'use',\n",
       "   'of',\n",
       "   'close-ups',\n",
       "   'and',\n",
       "   'her',\n",
       "   'grace',\n",
       "   'with',\n",
       "   'a',\n",
       "   'moving',\n",
       "   'camera',\n",
       "   ',',\n",
       "   'creates',\n",
       "   'sheerly',\n",
       "   'cinematic',\n",
       "   'appeal',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['starts',\n",
       "   'slowly',\n",
       "   ',',\n",
       "   'but',\n",
       "   'adrien',\n",
       "   'brody',\n",
       "   'Â\\x96',\n",
       "   'in',\n",
       "   'the',\n",
       "   'title',\n",
       "   'role',\n",
       "   'Â\\x96',\n",
       "   'helps',\n",
       "   'make',\n",
       "   'the',\n",
       "   \"film's\",\n",
       "   'conclusion',\n",
       "   'powerful',\n",
       "   'and',\n",
       "   'satisfying',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'refreshing',\n",
       "   'change',\n",
       "   'from',\n",
       "   'the',\n",
       "   'usual',\n",
       "   'whoopee-cushion',\n",
       "   'effort',\n",
       "   'aimed',\n",
       "   'at',\n",
       "   'the',\n",
       "   'youth',\n",
       "   'market',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['you',\n",
       "   'really',\n",
       "   'have',\n",
       "   'to',\n",
       "   'salute',\n",
       "   'writer-director',\n",
       "   'haneke',\n",
       "   '(',\n",
       "   'he',\n",
       "   'adapted',\n",
       "   'elfriede',\n",
       "   \"jelinek's\",\n",
       "   'novel',\n",
       "   ')',\n",
       "   'for',\n",
       "   'making',\n",
       "   'a',\n",
       "   'film',\n",
       "   'that',\n",
       "   \"isn't\",\n",
       "   'nearly',\n",
       "   'as',\n",
       "   'graphic',\n",
       "   'but',\n",
       "   'much',\n",
       "   'more',\n",
       "   'powerful',\n",
       "   ',',\n",
       "   'brutally',\n",
       "   'shocking',\n",
       "   'and',\n",
       "   'difficult',\n",
       "   'to',\n",
       "   'watch',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['barry',\n",
       "   'convinces',\n",
       "   'us',\n",
       "   \"he's\",\n",
       "   'a',\n",
       "   'dangerous',\n",
       "   ',',\n",
       "   'secretly',\n",
       "   'unhinged',\n",
       "   'guy',\n",
       "   'who',\n",
       "   'could',\n",
       "   'easily',\n",
       "   'have',\n",
       "   'killed',\n",
       "   'a',\n",
       "   'president',\n",
       "   'because',\n",
       "   'it',\n",
       "   'made',\n",
       "   'him',\n",
       "   'feel',\n",
       "   'powerful',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'distant',\n",
       "   ',',\n",
       "   'even',\n",
       "   'sterile',\n",
       "   ',',\n",
       "   'yet',\n",
       "   'compulsively',\n",
       "   'watchable',\n",
       "   'look',\n",
       "   'at',\n",
       "   'the',\n",
       "   'sordid',\n",
       "   'life',\n",
       "   'of',\n",
       "   \"hogan's\",\n",
       "   'heroes',\n",
       "   'star',\n",
       "   'bob',\n",
       "   'crane',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"there's\",\n",
       "   'no',\n",
       "   'disguising',\n",
       "   'this',\n",
       "   'as',\n",
       "   'one',\n",
       "   'of',\n",
       "   'the',\n",
       "   'worst',\n",
       "   'films',\n",
       "   'of',\n",
       "   'the',\n",
       "   'summer',\n",
       "   '.',\n",
       "   'or',\n",
       "   'for',\n",
       "   'the',\n",
       "   'year',\n",
       "   ',',\n",
       "   'for',\n",
       "   'that',\n",
       "   'matter',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['director',\n",
       "   'dan',\n",
       "   'verete',\n",
       "   'uses',\n",
       "   'his',\n",
       "   'camera',\n",
       "   'as',\n",
       "   'the',\n",
       "   'metaphoric',\n",
       "   'needle',\n",
       "   ',',\n",
       "   'and',\n",
       "   'his',\n",
       "   'cast',\n",
       "   'in',\n",
       "   'each',\n",
       "   'segment',\n",
       "   'as',\n",
       "   'his',\n",
       "   'thread',\n",
       "   ',',\n",
       "   'to',\n",
       "   'form',\n",
       "   'a',\n",
       "   'sweeping',\n",
       "   'tapestry',\n",
       "   'of',\n",
       "   'mis-explanation',\n",
       "   'and',\n",
       "   'contention',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['waydowntown',\n",
       "   'just',\n",
       "   'like',\n",
       "   'most',\n",
       "   'large',\n",
       "   'cities',\n",
       "   ',',\n",
       "   \"isn't\",\n",
       "   'somewhere',\n",
       "   \"you'll\",\n",
       "   'want',\n",
       "   'to',\n",
       "   'spend',\n",
       "   'the',\n",
       "   'rest',\n",
       "   'of',\n",
       "   'your',\n",
       "   'life',\n",
       "   ',',\n",
       "   'but',\n",
       "   'it',\n",
       "   'sure',\n",
       "   'is',\n",
       "   'a',\n",
       "   'fun',\n",
       "   'place',\n",
       "   'to',\n",
       "   'visit',\n",
       "   'for',\n",
       "   'a',\n",
       "   'while',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'acting',\n",
       "   'in',\n",
       "   'pauline',\n",
       "   'and',\n",
       "   'paulette',\n",
       "   'is',\n",
       "   'good',\n",
       "   'all',\n",
       "   'round',\n",
       "   ',',\n",
       "   'but',\n",
       "   'what',\n",
       "   'really',\n",
       "   'sets',\n",
       "   'the',\n",
       "   'film',\n",
       "   'apart',\n",
       "   'is',\n",
       "   \"debrauwer's\",\n",
       "   'refusal',\n",
       "   'to',\n",
       "   'push',\n",
       "   'the',\n",
       "   'easy',\n",
       "   'emotional',\n",
       "   'buttons',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'only',\n",
       "   'young',\n",
       "   'people',\n",
       "   'who',\n",
       "   'possibly',\n",
       "   'will',\n",
       "   'enjoy',\n",
       "   'it',\n",
       "   'are',\n",
       "   'infants',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'who',\n",
       "   'might',\n",
       "   'be',\n",
       "   'distracted',\n",
       "   'by',\n",
       "   'the',\n",
       "   \"movie's\",\n",
       "   'quick',\n",
       "   'movements',\n",
       "   'and',\n",
       "   'sounds',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"there's\",\n",
       "   'lots',\n",
       "   'of',\n",
       "   'cool',\n",
       "   'stuff',\n",
       "   'packed',\n",
       "   'into',\n",
       "   \"espn's\",\n",
       "   'ultimate',\n",
       "   'x',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['it',\n",
       "   'gets',\n",
       "   'old',\n",
       "   'quickly',\n",
       "   '.',\n",
       "   'watch',\n",
       "   'barbershop',\n",
       "   'again',\n",
       "   'if',\n",
       "   \"you're\",\n",
       "   'in',\n",
       "   'need',\n",
       "   'of',\n",
       "   'a',\n",
       "   'cube',\n",
       "   'fix--this',\n",
       "   \"isn't\",\n",
       "   'worth',\n",
       "   'sitting',\n",
       "   'through',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['harland',\n",
       "   'williams',\n",
       "   'is',\n",
       "   'so',\n",
       "   'funny',\n",
       "   'in',\n",
       "   'drag',\n",
       "   'he',\n",
       "   'should',\n",
       "   'consider',\n",
       "   'permanent',\n",
       "   'sex-reassignment',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   \"film's\",\n",
       "   'images',\n",
       "   'give',\n",
       "   'a',\n",
       "   'backbone',\n",
       "   'to',\n",
       "   'the',\n",
       "   'company',\n",
       "   'and',\n",
       "   'provide',\n",
       "   'an',\n",
       "   'emotional',\n",
       "   'edge',\n",
       "   'to',\n",
       "   'its',\n",
       "   'ultimate',\n",
       "   'demise',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"it's\",\n",
       "   'the',\n",
       "   'kind',\n",
       "   'of',\n",
       "   'film',\n",
       "   'where',\n",
       "   'the',\n",
       "   'villain',\n",
       "   'even',\n",
       "   'gives',\n",
       "   'an',\n",
       "   'evil',\n",
       "   'look',\n",
       "   'for',\n",
       "   'his',\n",
       "   'passport',\n",
       "   'photo',\n",
       "   '.',\n",
       "   'how',\n",
       "   'can',\n",
       "   'you',\n",
       "   'resist',\n",
       "   'that',\n",
       "   '?'],\n",
       "  'subj'),\n",
       " (['plotless',\n",
       "   'collection',\n",
       "   'of',\n",
       "   'moronic',\n",
       "   'stunts',\n",
       "   'is',\n",
       "   'by',\n",
       "   'far',\n",
       "   'the',\n",
       "   'worst',\n",
       "   'movie',\n",
       "   'of',\n",
       "   'the',\n",
       "   'year',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'broad',\n",
       "   ',',\n",
       "   'melodramatic',\n",
       "   'estrogen',\n",
       "   'opera',\n",
       "   \"that's\",\n",
       "   'pretty',\n",
       "   'toxic',\n",
       "   'in',\n",
       "   'its',\n",
       "   'own',\n",
       "   'right',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['just',\n",
       "   'a',\n",
       "   'kiss',\n",
       "   'wants',\n",
       "   'desperately',\n",
       "   'to',\n",
       "   'come',\n",
       "   'off',\n",
       "   'as',\n",
       "   'a',\n",
       "   'fanciful',\n",
       "   'film',\n",
       "   'about',\n",
       "   'the',\n",
       "   'typical',\n",
       "   'problems',\n",
       "   'of',\n",
       "   'average',\n",
       "   'people',\n",
       "   '.',\n",
       "   'but',\n",
       "   'it',\n",
       "   'is',\n",
       "   'set',\n",
       "   'in',\n",
       "   'a',\n",
       "   'world',\n",
       "   'that',\n",
       "   'is',\n",
       "   'very',\n",
       "   ',',\n",
       "   'very',\n",
       "   'far',\n",
       "   'from',\n",
       "   'the',\n",
       "   'one',\n",
       "   'most',\n",
       "   'of',\n",
       "   'us',\n",
       "   'inhabit',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['this',\n",
       "   'is',\n",
       "   'a',\n",
       "   'movie',\n",
       "   'where',\n",
       "   'the',\n",
       "   'most',\n",
       "   'notable',\n",
       "   'observation',\n",
       "   'is',\n",
       "   'how',\n",
       "   'long',\n",
       "   \"you've\",\n",
       "   'been',\n",
       "   'sitting',\n",
       "   'still',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['with',\n",
       "   'a',\n",
       "   'romantic',\n",
       "   'comedy',\n",
       "   'plotline',\n",
       "   'straight',\n",
       "   'from',\n",
       "   'the',\n",
       "   'ages',\n",
       "   ',',\n",
       "   'this',\n",
       "   'cinderella',\n",
       "   'story',\n",
       "   \"doesn't\",\n",
       "   'have',\n",
       "   'a',\n",
       "   'single',\n",
       "   'surprise',\n",
       "   'up',\n",
       "   'its',\n",
       "   'sleeve',\n",
       "   '.',\n",
       "   'but',\n",
       "   'it',\n",
       "   'does',\n",
       "   'somehow',\n",
       "   'manage',\n",
       "   'to',\n",
       "   'get',\n",
       "   'you',\n",
       "   'under',\n",
       "   'its',\n",
       "   'spell',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'charming',\n",
       "   'trifle',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'a',\n",
       "   'welcome',\n",
       "   'return',\n",
       "   'to',\n",
       "   'jocular',\n",
       "   'form',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"it's\",\n",
       "   'not',\n",
       "   'difficult',\n",
       "   'to',\n",
       "   'spot',\n",
       "   'the',\n",
       "   'culprit',\n",
       "   'early-on',\n",
       "   'in',\n",
       "   'this',\n",
       "   'predictable',\n",
       "   'thriller',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['without',\n",
       "   'the',\n",
       "   'dark',\n",
       "   'spookiness',\n",
       "   'of',\n",
       "   'crystal',\n",
       "   'lake',\n",
       "   'camp',\n",
       "   ',',\n",
       "   'the',\n",
       "   'horror',\n",
       "   'concept',\n",
       "   'completely',\n",
       "   'loses',\n",
       "   'its',\n",
       "   'creepy',\n",
       "   'menace',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"there's\",\n",
       "   'suspension',\n",
       "   'of',\n",
       "   'disbelief',\n",
       "   'and',\n",
       "   'then',\n",
       "   \"there's\",\n",
       "   'bad',\n",
       "   'screenwriting',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'this',\n",
       "   'film',\n",
       "   'packs',\n",
       "   'a',\n",
       "   'wallop',\n",
       "   'of',\n",
       "   'the',\n",
       "   'latter',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['little',\n",
       "   'more',\n",
       "   'than',\n",
       "   'a',\n",
       "   'stylish',\n",
       "   'exercise',\n",
       "   'in',\n",
       "   'revisionism',\n",
       "   'whose',\n",
       "   'point',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'is',\n",
       "   'no',\n",
       "   'doubt',\n",
       "   'true',\n",
       "   ',',\n",
       "   'but',\n",
       "   'serves',\n",
       "   'as',\n",
       "   'a',\n",
       "   'rather',\n",
       "   'thin',\n",
       "   'moral',\n",
       "   'to',\n",
       "   'such',\n",
       "   'a',\n",
       "   'knowing',\n",
       "   'fable',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['anyone',\n",
       "   'who',\n",
       "   'can',\n",
       "   'count',\n",
       "   'to',\n",
       "   'five',\n",
       "   '(',\n",
       "   'the',\n",
       "   \"film's\",\n",
       "   'target',\n",
       "   'market',\n",
       "   '?',\n",
       "   ')',\n",
       "   'can',\n",
       "   'see',\n",
       "   'where',\n",
       "   'this',\n",
       "   'dumbed-down',\n",
       "   'concoction',\n",
       "   'is',\n",
       "   'going',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['every',\n",
       "   'defiantly',\n",
       "   'over-the-top',\n",
       "   'action',\n",
       "   'scene',\n",
       "   '--',\n",
       "   'from',\n",
       "   'high-stakes',\n",
       "   'car',\n",
       "   'chases',\n",
       "   'to',\n",
       "   'fearsome',\n",
       "   'drug',\n",
       "   'busts',\n",
       "   '--',\n",
       "   'seizes',\n",
       "   'your',\n",
       "   'adrenal',\n",
       "   'gland',\n",
       "   'and',\n",
       "   'milks',\n",
       "   'it',\n",
       "   'like',\n",
       "   'an',\n",
       "   'epileptic',\n",
       "   'farmer',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['kosminsky',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.',\n",
       "   'puts',\n",
       "   'enough',\n",
       "   'salt',\n",
       "   'into',\n",
       "   'the',\n",
       "   'wounds',\n",
       "   'of',\n",
       "   'the',\n",
       "   'tortured',\n",
       "   'and',\n",
       "   'self-conscious',\n",
       "   'material',\n",
       "   'to',\n",
       "   'make',\n",
       "   'it',\n",
       "   'sting',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'sobering',\n",
       "   'and',\n",
       "   'powerful',\n",
       "   'documentary',\n",
       "   'about',\n",
       "   'the',\n",
       "   'most',\n",
       "   'severe',\n",
       "   'kind',\n",
       "   'of',\n",
       "   'personal',\n",
       "   'loss',\n",
       "   ':',\n",
       "   'rejection',\n",
       "   'by',\n",
       "   \"one's\",\n",
       "   'mother',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['if',\n",
       "   'the',\n",
       "   'story',\n",
       "   'lacks',\n",
       "   'bite',\n",
       "   ',',\n",
       "   'the',\n",
       "   'performances',\n",
       "   'are',\n",
       "   'never',\n",
       "   'less',\n",
       "   'than',\n",
       "   'affectionate',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['a',\n",
       "   'deftly',\n",
       "   'entertaining',\n",
       "   'film',\n",
       "   ',',\n",
       "   'smartly',\n",
       "   'played',\n",
       "   'and',\n",
       "   'smartly',\n",
       "   'directed',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['ice',\n",
       "   'age',\n",
       "   'is',\n",
       "   'the',\n",
       "   'first',\n",
       "   'computer-generated',\n",
       "   'feature',\n",
       "   'cartoon',\n",
       "   'to',\n",
       "   'feel',\n",
       "   'like',\n",
       "   'other',\n",
       "   'movies',\n",
       "   ',',\n",
       "   'and',\n",
       "   'that',\n",
       "   'makes',\n",
       "   'for',\n",
       "   'some',\n",
       "   'glacial',\n",
       "   'pacing',\n",
       "   'early',\n",
       "   'on',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['i',\n",
       "   'like',\n",
       "   'my',\n",
       "   'christmas',\n",
       "   'movies',\n",
       "   'with',\n",
       "   'more',\n",
       "   'elves',\n",
       "   'and',\n",
       "   'snow',\n",
       "   'and',\n",
       "   'less',\n",
       "   'pimps',\n",
       "   'and',\n",
       "   \"ho's\",\n",
       "   '.'],\n",
       "  'subj'),\n",
       " ([\"ferrara's\",\n",
       "   'strongest',\n",
       "   'and',\n",
       "   'most',\n",
       "   'touching',\n",
       "   'movie',\n",
       "   'of',\n",
       "   'recent',\n",
       "   'years',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['skip',\n",
       "   'work',\n",
       "   'to',\n",
       "   'see',\n",
       "   'it',\n",
       "   'at',\n",
       "   'the',\n",
       "   'first',\n",
       "   'opportunity',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['both',\n",
       "   'the',\n",
       "   'film',\n",
       "   'and',\n",
       "   \"nachtwey's\",\n",
       "   'photos',\n",
       "   'hammer',\n",
       "   'home',\n",
       "   'the',\n",
       "   'grim',\n",
       "   'reality',\n",
       "   'of',\n",
       "   'the',\n",
       "   \"world's\",\n",
       "   'gutters',\n",
       "   'and',\n",
       "   'battlefields',\n",
       "   ',',\n",
       "   'and',\n",
       "   'will',\n",
       "   'make',\n",
       "   'you',\n",
       "   'question',\n",
       "   'what',\n",
       "   \"'news'\",\n",
       "   'really',\n",
       "   'is',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['so',\n",
       "   'relentlessly',\n",
       "   'wholesome',\n",
       "   'it',\n",
       "   'made',\n",
       "   'me',\n",
       "   'want',\n",
       "   'to',\n",
       "   'swipe',\n",
       "   'something',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['shyamalan',\n",
       "   'offers',\n",
       "   'copious',\n",
       "   'hints',\n",
       "   'along',\n",
       "   'the',\n",
       "   'way',\n",
       "   '--',\n",
       "   'myriad',\n",
       "   'signs',\n",
       "   ',',\n",
       "   'if',\n",
       "   'you',\n",
       "   'will',\n",
       "   '--',\n",
       "   'that',\n",
       "   'beneath',\n",
       "   'the',\n",
       "   'familiar',\n",
       "   ',',\n",
       "   'funny',\n",
       "   'surface',\n",
       "   'is',\n",
       "   'a',\n",
       "   'far',\n",
       "   'bigger',\n",
       "   ',',\n",
       "   'far',\n",
       "   'more',\n",
       "   'meaningful',\n",
       "   'story',\n",
       "   'than',\n",
       "   'one',\n",
       "   'in',\n",
       "   'which',\n",
       "   'little',\n",
       "   'green',\n",
       "   'men',\n",
       "   'come',\n",
       "   'to',\n",
       "   'earth',\n",
       "   'for',\n",
       "   'harvesting',\n",
       "   'purposes',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['that',\n",
       "   \"'alabama'\",\n",
       "   'manages',\n",
       "   'to',\n",
       "   'be',\n",
       "   'pleasant',\n",
       "   'in',\n",
       "   'spite',\n",
       "   'of',\n",
       "   'its',\n",
       "   'predictability',\n",
       "   'and',\n",
       "   'occasional',\n",
       "   'slowness',\n",
       "   'is',\n",
       "   'due',\n",
       "   'primarily',\n",
       "   'to',\n",
       "   'the',\n",
       "   'perkiness',\n",
       "   'of',\n",
       "   'witherspoon',\n",
       "   '(',\n",
       "   'who',\n",
       "   'is',\n",
       "   'always',\n",
       "   'a',\n",
       "   'joy',\n",
       "   'to',\n",
       "   'watch',\n",
       "   ',',\n",
       "   'even',\n",
       "   'when',\n",
       "   'her',\n",
       "   'material',\n",
       "   'is',\n",
       "   'not',\n",
       "   'first-rate',\n",
       "   ')',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['against',\n",
       "   'all',\n",
       "   'odds',\n",
       "   'in',\n",
       "   'heaven',\n",
       "   'and',\n",
       "   'hell',\n",
       "   ',',\n",
       "   'it',\n",
       "   'creeped',\n",
       "   'me',\n",
       "   'out',\n",
       "   'just',\n",
       "   'fine',\n",
       "   '.'],\n",
       "  'subj'),\n",
       " (['the',\n",
       "   'movie',\n",
       "   'begins',\n",
       "   'in',\n",
       "   'the',\n",
       "   'past',\n",
       "   'where',\n",
       "   'a',\n",
       "   'young',\n",
       "   'boy',\n",
       "   'named',\n",
       "   'sam',\n",
       "   'attempts',\n",
       "   'to',\n",
       "   'save',\n",
       "   'celebi',\n",
       "   'from',\n",
       "   'a',\n",
       "   'hunter',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['emerging',\n",
       "   'from',\n",
       "   'the',\n",
       "   'human',\n",
       "   'psyche',\n",
       "   'and',\n",
       "   'showing',\n",
       "   'characteristics',\n",
       "   'of',\n",
       "   'abstract',\n",
       "   'expressionism',\n",
       "   ',',\n",
       "   'minimalism',\n",
       "   'and',\n",
       "   'russian',\n",
       "   'constructivism',\n",
       "   ',',\n",
       "   'graffiti',\n",
       "   'removal',\n",
       "   'has',\n",
       "   'secured',\n",
       "   'its',\n",
       "   'place',\n",
       "   'in',\n",
       "   'the',\n",
       "   'history',\n",
       "   'of',\n",
       "   'modern',\n",
       "   'art',\n",
       "   'while',\n",
       "   'being',\n",
       "   'created',\n",
       "   'by',\n",
       "   'artists',\n",
       "   'who',\n",
       "   'are',\n",
       "   'unconscious',\n",
       "   'of',\n",
       "   'their',\n",
       "   'artistic',\n",
       "   'achievements',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['spurning',\n",
       "   'her',\n",
       "   \"mother's\",\n",
       "   'insistence',\n",
       "   'that',\n",
       "   'she',\n",
       "   'get',\n",
       "   'on',\n",
       "   'with',\n",
       "   'her',\n",
       "   'life',\n",
       "   ',',\n",
       "   'mary',\n",
       "   'is',\n",
       "   'thrown',\n",
       "   'out',\n",
       "   'of',\n",
       "   'the',\n",
       "   'house',\n",
       "   ',',\n",
       "   'rejected',\n",
       "   'by',\n",
       "   'joe',\n",
       "   ',',\n",
       "   'and',\n",
       "   'expelled',\n",
       "   'from',\n",
       "   'school',\n",
       "   'as',\n",
       "   'she',\n",
       "   'grows',\n",
       "   'larger',\n",
       "   'with',\n",
       "   'child',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['amitabh',\n",
       "   \"can't\",\n",
       "   'believe',\n",
       "   'the',\n",
       "   'board',\n",
       "   'of',\n",
       "   'directors',\n",
       "   'and',\n",
       "   'his',\n",
       "   'mind',\n",
       "   'is',\n",
       "   'filled',\n",
       "   'with',\n",
       "   'revenge',\n",
       "   'and',\n",
       "   'what',\n",
       "   'better',\n",
       "   'revenge',\n",
       "   'than',\n",
       "   'robbing',\n",
       "   'the',\n",
       "   'bank',\n",
       "   'himself',\n",
       "   ',',\n",
       "   'ironic',\n",
       "   'as',\n",
       "   'it',\n",
       "   'may',\n",
       "   'sound',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['she',\n",
       "   ',',\n",
       "   'among',\n",
       "   'others',\n",
       "   'excentricities',\n",
       "   ',',\n",
       "   'talks',\n",
       "   'to',\n",
       "   'a',\n",
       "   'small',\n",
       "   'rock',\n",
       "   ',',\n",
       "   'gertrude',\n",
       "   ',',\n",
       "   'like',\n",
       "   'if',\n",
       "   'she',\n",
       "   'was',\n",
       "   'alive',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['this',\n",
       "   'gives',\n",
       "   'the',\n",
       "   'girls',\n",
       "   'a',\n",
       "   'fair',\n",
       "   'chance',\n",
       "   'of',\n",
       "   'pulling',\n",
       "   'the',\n",
       "   'wool',\n",
       "   'over',\n",
       "   'their',\n",
       "   'eyes',\n",
       "   'using',\n",
       "   'their',\n",
       "   'sexiness',\n",
       "   'to',\n",
       "   'poach',\n",
       "   'any',\n",
       "   'last',\n",
       "   'vestige',\n",
       "   'of',\n",
       "   'common',\n",
       "   'sense',\n",
       "   'the',\n",
       "   'dons',\n",
       "   'might',\n",
       "   'have',\n",
       "   'had',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['styled',\n",
       "   'after',\n",
       "   \"vh1's\",\n",
       "   '\"',\n",
       "   'behind',\n",
       "   'the',\n",
       "   'music',\n",
       "   ',',\n",
       "   '\"',\n",
       "   'this',\n",
       "   'mockumentary',\n",
       "   'profiles',\n",
       "   'the',\n",
       "   'rise',\n",
       "   'and',\n",
       "   'fall',\n",
       "   'of',\n",
       "   'an',\n",
       "   'internet',\n",
       "   'startup',\n",
       "   ',',\n",
       "   'called',\n",
       "   'icevan',\n",
       "   '.',\n",
       "   'com',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['being',\n",
       "   'blue',\n",
       "   'is',\n",
       "   'not',\n",
       "   'his',\n",
       "   'only',\n",
       "   'predicament',\n",
       "   ';',\n",
       "   'he',\n",
       "   'also',\n",
       "   'lacks',\n",
       "   'the',\n",
       "   'ability',\n",
       "   'to',\n",
       "   'outwardly',\n",
       "   'express',\n",
       "   'his',\n",
       "   'emotions',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   \"killer's\",\n",
       "   'clues',\n",
       "   'are',\n",
       "   'a',\n",
       "   'perversion',\n",
       "   'of',\n",
       "   'biblical',\n",
       "   'punishments',\n",
       "   'for',\n",
       "   'sins',\n",
       "   ':',\n",
       "   'stoning',\n",
       "   ',',\n",
       "   'burning',\n",
       "   ',',\n",
       "   'decapitation',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['david',\n",
       "   'is',\n",
       "   'a',\n",
       "   'painter',\n",
       "   'with',\n",
       "   \"painter's\",\n",
       "   'block',\n",
       "   'who',\n",
       "   'takes',\n",
       "   'a',\n",
       "   'job',\n",
       "   'as',\n",
       "   'a',\n",
       "   'waiter',\n",
       "   'to',\n",
       "   'get',\n",
       "   'some',\n",
       "   'inspiration',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['women', 'craved', 'him', 'and', 'men', 'wanted', 'to', 'be', 'him', '.'],\n",
       "  'obj'),\n",
       " (['set',\n",
       "   'on',\n",
       "   'an',\n",
       "   'island',\n",
       "   'off',\n",
       "   'the',\n",
       "   'coast',\n",
       "   'of',\n",
       "   'florida',\n",
       "   ',',\n",
       "   'a',\n",
       "   'techno',\n",
       "   'rave',\n",
       "   'party',\n",
       "   'attracts',\n",
       "   'a',\n",
       "   'diverse',\n",
       "   'group',\n",
       "   'of',\n",
       "   'college',\n",
       "   'coeds',\n",
       "   'and',\n",
       "   'a',\n",
       "   'coast',\n",
       "   'guard',\n",
       "   'officer',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['lesson',\n",
       "   'to',\n",
       "   'be',\n",
       "   'learned',\n",
       "   ':',\n",
       "   'never',\n",
       "   ',',\n",
       "   'never',\n",
       "   'mess',\n",
       "   'with',\n",
       "   '\"',\n",
       "   'the',\n",
       "   'gay',\n",
       "   'mafia',\n",
       "   '!',\n",
       "   '\"'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'theme',\n",
       "   'of',\n",
       "   'the',\n",
       "   'film',\n",
       "   'simultaneously',\n",
       "   'addresses',\n",
       "   'the',\n",
       "   'similarities',\n",
       "   'between',\n",
       "   'the',\n",
       "   'two',\n",
       "   'factions',\n",
       "   'of',\n",
       "   'law',\n",
       "   'and',\n",
       "   'crime',\n",
       "   'while',\n",
       "   'revealing',\n",
       "   'the',\n",
       "   'similarities',\n",
       "   'between',\n",
       "   'the',\n",
       "   'two',\n",
       "   'brothers',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " ([\"they're\",\n",
       "   'jewish',\n",
       "   ',',\n",
       "   \"they're\",\n",
       "   'grandmothers',\n",
       "   ',',\n",
       "   'and',\n",
       "   \"they're\",\n",
       "   'lesbians',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['but',\n",
       "   \"he's\",\n",
       "   'neglecting',\n",
       "   'his',\n",
       "   'work',\n",
       "   ',',\n",
       "   'carping',\n",
       "   'at',\n",
       "   'his',\n",
       "   'mom',\n",
       "   ',',\n",
       "   'and',\n",
       "   'behaving',\n",
       "   'badly',\n",
       "   'toward',\n",
       "   'his',\n",
       "   'loyal',\n",
       "   'friend',\n",
       "   'bobbi',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['with',\n",
       "   'all',\n",
       "   'this',\n",
       "   'going',\n",
       "   'on',\n",
       "   ',',\n",
       "   \"gerry's\",\n",
       "   'estranged',\n",
       "   'wife',\n",
       "   'margaret',\n",
       "   'is',\n",
       "   'worried',\n",
       "   'for',\n",
       "   'her',\n",
       "   \"daughter's\",\n",
       "   'safety',\n",
       "   'and',\n",
       "   'finds',\n",
       "   'herself',\n",
       "   'another',\n",
       "   'target',\n",
       "   'in',\n",
       "   'the',\n",
       "   'race',\n",
       "   'to',\n",
       "   'find',\n",
       "   'the',\n",
       "   'code',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['valento',\n",
       "   'feels',\n",
       "   'the',\n",
       "   'heat',\n",
       "   'and',\n",
       "   'turns',\n",
       "   'the',\n",
       "   'table',\n",
       "   ':',\n",
       "   'he',\n",
       "   'makes',\n",
       "   'the',\n",
       "   'dupe',\n",
       "   'into',\n",
       "   'one',\n",
       "   'of',\n",
       "   'his',\n",
       "   'own',\n",
       "   'and',\n",
       "   'rubs',\n",
       "   'the',\n",
       "   \"da's\",\n",
       "   'nose',\n",
       "   'in',\n",
       "   'it',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['saigon',\n",
       "   ',',\n",
       "   '1952',\n",
       "   ',',\n",
       "   'a',\n",
       "   'beautiful',\n",
       "   ',',\n",
       "   'exotic',\n",
       "   ',',\n",
       "   'and',\n",
       "   'mysterious',\n",
       "   'city',\n",
       "   'caught',\n",
       "   'in',\n",
       "   'the',\n",
       "   'grips',\n",
       "   'of',\n",
       "   'the',\n",
       "   'vietnamese',\n",
       "   'war',\n",
       "   'of',\n",
       "   'liberation',\n",
       "   'from',\n",
       "   'the',\n",
       "   'french',\n",
       "   'colonial',\n",
       "   'powers',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['deep',\n",
       "   'in',\n",
       "   'the',\n",
       "   'northwest',\n",
       "   ',',\n",
       "   'there',\n",
       "   'is',\n",
       "   'a',\n",
       "   'lone',\n",
       "   'ranch',\n",
       "   'tucked',\n",
       "   'away',\n",
       "   'so',\n",
       "   'purposefully',\n",
       "   ',',\n",
       "   'the',\n",
       "   'only',\n",
       "   'way',\n",
       "   'to',\n",
       "   'find',\n",
       "   'it',\n",
       "   'is',\n",
       "   'by',\n",
       "   'not',\n",
       "   'looking',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['as',\n",
       "   'a',\n",
       "   'young',\n",
       "   'teenager',\n",
       "   ',',\n",
       "   'he',\n",
       "   'finds',\n",
       "   'out',\n",
       "   'who',\n",
       "   'his',\n",
       "   'father',\n",
       "   'is',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['in',\n",
       "   'life',\n",
       "   ',',\n",
       "   \"there's\",\n",
       "   'silver',\n",
       "   ',',\n",
       "   'and',\n",
       "   \"there's\",\n",
       "   'lead',\n",
       "   ',',\n",
       "   'says',\n",
       "   'rikki',\n",
       "   'ortega',\n",
       "   ',',\n",
       "   'as',\n",
       "   'he',\n",
       "   'moves',\n",
       "   'to',\n",
       "   'be',\n",
       "   'king',\n",
       "   'of',\n",
       "   'the',\n",
       "   'street',\n",
       "   'in',\n",
       "   '\"',\n",
       "   '&#193',\n",
       "   ';',\n",
       "   'nglio',\n",
       "   ',',\n",
       "   '\"',\n",
       "   'l',\n",
       "   '.',\n",
       "   'a',\n",
       "   '.',\n",
       "   \"'s\",\n",
       "   'east',\n",
       "   'side',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['all',\n",
       "   'these',\n",
       "   'games',\n",
       "   'of',\n",
       "   'chasing',\n",
       "   ',',\n",
       "   'rejecting',\n",
       "   'and',\n",
       "   'seducing',\n",
       "   'are',\n",
       "   'played',\n",
       "   'out',\n",
       "   'in',\n",
       "   'an',\n",
       "   'economically',\n",
       "   'and',\n",
       "   'spiritually',\n",
       "   'depressed',\n",
       "   'hong',\n",
       "   'kong',\n",
       "   ',',\n",
       "   'without',\n",
       "   'much',\n",
       "   'gusto',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['television',\n",
       "   'made',\n",
       "   'him',\n",
       "   'famous',\n",
       "   ',',\n",
       "   'but',\n",
       "   'his',\n",
       "   'biggest',\n",
       "   'hits',\n",
       "   'happened',\n",
       "   'off',\n",
       "   'screen',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['jordan',\n",
       "   'is',\n",
       "   'a',\n",
       "   'mom',\n",
       "   'who',\n",
       "   'is',\n",
       "   'on',\n",
       "   'a',\n",
       "   'life',\n",
       "   'long',\n",
       "   'search',\n",
       "   'for',\n",
       "   'true',\n",
       "   'faith',\n",
       "   'as',\n",
       "   'she',\n",
       "   'tries',\n",
       "   'to',\n",
       "   'protect',\n",
       "   'her',\n",
       "   'only',\n",
       "   'child',\n",
       "   'from',\n",
       "   'what',\n",
       "   'she',\n",
       "   'believes',\n",
       "   'is',\n",
       "   'injustice',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " ([\"'bloody\",\n",
       "   \"magic'\",\n",
       "   'is',\n",
       "   'the',\n",
       "   'story',\n",
       "   'of',\n",
       "   'zack',\n",
       "   ',',\n",
       "   'an',\n",
       "   'eleven',\n",
       "   'year',\n",
       "   'old',\n",
       "   'school',\n",
       "   'boy',\n",
       "   ',',\n",
       "   \"who's\",\n",
       "   'family',\n",
       "   'is',\n",
       "   'visited',\n",
       "   'by',\n",
       "   'three',\n",
       "   'debt',\n",
       "   'collectors',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['however',\n",
       "   ',',\n",
       "   'jane',\n",
       "   ',',\n",
       "   \"wendy's\",\n",
       "   '12-year-old',\n",
       "   'daughter',\n",
       "   ',',\n",
       "   'sees',\n",
       "   'it',\n",
       "   'all',\n",
       "   'as',\n",
       "   'make',\n",
       "   'believe',\n",
       "   'and',\n",
       "   'refuses',\n",
       "   'to',\n",
       "   'believe',\n",
       "   'in',\n",
       "   'the',\n",
       "   'tales',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['called',\n",
       "   '\"',\n",
       "   'an',\n",
       "   'elegant',\n",
       "   'documentary',\n",
       "   '\"',\n",
       "   'by',\n",
       "   'sundance',\n",
       "   'and',\n",
       "   '\"',\n",
       "   'eloquent',\n",
       "   'and',\n",
       "   'deeply',\n",
       "   'moving',\n",
       "   '\"',\n",
       "   'by',\n",
       "   'the',\n",
       "   'la',\n",
       "   'times',\n",
       "   ',',\n",
       "   'toyo',\n",
       "   'miyatake',\n",
       "   ':',\n",
       "   'infinite',\n",
       "   'shades',\n",
       "   'of',\n",
       "   'gray',\n",
       "   'is',\n",
       "   'a',\n",
       "   'penetrating',\n",
       "   'portrait',\n",
       "   'of',\n",
       "   'this',\n",
       "   \"photographer's\",\n",
       "   'search',\n",
       "   'for',\n",
       "   'truth',\n",
       "   'and',\n",
       "   'beauty',\n",
       "   'in',\n",
       "   'a',\n",
       "   'world',\n",
       "   'of',\n",
       "   'impermanence',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['straight',\n",
       "   'up',\n",
       "   ':',\n",
       "   'helicopters',\n",
       "   'in',\n",
       "   'action',\n",
       "   'will',\n",
       "   'take',\n",
       "   'audiences',\n",
       "   'on',\n",
       "   'a',\n",
       "   'series',\n",
       "   'of',\n",
       "   'aerial',\n",
       "   'adventures',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['a',\n",
       "   'lapp',\n",
       "   'woman',\n",
       "   'anni',\n",
       "   'gives',\n",
       "   'a',\n",
       "   'shelter',\n",
       "   'to',\n",
       "   'both',\n",
       "   'of',\n",
       "   'them',\n",
       "   'at',\n",
       "   'her',\n",
       "   'farm',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['it',\n",
       "   'also',\n",
       "   'touches',\n",
       "   'on',\n",
       "   'the',\n",
       "   'encroachment',\n",
       "   'of',\n",
       "   'christianity',\n",
       "   'brought',\n",
       "   'by',\n",
       "   'the',\n",
       "   'missionaries',\n",
       "   ',',\n",
       "   'which',\n",
       "   'is',\n",
       "   'at',\n",
       "   'odds',\n",
       "   'with',\n",
       "   \"mepe's\",\n",
       "   'tribal',\n",
       "   'and',\n",
       "   'traditional',\n",
       "   'roots',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['a',\n",
       "   'set',\n",
       "   'of',\n",
       "   'grisly',\n",
       "   'murders',\n",
       "   'brings',\n",
       "   'fbi',\n",
       "   'agent',\n",
       "   'will',\n",
       "   'graham',\n",
       "   '(',\n",
       "   'norton',\n",
       "   ')',\n",
       "   'out',\n",
       "   'of',\n",
       "   'retirement',\n",
       "   'and',\n",
       "   'puts',\n",
       "   'him',\n",
       "   'in',\n",
       "   'search',\n",
       "   'of',\n",
       "   'an',\n",
       "   'atrocious',\n",
       "   'killer',\n",
       "   '(',\n",
       "   'fiennes',\n",
       "   ')',\n",
       "   \"who's\",\n",
       "   'driven',\n",
       "   'by',\n",
       "   'the',\n",
       "   'image',\n",
       "   'of',\n",
       "   'a',\n",
       "   'painting',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['soon',\n",
       "   ',',\n",
       "   'the',\n",
       "   'team',\n",
       "   'begins',\n",
       "   'to',\n",
       "   'suspect',\n",
       "   'that',\n",
       "   \"knowles'\",\n",
       "   'main',\n",
       "   'objective',\n",
       "   'is',\n",
       "   'actually',\n",
       "   'to',\n",
       "   'recover',\n",
       "   'the',\n",
       "   'prototype',\n",
       "   'of',\n",
       "   'a',\n",
       "   'dna',\n",
       "   'testing',\n",
       "   'machine',\n",
       "   'called',\n",
       "   'the',\n",
       "   'huxley',\n",
       "   'project',\n",
       "   ',',\n",
       "   'which',\n",
       "   'his',\n",
       "   'company',\n",
       "   'has',\n",
       "   'spent',\n",
       "   'years',\n",
       "   'and',\n",
       "   'millions',\n",
       "   'of',\n",
       "   'dollars',\n",
       "   'developing',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['his',\n",
       "   'mother',\n",
       "   'persuades',\n",
       "   'a',\n",
       "   'renowned',\n",
       "   'entomologist',\n",
       "   'to',\n",
       "   'take',\n",
       "   'them',\n",
       "   'on',\n",
       "   'a',\n",
       "   'trip',\n",
       "   'to',\n",
       "   'the',\n",
       "   'jungle',\n",
       "   'to',\n",
       "   'search',\n",
       "   'for',\n",
       "   'the',\n",
       "   'butterfly',\n",
       "   ',',\n",
       "   'leading',\n",
       "   'to',\n",
       "   'an',\n",
       "   'adventure',\n",
       "   'that',\n",
       "   'will',\n",
       "   'transform',\n",
       "   'their',\n",
       "   'lives',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['with',\n",
       "   'a',\n",
       "   'rare',\n",
       "   'gift',\n",
       "   'for',\n",
       "   'melding',\n",
       "   'subjectivity',\n",
       "   'with',\n",
       "   'biographical',\n",
       "   'facts',\n",
       "   ',',\n",
       "   'm&#225',\n",
       "   ';',\n",
       "   'rton',\n",
       "   'brings',\n",
       "   'sabina',\n",
       "   'spielrein',\n",
       "   'back',\n",
       "   'to',\n",
       "   'life',\n",
       "   ',',\n",
       "   'body',\n",
       "   'and',\n",
       "   'soul',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['seeking',\n",
       "   'a',\n",
       "   'mental',\n",
       "   'escape',\n",
       "   ',',\n",
       "   'simone',\n",
       "   'begins',\n",
       "   'to',\n",
       "   'tune',\n",
       "   'into',\n",
       "   \"what's\",\n",
       "   'happening',\n",
       "   'with',\n",
       "   'the',\n",
       "   'other',\n",
       "   'couples',\n",
       "   'around',\n",
       "   'her',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'beatle',\n",
       "   'fan',\n",
       "   'is',\n",
       "   'a',\n",
       "   'drama',\n",
       "   'about',\n",
       "   'albert',\n",
       "   ',',\n",
       "   'a',\n",
       "   'psychotic',\n",
       "   'prisoner',\n",
       "   'who',\n",
       "   'is',\n",
       "   'a',\n",
       "   'devoted',\n",
       "   'fan',\n",
       "   'of',\n",
       "   'john',\n",
       "   'lennon',\n",
       "   'and',\n",
       "   'the',\n",
       "   'beatles',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['then',\n",
       "   ',',\n",
       "   'in',\n",
       "   '1974',\n",
       "   ',',\n",
       "   'something',\n",
       "   'incredible',\n",
       "   'happened',\n",
       "   '-',\n",
       "   'they',\n",
       "   'fell',\n",
       "   'in',\n",
       "   'love',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['on',\n",
       "   'her',\n",
       "   'deathbed',\n",
       "   ',',\n",
       "   'candice',\n",
       "   'klein',\n",
       "   'accidentally',\n",
       "   'asks',\n",
       "   'the',\n",
       "   'question',\n",
       "   ',',\n",
       "   '\"',\n",
       "   'what',\n",
       "   'did',\n",
       "   'i',\n",
       "   'ever',\n",
       "   'do',\n",
       "   'to',\n",
       "   'deserve',\n",
       "   'this',\n",
       "   '?',\n",
       "   '\"'],\n",
       "  'obj'),\n",
       " (['shot',\n",
       "   'as',\n",
       "   'a',\n",
       "   '\"',\n",
       "   'behind-the-scenes',\n",
       "   '\"',\n",
       "   'look',\n",
       "   'at',\n",
       "   'how',\n",
       "   'a',\n",
       "   'fictional',\n",
       "   'kung-fu',\n",
       "   'movie',\n",
       "   'is',\n",
       "   'made',\n",
       "   ',',\n",
       "   'the',\n",
       "   'film',\n",
       "   'is',\n",
       "   'basically',\n",
       "   'a',\n",
       "   'movie',\n",
       "   'about',\n",
       "   'the',\n",
       "   'making',\n",
       "   'of',\n",
       "   'a',\n",
       "   'movie',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['before',\n",
       "   'the',\n",
       "   'investigation',\n",
       "   'ends',\n",
       "   ',',\n",
       "   \"we've\",\n",
       "   'met',\n",
       "   'boyfriends',\n",
       "   ',',\n",
       "   'a',\n",
       "   'drug',\n",
       "   'dealer',\n",
       "   ',',\n",
       "   \"alicia's\",\n",
       "   'mom',\n",
       "   ',',\n",
       "   \"hadley's\",\n",
       "   'dad',\n",
       "   ',',\n",
       "   'nurses',\n",
       "   ',',\n",
       "   'doctors',\n",
       "   ',',\n",
       "   'and',\n",
       "   'an',\n",
       "   'orderly',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['but',\n",
       "   'what',\n",
       "   'exactly',\n",
       "   'is',\n",
       "   'good',\n",
       "   '&#38',\n",
       "   ';',\n",
       "   'what',\n",
       "   'exactly',\n",
       "   'is',\n",
       "   'evil',\n",
       "   '?'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'movie',\n",
       "   'takes',\n",
       "   'place',\n",
       "   'in',\n",
       "   'mexico',\n",
       "   ',',\n",
       "   '2002',\n",
       "   '(',\n",
       "   'based',\n",
       "   'on',\n",
       "   'a',\n",
       "   'story',\n",
       "   'from',\n",
       "   'the',\n",
       "   \"1800's\",\n",
       "   ')',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " ([\"rainone's\",\n",
       "   'love',\n",
       "   'affair',\n",
       "   'with',\n",
       "   'singing',\n",
       "   'sensation',\n",
       "   'kelly',\n",
       "   'mcguire',\n",
       "   'whom',\n",
       "   'he',\n",
       "   'discovered',\n",
       "   'and',\n",
       "   'his',\n",
       "   'near',\n",
       "   'demise',\n",
       "   'by',\n",
       "   'the',\n",
       "   'hands',\n",
       "   'of',\n",
       "   'his',\n",
       "   'own',\n",
       "   'prot&#233',\n",
       "   ';',\n",
       "   'g&#233',\n",
       "   ';',\n",
       "   'vincent',\n",
       "   'riccola',\n",
       "   'is',\n",
       "   'the',\n",
       "   'juice',\n",
       "   'that',\n",
       "   'fuels',\n",
       "   'this',\n",
       "   'roller',\n",
       "   'coaster',\n",
       "   'ride',\n",
       "   'through',\n",
       "   'two',\n",
       "   'debauchery-filled',\n",
       "   'decades',\n",
       "   'of',\n",
       "   'greed',\n",
       "   ',',\n",
       "   'sex',\n",
       "   ',',\n",
       "   'drugs',\n",
       "   'and',\n",
       "   'rock',\n",
       "   'and',\n",
       "   'roll',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['trapped',\n",
       "   'in',\n",
       "   'a',\n",
       "   'lovers',\n",
       "   'triangle',\n",
       "   'and',\n",
       "   'ruthless',\n",
       "   'game',\n",
       "   'of',\n",
       "   'lust',\n",
       "   ',',\n",
       "   'greed',\n",
       "   ',',\n",
       "   'and',\n",
       "   'betrayal',\n",
       "   'we',\n",
       "   'follow',\n",
       "   'one',\n",
       "   \"woman's\",\n",
       "   'hypnotic',\n",
       "   'journey',\n",
       "   'to',\n",
       "   'discover',\n",
       "   'her',\n",
       "   'true',\n",
       "   'self',\n",
       "   '.',\n",
       "   '.',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['decent-but-dull',\n",
       "   'dek',\n",
       "   'loves',\n",
       "   'shirley',\n",
       "   ',',\n",
       "   'so',\n",
       "   'much',\n",
       "   'so',\n",
       "   'that',\n",
       "   'he',\n",
       "   'humiliates',\n",
       "   'her',\n",
       "   'by',\n",
       "   'proposing',\n",
       "   'without',\n",
       "   'warning',\n",
       "   'on',\n",
       "   'national',\n",
       "   'television',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['since',\n",
       "   'all',\n",
       "   'her',\n",
       "   'architects',\n",
       "   'are',\n",
       "   'either',\n",
       "   'busy',\n",
       "   'otherwise',\n",
       "   'or',\n",
       "   'too',\n",
       "   'conservative',\n",
       "   'in',\n",
       "   'style',\n",
       "   ',',\n",
       "   'this',\n",
       "   'ambivalent',\n",
       "   'honour',\n",
       "   'falls',\n",
       "   'to',\n",
       "   'numerobis',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " ([\"there's\",\n",
       "   'a',\n",
       "   'story',\n",
       "   'that',\n",
       "   'goes',\n",
       "   'around',\n",
       "   'the',\n",
       "   'town',\n",
       "   'of',\n",
       "   'darkness',\n",
       "   'falls',\n",
       "   'about',\n",
       "   'her',\n",
       "   ',',\n",
       "   'and',\n",
       "   \"she's\",\n",
       "   'called',\n",
       "   'the',\n",
       "   'tooth',\n",
       "   'fairy',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['a',\n",
       "   'strange',\n",
       "   'film',\n",
       "   'employing',\n",
       "   'old',\n",
       "   'home',\n",
       "   'movies',\n",
       "   'and',\n",
       "   'newly',\n",
       "   'shot',\n",
       "   'footage',\n",
       "   'in',\n",
       "   'an',\n",
       "   'effort',\n",
       "   'to',\n",
       "   'expose',\n",
       "   'one',\n",
       "   'hungarian',\n",
       "   'family',\n",
       "   'and',\n",
       "   'their',\n",
       "   'mutiple',\n",
       "   'problems',\n",
       "   'from',\n",
       "   'the',\n",
       "   '1940s',\n",
       "   'to',\n",
       "   'current',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['matsumoto',\n",
       "   'and',\n",
       "   'sawako',\n",
       "   'were',\n",
       "   'a',\n",
       "   'happy',\n",
       "   'couple',\n",
       "   ',',\n",
       "   'but',\n",
       "   'meddling',\n",
       "   'parents',\n",
       "   'and',\n",
       "   'chase',\n",
       "   'for',\n",
       "   'success',\n",
       "   'push',\n",
       "   'the',\n",
       "   'boy',\n",
       "   'to',\n",
       "   'a',\n",
       "   'tragic',\n",
       "   'choice',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['elvis',\n",
       "   'teams',\n",
       "   'up',\n",
       "   'with',\n",
       "   'jack',\n",
       "   '(',\n",
       "   'ossie',\n",
       "   'davis',\n",
       "   ')',\n",
       "   ',',\n",
       "   'a',\n",
       "   'fellow',\n",
       "   'nursing',\n",
       "   'home',\n",
       "   'resident',\n",
       "   'who',\n",
       "   'thinks',\n",
       "   'that',\n",
       "   'he',\n",
       "   'is',\n",
       "   'actually',\n",
       "   'president',\n",
       "   'john',\n",
       "   'f',\n",
       "   '.',\n",
       "   'kennedy',\n",
       "   ',',\n",
       "   'and',\n",
       "   'the',\n",
       "   'two',\n",
       "   'valiant',\n",
       "   'old',\n",
       "   'codgers',\n",
       "   'sally',\n",
       "   'forth',\n",
       "   'to',\n",
       "   'battle',\n",
       "   'an',\n",
       "   'evil',\n",
       "   'egyptian',\n",
       "   'entity',\n",
       "   'who',\n",
       "   'has',\n",
       "   'chosen',\n",
       "   'their',\n",
       "   'long-term',\n",
       "   'care',\n",
       "   'facility',\n",
       "   'as',\n",
       "   'his',\n",
       "   'happy',\n",
       "   'hunting',\n",
       "   'grounds',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['everywhere',\n",
       "   'he',\n",
       "   'goes',\n",
       "   'he',\n",
       "   'is',\n",
       "   'plagued',\n",
       "   'by',\n",
       "   'cats',\n",
       "   'and',\n",
       "   'when',\n",
       "   'by',\n",
       "   'chance',\n",
       "   'he',\n",
       "   'meets',\n",
       "   'carol',\n",
       "   'on',\n",
       "   'a',\n",
       "   'lonely',\n",
       "   'highway',\n",
       "   'they',\n",
       "   'must',\n",
       "   'begin',\n",
       "   'a',\n",
       "   'journey',\n",
       "   ',',\n",
       "   'avoiding',\n",
       "   'the',\n",
       "   'mysterious',\n",
       "   'private',\n",
       "   'detective',\n",
       "   'mr',\n",
       "   'barlow',\n",
       "   'and',\n",
       "   'the',\n",
       "   'terrifying',\n",
       "   'inhuman',\n",
       "   'creature',\n",
       "   'jack',\n",
       "   ',',\n",
       "   'to',\n",
       "   'uncover',\n",
       "   'the',\n",
       "   'dark',\n",
       "   'truth',\n",
       "   'to',\n",
       "   \"charlie's\",\n",
       "   'life',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['used',\n",
       "   'to',\n",
       "   'living',\n",
       "   'in',\n",
       "   'poverty',\n",
       "   ',',\n",
       "   'it',\n",
       "   'seemed',\n",
       "   'impossible',\n",
       "   'for',\n",
       "   'cass',\n",
       "   'and',\n",
       "   'cary',\n",
       "   'to',\n",
       "   'have',\n",
       "   'a',\n",
       "   'comfortable',\n",
       "   'and',\n",
       "   'bountiful',\n",
       "   'life',\n",
       "   'until',\n",
       "   'doqa',\n",
       "   'gracia',\n",
       "   'comes',\n",
       "   'to',\n",
       "   'bring',\n",
       "   'them',\n",
       "   'into',\n",
       "   'her',\n",
       "   'home',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'story',\n",
       "   'of',\n",
       "   'a',\n",
       "   'normal',\n",
       "   'family',\n",
       "   'in',\n",
       "   'which',\n",
       "   'come',\n",
       "   'out',\n",
       "   'the',\n",
       "   'dreams',\n",
       "   'of',\n",
       "   'those',\n",
       "   'who',\n",
       "   'have',\n",
       "   'lost',\n",
       "   'their',\n",
       "   'possibilities',\n",
       "   'and',\n",
       "   'of',\n",
       "   'those',\n",
       "   'who',\n",
       "   'want',\n",
       "   'to',\n",
       "   'realize',\n",
       "   'them',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['with',\n",
       "   'grit',\n",
       "   'and',\n",
       "   'determination',\n",
       "   'molly',\n",
       "   'guides',\n",
       "   'the',\n",
       "   'girls',\n",
       "   'on',\n",
       "   'an',\n",
       "   'epic',\n",
       "   'journey',\n",
       "   ',',\n",
       "   'one',\n",
       "   'step',\n",
       "   'ahead',\n",
       "   'of',\n",
       "   'the',\n",
       "   'authorities',\n",
       "   ',',\n",
       "   'over',\n",
       "   '1',\n",
       "   ',',\n",
       "   '500',\n",
       "   'miles',\n",
       "   'of',\n",
       "   \"australia's\",\n",
       "   'outback',\n",
       "   'in',\n",
       "   'search',\n",
       "   'of',\n",
       "   'the',\n",
       "   'rabbit-proof',\n",
       "   'fence',\n",
       "   'that',\n",
       "   'bisects',\n",
       "   'the',\n",
       "   'continent',\n",
       "   'and',\n",
       "   'will',\n",
       "   'lead',\n",
       "   'them',\n",
       "   'home',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['they',\n",
       "   'nevertherless',\n",
       "   'feel',\n",
       "   'responsible',\n",
       "   'to',\n",
       "   'protect',\n",
       "   'the',\n",
       "   'flag',\n",
       "   'until',\n",
       "   \"monday's\",\n",
       "   'ceremony',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'doctor',\n",
       "   'realizes',\n",
       "   \"it''s\",\n",
       "   'a',\n",
       "   'love',\n",
       "   'virus',\n",
       "   'so',\n",
       "   'he',\n",
       "   'advises',\n",
       "   'him',\n",
       "   'to',\n",
       "   'woo',\n",
       "   'the',\n",
       "   'girl',\n",
       "   'somehow',\n",
       "   ',',\n",
       "   'not',\n",
       "   'realizing',\n",
       "   'that',\n",
       "   'munnabhai',\n",
       "   'has',\n",
       "   'fallen',\n",
       "   'for',\n",
       "   'none',\n",
       "   'other',\n",
       "   'than',\n",
       "   'his',\n",
       "   'own',\n",
       "   'younger',\n",
       "   'sister',\n",
       "   'komal',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['however',\n",
       "   ',',\n",
       "   'he',\n",
       "   'can',\n",
       "   'only',\n",
       "   'inhabit',\n",
       "   'the',\n",
       "   'body',\n",
       "   'of',\n",
       "   'a',\n",
       "   'child',\n",
       "   'for',\n",
       "   'a',\n",
       "   'short',\n",
       "   'time',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['years',\n",
       "   'later',\n",
       "   ',',\n",
       "   'on',\n",
       "   'a',\n",
       "   'hunting',\n",
       "   'trip',\n",
       "   'in',\n",
       "   'the',\n",
       "   'maine',\n",
       "   'woods',\n",
       "   ',',\n",
       "   'they',\n",
       "   'are',\n",
       "   'overtaken',\n",
       "   'by',\n",
       "   'a',\n",
       "   'blizzard',\n",
       "   ',',\n",
       "   'a',\n",
       "   'vicious',\n",
       "   'storm',\n",
       "   'in',\n",
       "   'which',\n",
       "   'something',\n",
       "   'much',\n",
       "   'more',\n",
       "   'ominous',\n",
       "   'moves',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['consequently',\n",
       "   ',',\n",
       "   'what',\n",
       "   'begins',\n",
       "   'as',\n",
       "   'an',\n",
       "   'enthusiastic',\n",
       "   'road',\n",
       "   'trip',\n",
       "   'is',\n",
       "   'soon',\n",
       "   'plagued',\n",
       "   'with',\n",
       "   'mysterious',\n",
       "   'roadside',\n",
       "   'obstacles',\n",
       "   'that',\n",
       "   'threaten',\n",
       "   'to',\n",
       "   'prevent',\n",
       "   'the',\n",
       "   'boys',\n",
       "   'from',\n",
       "   'ever',\n",
       "   'making',\n",
       "   'it',\n",
       "   'to',\n",
       "   'the',\n",
       "   'competition',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['when',\n",
       "   'she',\n",
       "   'gets',\n",
       "   'into',\n",
       "   'trouble',\n",
       "   'with',\n",
       "   'the',\n",
       "   'police',\n",
       "   'simon',\n",
       "   'represses',\n",
       "   'his',\n",
       "   'death',\n",
       "   'wish',\n",
       "   'and',\n",
       "   'decides',\n",
       "   'to',\n",
       "   'help',\n",
       "   'her',\n",
       "   'out',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['chon',\n",
       "   'then',\n",
       "   'travels',\n",
       "   'to',\n",
       "   'new',\n",
       "   'york',\n",
       "   'for',\n",
       "   'roy',\n",
       "   \"o'bannon\",\n",
       "   '(',\n",
       "   'owen',\n",
       "   'wilson',\n",
       "   ')',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['not',\n",
       "   'only',\n",
       "   'must',\n",
       "   'they',\n",
       "   'overcome',\n",
       "   'an',\n",
       "   'enemy',\n",
       "   'adept',\n",
       "   'at',\n",
       "   'technological',\n",
       "   'witchery',\n",
       "   ',',\n",
       "   'they',\n",
       "   'must',\n",
       "   'overcome',\n",
       "   'the',\n",
       "   'curse',\n",
       "   'that',\n",
       "   'marks',\n",
       "   'their',\n",
       "   'destiny',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['their',\n",
       "   'life',\n",
       "   'becomes',\n",
       "   'less',\n",
       "   'ordinary',\n",
       "   'when',\n",
       "   'they',\n",
       "   'encounter',\n",
       "   'herb',\n",
       "   ',',\n",
       "   'a',\n",
       "   'mischievous',\n",
       "   'and',\n",
       "   'malevolent',\n",
       "   'geek',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['they', 'call', 'themselves', 'd', '.', 'e', '.', 'b', '.', 's', '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'story',\n",
       "   'starts',\n",
       "   'with',\n",
       "   'hakimi',\n",
       "   ',',\n",
       "   'a',\n",
       "   'freelance',\n",
       "   'scriptwriter',\n",
       "   'who',\n",
       "   'is',\n",
       "   'on',\n",
       "   'his',\n",
       "   'way',\n",
       "   'to',\n",
       "   'send',\n",
       "   'his',\n",
       "   '7-year-old',\n",
       "   'daughter',\n",
       "   ',',\n",
       "   'imelda',\n",
       "   ',',\n",
       "   'to',\n",
       "   'his',\n",
       "   \"ex-wife's\",\n",
       "   'house',\n",
       "   'on',\n",
       "   'one',\n",
       "   'stormy',\n",
       "   'night',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['they',\n",
       "   'follow',\n",
       "   'leads',\n",
       "   ',',\n",
       "   'informants',\n",
       "   'turn',\n",
       "   'up',\n",
       "   'dead',\n",
       "   ',',\n",
       "   \"nick's\",\n",
       "   'wife',\n",
       "   'is',\n",
       "   'unhappy',\n",
       "   \"he's\",\n",
       "   'back',\n",
       "   'on',\n",
       "   'the',\n",
       "   'street',\n",
       "   ',',\n",
       "   \"henry's\",\n",
       "   'protective',\n",
       "   'of',\n",
       "   'the',\n",
       "   'dead',\n",
       "   \"cop's\",\n",
       "   'wife',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['the',\n",
       "   'second',\n",
       "   'part',\n",
       "   'of',\n",
       "   'aki',\n",
       "   'kaurism&#228',\n",
       "   ';',\n",
       "   \"ki's\",\n",
       "   '\"',\n",
       "   'finland',\n",
       "   '\"',\n",
       "   'trilogy',\n",
       "   ',',\n",
       "   'the',\n",
       "   'film',\n",
       "   'follows',\n",
       "   'a',\n",
       "   'man',\n",
       "   'who',\n",
       "   'arrives',\n",
       "   'in',\n",
       "   'helsinki',\n",
       "   'and',\n",
       "   'gets',\n",
       "   'beaten',\n",
       "   'up',\n",
       "   'so',\n",
       "   'severely',\n",
       "   'he',\n",
       "   'develops',\n",
       "   'amnesia',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['soon',\n",
       "   'after',\n",
       "   'the',\n",
       "   'accident',\n",
       "   ',',\n",
       "   'the',\n",
       "   'survivors',\n",
       "   'of',\n",
       "   'the',\n",
       "   'accident',\n",
       "   'start',\n",
       "   'dropping',\n",
       "   'like',\n",
       "   'flies',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['edgar',\n",
       "   'becomes',\n",
       "   'intent',\n",
       "   'on',\n",
       "   'laying',\n",
       "   'down',\n",
       "   'some',\n",
       "   'new',\n",
       "   'rules',\n",
       "   'and',\n",
       "   'turning',\n",
       "   'his',\n",
       "   'coddled',\n",
       "   'son',\n",
       "   'into',\n",
       "   'someone',\n",
       "   'who',\n",
       "   'can',\n",
       "   'take',\n",
       "   'on',\n",
       "   'the',\n",
       "   'family',\n",
       "   'farm',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['bound',\n",
       "   'by',\n",
       "   'a',\n",
       "   'long',\n",
       "   'red',\n",
       "   'cord',\n",
       "   ',',\n",
       "   'a',\n",
       "   'young',\n",
       "   'couple',\n",
       "   'wanders',\n",
       "   'in',\n",
       "   'search',\n",
       "   'of',\n",
       "   'something',\n",
       "   'they',\n",
       "   'have',\n",
       "   'forgotten',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['sudden',\n",
       "   'fame',\n",
       "   'does',\n",
       "   'not',\n",
       "   'seem',\n",
       "   'to',\n",
       "   'solve',\n",
       "   'everything',\n",
       "   ',',\n",
       "   'however',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['with',\n",
       "   'no',\n",
       "   'option',\n",
       "   ',',\n",
       "   'joe',\n",
       "   'and',\n",
       "   'katsuragi',\n",
       "   'must',\n",
       "   'use',\n",
       "   'their',\n",
       "   'martial',\n",
       "   'arts',\n",
       "   'skills',\n",
       "   'to',\n",
       "   'fight',\n",
       "   'in',\n",
       "   'the',\n",
       "   'muscle',\n",
       "   'dome',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['drawing',\n",
       "   'from',\n",
       "   'his',\n",
       "   'time',\n",
       "   'with',\n",
       "   'the',\n",
       "   'kids',\n",
       "   ',',\n",
       "   'he',\n",
       "   'writes',\n",
       "   'a',\n",
       "   'story',\n",
       "   'about',\n",
       "   'children',\n",
       "   'who',\n",
       "   \"don't\",\n",
       "   'want',\n",
       "   'to',\n",
       "   'grow',\n",
       "   'up',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " ([\"she's\",\n",
       "   'an',\n",
       "   'artist',\n",
       "   ',',\n",
       "   'but',\n",
       "   \"hasn't\",\n",
       "   'picked',\n",
       "   'up',\n",
       "   'a',\n",
       "   'brush',\n",
       "   'in',\n",
       "   'a',\n",
       "   'year',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['when',\n",
       "   'his',\n",
       "   'daughter',\n",
       "   'is',\n",
       "   'kidnapped',\n",
       "   'and',\n",
       "   'held',\n",
       "   'in',\n",
       "   'exchange',\n",
       "   'for',\n",
       "   'priceless',\n",
       "   'diamonds',\n",
       "   ',',\n",
       "   'the',\n",
       "   'leader',\n",
       "   'of',\n",
       "   'a',\n",
       "   'crew',\n",
       "   'of',\n",
       "   'highly',\n",
       "   'skilled',\n",
       "   'urban',\n",
       "   'thieves',\n",
       "   '(',\n",
       "   'dmx',\n",
       "   ')',\n",
       "   'forges',\n",
       "   'an',\n",
       "   'unlikely',\n",
       "   'alliance',\n",
       "   'with',\n",
       "   'a',\n",
       "   'taiwanese',\n",
       "   'intelligence',\n",
       "   'officer',\n",
       "   '(',\n",
       "   'jet',\n",
       "   'li',\n",
       "   ')',\n",
       "   'to',\n",
       "   'rescue',\n",
       "   'her',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['\"',\n",
       "   'garmento',\n",
       "   '\"',\n",
       "   'tells',\n",
       "   'the',\n",
       "   'other',\n",
       "   'side',\n",
       "   'of',\n",
       "   'the',\n",
       "   'story',\n",
       "   ',',\n",
       "   'with',\n",
       "   'a',\n",
       "   'dark',\n",
       "   'and',\n",
       "   'satirical',\n",
       "   'look',\n",
       "   'at',\n",
       "   'new',\n",
       "   \"york's\",\n",
       "   'wholesale',\n",
       "   'garment',\n",
       "   'industry',\n",
       "   ',',\n",
       "   'where',\n",
       "   'shady',\n",
       "   'deals',\n",
       "   'are',\n",
       "   'made',\n",
       "   'for',\n",
       "   'a',\n",
       "   'buck',\n",
       "   'and',\n",
       "   'ruthlessness',\n",
       "   'is',\n",
       "   'a',\n",
       "   'prerequisite',\n",
       "   'for',\n",
       "   'career',\n",
       "   'success',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['rudy',\n",
       "   'yellowshirt',\n",
       "   'is',\n",
       "   'an',\n",
       "   'investigator',\n",
       "   'with',\n",
       "   'the',\n",
       "   'police',\n",
       "   'department',\n",
       "   'and',\n",
       "   'witnesses',\n",
       "   'firsthand',\n",
       "   'the',\n",
       "   'painful',\n",
       "   'legacy',\n",
       "   'of',\n",
       "   'indian',\n",
       "   'existence',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['journeying',\n",
       "   'from',\n",
       "   'the',\n",
       "   'vietnam',\n",
       "   'war',\n",
       "   'to',\n",
       "   'pulaski',\n",
       "   ',',\n",
       "   'tennessee',\n",
       "   'and',\n",
       "   'back',\n",
       "   'to',\n",
       "   'vietnam',\n",
       "   ',',\n",
       "   'daughter',\n",
       "   'from',\n",
       "   'danang',\n",
       "   'tensely',\n",
       "   'unfolds',\n",
       "   'as',\n",
       "   'cultural',\n",
       "   'differences',\n",
       "   'and',\n",
       "   'the',\n",
       "   'years',\n",
       "   'of',\n",
       "   'separation',\n",
       "   'take',\n",
       "   'their',\n",
       "   'toll',\n",
       "   'in',\n",
       "   'a',\n",
       "   'riveting',\n",
       "   'film',\n",
       "   'about',\n",
       "   'longing',\n",
       "   'and',\n",
       "   'the',\n",
       "   'personal',\n",
       "   'legacy',\n",
       "   'of',\n",
       "   'war',\n",
       "   '.'],\n",
       "  'obj'),\n",
       " (['each',\n",
       "   'weekend',\n",
       "   'they',\n",
       "   'come',\n",
       "   'back',\n",
       "   'with',\n",
       "   'nothing',\n",
       "   'but',\n",
       "   'a',\n",
       "   'hangover',\n",
       "   '.'],\n",
       "  'obj')]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['.',\n",
       " 'and',\n",
       " 'just',\n",
       " 'enough',\n",
       " 'science',\n",
       " 'to',\n",
       " 'send',\n",
       " 'you',\n",
       " 'home',\n",
       " 'thinking',\n",
       " '.',\n",
       " 'it',\n",
       " 'is',\n",
       " 'not',\n",
       " 'a_NEG',\n",
       " 'mass-market_NEG',\n",
       " 'entertainment_NEG',\n",
       " 'but_NEG',\n",
       " 'an_NEG',\n",
       " 'uncompromising_NEG']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_words_neg[25:45]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Note how this sentiment analyzer is marking everything after a negation word with '_NEG'\n",
    "This is one of many ways we can determine sentiment"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 6: Use `unigram_word_feats` to get unigrams features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "83"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)\n",
    "len(unigram_feats)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 7: Use `add_feat_extractor` to get a feature-value representation of our data \n",
    "#### Apply to both `training_set` and `testing_set`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[({'contains(.)': True, 'contains(the)': False, 'contains(,)': True, 'contains(a)': True, 'contains(and)': True, 'contains(of)': False, 'contains(to)': False, 'contains(is)': True, 'contains(in)': False, 'contains(with)': False, 'contains(it)': False, 'contains(that)': False, 'contains(his)': False, 'contains(on)': False, 'contains(for)': False, 'contains(an)': False, 'contains(who)': False, 'contains(by)': False, 'contains(he)': False, 'contains(from)': False, 'contains(her)': False, 'contains(\")': False, 'contains(film)': False, 'contains(as)': False, 'contains(this)': False, 'contains(movie)': False, 'contains(their)': False, 'contains(but)': False, 'contains(one)': True, 'contains(at)': False, 'contains(about)': True, 'contains(the_NEG)': False, 'contains(a_NEG)': False, 'contains(to_NEG)': False, 'contains(are)': False, \"contains(there's)\": False, 'contains(()': False, 'contains(story)': False, 'contains(when)': False, 'contains(so)': False, 'contains(be)': False, 'contains(,_NEG)': False, 'contains())': False, 'contains(they)': False, 'contains(you)': False, 'contains(not)': False, 'contains(have)': False, 'contains(like)': False, 'contains(will)': False, 'contains(all)': False, 'contains(into)': False, 'contains(out)': False, 'contains(she)': False, 'contains(what)': False, 'contains(life)': False, 'contains(has)': False, 'contains(its)': False, 'contains(only)': False, 'contains(more)': False, 'contains(even)': False, 'contains(--)': False, 'contains(:)': False, 'contains(can)': False, 'contains(;)': False, 'contains(home)': False, 'contains(look)': False, \"contains(it's)\": False, 'contains(if)': False, 'contains(where)': False, 'contains(most)': False, 'contains(him)': False, 'contains(search)': False, 'contains(but_NEG)': False, 'contains(love)': False, 'contains(both)': False, 'contains(make)': False, 'contains(begins)': False, 'contains(some)': False, 'contains(two)': False, 'contains(of_NEG)': False, 'contains(made)': False, 'contains(which)': False, 'contains(them)': False}, 'subj')]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "training_set = sentim_analyzer.apply_features(training_docs)\n",
    "training_set[:1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[({'contains(.)': True, 'contains(the)': True, 'contains(,)': False, 'contains(a)': True, 'contains(and)': False, 'contains(of)': True, 'contains(to)': False, 'contains(is)': False, 'contains(in)': False, 'contains(with)': True, 'contains(it)': False, 'contains(that)': False, 'contains(his)': False, 'contains(on)': False, 'contains(for)': True, 'contains(an)': False, 'contains(who)': False, 'contains(by)': False, 'contains(he)': False, 'contains(from)': False, 'contains(her)': False, 'contains(\")': False, 'contains(film)': False, 'contains(as)': False, 'contains(this)': False, 'contains(movie)': False, 'contains(their)': False, 'contains(but)': False, 'contains(one)': False, 'contains(at)': False, 'contains(about)': False, 'contains(the_NEG)': False, 'contains(a_NEG)': False, 'contains(to_NEG)': False, 'contains(are)': False, \"contains(there's)\": False, 'contains(()': False, 'contains(story)': False, 'contains(when)': False, 'contains(so)': False, 'contains(be)': False, 'contains(,_NEG)': False, 'contains())': False, 'contains(they)': False, 'contains(you)': False, 'contains(not)': False, 'contains(have)': False, 'contains(like)': False, 'contains(will)': False, 'contains(all)': False, 'contains(into)': False, 'contains(out)': False, 'contains(she)': False, 'contains(what)': False, 'contains(life)': False, 'contains(has)': False, 'contains(its)': False, 'contains(only)': False, 'contains(more)': False, 'contains(even)': False, 'contains(--)': False, 'contains(:)': False, 'contains(can)': False, 'contains(;)': False, 'contains(home)': False, 'contains(look)': False, \"contains(it's)\": False, 'contains(if)': False, 'contains(where)': False, 'contains(most)': False, 'contains(him)': False, 'contains(search)': False, 'contains(but_NEG)': False, 'contains(love)': False, 'contains(both)': False, 'contains(make)': False, 'contains(begins)': False, 'contains(some)': False, 'contains(two)': False, 'contains(of_NEG)': False, 'contains(made)': False, 'contains(which)': False, 'contains(them)': False}, 'subj')]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_set = sentim_analyzer.apply_features(testing_docs)\n",
    "test_set[:1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### STEP 8: FINAL STEP!! We use Naive Bayes to create a trainer and FINALLY classify our data!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training classifier\n"
     ]
    }
   ],
   "source": [
    "trainer = NaiveBayesClassifier.train\n",
    "classifier = sentim_analyzer.train(trainer, training_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating NaiveBayesClassifier results...\n",
      "Accuracy: 0.8\n",
      "F-measure [obj]: 0.8\n",
      "F-measure [subj]: 0.8\n",
      "Precision [obj]: 0.8\n",
      "Precision [subj]: 0.8\n",
      "Recall [obj]: 0.8\n",
      "Recall [subj]: 0.8\n"
     ]
    }
   ],
   "source": [
    "for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):\n",
    "    print('{0}: {1}'.format(key,value))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
