{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HW2: VECTORIZATION (Pandas style!)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## STEP 1: Import ALL the things\n", "### Import libraries " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "##########################################\n", "# NOTE: I'm toying with the idea of requiring the library just above \n", "# when I use it so it makes more sense in context\n", "##########################################\n", "# import os\n", "# import pandas as pd\n", "# from nltk.tokenize import word_tokenize, sent_tokenize\n", "# from nltk.sentiment import SentimentAnalyzer\n", "# from nltk.sentiment.util import *\n", "# from nltk.probability import FreqDist\n", "# from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "# sid = SentimentIntensityAnalyzer()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import data from files" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "def get_data_from_files(path):\n", " directory = os.listdir(path)\n", " results = []\n", " for file in directory:\n", " f=open(path+file)\n", " results.append(f.read())\n", " f.close()\n", " return results\n", "\n", "neg = get_data_from_files('../neg_cornell/')\n", "pos = get_data_from_files('../pos_cornell/')\n", "\n", "# neg = get_data_from_files('../neg_hw4/')\n", "# pos = get_data_from_files('../pos_hw4/')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## STEP 2: Prep Data\n", "### STEP 2a: Turn that fresh text into a pandas DF" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "neg_df = pd.DataFrame(neg)\n", "pos_df = pd.DataFrame(pos)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### STEP 2b: Label it" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pos_df['PoN'] = 'P'\n", "neg_df['PoN'] = 'N'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### STEP 2c: Combine the dfs" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "all_df = neg_df.append(pos_df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "PoN | \n", "
---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "
... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "
2000 rows × 2 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "
---|---|---|---|---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "[bad ., bad ., bad ., that one word seems to p... | \n", "67 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "1071 | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "[isn't it the ultimate sign of a movie's cinem... | \n", "32 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "553 | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "[ \" gordy \" is not a movie , it is a 90-minute... | \n", "23 | \n", "[gordy, is, not, a, movie, it, is, a, sesame, ... | \n", "478 | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "[disconnect the phone line ., don't accept the... | \n", "37 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "604 | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "[when robert forster found himself famous agai... | \n", "29 | \n", "[when, robert, forster, found, himself, famous... | \n", "386 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "[one of the funniest carry on movies and the t... | \n", "25 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "434 | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "[i remember making a pact , right after `patch... | \n", "40 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "652 | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "[barely scrapping by playing at a nyc piano ba... | \n", "23 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "345 | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "[if the current trends of hollywood filmmaking... | \n", "34 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "730 | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "[capsule : the director of cure brings a weird... | \n", "45 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "641 | \n", "
2000 rows × 6 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "[bad ., bad ., bad ., that one word seems to p... | \n", "67 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "1071 | \n", "[bad, bad, bad, one, word, seems, pretty, much... | \n", "515 | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "[isn't it the ultimate sign of a movie's cinem... | \n", "32 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "553 | \n", "[ultimate, sign, movie, cinematic, ineptitude,... | \n", "297 | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "[ \" gordy \" is not a movie , it is a 90-minute... | \n", "23 | \n", "[gordy, is, not, a, movie, it, is, a, sesame, ... | \n", "478 | \n", "[gordy, movie, sesame, street, skit, bad, one,... | \n", "239 | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "[disconnect the phone line ., don't accept the... | \n", "37 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "604 | \n", "[disconnect, phone, line, accept, charges, any... | \n", "323 | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "[when robert forster found himself famous agai... | \n", "29 | \n", "[when, robert, forster, found, himself, famous... | \n", "386 | \n", "[robert, forster, found, famous, appearing, ja... | \n", "185 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "[one of the funniest carry on movies and the t... | \n", "25 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "434 | \n", "[one, funniest, carry, movies, third, medical,... | \n", "241 | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "[i remember making a pact , right after `patch... | \n", "40 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "652 | \n", "[remember, making, pact, right, patch, adams, ... | \n", "361 | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "[barely scrapping by playing at a nyc piano ba... | \n", "23 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "345 | \n", "[barely, scrapping, playing, nyc, piano, bar, ... | \n", "177 | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "[if the current trends of hollywood filmmaking... | \n", "34 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "730 | \n", "[current, trends, hollywood, filmmaking, conti... | \n", "428 | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "[capsule : the director of cure brings a weird... | \n", "45 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "641 | \n", "[capsule, director, cure, brings, weird, compl... | \n", "340 | \n", "
2000 rows × 8 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "topwords_unfil | \n", "topwords_fil | \n", "freq_dist | \n", "freq_dist_unfil | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "[bad ., bad ., bad ., that one word seems to p... | \n", "67 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "1071 | \n", "[bad, bad, bad, one, word, seems, pretty, much... | \n", "515 | \n", "[(the, 60), (a, 35), (to, 34), (of, 24), (this... | \n", "[(movie, 17), (bad, 8), (one, 7), (meyer, 6), ... | \n", "{'bad': 8, 'one': 7, 'word': 1, 'seems': 1, 'p... | \n", "{'bad': 8, 'that': 19, 'one': 7, 'word': 1, 's... | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "[isn't it the ultimate sign of a movie's cinem... | \n", "32 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "553 | \n", "[ultimate, sign, movie, cinematic, ineptitude,... | \n", "297 | \n", "[(the, 28), (a, 18), (of, 16), (to, 14), (i, 1... | \n", "[(movie, 7), (one, 6), (first, 5), (much, 4), ... | \n", "{'ultimate': 1, 'sign': 1, 'movie': 7, 'cinema... | \n", "{'is': 11, 'it': 11, 'the': 28, 'ultimate': 1,... | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "[ \" gordy \" is not a movie , it is a 90-minute... | \n", "23 | \n", "[gordy, is, not, a, movie, it, is, a, sesame, ... | \n", "478 | \n", "[gordy, movie, sesame, street, skit, bad, one,... | \n", "239 | \n", "[(the, 25), (and, 21), (to, 18), (is, 17), (a,... | \n", "[(gordy, 8), (movie, 5), (one, 4), (stupid, 4)... | \n", "{'gordy': 8, 'movie': 5, 'sesame': 1, 'street'... | \n", "{'gordy': 8, 'is': 17, 'not': 3, 'a': 17, 'mov... | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "[disconnect the phone line ., don't accept the... | \n", "37 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "604 | \n", "[disconnect, phone, line, accept, charges, any... | \n", "323 | \n", "[(the, 41), (of, 17), (a, 17), (to, 16), (and,... | \n", "[(hanging, 9), (sisters, 5), (ryan, 4), (time,... | \n", "{'disconnect': 1, 'phone': 2, 'line': 1, 'acce... | \n", "{'disconnect': 1, 'the': 41, 'phone': 2, 'line... | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "[when robert forster found himself famous agai... | \n", "29 | \n", "[when, robert, forster, found, himself, famous... | \n", "386 | \n", "[robert, forster, found, famous, appearing, ja... | \n", "185 | \n", "[(the, 21), (it, 11), (i, 10), (to, 10), (of, ... | \n", "[(film, 5), (movie, 5), (american, 4), (perfek... | \n", "{'robert': 2, 'forster': 3, 'found': 1, 'famou... | \n", "{'when': 2, 'robert': 2, 'forster': 3, 'found'... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "[one of the funniest carry on movies and the t... | \n", "25 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "434 | \n", "[one, funniest, carry, movies, third, medical,... | \n", "241 | \n", "[(the, 26), (and, 21), (of, 11), (a, 10), (is,... | \n", "[(nookey, 9), (hawtrey, 5), (carry, 4), (dr, 4... | \n", "{'one': 1, 'funniest': 1, 'carry': 4, 'movies'... | \n", "{'one': 1, 'of': 11, 'the': 26, 'funniest': 1,... | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "[i remember making a pact , right after `patch... | \n", "40 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "652 | \n", "[remember, making, pact, right, patch, adams, ... | \n", "361 | \n", "[(the, 44), (of, 29), (and, 19), (a, 15), (it,... | \n", "[(music, 8), (heart, 7), (craven, 6), (movie, ... | \n", "{'remember': 1, 'making': 1, 'pact': 1, 'right... | \n", "{'i': 1, 'remember': 1, 'making': 1, 'a': 15, ... | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "[barely scrapping by playing at a nyc piano ba... | \n", "23 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "345 | \n", "[barely, scrapping, playing, nyc, piano, bar, ... | \n", "177 | \n", "[(a, 23), (is, 16), (the, 13), (and, 10), (of,... | \n", "[(like, 4), (hutton, 3), (old, 3), (high, 2), ... | \n", "{'barely': 1, 'scrapping': 1, 'playing': 1, 'n... | \n", "{'barely': 1, 'scrapping': 1, 'by': 2, 'playin... | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "[if the current trends of hollywood filmmaking... | \n", "34 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "730 | \n", "[current, trends, hollywood, filmmaking, conti... | \n", "428 | \n", "[(the, 49), (of, 31), (and, 19), (in, 18), (to... | \n", "[(one, 7), (like, 5), (l, 5), (hollywood, 4), ... | \n", "{'current': 1, 'trends': 1, 'hollywood': 4, 'f... | \n", "{'if': 1, 'the': 49, 'current': 1, 'trends': 1... | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "[capsule : the director of cure brings a weird... | \n", "45 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "641 | \n", "[capsule, director, cure, brings, weird, compl... | \n", "340 | \n", "[(the, 33), (to, 28), (and, 21), (a, 18), (of,... | \n", "[(computer, 11), (kurosawa, 8), (one, 5), (see... | \n", "{'capsule': 1, 'director': 1, 'cure': 3, 'brin... | \n", "{'capsule': 1, 'the': 33, 'director': 1, 'of':... | \n", "
2000 rows × 12 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "topwords_unfil | \n", "topwords_fil | \n", "... | \n", "v_pos_fd | \n", "v_freq_words_unfil | \n", "vader_fd_all_unfil | \n", "v_compound_fd_uf | \n", "v_neg_fd_uf | \n", "v_neu_fd_uf | \n", "v_pos_fd_uf | \n", "nltk_negs | \n", "unigram_feats | \n", "nltk_all | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "[bad ., bad ., bad ., that one word seems to p... | \n", "67 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "1071 | \n", "[bad, bad, bad, one, word, seems, pretty, much... | \n", "515 | \n", "[(the, 60), (a, 35), (to, 34), (of, 24), (this... | \n", "[(movie, 17), (bad, 8), (one, 7), (meyer, 6), ... | \n", "... | \n", "0.219 | \n", "the a to of this that i in is movie it and you... | \n", "{'neg': 0.046, 'neu': 0.954, 'pos': 0.0, 'comp... | \n", "-0.3071 | \n", "0.046 | \n", "0.954 | \n", "0.000 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "[the_NEG, to_NEG, a_NEG, of_NEG, this_NEG, i_N... | \n", "0 | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "[isn't it the ultimate sign of a movie's cinem... | \n", "32 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "553 | \n", "[ultimate, sign, movie, cinematic, ineptitude,... | \n", "297 | \n", "[(the, 28), (a, 18), (of, 16), (to, 14), (i, 1... | \n", "[(movie, 7), (one, 6), (first, 5), (much, 4), ... | \n", "... | \n", "0.173 | \n", "the a of to i is it and movie this in some one... | \n", "{'neg': 0.1, 'neu': 0.9, 'pos': 0.0, 'compound... | \n", "-0.6262 | \n", "0.100 | \n", "0.900 | \n", "0.000 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "[the_NEG, a_NEG, of_NEG, i_NEG, to_NEG, is_NEG... | \n", "0 | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "[ \" gordy \" is not a movie , it is a 90-minute... | \n", "23 | \n", "[gordy, is, not, a, movie, it, is, a, sesame, ... | \n", "478 | \n", "[gordy, movie, sesame, street, skit, bad, one,... | \n", "239 | \n", "[(the, 25), (and, 21), (to, 18), (is, 17), (a,... | \n", "[(gordy, 8), (movie, 5), (one, 4), (stupid, 4)... | \n", "... | \n", "0.103 | \n", "the and to is a it of this gordy that but on m... | \n", "{'neg': 0.231, 'neu': 0.769, 'pos': 0.0, 'comp... | \n", "-0.9413 | \n", "0.231 | \n", "0.769 | \n", "0.000 | \n", "[gordy, is, not, a_NEG, movie_NEG, it_NEG, is_... | \n", "[the_NEG, and_NEG, to_NEG, a_NEG, is_NEG, it_N... | \n", "0 | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "[disconnect the phone line ., don't accept the... | \n", "37 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "604 | \n", "[disconnect, phone, line, accept, charges, any... | \n", "323 | \n", "[(the, 41), (of, 17), (a, 17), (to, 16), (and,... | \n", "[(hanging, 9), (sisters, 5), (ryan, 4), (time,... | \n", "... | \n", "0.248 | \n", "the of a to and is up hanging in as for an tha... | \n", "{'neg': 0.0, 'neu': 0.869, 'pos': 0.131, 'comp... | \n", "0.7876 | \n", "0.000 | \n", "0.869 | \n", "0.131 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "[the, the_NEG, a_NEG, is_NEG, and, of_NEG, to,... | \n", "0 | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "[when robert forster found himself famous agai... | \n", "29 | \n", "[when, robert, forster, found, himself, famous... | \n", "386 | \n", "[robert, forster, found, famous, appearing, ja... | \n", "185 | \n", "[(the, 21), (it, 11), (i, 10), (to, 10), (of, ... | \n", "[(film, 5), (movie, 5), (american, 4), (perfek... | \n", "... | \n", "0.000 | \n", "the it i to of and a was is you for film this ... | \n", "{'neg': 0.056, 'neu': 0.944, 'pos': 0.0, 'comp... | \n", "-0.4215 | \n", "0.056 | \n", "0.944 | \n", "0.000 | \n", "[when, robert, forster, found, himself, famous... | \n", "[the_NEG, it_NEG, of_NEG, and_NEG, i_NEG, to_N... | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "[one of the funniest carry on movies and the t... | \n", "25 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "434 | \n", "[one, funniest, carry, movies, third, medical,... | \n", "241 | \n", "[(the, 26), (and, 21), (of, 11), (a, 10), (is,... | \n", "[(nookey, 9), (hawtrey, 5), (carry, 4), (dr, 4... | \n", "... | \n", "0.266 | \n", "the and of a is to on nookey as in who from hi... | \n", "{'neg': 0.041, 'neu': 0.862, 'pos': 0.097, 'co... | \n", "0.4576 | \n", "0.041 | \n", "0.862 | \n", "0.097 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "[the, and, the_NEG, to, nookey, and_NEG, of, a... | \n", "0 | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "[i remember making a pact , right after `patch... | \n", "40 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "652 | \n", "[remember, making, pact, right, patch, adams, ... | \n", "361 | \n", "[(the, 44), (of, 29), (and, 19), (a, 15), (it,... | \n", "[(music, 8), (heart, 7), (craven, 6), (movie, ... | \n", "... | \n", "0.236 | \n", "the of and a it to is with in but her music he... | \n", "{'neg': 0.0, 'neu': 0.866, 'pos': 0.134, 'comp... | \n", "0.8047 | \n", "0.000 | \n", "0.866 | \n", "0.134 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "[the_NEG, of_NEG, and_NEG, it_NEG, a_NEG, is_N... | \n", "0 | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "[barely scrapping by playing at a nyc piano ba... | \n", "23 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "345 | \n", "[barely, scrapping, playing, nyc, piano, bar, ... | \n", "177 | \n", "[(a, 23), (is, 16), (the, 13), (and, 10), (of,... | \n", "[(like, 4), (hutton, 3), (old, 3), (high, 2), ... | \n", "... | \n", "0.196 | \n", "a is the and of with his for in to like she it... | \n", "{'neg': 0.056, 'neu': 0.783, 'pos': 0.162, 'co... | \n", "0.7273 | \n", "0.056 | \n", "0.783 | \n", "0.162 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "[a_NEG, is_NEG, a, the, with_NEG, the_NEG, for... | \n", "0 | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "[if the current trends of hollywood filmmaking... | \n", "34 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "730 | \n", "[current, trends, hollywood, filmmaking, conti... | \n", "428 | \n", "[(the, 49), (of, 31), (and, 19), (in, 18), (to... | \n", "[(one, 7), (like, 5), (l, 5), (hollywood, 4), ... | \n", "... | \n", "0.166 | \n", "the of and in to that a is his by one as for l... | \n", "{'neg': 0.0, 'neu': 0.859, 'pos': 0.141, 'comp... | \n", "0.7506 | \n", "0.000 | \n", "0.859 | \n", "0.141 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "[the, the_NEG, of_NEG, of, and_NEG, to, in_NEG... | \n", "0 | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "[capsule : the director of cure brings a weird... | \n", "45 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "641 | \n", "[capsule, director, cure, brings, weird, compl... | \n", "340 | \n", "[(the, 33), (to, 28), (and, 21), (a, 18), (of,... | \n", "[(computer, 11), (kurosawa, 8), (one, 5), (see... | \n", "... | \n", "0.136 | \n", "the to and a of is his computer are with on no... | \n", "{'neg': 0.082, 'neu': 0.828, 'pos': 0.09, 'com... | \n", "0.3497 | \n", "0.082 | \n", "0.828 | \n", "0.090 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "[the_NEG, to_NEG, and_NEG, a_NEG, of_NEG, is_N... | \n", "0 | \n", "
2000 rows × 40 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "topwords_unfil | \n", "topwords_fil | \n", "... | \n", "v_freq_words_unfil | \n", "vader_fd_all_unfil | \n", "v_compound_fd_uf | \n", "v_neg_fd_uf | \n", "v_neu_fd_uf | \n", "v_pos_fd_uf | \n", "nltk_negs | \n", "unigram_feats | \n", "nltk_all | \n", "bow_nosw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "bad . bad . \\nbad . \\nthat one word seems to p... | \n", "N | \n", "[bad ., bad ., bad ., that one word seems to p... | \n", "67 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "1071 | \n", "[bad, bad, bad, one, word, seems, pretty, much... | \n", "515 | \n", "[(the, 60), (a, 35), (to, 34), (of, 24), (this... | \n", "[(movie, 17), (bad, 8), (one, 7), (meyer, 6), ... | \n", "... | \n", "the a to of this that i in is movie it and you... | \n", "{'neg': 0.046, 'neu': 0.954, 'pos': 0.0, 'comp... | \n", "-0.3071 | \n", "0.046 | \n", "0.954 | \n", "0.000 | \n", "[bad, bad, bad, that, one, word, seems, to, pr... | \n", "[the_NEG, to_NEG, a_NEG, of_NEG, this_NEG, i_N... | \n", "0 | \n", "{'bad': 8, '.': 62, 'that': 19, 'one': 7, 'wor... | \n", "
1 | \n", "isn't it the ultimate sign of a movie's cinema... | \n", "N | \n", "[isn't it the ultimate sign of a movie's cinem... | \n", "32 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "553 | \n", "[ultimate, sign, movie, cinematic, ineptitude,... | \n", "297 | \n", "[(the, 28), (a, 18), (of, 16), (to, 14), (i, 1... | \n", "[(movie, 7), (one, 6), (first, 5), (much, 4), ... | \n", "... | \n", "the a of to i is it and movie this in some one... | \n", "{'neg': 0.1, 'neu': 0.9, 'pos': 0.0, 'compound... | \n", "-0.6262 | \n", "0.100 | \n", "0.900 | \n", "0.000 | \n", "[is, it, the, ultimate, sign, of, a, movie, ci... | \n", "[the_NEG, a_NEG, of_NEG, i_NEG, to_NEG, is_NEG... | \n", "0 | \n", "{'isn't': 2, 'it': 9, 'the': 28, 'ultimate': 1... | \n", "
2 | \n", "\" gordy \" is not a movie , it is a 90-minute-... | \n", "N | \n", "[ \" gordy \" is not a movie , it is a 90-minute... | \n", "23 | \n", "[gordy, is, not, a, movie, it, is, a, sesame, ... | \n", "478 | \n", "[gordy, movie, sesame, street, skit, bad, one,... | \n", "239 | \n", "[(the, 25), (and, 21), (to, 18), (is, 17), (a,... | \n", "[(gordy, 8), (movie, 5), (one, 4), (stupid, 4)... | \n", "... | \n", "the and to is a it of this gordy that but on m... | \n", "{'neg': 0.231, 'neu': 0.769, 'pos': 0.0, 'comp... | \n", "-0.9413 | \n", "0.231 | \n", "0.769 | \n", "0.000 | \n", "[gordy, is, not, a_NEG, movie_NEG, it_NEG, is_... | \n", "[the_NEG, and_NEG, to_NEG, a_NEG, is_NEG, it_N... | \n", "0 | \n", "{'\"': 12, 'gordy': 8, 'is': 16, 'not': 3, 'a':... | \n", "
3 | \n", "disconnect the phone line . \\ndon't accept the... | \n", "N | \n", "[disconnect the phone line ., don't accept the... | \n", "37 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "604 | \n", "[disconnect, phone, line, accept, charges, any... | \n", "323 | \n", "[(the, 41), (of, 17), (a, 17), (to, 16), (and,... | \n", "[(hanging, 9), (sisters, 5), (ryan, 4), (time,... | \n", "... | \n", "the of a to and is up hanging in as for an tha... | \n", "{'neg': 0.0, 'neu': 0.869, 'pos': 0.131, 'comp... | \n", "0.7876 | \n", "0.000 | \n", "0.869 | \n", "0.131 | \n", "[disconnect, the, phone, line, do, accept, the... | \n", "[the, the_NEG, a_NEG, is_NEG, and, of_NEG, to,... | \n", "0 | \n", "{'disconnect': 1, 'the': 41, 'phone': 2, 'line... | \n", "
4 | \n", "when robert forster found himself famous again... | \n", "N | \n", "[when robert forster found himself famous agai... | \n", "29 | \n", "[when, robert, forster, found, himself, famous... | \n", "386 | \n", "[robert, forster, found, famous, appearing, ja... | \n", "185 | \n", "[(the, 21), (it, 11), (i, 10), (to, 10), (of, ... | \n", "[(film, 5), (movie, 5), (american, 4), (perfek... | \n", "... | \n", "the it i to of and a was is you for film this ... | \n", "{'neg': 0.056, 'neu': 0.944, 'pos': 0.0, 'comp... | \n", "-0.4215 | \n", "0.056 | \n", "0.944 | \n", "0.000 | \n", "[when, robert, forster, found, himself, famous... | \n", "[the_NEG, it_NEG, of_NEG, and_NEG, i_NEG, to_N... | \n", "0 | \n", "{'when': 2, 'robert': 2, 'forster': 3, 'found'... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "one of the funniest carry on movies and the th... | \n", "P | \n", "[one of the funniest carry on movies and the t... | \n", "25 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "434 | \n", "[one, funniest, carry, movies, third, medical,... | \n", "241 | \n", "[(the, 26), (and, 21), (of, 11), (a, 10), (is,... | \n", "[(nookey, 9), (hawtrey, 5), (carry, 4), (dr, 4... | \n", "... | \n", "the and of a is to on nookey as in who from hi... | \n", "{'neg': 0.041, 'neu': 0.862, 'pos': 0.097, 'co... | \n", "0.4576 | \n", "0.041 | \n", "0.862 | \n", "0.097 | \n", "[one, of, the, funniest, carry, on, movies, an... | \n", "[the, and, the_NEG, to, nookey, and_NEG, of, a... | \n", "0 | \n", "{'one': 1, 'of': 11, 'the': 26, 'funniest': 1,... | \n", "
996 | \n", "i remember making a pact , right after `patch ... | \n", "P | \n", "[i remember making a pact , right after `patch... | \n", "40 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "652 | \n", "[remember, making, pact, right, patch, adams, ... | \n", "361 | \n", "[(the, 44), (of, 29), (and, 19), (a, 15), (it,... | \n", "[(music, 8), (heart, 7), (craven, 6), (movie, ... | \n", "... | \n", "the of and a it to is with in but her music he... | \n", "{'neg': 0.0, 'neu': 0.866, 'pos': 0.134, 'comp... | \n", "0.8047 | \n", "0.000 | \n", "0.866 | \n", "0.134 | \n", "[i, remember, making, a, pact, right, after, p... | \n", "[the_NEG, of_NEG, and_NEG, it_NEG, a_NEG, is_N... | \n", "0 | \n", "{'i': 1, 'remember': 1, 'making': 1, 'a': 15, ... | \n", "
997 | \n", "barely scrapping by playing at a nyc piano bar... | \n", "P | \n", "[barely scrapping by playing at a nyc piano ba... | \n", "23 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "345 | \n", "[barely, scrapping, playing, nyc, piano, bar, ... | \n", "177 | \n", "[(a, 23), (is, 16), (the, 13), (and, 10), (of,... | \n", "[(like, 4), (hutton, 3), (old, 3), (high, 2), ... | \n", "... | \n", "a is the and of with his for in to like she it... | \n", "{'neg': 0.056, 'neu': 0.783, 'pos': 0.162, 'co... | \n", "0.7273 | \n", "0.056 | \n", "0.783 | \n", "0.162 | \n", "[barely, scrapping, by, playing, at, a, nyc, p... | \n", "[a_NEG, is_NEG, a, the, with_NEG, the_NEG, for... | \n", "0 | \n", "{'barely': 1, 'scrapping': 1, 'by': 2, 'playin... | \n", "
998 | \n", "if the current trends of hollywood filmmaking ... | \n", "P | \n", "[if the current trends of hollywood filmmaking... | \n", "34 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "730 | \n", "[current, trends, hollywood, filmmaking, conti... | \n", "428 | \n", "[(the, 49), (of, 31), (and, 19), (in, 18), (to... | \n", "[(one, 7), (like, 5), (l, 5), (hollywood, 4), ... | \n", "... | \n", "the of and in to that a is his by one as for l... | \n", "{'neg': 0.0, 'neu': 0.859, 'pos': 0.141, 'comp... | \n", "0.7506 | \n", "0.000 | \n", "0.859 | \n", "0.141 | \n", "[if, the, current, trends, of, hollywood, film... | \n", "[the, the_NEG, of_NEG, of, and_NEG, to, in_NEG... | \n", "0 | \n", "{'if': 1, 'the': 49, 'current': 1, 'trends': 1... | \n", "
999 | \n", "capsule : the director of cure brings a weird ... | \n", "P | \n", "[capsule : the director of cure brings a weird... | \n", "45 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "641 | \n", "[capsule, director, cure, brings, weird, compl... | \n", "340 | \n", "[(the, 33), (to, 28), (and, 21), (a, 18), (of,... | \n", "[(computer, 11), (kurosawa, 8), (one, 5), (see... | \n", "... | \n", "the to and a of is his computer are with on no... | \n", "{'neg': 0.082, 'neu': 0.828, 'pos': 0.09, 'com... | \n", "0.3497 | \n", "0.082 | \n", "0.828 | \n", "0.090 | \n", "[capsule, the, director, of, cure, brings, a, ... | \n", "[the_NEG, to_NEG, and_NEG, a_NEG, of_NEG, is_N... | \n", "0 | \n", "{'capsule': 1, ':': 1, 'the': 33, 'director': ... | \n", "
2000 rows × 41 columns
\n", "