{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HW4 [Deception] " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## STEP 1: GET THAT DATA" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "def get_data(file, path):\n", " f=open(path+file)\n", " data = f.read()\n", " f.close()\n", " return data\n", " \n", "def get_data_from_files(path):\n", " results = [get_data(file, path) for file in os.listdir(path)]\n", " return results\n", "\n", "# pos = get_data_from_files('../pos_cornell//')\n", "# neg = get_data_from_files('../neg_cornell/')\n", "\n", "# pos = get_data_from_files('../hw4_lie_false/')\n", "# neg = get_data_from_files('../hw4_lie_true/')\n", "\n", "pos = get_data_from_files('../hw4_lie_false/')\n", "neg = get_data_from_files('../hw4_lie_true/')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "PoN | \n", "
---|---|---|
0 | \n", "? | \n", "N | \n", "
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "
3 | \n", "? | \n", "N | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "
---|---|---|---|---|---|---|
0 | \n", "? | \n", "N | \n", "[?] | \n", "1 | \n", "[] | \n", "0 | \n", "
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "
---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "
---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "pos | \n", "pos_no_sw | \n", "pos_dict | \n", "pos_dict_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "{'NN': 11, 'NNS': 3, 'VBP': 3, 'JJ': 9, 'CC': ... | \n", "{'NN': 7, 'NNS': 5, 'VBP': 3, 'JJ': 10, 'RB': ... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "[(the, DT), (worst, JJS), (restaurant, NN), (t... | \n", "[(worst, RBS), (restaurant, NN), (ever, RB), (... | \n", "{'DT': 14, 'JJS': 1, 'NN': 29, 'IN': 8, 'VBP':... | \n", "{'RBS': 1, 'NN': 24, 'RB': 5, 'JJ': 9, 'VBN': ... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "[(i, NNS), (have, VBP), (been, VBN), (to, TO),... | \n", "[(asian, JJ), (restaurant, NN), (new, JJ), (yo... | \n", "{'NNS': 2, 'VBP': 2, 'VBN': 5, 'TO': 1, 'DT': ... | \n", "{'JJ': 5, 'NN': 11, 'VBN': 2, 'VBP': 1, 'NNS':... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "pos | \n", "pos_no_sw | \n", "pos_dict | \n", "pos_dict_no_sw | \n", "bow | \n", "bow_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "{'NN': 11, 'NNS': 3, 'VBP': 3, 'JJ': 9, 'CC': ... | \n", "{'NN': 7, 'NNS': 5, 'VBP': 3, 'JJ': 10, 'RB': ... | \n", "{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ... | \n", "{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "[(the, DT), (worst, JJS), (restaurant, NN), (t... | \n", "[(worst, RBS), (restaurant, NN), (ever, RB), (... | \n", "{'DT': 14, 'JJS': 1, 'NN': 29, 'IN': 8, 'VBP':... | \n", "{'RBS': 1, 'NN': 24, 'RB': 5, 'JJ': 9, 'VBN': ... | \n", "{'the': 6, 'worst': 1, 'restaurant': 1, 'that'... | \n", "{'worst': 1, 'restaurant': 1, 'ever': 1, 'eate... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "[(i, NNS), (have, VBP), (been, VBN), (to, TO),... | \n", "[(asian, JJ), (restaurant, NN), (new, JJ), (yo... | \n", "{'NNS': 2, 'VBP': 2, 'VBN': 5, 'TO': 1, 'DT': ... | \n", "{'JJ': 5, 'NN': 11, 'VBN': 2, 'VBP': 1, 'NNS':... | \n", "{'i': 3, 'have': 1, 'been': 1, 'to': 1, 'a': 3... | \n", "{'asian': 1, 'restaurant': 1, 'new': 1, 'york'... | \n", "
\n", " | NN | \n", "NNS | \n", "VBP | \n", "JJ | \n", "CC | \n", "VBZ | \n", "DT | \n", "RB | \n", "VB | \n", "TO | \n", "... | \n", "VBG | \n", "EX | \n", "JJR | \n", "PDT | \n", "RP | \n", "WP | \n", "CD | \n", "RBR | \n", "MD | \n", "RBS | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "11.0 | \n", "3.0 | \n", "3.0 | \n", "9.0 | \n", "3.0 | \n", "2.0 | \n", "4.0 | \n", "4.0 | \n", "4.0 | \n", "3.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
N | \n", "29.0 | \n", "1.0 | \n", "1.0 | \n", "7.0 | \n", "5.0 | \n", "1.0 | \n", "14.0 | \n", "8.0 | \n", "4.0 | \n", "4.0 | \n", "... | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
N | \n", "13.0 | \n", "2.0 | \n", "2.0 | \n", "5.0 | \n", "1.0 | \n", "2.0 | \n", "5.0 | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 rows × 28 columns
\n", "\n", " | NN | \n", "NNS | \n", "VBP | \n", "JJ | \n", "CC | \n", "VBZ | \n", "DT | \n", "RB | \n", "VB | \n", "TO | \n", "... | \n", "VBG | \n", "EX | \n", "JJR | \n", "PDT | \n", "RP | \n", "WP | \n", "CD | \n", "RBR | \n", "MD | \n", "RBS | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "11 | \n", "3 | \n", "3 | \n", "9 | \n", "3 | \n", "2 | \n", "4 | \n", "4 | \n", "4 | \n", "3 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "29 | \n", "1 | \n", "1 | \n", "7 | \n", "5 | \n", "1 | \n", "14 | \n", "8 | \n", "4 | \n", "4 | \n", "... | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "13 | \n", "2 | \n", "2 | \n", "5 | \n", "1 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 rows × 28 columns
\n", "\n", " | NN | \n", "NNS | \n", "VBP | \n", "JJ | \n", "CC | \n", "VBZ | \n", "DT | \n", "RB | \n", "VB | \n", "TO | \n", "... | \n", "EX | \n", "JJR | \n", "PDT | \n", "RP | \n", "WP | \n", "CD | \n", "RBR | \n", "MD | \n", "RBS | \n", "total | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "11 | \n", "3 | \n", "3 | \n", "9 | \n", "3 | \n", "2 | \n", "4 | \n", "4 | \n", "4 | \n", "3 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "53 | \n", "
N | \n", "29 | \n", "1 | \n", "1 | \n", "7 | \n", "5 | \n", "1 | \n", "14 | \n", "8 | \n", "4 | \n", "4 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "105 | \n", "
N | \n", "13 | \n", "2 | \n", "2 | \n", "5 | \n", "1 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "45 | \n", "
3 rows × 29 columns
\n", "\n", " | NN | \n", "NNS | \n", "VBP | \n", "JJ | \n", "CC | \n", "VBZ | \n", "DT | \n", "RB | \n", "VB | \n", "TO | \n", "... | \n", "VBG | \n", "EX | \n", "JJR | \n", "PDT | \n", "RP | \n", "WP | \n", "CD | \n", "RBR | \n", "MD | \n", "RBS | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.207547 | \n", "0.056604 | \n", "0.056604 | \n", "0.169811 | \n", "0.056604 | \n", "0.037736 | \n", "0.075472 | \n", "0.075472 | \n", "0.075472 | \n", "0.056604 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
N | \n", "0.276190 | \n", "0.009524 | \n", "0.009524 | \n", "0.066667 | \n", "0.047619 | \n", "0.009524 | \n", "0.133333 | \n", "0.076190 | \n", "0.038095 | \n", "0.038095 | \n", "... | \n", "0.009524 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
N | \n", "0.288889 | \n", "0.044444 | \n", "0.044444 | \n", "0.111111 | \n", "0.022222 | \n", "0.044444 | \n", "0.111111 | \n", "0.000000 | \n", "0.000000 | \n", "0.022222 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 rows × 28 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "... | \n", "pos | \n", "pos_no_sw | \n", "pos_dict | \n", "pos_dict_no_sw | \n", "bow | \n", "bow_no_sw | \n", "pos_sent | \n", "pos_sent_str | \n", "pos_no_sw_sent | \n", "pos_sent_bi | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "[(twin, NN), (trees, NNS), (cicero, VBP), (ny,... | \n", "{'NN': 11, 'NNS': 3, 'VBP': 3, 'JJ': 9, 'CC': ... | \n", "{'NN': 7, 'NNS': 5, 'VBP': 3, 'JJ': 10, 'RB': ... | \n", "{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ... | \n", "{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ... | \n", "[NN, NNS, VBP, JJ, JJ, NN, NN, CC, JJ, NN, JJ,... | \n", "[NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN ... | \n", "[NN, NNS, VBP, JJ, JJ, NN, NN, JJ, NN, JJ, NNS... | \n", "[(NN, NNS), (NNS, VBP), (VBP, JJ), (JJ, JJ), (... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "... | \n", "[(the, DT), (worst, JJS), (restaurant, NN), (t... | \n", "[(worst, RBS), (restaurant, NN), (ever, RB), (... | \n", "{'DT': 14, 'JJS': 1, 'NN': 29, 'IN': 8, 'VBP':... | \n", "{'RBS': 1, 'NN': 24, 'RB': 5, 'JJ': 9, 'VBN': ... | \n", "{'the': 6, 'worst': 1, 'restaurant': 1, 'that'... | \n", "{'worst': 1, 'restaurant': 1, 'ever': 1, 'eate... | \n", "[DT, JJS, NN, IN, NN, VBP, RB, VBN, IN, VBZ, R... | \n", "[DT JJS NN IN NN VBP RB VBN IN VBZ RB DT NN VB... | \n", "[RBS, NN, RB, RB, JJ, NN, VBN, NN, NN, VBD, NN... | \n", "[(DT, JJS), (JJS, NN), (NN, IN), (IN, NN), (NN... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "... | \n", "[(i, NNS), (have, VBP), (been, VBN), (to, TO),... | \n", "[(asian, JJ), (restaurant, NN), (new, JJ), (yo... | \n", "{'NNS': 2, 'VBP': 2, 'VBN': 5, 'TO': 1, 'DT': ... | \n", "{'JJ': 5, 'NN': 11, 'VBN': 2, 'VBP': 1, 'NNS':... | \n", "{'i': 3, 'have': 1, 'been': 1, 'to': 1, 'a': 3... | \n", "{'asian': 1, 'restaurant': 1, 'new': 1, 'york'... | \n", "[NNS, VBP, VBN, TO, DT, JJ, NN, IN, JJ, NN, NN... | \n", "[NNS VBP VBN TO DT JJ NN IN JJ NN NN DT NN VBZ... | \n", "[JJ, NN, JJ, NN, NN, NN, VBN, JJ, JJ, VBP, JJ,... | \n", "[(NNS, VBP), (VBP, VBN), (VBN, TO), (TO, DT), ... | \n", "
5 | \n", "The best restaurant I have gone to is when I w... | \n", "N | \n", "[The best restaurant I have gone to is when I ... | \n", "6 | \n", "[the, best, restaurant, i, have, gone, to, is,... | \n", "71 | \n", "[best, restaurant, gone, went, applebee, frien... | \n", "30 | \n", "[the, best, restaur, i, have, gone, to, is, wh... | \n", "[best, restaur, gone, went, applebe, friend, s... | \n", "... | \n", "[(the, DT), (best, JJS), (restaurant, NN), (i,... | \n", "[(best, RBS), (restaurant, NN), (gone, VBN), (... | \n", "{'DT': 6, 'JJS': 1, 'NN': 10, 'VBP': 2, 'VBN':... | \n", "{'RBS': 1, 'NN': 7, 'VBN': 1, 'VBD': 2, 'JJ': ... | \n", "{'the': 5, 'best': 1, 'restaurant': 2, 'i': 4,... | \n", "{'best': 1, 'restaurant': 2, 'gone': 1, 'went'... | \n", "[DT, JJS, NN, NN, VBP, VBN, TO, VBZ, WRB, JJ, ... | \n", "[DT JJS NN NN VBP VBN TO VBZ WRB JJ VBD TO VB ... | \n", "[RBS, NN, VBN, VBD, JJ, NNS, NN, RB, NN, JJ, V... | \n", "[(DT, JJS), (JJS, NN), (NN, NN), (NN, VBP), (V... | \n", "
4 rows × 22 columns
\n", "\n", " | (NN, NNS) | \n", "(NNS, VBP) | \n", "(VBP, JJ) | \n", "(JJ, JJ) | \n", "(JJ, NN) | \n", "(NN, NN) | \n", "(NN, CC) | \n", "(CC, JJ) | \n", "(NN, JJ) | \n", "(JJ, VBZ) | \n", "... | \n", "(WDT, MD) | \n", "(WRB, MD) | \n", "(MD, DT) | \n", "(NNS, JJR) | \n", "(JJR, EX) | \n", "(VBP, MD) | \n", "(JJS, WRB) | \n", "(CD, RB) | \n", "(JJS, VBG) | \n", "(RP, TO) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "1 | \n", "1 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "1 | \n", "2 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "4 | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "2 | \n", "4 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "4 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 389 columns
\n", "\n", " | (NN, NNS) | \n", "(NNS, VBP) | \n", "(VBP, JJ) | \n", "(JJ, JJ) | \n", "(JJ, NN) | \n", "(NN, NN) | \n", "(NN, CC) | \n", "(CC, JJ) | \n", "(NN, JJ) | \n", "(JJ, VBZ) | \n", "... | \n", "(WDT, MD) | \n", "(WRB, MD) | \n", "(MD, DT) | \n", "(NNS, JJR) | \n", "(JJR, EX) | \n", "(VBP, MD) | \n", "(JJS, WRB) | \n", "(CD, RB) | \n", "(JJS, VBG) | \n", "(RP, TO) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.019231 | \n", "0.019231 | \n", "0.038462 | \n", "0.019231 | \n", "0.038462 | \n", "0.038462 | \n", "0.019231 | \n", "0.038462 | \n", "0.019231 | \n", "0.019231 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
N | \n", "0.009615 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.019231 | \n", "0.038462 | \n", "0.028846 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.022727 | \n", "0.000000 | \n", "0.000000 | \n", "0.045455 | \n", "0.090909 | \n", "0.000000 | \n", "0.022727 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.057143 | \n", "0.014286 | \n", "0.000000 | \n", "0.014286 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.028571 | \n", "0.000000 | \n", "0.028571 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
P | \n", "0.000000 | \n", "0.023810 | \n", "0.023810 | \n", "0.000000 | \n", "0.071429 | \n", "0.023810 | \n", "0.023810 | \n", "0.000000 | \n", "0.023810 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.02381 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.043478 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.030612 | \n", "0.020408 | \n", "0.020408 | \n", "0.010204 | \n", "0.010204 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.010204 | \n", "0.000000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.006494 | \n", "0.000000 | \n", "0.012987 | \n", "0.038961 | \n", "0.071429 | \n", "0.025974 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.00000 | \n", "0.000000 | \n", "0.006494 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.023810 | \n", "0.000000 | \n", "0.119048 | \n", "0.047619 | \n", "0.023810 | \n", "0.023810 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.02381 | \n", "0.000000 | \n", "0.000000 | \n", "0.02381 | \n", "
90 rows × 389 columns
\n", "\n", " | word | \n", "count | \n", "
---|---|---|
11 | \n", "(DT, NN) | \n", "245 | \n", "
38 | \n", "(JJ, NN) | \n", "151 | \n", "
0 | \n", "(NN, NN) | \n", "125 | \n", "
44 | \n", "(NN, IN) | \n", "114 | \n", "
1 | \n", "(NN, VBD) | \n", "112 | \n", "
43 | \n", "(IN, DT) | \n", "111 | \n", "
5 | \n", "(NN, CC) | \n", "77 | \n", "
19 | \n", "(TO, VB) | \n", "76 | \n", "
37 | \n", "(DT, JJ) | \n", "62 | \n", "
26 | \n", "(RB, JJ) | \n", "61 | \n", "
105 | \n", "(PRP, VBD) | \n", "60 | \n", "
29 | \n", "(IN, NN) | \n", "57 | \n", "
2 | \n", "(VBD, DT) | \n", "55 | \n", "
25 | \n", "(VBD, RB) | \n", "45 | \n", "
75 | \n", "(PRP$, NN) | \n", "44 | \n", "
12 | \n", "(NN, VBZ) | \n", "42 | \n", "
83 | \n", "(NN, RB) | \n", "38 | \n", "
86 | \n", "(IN, JJ) | \n", "37 | \n", "
39 | \n", "(CC, DT) | \n", "36 | \n", "
65 | \n", "(JJ, CC) | \n", "34 | \n", "
\n", " | word | \n", "count | \n", "
---|---|---|
11 | \n", "(DT, NN) | \n", "230 | \n", "
4 | \n", "(JJ, NN) | \n", "145 | \n", "
24 | \n", "(NN, IN) | \n", "122 | \n", "
49 | \n", "(NN, VBD) | \n", "106 | \n", "
5 | \n", "(NN, NN) | \n", "102 | \n", "
32 | \n", "(IN, DT) | \n", "96 | \n", "
33 | \n", "(DT, JJ) | \n", "76 | \n", "
44 | \n", "(IN, NN) | \n", "65 | \n", "
20 | \n", "(TO, VB) | \n", "59 | \n", "
12 | \n", "(NN, VBZ) | \n", "56 | \n", "
14 | \n", "(RB, JJ) | \n", "53 | \n", "
59 | \n", "(PRP, VBD) | \n", "48 | \n", "
6 | \n", "(NN, CC) | \n", "48 | \n", "
60 | \n", "(VBD, RB) | \n", "47 | \n", "
79 | \n", "(NN, DT) | \n", "41 | \n", "
52 | \n", "(VBD, DT) | \n", "37 | \n", "
28 | \n", "(IN, PRP) | \n", "36 | \n", "
61 | \n", "(JJ, CC) | \n", "35 | \n", "
74 | \n", "(VBD, JJ) | \n", "34 | \n", "
57 | \n", "(PRP$, NN) | \n", "33 | \n", "
\n", " | NN_NNS_VBP | \n", "NNS_VBP_JJ | \n", "VBP_JJ_JJ | \n", "JJ_JJ_NN | \n", "JJ_NN_NN | \n", "NN_NN_CC | \n", "NN_CC_JJ | \n", "CC_JJ_NN | \n", "JJ_NN_JJ | \n", "NN_JJ_VBZ | \n", "... | \n", "RB_NN_NN | \n", "VBD_RP_PRP | \n", "PRP_TO_JJ | \n", "TO_JJ_JJ | \n", "CC_VB_NN | \n", "JJS_WRB_NN | \n", "NN_VBP_RP | \n", "VBP_RP_TO | \n", "RP_TO_VB | \n", "VBD_PRP_CC | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 1683 columns
\n", "" ], "text/plain": [ " NN_NNS_VBP NNS_VBP_JJ VBP_JJ_JJ JJ_JJ_NN JJ_NN_NN NN_NN_CC \\\n", "PoN \n", "N 1 1 1 1 1 1 \n", "N 0 0 0 0 2 1 \n", "N 0 0 0 0 1 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "\n", " NN_CC_JJ CC_JJ_NN JJ_NN_JJ NN_JJ_VBZ ... RB_NN_NN VBD_RP_PRP \\\n", "PoN ... \n", "N 1 1 1 1 ... 0 0 \n", "N 0 0 0 0 ... 0 0 \n", "N 0 0 0 0 ... 0 0 \n", "N 0 0 0 0 ... 0 0 \n", "N 0 0 0 0 ... 0 0 \n", "\n", " PRP_TO_JJ TO_JJ_JJ CC_VB_NN JJS_WRB_NN NN_VBP_RP VBP_RP_TO \\\n", "PoN \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "\n", " RP_TO_VB VBD_PRP_CC \n", "PoN \n", "N 0 0 \n", "N 0 0 \n", "N 0 0 \n", "N 0 0 \n", "N 0 0 \n", "\n", "[5 rows x 1683 columns]" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df = pd.DataFrame(all_df['trigrams_feats_bow'].tolist(), all_df['PoN'])\n", "new_df = new_df.fillna(0).astype(int)\n", "new_df[:5]\n" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.5185185185185185\n" ] } ], "source": [ "get_NB(new_df, new_df.index)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.5185185185185185\n" ] }, { "data": { "text/html": [ "\n", " | NN_NNS_VBP | \n", "NNS_VBP_JJ | \n", "VBP_JJ_JJ | \n", "JJ_JJ_NN | \n", "JJ_NN_NN | \n", "NN_NN_CC | \n", "NN_CC_JJ | \n", "CC_JJ_NN | \n", "JJ_NN_JJ | \n", "NN_JJ_VBZ | \n", "... | \n", "RB_NN_NN | \n", "VBD_RP_PRP | \n", "PRP_TO_JJ | \n", "TO_JJ_JJ | \n", "CC_VB_NN | \n", "JJS_WRB_NN | \n", "NN_VBP_RP | \n", "VBP_RP_TO | \n", "RP_TO_VB | \n", "VBD_PRP_CC | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "0.019231 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.018868 | \n", "0.009434 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.023256 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
P | \n", "0.000000 | \n", "0.024390 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.024390 | \n", "0.000000 | \n", "0.000000 | \n", "0.024390 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.012346 | \n", "0.030864 | \n", "0.012346 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.006173 | \n", "0.006173 | \n", "0.006173 | \n", "0.006173 | \n", "0.006173 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "0.00000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.048780 | \n", "0.000000 | \n", "0.024390 | \n", "0.024390 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.02439 | \n", "0.02439 | \n", "0.02439 | \n", "0.02439 | \n", "0.02439 | \n", "
90 rows × 1683 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "... | \n", "bow_no_sw | \n", "pos_sent | \n", "pos_sent_str | \n", "pos_no_sw_sent | \n", "pos_sent_bi | \n", "bow_pos | \n", "trigrams | \n", "trigrams_pos | \n", "trigrams_feats | \n", "trigrams_feats_bow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "Twin Trees Cicero NY HUGE salad bar and high q... | \n", "N | \n", "[Twin Trees Cicero NY HUGE salad bar and high ... | \n", "4 | \n", "[twin, trees, cicero, ny, huge, salad, bar, an... | \n", "53 | \n", "[twin, trees, cicero, ny, huge, salad, bar, hi... | \n", "32 | \n", "[twin, tree, cicero, ny, huge, salad, bar, and... | \n", "[twin, tree, cicero, ny, huge, salad, bar, hig... | \n", "... | \n", "{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ... | \n", "[NN, NNS, VBP, JJ, JJ, NN, NN, CC, JJ, NN, JJ,... | \n", "[NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN ... | \n", "[NN, NNS, VBP, JJ, JJ, NN, NN, JJ, NN, JJ, NNS... | \n", "[(NN, NNS), (NNS, VBP), (VBP, JJ), (JJ, JJ), (... | \n", "{('NN', 'NNS'): 1, ('NNS', 'VBP'): 1, ('VBP', ... | \n", "[(twin, trees, cicero), (trees, cicero, ny), (... | \n", "[(NN, NNS, VBP), (NNS, VBP, JJ), (VBP, JJ, JJ)... | \n", "[NN_NNS_VBP, NNS_VBP_JJ, VBP_JJ_JJ, JJ_JJ_NN, ... | \n", "{'NN_NNS_VBP': 1, 'NNS_VBP_JJ': 1, 'VBP_JJ_JJ'... | \n", "
2 | \n", "The worst restaurant that I have ever eaten in... | \n", "N | \n", "[The worst restaurant that I have ever eaten i... | \n", "5 | \n", "[the, worst, restaurant, that, i, have, ever, ... | \n", "105 | \n", "[worst, restaurant, ever, eaten, undoubtedly, ... | \n", "49 | \n", "[the, worst, restaur, that, i, have, ever, eat... | \n", "[worst, restaur, ever, eaten, undoubtedli, pla... | \n", "... | \n", "{'worst': 1, 'restaurant': 1, 'ever': 1, 'eate... | \n", "[DT, JJS, NN, IN, NN, VBP, RB, VBN, IN, VBZ, R... | \n", "[DT JJS NN IN NN VBP RB VBN IN VBZ RB DT NN VB... | \n", "[RBS, NN, RB, RB, JJ, NN, VBN, NN, NN, VBD, NN... | \n", "[(DT, JJS), (JJS, NN), (NN, IN), (IN, NN), (NN... | \n", "{('DT', 'JJS'): 1, ('JJS', 'NN'): 1, ('NN', 'I... | \n", "[(the, worst, restaurant), (worst, restaurant,... | \n", "[(DT, JJS, NN), (JJS, NN, IN), (NN, IN, NN), (... | \n", "[DT_JJS_NN, JJS_NN_IN, NN_IN_NN, IN_NN_VBP, NN... | \n", "{'DT_JJS_NN': 1, 'JJS_NN_IN': 1, 'NN_IN_NN': 2... | \n", "
4 | \n", "I have been to a Asian restaurant in New York ... | \n", "N | \n", "[I have been to a Asian restaurant in New York... | \n", "4 | \n", "[i, have, been, to, a, asian, restaurant, in, ... | \n", "45 | \n", "[asian, restaurant, new, york, city, menu, wri... | \n", "23 | \n", "[i, have, been, to, a, asian, restaur, in, new... | \n", "[asian, restaur, new, york, citi, menu, writte... | \n", "... | \n", "{'asian': 1, 'restaurant': 1, 'new': 1, 'york'... | \n", "[NNS, VBP, VBN, TO, DT, JJ, NN, IN, JJ, NN, NN... | \n", "[NNS VBP VBN TO DT JJ NN IN JJ NN NN DT NN VBZ... | \n", "[JJ, NN, JJ, NN, NN, NN, VBN, JJ, JJ, VBP, JJ,... | \n", "[(NNS, VBP), (VBP, VBN), (VBN, TO), (TO, DT), ... | \n", "{('NNS', 'VBP'): 1, ('VBP', 'VBN'): 1, ('VBN',... | \n", "[(i, have, been), (have, been, to), (been, to,... | \n", "[(NNS, VBP, VBN), (VBP, VBN, TO), (VBN, TO, DT... | \n", "[NNS_VBP_VBN, VBP_VBN_TO, VBN_TO_DT, TO_DT_JJ,... | \n", "{'NNS_VBP_VBN': 1, 'VBP_VBN_TO': 1, 'VBN_TO_DT... | \n", "
5 | \n", "The best restaurant I have gone to is when I w... | \n", "N | \n", "[The best restaurant I have gone to is when I ... | \n", "6 | \n", "[the, best, restaurant, i, have, gone, to, is,... | \n", "71 | \n", "[best, restaurant, gone, went, applebee, frien... | \n", "30 | \n", "[the, best, restaur, i, have, gone, to, is, wh... | \n", "[best, restaur, gone, went, applebe, friend, s... | \n", "... | \n", "{'best': 1, 'restaurant': 2, 'gone': 1, 'went'... | \n", "[DT, JJS, NN, NN, VBP, VBN, TO, VBZ, WRB, JJ, ... | \n", "[DT JJS NN NN VBP VBN TO VBZ WRB JJ VBD TO VB ... | \n", "[RBS, NN, VBN, VBD, JJ, NNS, NN, RB, NN, JJ, V... | \n", "[(DT, JJS), (JJS, NN), (NN, NN), (NN, VBP), (V... | \n", "{('DT', 'JJS'): 1, ('JJS', 'NN'): 1, ('NN', 'N... | \n", "[(the, best, restaurant), (best, restaurant, i... | \n", "[(DT, JJS, NN), (JJS, NN, NN), (NN, NN, VBP), ... | \n", "[DT_JJS_NN, JJS_NN_NN, NN_NN_VBP, NN_VBP_VBN, ... | \n", "{'DT_JJS_NN': 1, 'JJS_NN_NN': 1, 'NN_NN_VBP': ... | \n", "
6 | \n", "The restaurant looked pretty good the people a... | \n", "N | \n", "[The restaurant looked pretty good the people ... | \n", "3 | \n", "[the, restaurant, looked, pretty, good, the, p... | \n", "36 | \n", "[restaurant, looked, pretty, good, people, aro... | \n", "19 | \n", "[the, restaur, look, pretti, good, the, peopl,... | \n", "[restaur, look, pretti, good, peopl, around, a... | \n", "... | \n", "{'restaurant': 1, 'looked': 1, 'pretty': 1, 'g... | \n", "[DT, NN, VBD, RB, JJ, DT, NNS, IN, PRP, DT, NN... | \n", "[DT NN VBD RB JJ DT NNS IN PRP DT NN CC VBD RB... | \n", "[NN, VBD, RB, JJ, NNS, IN, NN, VBD, RB, NN, JJ... | \n", "[(DT, NN), (NN, VBD), (VBD, RB), (RB, JJ), (JJ... | \n", "{('DT', 'NN'): 5, ('NN', 'VBD'): 3, ('VBD', 'R... | \n", "[(the, restaurant, looked), (restaurant, looke... | \n", "[(DT, NN, VBD), (NN, VBD, RB), (VBD, RB, JJ), ... | \n", "[DT_NN_VBD, NN_VBD_RB, VBD_RB_JJ, RB_JJ_DT, JJ... | \n", "{'DT_NN_VBD': 3, 'NN_VBD_RB': 1, 'VBD_RB_JJ': ... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
87 | \n", "Mikes Pizza High Point NY Service was very slo... | \n", "P | \n", "[Mikes Pizza High Point NY Service was very sl... | \n", "4 | \n", "[mikes, pizza, high, point, ny, service, was, ... | \n", "43 | \n", "[mikes, pizza, high, point, ny, service, slow,... | \n", "26 | \n", "[mike, pizza, high, point, ny, servic, wa, ver... | \n", "[mike, pizza, high, point, ny, servic, slow, q... | \n", "... | \n", "{'mikes': 1, 'pizza': 2, 'high': 1, 'point': 1... | \n", "[NNS, VBP, JJ, NN, JJ, NN, VBD, RB, JJ, CC, DT... | \n", "[NNS VBP JJ NN JJ NN VBD RB JJ CC DT NN VBD JJ... | \n", "[NNS, VBP, JJ, NN, JJ, NN, JJ, NN, NN, MD, VB,... | \n", "[(NNS, VBP), (VBP, JJ), (JJ, NN), (NN, JJ), (J... | \n", "{('NNS', 'VBP'): 1, ('VBP', 'JJ'): 1, ('JJ', '... | \n", "[(mikes, pizza, high), (pizza, high, point), (... | \n", "[(NNS, VBP, JJ), (VBP, JJ, NN), (JJ, NN, JJ), ... | \n", "[NNS_VBP_JJ, VBP_JJ_NN, JJ_NN_JJ, NN_JJ_NN, JJ... | \n", "{'NNS_VBP_JJ': 1, 'VBP_JJ_NN': 1, 'JJ_NN_JJ': ... | \n", "
88 | \n", "After I went shopping with some of my friend w... | \n", "P | \n", "[After I went shopping with some of my friend ... | \n", "2 | \n", "[after, i, went, shopping, with, some, of, my,... | \n", "24 | \n", "[went, shopping, friend, went, dodo, restauran... | \n", "11 | \n", "[after, i, went, shop, with, some, of, my, fri... | \n", "[went, shop, friend, went, dodo, restaur, dinn... | \n", "... | \n", "{'went': 2, 'shopping': 1, 'friend': 1, 'dodo'... | \n", "[IN, JJ, VBD, VBG, IN, DT, IN, PRP$, NN, PRP, ... | \n", "[IN JJ VBD VBG IN DT IN PRP$ NN PRP VBD TO VB ... | \n", "[VBD, VBG, NN, VBD, JJ, NN, NN, VBD, RB, CD, NNS] | \n", "[(IN, JJ), (JJ, VBD), (VBD, VBG), (VBG, IN), (... | \n", "{('IN', 'JJ'): 1, ('JJ', 'VBD'): 1, ('VBD', 'V... | \n", "[(after, i, went), (i, went, shopping), (went,... | \n", "[(IN, JJ, VBD), (JJ, VBD, VBG), (VBD, VBG, IN)... | \n", "[IN_JJ_VBD, JJ_VBD_VBG, VBD_VBG_IN, VBG_IN_DT,... | \n", "{'IN_JJ_VBD': 1, 'JJ_VBD_VBG': 1, 'VBD_VBG_IN'... | \n", "
89 | \n", "I entered the restaurant and a waitress came b... | \n", "P | \n", "[I entered the restaurant and a waitress came ... | \n", "5 | \n", "[i, entered, the, restaurant, and, a, waitress... | \n", "99 | \n", "[entered, restaurant, waitress, came, blanking... | \n", "49 | \n", "[i, enter, the, restaur, and, a, waitress, cam... | \n", "[enter, restaur, waitress, came, blank, look, ... | \n", "... | \n", "{'entered': 1, 'restaurant': 1, 'waitress': 2,... | \n", "[NN, VBD, DT, NN, CC, DT, NN, VBD, IN, IN, DT,... | \n", "[NN VBD DT NN CC DT NN VBD IN IN DT NN VBG CC ... | \n", "[VBN, NN, NN, VBD, VBG, VBG, JJ, NN, NN, VBD, ... | \n", "[(NN, VBD), (VBD, DT), (DT, NN), (NN, CC), (CC... | \n", "{('NN', 'VBD'): 5, ('VBD', 'DT'): 4, ('DT', 'N... | \n", "[(i, entered, the), (entered, the, restaurant)... | \n", "[(NN, VBD, DT), (VBD, DT, NN), (DT, NN, CC), (... | \n", "[NN_VBD_DT, VBD_DT_NN, DT_NN_CC, NN_CC_DT, CC_... | \n", "{'NN_VBD_DT': 1, 'VBD_DT_NN': 3, 'DT_NN_CC': 1... | \n", "
90 | \n", "Carlos Plate Shack was the worst dining experi... | \n", "P | \n", "[Carlos Plate Shack was the worst dining exper... | \n", "9 | \n", "[carlos, plate, shack, was, the, worst, dining... | \n", "155 | \n", "[carlos, plate, shack, worst, dining, experien... | \n", "88 | \n", "[carlo, plate, shack, wa, the, worst, dine, ex... | \n", "[carlo, plate, shack, worst, dine, experi, lif... | \n", "... | \n", "{'carlos': 1, 'plate': 6, 'shack': 1, 'worst':... | \n", "[NN, NN, NN, VBD, DT, JJS, VBG, NN, IN, PRP$, ... | \n", "[NN NN NN VBD DT JJS VBG NN IN PRP$ NN IN PRP$... | \n", "[NN, NN, NN, JJS, VBG, NN, NN, IN, JJ, NN, NN,... | \n", "[(NN, NN), (NN, NN), (NN, VBD), (VBD, DT), (DT... | \n", "{('NN', 'NN'): 11, ('NN', 'VBD'): 6, ('VBD', '... | \n", "[(carlos, plate, shack), (plate, shack, was), ... | \n", "[(NN, NN, NN), (NN, NN, VBD), (NN, VBD, DT), (... | \n", "[NN_NN_NN, NN_NN_VBD, NN_VBD_DT, VBD_DT_JJS, D... | \n", "{'NN_NN_NN': 2, 'NN_NN_VBD': 3, 'NN_VBD_DT': 2... | \n", "
91 | \n", "Olive Oil Garden was very disappointing. I exp... | \n", "P | \n", "[Olive Oil Garden was very disappointing., I e... | \n", "5 | \n", "[olive, oil, garden, was, very, disappointing,... | \n", "43 | \n", "[olive, oil, garden, disappointing, expect, go... | \n", "23 | \n", "[oliv, oil, garden, wa, veri, disappoint, i, e... | \n", "[oliv, oil, garden, disappoint, expect, good, ... | \n", "... | \n", "{'olive': 2, 'oil': 2, 'garden': 2, 'disappoin... | \n", "[JJ, NN, NN, VBD, RB, JJ, NN, VBP, JJ, NN, CC,... | \n", "[JJ NN NN VBD RB JJ NN VBP JJ NN CC JJ NN IN J... | \n", "[JJ, NN, NN, NN, VBP, JJ, NN, JJ, NN, JJS, VB,... | \n", "[(JJ, NN), (NN, NN), (NN, VBD), (VBD, RB), (RB... | \n", "{('JJ', 'NN'): 5, ('NN', 'NN'): 2, ('NN', 'VBD... | \n", "[(olive, oil, garden), (oil, garden, was), (ga... | \n", "[(JJ, NN, NN), (NN, NN, VBD), (NN, VBD, RB), (... | \n", "[JJ_NN_NN, NN_NN_VBD, NN_VBD_RB, VBD_RB_JJ, RB... | \n", "{'JJ_NN_NN': 2, 'NN_NN_VBD': 1, 'NN_VBD_RB': 1... | \n", "
90 rows × 27 columns
\n", "