{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HW4 [Deception] PART 2 -- Check with Myle Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## STEP 1: GET THAT DATA" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "import os\n", "def get_data(file, path):\n", " f=open(path+file)\n", " data = f.read()\n", " f.close()\n", " return data\n", " \n", "def get_data_from_files(path):\n", " results = [get_data(file, path) for file in os.listdir(path)]\n", " return results\n", "\n", "# pos = get_data_from_files('../pos_cornell//')\n", "# neg = get_data_from_files('../neg_cornell/')\n", "\n", "# pos = get_data_from_files('../hw4_lie_false/')\n", "# neg = get_data_from_files('../hw4_lie_true/')\n", "\n", "## TRUE IS NEG!!!!\n", "neg = get_data_from_files('../myle_pos_deceptive/')\n", "pos = get_data_from_files('../myle_pos_truthful/')\n", "neg2 = get_data_from_files('../myle_neg_deceptive/')\n", "pos2 = get_data_from_files('../myle_neg_truthful/')" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "PoN | \n", "
---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "
3 | \n", "This is an absolutely exquisite hotel, at a gr... | \n", "N | \n", "
4 | \n", "I recently traveled up to Chicago for business... | \n", "N | \n", "
... | \n", "... | \n", "... | \n", "
310 | \n", "It's not a bad hotel. It's just so...disappoin... | \n", "P | \n", "
311 | \n", "My wife and I brought our daughter downtown fo... | \n", "P | \n", "
312 | \n", "Excellent Hotel ! Rooms and service were great... | \n", "P | \n", "
313 | \n", "Had a week long stay at the Hilton on south Mi... | \n", "P | \n", "
314 | \n", "We stayed at the James hotel for a 40th birthd... | \n", "P | \n", "
315 rows × 2 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "
---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "
---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "[this, hotel, wa, gorgeous, i, really, enjoyed... | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "pos | \n", "pos_no_sw | \n", "pos_dict | \n", "pos_dict_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "[(i, NN), (traveled, VBD), (to, TO), (chicago,... | \n", "[(traveled, VBN), (chicago, JJ), (husband, NN)... | \n", "{'NN': 18, 'VBD': 6, 'TO': 1, 'VB': 3, 'IN': 6... | \n", "{'VBN': 1, 'JJ': 6, 'NN': 16, 'RB': 4, 'MD': 2... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "[(i, JJ), (stayed, VBD), (in, IN), (the, DT), ... | \n", "[(stayed, JJ), (sofitel, NN), (chicago, NN), (... | \n", "{'JJ': 19, 'VBD': 6, 'IN': 16, 'DT': 14, 'NN':... | \n", "{'JJ': 15, 'NN': 29, 'CD': 1, 'NNS': 11, 'RB':... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "[this, hotel, wa, gorgeous, i, really, enjoyed... | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "[(this, DT), (hotel, NN), (was, VBD), (gorgeou... | \n", "[(hotel, NN), (gorgeous, JJ), (really, RB), (e... | \n", "{'DT': 9, 'NN': 15, 'VBD': 6, 'JJ': 10, 'RB': ... | \n", "{'NN': 15, 'JJ': 9, 'RB': 6, 'VBN': 1, 'VBG': ... | \n", "
\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "lemmed | \n", "lemmed_no_sw | \n", "pos | \n", "pos_no_sw | \n", "pos_dict | \n", "pos_dict_no_sw | \n", "bow | \n", "bow_no_sw | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "[(i, NN), (traveled, VBD), (to, TO), (chicago,... | \n", "[(traveled, VBN), (chicago, JJ), (husband, NN)... | \n", "{'NN': 18, 'VBD': 6, 'TO': 1, 'VB': 3, 'IN': 6... | \n", "{'VBN': 1, 'JJ': 6, 'NN': 16, 'RB': 4, 'MD': 2... | \n", "{'i': 1, 'traveled': 1, 'to': 1, 'chicago': 2,... | \n", "{'traveled': 1, 'chicago': 2, 'husband': 1, 'r... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "[(i, JJ), (stayed, VBD), (in, IN), (the, DT), ... | \n", "[(stayed, JJ), (sofitel, NN), (chicago, NN), (... | \n", "{'JJ': 19, 'VBD': 6, 'IN': 16, 'DT': 14, 'NN':... | \n", "{'JJ': 15, 'NN': 29, 'CD': 1, 'NNS': 11, 'RB':... | \n", "{'i': 3, 'stayed': 1, 'in': 1, 'the': 9, 'sofi... | \n", "{'stayed': 1, 'sofitel': 1, 'chicago': 1, 'wat... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "[this, hotel, wa, gorgeous, i, really, enjoyed... | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "[(this, DT), (hotel, NN), (was, VBD), (gorgeou... | \n", "[(hotel, NN), (gorgeous, JJ), (really, RB), (e... | \n", "{'DT': 9, 'NN': 15, 'VBD': 6, 'JJ': 10, 'RB': ... | \n", "{'NN': 15, 'JJ': 9, 'RB': 6, 'VBN': 1, 'VBG': ... | \n", "{'this': 2, 'hotel': 2, 'was': 6, 'gorgeous': ... | \n", "{'hotel': 2, 'gorgeous': 1, 'really': 1, 'enjo... | \n", "
\n", " | NN | \n", "VBD | \n", "TO | \n", "VB | \n", "IN | \n", "PRP$ | \n", "DT | \n", "JJ | \n", "RB | \n", "MD | \n", "... | \n", "WDT | \n", "PDT | \n", "JJR | \n", "WP | \n", "JJS | \n", "EX | \n", "RBS | \n", "NNP | \n", "UH | \n", "FW | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "18 | \n", "6.0 | \n", "1.0 | \n", "3.0 | \n", "6 | \n", "3.0 | \n", "7.0 | \n", "5 | \n", "4.0 | \n", "2.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
N | \n", "28 | \n", "6.0 | \n", "2.0 | \n", "3.0 | \n", "16 | \n", "2.0 | \n", "14.0 | \n", "19 | \n", "4.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
N | \n", "15 | \n", "6.0 | \n", "1.0 | \n", "2.0 | \n", "3 | \n", "1.0 | \n", "9.0 | \n", "10 | \n", "8.0 | \n", "2.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 rows × 31 columns
\n", "\n", " | NN | \n", "VBD | \n", "TO | \n", "VB | \n", "IN | \n", "PRP$ | \n", "DT | \n", "JJ | \n", "RB | \n", "MD | \n", "... | \n", "WDT | \n", "PDT | \n", "JJR | \n", "WP | \n", "JJS | \n", "EX | \n", "RBS | \n", "NNP | \n", "UH | \n", "FW | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "18 | \n", "6 | \n", "1 | \n", "3 | \n", "6 | \n", "3 | \n", "7 | \n", "5 | \n", "4 | \n", "2 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "28 | \n", "6 | \n", "2 | \n", "3 | \n", "16 | \n", "2 | \n", "14 | \n", "19 | \n", "4 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "15 | \n", "6 | \n", "1 | \n", "2 | \n", "3 | \n", "1 | \n", "9 | \n", "10 | \n", "8 | \n", "2 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
3 rows × 31 columns
\n", "\n", " | NN | \n", "VBD | \n", "TO | \n", "VB | \n", "IN | \n", "PRP$ | \n", "DT | \n", "JJ | \n", "RB | \n", "MD | \n", "... | \n", "PDT | \n", "JJR | \n", "WP | \n", "JJS | \n", "EX | \n", "RBS | \n", "NNP | \n", "UH | \n", "FW | \n", "total | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "18 | \n", "6 | \n", "1 | \n", "3 | \n", "6 | \n", "3 | \n", "7 | \n", "5 | \n", "4 | \n", "2 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "68 | \n", "
N | \n", "28 | \n", "6 | \n", "2 | \n", "3 | \n", "16 | \n", "2 | \n", "14 | \n", "19 | \n", "4 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "129 | \n", "
N | \n", "15 | \n", "6 | \n", "1 | \n", "2 | \n", "3 | \n", "1 | \n", "9 | \n", "10 | \n", "8 | \n", "2 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "69 | \n", "
3 rows × 32 columns
\n", "\n", " | NN | \n", "VBD | \n", "TO | \n", "VB | \n", "IN | \n", "PRP$ | \n", "DT | \n", "JJ | \n", "RB | \n", "MD | \n", "... | \n", "WDT | \n", "PDT | \n", "JJR | \n", "WP | \n", "JJS | \n", "EX | \n", "RBS | \n", "NNP | \n", "UH | \n", "FW | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.264706 | \n", "0.088235 | \n", "0.014706 | \n", "0.044118 | \n", "0.088235 | \n", "0.044118 | \n", "0.102941 | \n", "0.073529 | \n", "0.058824 | \n", "0.029412 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
N | \n", "0.217054 | \n", "0.046512 | \n", "0.015504 | \n", "0.023256 | \n", "0.124031 | \n", "0.015504 | \n", "0.108527 | \n", "0.147287 | \n", "0.031008 | \n", "0.007752 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
N | \n", "0.217391 | \n", "0.086957 | \n", "0.014493 | \n", "0.028986 | \n", "0.043478 | \n", "0.014493 | \n", "0.130435 | \n", "0.144928 | \n", "0.115942 | \n", "0.028986 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 rows × 31 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "... | \n", "bow_no_sw | \n", "pos_sent | \n", "pos_sent_str | \n", "pos_no_sw_sent | \n", "pos_sent_bi | \n", "bow_pos | \n", "trigrams | \n", "trigrams_pos | \n", "trigrams_feats | \n", "trigrams_feats_bow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "... | \n", "{'traveled': 1, 'chicago': 2, 'husband': 1, 'r... | \n", "[NN, VBD, TO, VB, IN, PRP$, NN, IN, DT, JJ, NN... | \n", "[NN VBD TO VB IN PRP$ NN IN DT JJ NN RB PRP$ N... | \n", "[VBN, JJ, NN, JJ, NN, RB, JJ, JJ, NN, NN, NN, ... | \n", "[(NN, VBD), (VBD, TO), (TO, VB), (VB, IN), (IN... | \n", "{('NN', 'VBD'): 4, ('VBD', 'TO'): 1, ('TO', 'V... | \n", "[(i, traveled, to), (traveled, to, chicago), (... | \n", "[(NN, VBD, TO), (VBD, TO, VB), (TO, VB, IN), (... | \n", "[NN_VBD_TO, VBD_TO_VB, TO_VB_IN, VB_IN_PRP, IN... | \n", "{'NN_VBD_TO': 1, 'VBD_TO_VB': 1, 'TO_VB_IN': 1... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "... | \n", "{'stayed': 1, 'sofitel': 1, 'chicago': 1, 'wat... | \n", "[JJ, VBD, IN, DT, NN, NN, NN, NN, NN, IN, PRP$... | \n", "[JJ VBD IN DT NN NN NN NN NN IN PRP$ NN CC CD ... | \n", "[JJ, NN, NN, NN, NN, NN, NN, CD, NNS, JJ, NN, ... | \n", "[(JJ, VBD), (VBD, IN), (IN, DT), (DT, NN), (NN... | \n", "{('JJ', 'VBD'): 1, ('VBD', 'IN'): 1, ('IN', 'D... | \n", "[(i, stayed, in), (stayed, in, the), (in, the,... | \n", "[(JJ, VBD, IN), (VBD, IN, DT), (IN, DT, NN), (... | \n", "[JJ_VBD_IN, VBD_IN_DT, IN_DT_NN, DT_NN_NN, NN_... | \n", "{'JJ_VBD_IN': 1, 'VBD_IN_DT': 1, 'IN_DT_NN': 2... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "... | \n", "{'hotel': 2, 'gorgeous': 1, 'really': 1, 'enjo... | \n", "[DT, NN, VBD, JJ, JJ, RB, VBN, PRP$, NN, RB, C... | \n", "[DT NN VBD JJ JJ RB VBN PRP$ NN RB CC MD RB VB... | \n", "[NN, JJ, RB, VBN, NN, RB, VBG, JJ, NN, JJ, NN,... | \n", "[(DT, NN), (NN, VBD), (VBD, JJ), (JJ, JJ), (JJ... | \n", "{('DT', 'NN'): 8, ('NN', 'VBD'): 6, ('VBD', 'J... | \n", "[(this, hotel, was), (hotel, was, gorgeous), (... | \n", "[(DT, NN, VBD), (NN, VBD, JJ), (VBD, JJ, JJ), ... | \n", "[DT_NN_VBD, NN_VBD_JJ, VBD_JJ_JJ, JJ_JJ_RB, JJ... | \n", "{'DT_NN_VBD': 4, 'NN_VBD_JJ': 4, 'VBD_JJ_JJ': ... | \n", "
3 | \n", "This is an absolutely exquisite hotel, at a gr... | \n", "N | \n", "[This is an absolutely exquisite hotel, at a g... | \n", "6 | \n", "[this, is, an, absolutely, exquisite, hotel, a... | \n", "110 | \n", "[absolutely, exquisite, hotel, great, location... | \n", "52 | \n", "[thi, is, an, absolut, exquisit, hotel, at, a,... | \n", "[absolut, exquisit, hotel, great, locat, boast... | \n", "... | \n", "{'absolutely': 1, 'exquisite': 1, 'hotel': 3, ... | \n", "[DT, VBZ, DT, RB, JJ, NN, IN, DT, JJ, NN, CC, ... | \n", "[DT VBZ DT RB JJ NN IN DT JJ NN CC NN NN NNS N... | \n", "[RB, JJ, NN, JJ, NN, VBG, NN, NNS, JJ, NN, RB,... | \n", "[(DT, VBZ), (VBZ, DT), (DT, RB), (RB, JJ), (JJ... | \n", "{('DT', 'VBZ'): 1, ('VBZ', 'DT'): 2, ('DT', 'R... | \n", "[(this, is, an), (is, an, absolutely), (an, ab... | \n", "[(DT, VBZ, DT), (VBZ, DT, RB), (DT, RB, JJ), (... | \n", "[DT_VBZ_DT, VBZ_DT_RB, DT_RB_JJ, RB_JJ_NN, JJ_... | \n", "{'DT_VBZ_DT': 1, 'VBZ_DT_RB': 1, 'DT_RB_JJ': 1... | \n", "
4 rows × 27 columns
\n", "\n", " | (NN, VBD) | \n", "(VBD, TO) | \n", "(TO, VB) | \n", "(VB, IN) | \n", "(IN, PRP$) | \n", "(PRP$, NN) | \n", "(NN, IN) | \n", "(IN, DT) | \n", "(DT, JJ) | \n", "(JJ, NN) | \n", "... | \n", "(RBS, PRP) | \n", "(PRP, JJR) | \n", "(JJ, RBS) | \n", "(RBS, RB) | \n", "(PRP$, TO) | \n", "(WRB, VBD) | \n", "(CC, JJS) | \n", "(MD, DT) | \n", "(VBN, MD) | \n", "(PRP, RBR) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "4 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "3 | \n", "3 | \n", "2 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "2 | \n", "6 | \n", "4 | \n", "4 | \n", "7 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "6 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "2 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "3 | \n", "1 | \n", "2 | \n", "0 | \n", "2 | \n", "2 | \n", "5 | \n", "4 | \n", "3 | \n", "6 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "12 | \n", "0 | \n", "8 | \n", "4 | \n", "3 | \n", "5 | \n", "6 | \n", "10 | \n", "4 | \n", "8 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 553 columns
\n", "\n", " | (NN, VBD) | \n", "(VBD, TO) | \n", "(TO, VB) | \n", "(VB, IN) | \n", "(IN, PRP$) | \n", "(PRP$, NN) | \n", "(NN, IN) | \n", "(IN, DT) | \n", "(DT, JJ) | \n", "(JJ, NN) | \n", "... | \n", "(RBS, PRP) | \n", "(PRP, JJR) | \n", "(JJ, RBS) | \n", "(RBS, RB) | \n", "(PRP$, TO) | \n", "(WRB, VBD) | \n", "(CC, JJS) | \n", "(MD, DT) | \n", "(VBN, MD) | \n", "(PRP, RBR) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.059701 | \n", "0.014925 | \n", "0.014925 | \n", "0.014925 | \n", "0.014925 | \n", "0.044776 | \n", "0.044776 | \n", "0.029851 | \n", "0.014925 | \n", "0.014925 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
N | \n", "0.023438 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.015625 | \n", "0.015625 | \n", "0.046875 | \n", "0.031250 | \n", "0.031250 | \n", "0.054688 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
N | \n", "0.088235 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.014706 | \n", "0.000000 | \n", "0.029412 | \n", "0.014706 | \n", "0.014706 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
N | \n", "0.027523 | \n", "0.009174 | \n", "0.018349 | \n", "0.000000 | \n", "0.018349 | \n", "0.018349 | \n", "0.045872 | \n", "0.036697 | \n", "0.027523 | \n", "0.055046 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
N | \n", "0.046875 | \n", "0.000000 | \n", "0.031250 | \n", "0.015625 | \n", "0.011719 | \n", "0.019531 | \n", "0.023438 | \n", "0.039062 | \n", "0.015625 | \n", "0.031250 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
P | \n", "0.065574 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.032787 | \n", "0.065574 | \n", "0.016393 | \n", "0.000000 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
P | \n", "0.013793 | \n", "0.000000 | \n", "0.006897 | \n", "0.000000 | \n", "0.013793 | \n", "0.006897 | \n", "0.075862 | \n", "0.062069 | \n", "0.041379 | \n", "0.048276 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.007634 | \n", "0.000000 | \n", "0.007634 | \n", "0.007634 | \n", "0.061069 | \n", "0.061069 | \n", "0.022901 | \n", "0.053435 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.007634 | \n", "
P | \n", "0.032258 | \n", "0.000000 | \n", "0.012903 | \n", "0.000000 | \n", "0.006452 | \n", "0.006452 | \n", "0.064516 | \n", "0.051613 | \n", "0.032258 | \n", "0.025806 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
P | \n", "0.029412 | \n", "0.000000 | \n", "0.029412 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.029412 | \n", "0.000000 | \n", "0.029412 | \n", "0.058824 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.000000 | \n", "
320 rows × 553 columns
\n", "\n", " | word | \n", "count | \n", "
---|---|---|
1 | \n", "(DT, NN) | \n", "1719 | \n", "
14 | \n", "(IN, DT) | \n", "1051 | \n", "
3 | \n", "(NN, IN) | \n", "916 | \n", "
2 | \n", "(NN, NN) | \n", "897 | \n", "
8 | \n", "(JJ, NN) | \n", "859 | \n", "
6 | \n", "(NN, VBD) | \n", "676 | \n", "
7 | \n", "(DT, JJ) | \n", "540 | \n", "
30 | \n", "(PRP, VBD) | \n", "458 | \n", "
21 | \n", "(NN, CC) | \n", "451 | \n", "
9 | \n", "(IN, NN) | \n", "398 | \n", "
25 | \n", "(TO, VB) | \n", "374 | \n", "
0 | \n", "(VBD, DT) | \n", "332 | \n", "
32 | \n", "(RB, JJ) | \n", "308 | \n", "
147 | \n", "(VBD, RB) | \n", "281 | \n", "
5 | \n", "(PRP$, NN) | \n", "251 | \n", "
17 | \n", "(VBD, JJ) | \n", "250 | \n", "
13 | \n", "(NNS, IN) | \n", "233 | \n", "
158 | \n", "(NN, RB) | \n", "229 | \n", "
36 | \n", "(IN, PRP) | \n", "229 | \n", "
38 | \n", "(NN, DT) | \n", "214 | \n", "
\n", " | word | \n", "count | \n", "
---|---|---|
12 | \n", "(DT, NN) | \n", "1519 | \n", "
7 | \n", "(IN, DT) | \n", "1045 | \n", "
6 | \n", "(NN, IN) | \n", "877 | \n", "
13 | \n", "(NN, NN) | \n", "836 | \n", "
9 | \n", "(JJ, NN) | \n", "782 | \n", "
0 | \n", "(NN, VBD) | \n", "743 | \n", "
8 | \n", "(DT, JJ) | \n", "529 | \n", "
27 | \n", "(PRP, VBD) | \n", "438 | \n", "
2 | \n", "(TO, VB) | \n", "435 | \n", "
49 | \n", "(NN, CC) | \n", "423 | \n", "
61 | \n", "(IN, NN) | \n", "393 | \n", "
5 | \n", "(PRP$, NN) | \n", "377 | \n", "
28 | \n", "(VBD, RB) | \n", "337 | \n", "
57 | \n", "(VBD, DT) | \n", "292 | \n", "
90 | \n", "(RB, JJ) | \n", "284 | \n", "
10 | \n", "(NN, RB) | \n", "241 | \n", "
41 | \n", "(DT, NNS) | \n", "240 | \n", "
4 | \n", "(IN, PRP$) | \n", "226 | \n", "
122 | \n", "(IN, PRP) | \n", "213 | \n", "
43 | \n", "(VBD, IN) | \n", "209 | \n", "
\n", " | NN_VBD_TO | \n", "VBD_TO_VB | \n", "TO_VB_IN | \n", "VB_IN_PRP | \n", "IN_PRP_$ | \n", "PRP_$_NN | \n", "$_NN_IN | \n", "NN_IN_DT | \n", "IN_DT_JJ | \n", "DT_JJ_NN | \n", "... | \n", "VBP_VBN_PDT | \n", "WP_VBD_NNS | \n", "VBZ_VBD_NN | \n", "DT_WRB_PRP | \n", "VBD_PRP_RBR | \n", "PRP_RBR_IN | \n", "JJ_CC_CD | \n", "VBN_VBN_RP | \n", "NN_PRP_VBN | \n", "PRP_VBN_RB | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "1 | \n", "3 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "2 | \n", "0 | \n", "2 | \n", "1 | \n", "2 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "2 | \n", "3 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
N | \n", "0 | \n", "0 | \n", "3 | \n", "0 | \n", "3 | \n", "5 | \n", "0 | \n", "2 | \n", "2 | \n", "4 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
5 rows × 3696 columns
\n", "" ], "text/plain": [ " NN_VBD_TO VBD_TO_VB TO_VB_IN VB_IN_PRP IN_PRP_$ PRP_$_NN $_NN_IN \\\n", "PoN \n", "N 1 1 1 1 1 3 2 \n", "N 0 0 0 0 2 2 0 \n", "N 0 0 0 0 0 1 0 \n", "N 0 0 0 0 2 2 1 \n", "N 0 0 3 0 3 5 0 \n", "\n", " NN_IN_DT IN_DT_JJ DT_JJ_NN ... VBP_VBN_PDT WP_VBD_NNS VBZ_VBD_NN \\\n", "PoN ... \n", "N 2 1 1 ... 0 0 0 \n", "N 2 1 2 ... 0 0 0 \n", "N 0 1 1 ... 0 0 0 \n", "N 1 2 3 ... 0 0 0 \n", "N 2 2 4 ... 0 0 0 \n", "\n", " DT_WRB_PRP VBD_PRP_RBR PRP_RBR_IN JJ_CC_CD VBN_VBN_RP NN_PRP_VBN \\\n", "PoN \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "N 0 0 0 0 0 0 \n", "\n", " PRP_VBN_RB \n", "PoN \n", "N 0 \n", "N 0 \n", "N 0 \n", "N 0 \n", "N 0 \n", "\n", "[5 rows x 3696 columns]" ] }, "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df = pd.DataFrame(all_df['trigrams_feats_bow'].tolist(), all_df['PoN'])\n", "new_df = new_df.fillna(0).astype(int)\n", "new_df[:5]\n" ] }, { "cell_type": "code", "execution_count": 222, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.5520833333333334\n", "Accuracy: 0.6145833333333334\n", "Accuracy: 0.5520833333333334\n", "Accuracy: 0.5729166666666666\n", "Accuracy: 0.5104166666666666\n" ] } ], "source": [ "get_NB(new_df, new_df.index)" ] }, { "cell_type": "code", "execution_count": 223, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.53125\n", "Accuracy: 0.6041666666666666\n", "Accuracy: 0.4791666666666667\n", "Accuracy: 0.5833333333333334\n", "Accuracy: 0.5208333333333334\n" ] }, { "data": { "text/html": [ "\n", " | NN_VBD_TO | \n", "VBD_TO_VB | \n", "TO_VB_IN | \n", "VB_IN_PRP | \n", "IN_PRP_$ | \n", "PRP_$_NN | \n", "$_NN_IN | \n", "NN_IN_DT | \n", "IN_DT_JJ | \n", "DT_JJ_NN | \n", "... | \n", "VBP_VBN_PDT | \n", "WP_VBD_NNS | \n", "VBZ_VBD_NN | \n", "DT_WRB_PRP | \n", "VBD_PRP_RBR | \n", "PRP_RBR_IN | \n", "JJ_CC_CD | \n", "VBN_VBN_RP | \n", "NN_PRP_VBN | \n", "PRP_VBN_RB | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PoN | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
N | \n", "0.014493 | \n", "0.014493 | \n", "0.014493 | \n", "0.014493 | \n", "0.014493 | \n", "0.043478 | \n", "0.028986 | \n", "0.028986 | \n", "0.014493 | \n", "0.014493 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.015504 | \n", "0.015504 | \n", "0.000000 | \n", "0.015504 | \n", "0.007752 | \n", "0.015504 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.014706 | \n", "0.000000 | \n", "0.000000 | \n", "0.014706 | \n", "0.014706 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.017857 | \n", "0.017857 | \n", "0.008929 | \n", "0.008929 | \n", "0.017857 | \n", "0.026786 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
N | \n", "0.000000 | \n", "0.000000 | \n", "0.011494 | \n", "0.000000 | \n", "0.011494 | \n", "0.019157 | \n", "0.000000 | \n", "0.007663 | \n", "0.007663 | \n", "0.015326 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.016667 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.013514 | \n", "0.006757 | \n", "0.000000 | \n", "0.027027 | \n", "0.027027 | \n", "0.027027 | \n", "... | \n", "0.006757 | \n", "0.006757 | \n", "0.006757 | \n", "0.006757 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.007634 | \n", "0.007634 | \n", "0.000000 | \n", "0.030534 | \n", "0.007634 | \n", "0.015267 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.007634 | \n", "0.007634 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.006452 | \n", "0.006452 | \n", "0.000000 | \n", "0.025806 | \n", "0.019355 | \n", "0.019355 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.006452 | \n", "0.006452 | \n", "0.006452 | \n", "0.006452 | \n", "
P | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.030303 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
320 rows × 3696 columns
\n", "\n", " | 0 | \n", "PoN | \n", "sentences | \n", "num_sentences | \n", "tokens | \n", "num_tokens | \n", "no_sw | \n", "num_no_sw | \n", "stemmed | \n", "stemmed_no_sw | \n", "... | \n", "bow_no_sw | \n", "pos_sent | \n", "pos_sent_str | \n", "pos_no_sw_sent | \n", "pos_sent_bi | \n", "bow_pos | \n", "trigrams | \n", "trigrams_pos | \n", "trigrams_feats | \n", "trigrams_feats_bow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "I traveled to Chicago with my husband for a ro... | \n", "N | \n", "[I traveled to Chicago with my husband for a r... | \n", "6 | \n", "[i, traveled, to, chicago, with, my, husband, ... | \n", "68 | \n", "[traveled, chicago, husband, romantic, weekend... | \n", "40 | \n", "[i, travel, to, chicago, with, my, husband, fo... | \n", "[travel, chicago, husband, romant, weekend, aw... | \n", "... | \n", "{'traveled': 1, 'chicago': 2, 'husband': 1, 'r... | \n", "[NN, VBD, TO, VB, IN, PRP$, NN, IN, DT, JJ, NN... | \n", "[NN VBD TO VB IN PRP$ NN IN DT JJ NN RB PRP$ N... | \n", "[VBN, JJ, NN, JJ, NN, RB, JJ, JJ, NN, NN, NN, ... | \n", "[(NN, VBD), (VBD, TO), (TO, VB), (VB, IN), (IN... | \n", "{('NN', 'VBD'): 4, ('VBD', 'TO'): 1, ('TO', 'V... | \n", "[(i, traveled, to), (traveled, to, chicago), (... | \n", "[(NN, VBD, TO), (VBD, TO, VB), (TO, VB, IN), (... | \n", "[NN_VBD_TO, VBD_TO_VB, TO_VB_IN, VB_IN_PRP, IN... | \n", "{'NN_VBD_TO': 1, 'VBD_TO_VB': 1, 'TO_VB_IN': 1... | \n", "
1 | \n", "I stayed in the Sofitel Chicago Water Tower ho... | \n", "N | \n", "[I stayed in the Sofitel Chicago Water Tower h... | \n", "6 | \n", "[i, stayed, in, the, sofitel, chicago, water, ... | \n", "129 | \n", "[stayed, sofitel, chicago, water, tower, hotel... | \n", "71 | \n", "[i, stay, in, the, sofitel, chicago, water, to... | \n", "[stay, sofitel, chicago, water, tower, hotel, ... | \n", "... | \n", "{'stayed': 1, 'sofitel': 1, 'chicago': 1, 'wat... | \n", "[JJ, VBD, IN, DT, NN, NN, NN, NN, NN, IN, PRP$... | \n", "[JJ VBD IN DT NN NN NN NN NN IN PRP$ NN CC CD ... | \n", "[JJ, NN, NN, NN, NN, NN, NN, CD, NNS, JJ, NN, ... | \n", "[(JJ, VBD), (VBD, IN), (IN, DT), (DT, NN), (NN... | \n", "{('JJ', 'VBD'): 1, ('VBD', 'IN'): 1, ('IN', 'D... | \n", "[(i, stayed, in), (stayed, in, the), (in, the,... | \n", "[(JJ, VBD, IN), (VBD, IN, DT), (IN, DT, NN), (... | \n", "[JJ_VBD_IN, VBD_IN_DT, IN_DT_NN, DT_NN_NN, NN_... | \n", "{'JJ_VBD_IN': 1, 'VBD_IN_DT': 1, 'IN_DT_NN': 2... | \n", "
2 | \n", "This hotel was gorgeous! I really enjoyed my s... | \n", "N | \n", "[This hotel was gorgeous!, I really enjoyed my... | \n", "7 | \n", "[this, hotel, was, gorgeous, i, really, enjoye... | \n", "69 | \n", "[hotel, gorgeous, really, enjoyed, stay, defin... | \n", "36 | \n", "[thi, hotel, wa, gorgeou, i, realli, enjoy, my... | \n", "[hotel, gorgeou, realli, enjoy, stay, definit,... | \n", "... | \n", "{'hotel': 2, 'gorgeous': 1, 'really': 1, 'enjo... | \n", "[DT, NN, VBD, JJ, JJ, RB, VBN, PRP$, NN, RB, C... | \n", "[DT NN VBD JJ JJ RB VBN PRP$ NN RB CC MD RB VB... | \n", "[NN, JJ, RB, VBN, NN, RB, VBG, JJ, NN, JJ, NN,... | \n", "[(DT, NN), (NN, VBD), (VBD, JJ), (JJ, JJ), (JJ... | \n", "{('DT', 'NN'): 8, ('NN', 'VBD'): 6, ('VBD', 'J... | \n", "[(this, hotel, was), (hotel, was, gorgeous), (... | \n", "[(DT, NN, VBD), (NN, VBD, JJ), (VBD, JJ, JJ), ... | \n", "[DT_NN_VBD, NN_VBD_JJ, VBD_JJ_JJ, JJ_JJ_RB, JJ... | \n", "{'DT_NN_VBD': 4, 'NN_VBD_JJ': 4, 'VBD_JJ_JJ': ... | \n", "
3 | \n", "This is an absolutely exquisite hotel, at a gr... | \n", "N | \n", "[This is an absolutely exquisite hotel, at a g... | \n", "6 | \n", "[this, is, an, absolutely, exquisite, hotel, a... | \n", "110 | \n", "[absolutely, exquisite, hotel, great, location... | \n", "52 | \n", "[thi, is, an, absolut, exquisit, hotel, at, a,... | \n", "[absolut, exquisit, hotel, great, locat, boast... | \n", "... | \n", "{'absolutely': 1, 'exquisite': 1, 'hotel': 3, ... | \n", "[DT, VBZ, DT, RB, JJ, NN, IN, DT, JJ, NN, CC, ... | \n", "[DT VBZ DT RB JJ NN IN DT JJ NN CC NN NN NNS N... | \n", "[RB, JJ, NN, JJ, NN, VBG, NN, NNS, JJ, NN, RB,... | \n", "[(DT, VBZ), (VBZ, DT), (DT, RB), (RB, JJ), (JJ... | \n", "{('DT', 'VBZ'): 1, ('VBZ', 'DT'): 2, ('DT', 'R... | \n", "[(this, is, an), (is, an, absolutely), (an, ab... | \n", "[(DT, VBZ, DT), (VBZ, DT, RB), (DT, RB, JJ), (... | \n", "[DT_VBZ_DT, VBZ_DT_RB, DT_RB_JJ, RB_JJ_NN, JJ_... | \n", "{'DT_VBZ_DT': 1, 'VBZ_DT_RB': 1, 'DT_RB_JJ': 1... | \n", "
4 | \n", "I recently traveled up to Chicago for business... | \n", "N | \n", "[I recently traveled up to Chicago for busines... | \n", "13 | \n", "[i, recently, traveled, up, to, chicago, for, ... | \n", "257 | \n", "[recently, traveled, chicago, business, terrif... | \n", "116 | \n", "[i, recent, travel, up, to, chicago, for, busi... | \n", "[recent, travel, chicago, busi, terrif, day, n... | \n", "... | \n", "{'recently': 1, 'traveled': 1, 'chicago': 4, '... | \n", "[NN, RB, VBD, RP, TO, VB, IN, NN, CC, VBD, DT,... | \n", "[NN RB VBD RP TO VB IN NN CC VBD DT JJ NN NN N... | \n", "[RB, VBN, NN, NN, NN, NN, NN, VB, RB, JJ, NN, ... | \n", "[(NN, RB), (RB, VBD), (VBD, RP), (RP, TO), (TO... | \n", "{('NN', 'RB'): 3, ('RB', 'VBD'): 3, ('VBD', 'R... | \n", "[(i, recently, traveled), (recently, traveled,... | \n", "[(NN, RB, VBD), (RB, VBD, RP), (VBD, RP, TO), ... | \n", "[NN_RB_VBD, RB_VBD_RP, VBD_RP_TO, RP_TO_VB, TO... | \n", "{'NN_RB_VBD': 1, 'RB_VBD_RP': 2, 'VBD_RP_TO': ... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
315 | \n", "This hotel was not worth it. From the moment w... | \n", "P | \n", "[This hotel was not worth it., From the moment... | \n", "6 | \n", "[this, hotel, was, not, worth, it, from, the, ... | \n", "62 | \n", "[hotel, worth, moment, walked, hotel, lobby, c... | \n", "27 | \n", "[thi, hotel, wa, not, worth, it, from, the, mo... | \n", "[hotel, worth, moment, walk, hotel, lobbi, che... | \n", "... | \n", "{'hotel': 2, 'worth': 1, 'moment': 1, 'walked'... | \n", "[DT, NN, VBD, RB, JJ, PRP, IN, DT, NN, PRP, VB... | \n", "[DT NN VBD RB JJ PRP IN DT NN PRP VBD IN DT NN... | \n", "[NN, JJ, NN, VBD, NN, NN, NN, NN, VBP, JJ, NNS... | \n", "[(DT, NN), (NN, VBD), (VBD, RB), (RB, JJ), (JJ... | \n", "{('DT', 'NN'): 7, ('NN', 'VBD'): 4, ('VBD', 'R... | \n", "[(this, hotel, was), (hotel, was, not), (was, ... | \n", "[(DT, NN, VBD), (NN, VBD, RB), (VBD, RB, JJ), ... | \n", "[DT_NN_VBD, NN_VBD_RB, VBD_RB_JJ, RB_JJ_PRP, J... | \n", "{'DT_NN_VBD': 3, 'NN_VBD_RB': 1, 'VBD_RB_JJ': ... | \n", "
316 | \n", "I stayed at the hotel during the Dave Matthews... | \n", "P | \n", "[I stayed at the hotel during the Dave Matthew... | \n", "9 | \n", "[i, stayed, at, the, hotel, during, the, dave,... | \n", "146 | \n", "[stayed, hotel, dave, matthews, caravan, tour,... | \n", "76 | \n", "[i, stay, at, the, hotel, dure, the, dave, mat... | \n", "[stay, hotel, dave, matthew, caravan, tour, wo... | \n", "... | \n", "{'stayed': 1, 'hotel': 3, 'dave': 1, 'matthews... | \n", "[JJ, VBD, IN, DT, NN, IN, DT, NN, NNS, VBP, JJ... | \n", "[JJ VBD IN DT NN IN DT NN NNS VBP JJ CC MD VB ... | \n", "[JJ, NN, VBP, NNS, VB, NNS, MD, VB, NN, NN, RB... | \n", "[(JJ, VBD), (VBD, IN), (IN, DT), (DT, NN), (NN... | \n", "{('JJ', 'VBD'): 1, ('VBD', 'IN'): 1, ('IN', 'D... | \n", "[(i, stayed, at), (stayed, at, the), (at, the,... | \n", "[(JJ, VBD, IN), (VBD, IN, DT), (IN, DT, NN), (... | \n", "[JJ_VBD_IN, VBD_IN_DT, IN_DT_NN, DT_NN_IN, NN_... | \n", "{'JJ_VBD_IN': 1, 'VBD_IN_DT': 1, 'IN_DT_NN': 3... | \n", "
317 | \n", "We had a reservation for 3 rooms with 5 adults... | \n", "P | \n", "[We had a reservation for 3 rooms with 5 adult... | \n", "9 | \n", "[we, had, a, reservation, for, rooms, with, ad... | \n", "132 | \n", "[reservation, rooms, adults, kids, got, rooms,... | \n", "58 | \n", "[we, had, a, reserv, for, room, with, adult, a... | \n", "[reserv, room, adult, kid, got, room, arriv, c... | \n", "... | \n", "{'reservation': 1, 'rooms': 3, 'adults': 1, 'k... | \n", "[PRP, VBD, DT, NN, IN, NNS, IN, NNS, CC, NNS, ... | \n", "[PRP VBD DT NN IN NNS IN NNS CC NNS VBD RB NNS... | \n", "[NN, NNS, NNS, NNS, VBD, NNS, JJ, NN, NNS, JJ,... | \n", "[(PRP, VBD), (VBD, DT), (DT, NN), (NN, IN), (I... | \n", "{('PRP', 'VBD'): 8, ('VBD', 'DT'): 1, ('DT', '... | \n", "[(we, had, a), (had, a, reservation), (a, rese... | \n", "[(PRP, VBD, DT), (VBD, DT, NN), (DT, NN, IN), ... | \n", "[PRP_VBD_DT, VBD_DT_NN, DT_NN_IN, NN_IN_NNS, I... | \n", "{'PRP_VBD_DT': 1, 'VBD_DT_NN': 1, 'DT_NN_IN': ... | \n", "
318 | \n", "I am staying here now and actually am compelle... | \n", "P | \n", "[I am staying here now and actually am compell... | \n", "6 | \n", "[i, am, staying, here, now, and, actually, am,... | \n", "156 | \n", "[staying, actually, compelled, write, review, ... | \n", "72 | \n", "[i, am, stay, here, now, and, actual, am, comp... | \n", "[stay, actual, compel, write, review, fall, as... | \n", "... | \n", "{'staying': 1, 'actually': 1, 'compelled': 1, ... | \n", "[NN, VBP, VBG, RB, RB, CC, RB, VBP, VBN, TO, V... | \n", "[NN VBP VBG RB RB CC RB VBP VBN TO VB DT NN IN... | \n", "[VBG, RB, VBN, JJ, NN, NN, JJ, NN, JJ, NN, NN,... | \n", "[(NN, VBP), (VBP, VBG), (VBG, RB), (RB, RB), (... | \n", "{('NN', 'VBP'): 1, ('VBP', 'VBG'): 1, ('VBG', ... | \n", "[(i, am, staying), (am, staying, here), (stayi... | \n", "[(NN, VBP, VBG), (VBP, VBG, RB), (VBG, RB, RB)... | \n", "[NN_VBP_VBG, VBP_VBG_RB, VBG_RB_RB, RB_RB_CC, ... | \n", "{'NN_VBP_VBG': 1, 'VBP_VBG_RB': 1, 'VBG_RB_RB'... | \n", "
319 | \n", "We enjoyed the Hotel Monaco. Great location fo... | \n", "P | \n", "[We enjoyed the Hotel Monaco., Great location ... | \n", "4 | \n", "[we, enjoyed, the, hotel, monaco, great, locat... | \n", "35 | \n", "[enjoyed, hotel, monaco, great, location, walk... | \n", "19 | \n", "[we, enjoy, the, hotel, monaco, great, locat, ... | \n", "[enjoy, hotel, monaco, great, locat, walk, bea... | \n", "... | \n", "{'enjoyed': 2, 'hotel': 1, 'monaco': 1, 'great... | \n", "[PRP, VBD, DT, NN, VBZ, JJ, NN, IN, NN, CC, NN... | \n", "[PRP VBD DT NN VBZ JJ NN IN NN CC NN NNS DT NN... | \n", "[JJ, NN, NN, JJ, NN, VBG, JJ, NNS, NN, RB, RB,... | \n", "[(PRP, VBD), (VBD, DT), (DT, NN), (NN, VBZ), (... | \n", "{('PRP', 'VBD'): 1, ('VBD', 'DT'): 2, ('DT', '... | \n", "[(we, enjoyed, the), (enjoyed, the, hotel), (t... | \n", "[(PRP, VBD, DT), (VBD, DT, NN), (DT, NN, VBZ),... | \n", "[PRP_VBD_DT, VBD_DT_NN, DT_NN_VBZ, NN_VBZ_JJ, ... | \n", "{'PRP_VBD_DT': 1, 'VBD_DT_NN': 1, 'DT_NN_VBZ':... | \n", "
320 rows × 27 columns
\n", "