HW4 [Deception]¶

STEP 1: GET THAT DATA¶

import os
def get_data(file, path):
    f=open(path+file)
    data = f.read()
    f.close()
    return data
    
def get_data_from_files(path):
    results = [get_data(file, path) for file in os.listdir(path)]
    return results

# pos = get_data_from_files('../pos_cornell//')
# neg = get_data_from_files('../neg_cornell/')

# pos = get_data_from_files('../hw4_lie_false/')
# neg = get_data_from_files('../hw4_lie_true/')

pos = get_data_from_files('../hw4_lie_false/')
neg = get_data_from_files('../hw4_lie_true/')

import pandas as pd
neg_df = pd.DataFrame(neg)
pos_df = pd.DataFrame(pos)
pos_df['PoN'] = 'P'
neg_df['PoN'] = 'N'
all_df = neg_df.append(pos_df)
all_df.reset_index(drop=True,inplace=True)
all_df[:5]

STEP 2: TOKENIZE¶

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

-- 2a by sentence¶

def get_sentence_tokens(review):
    return sent_tokenize(review)
    
all_df['sentences'] = all_df.apply(lambda x: get_sentence_tokens(x[0]), axis=1)
all_df['num_sentences'] = all_df.apply(lambda x: len(x['sentences']), axis=1)

-- 2b by word¶

def get_tokens(sentence):
    tokens = word_tokenize(sentence)
    clean_tokens = [word.lower() for word in tokens if word.isalpha()]
    return clean_tokens

all_df['tokens'] = all_df.apply(lambda x: get_tokens(x[0]), axis=1)
all_df['num_tokens'] = all_df.apply(lambda x: len(x['tokens']), axis=1)

all_df[:3]

-- 2c Remove if tokens < 1¶

all_df = all_df.drop(all_df[all_df.num_tokens < 1].index)
all_df[:3]

STEP 3: EXPERIMENT¶

Experiment with: stopwords, stemming, lemming etc.¶

-- 3a remove english stopwords¶

from nltk.corpus import stopwords
stop_words = set(stopwords.words("english"))
def remove_stopwords(sentence):
    filtered_text = []
    for word in sentence:
        if word not in stop_words:
            filtered_text.append(word)
    return filtered_text
all_df['no_sw'] = all_df.apply(lambda x: remove_stopwords(x['tokens']),axis=1)
all_df['num_no_sw'] = all_df.apply(lambda x: len(x['no_sw']),axis=1)

all_df[:3]

-- 3b get stems for both tokens and no_sw¶

from nltk.stem import PorterStemmer
def get_stems(sentence):
    ps = PorterStemmer()
    return [ps.stem(w) for w in sentence]
    
all_df['stemmed'] = all_df.apply(lambda x: get_stems(x['tokens']),axis=1)
all_df['stemmed_no_sw'] = all_df.apply(lambda x: get_stems(x['no_sw']),axis=1)

all_df[:3]

-- 3c get lemmas for both tokens and no_sw¶

from nltk.stem.wordnet import WordNetLemmatizer
def get_lemmas(sentence):
    lem = WordNetLemmatizer() 
    return [lem.lemmatize(w) for w in sentence]
    
all_df['lemmed'] = all_df.apply(lambda x: get_lemmas(x['tokens']),axis=1)
all_df['lemmed_no_sw'] = all_df.apply(lambda x: get_lemmas(x['no_sw']),axis=1)

all_df[:3]

all_df['pos'] = all_df.apply(lambda x: nltk.pos_tag(x['tokens']),axis=1)
all_df['pos_no_sw'] = all_df.apply(lambda x: nltk.pos_tag(x['no_sw']),axis=1)

def get_pos_dict(pos_tuple):
    pos_dict = {}
    for t in pos_tuple:
        if t[1] in pos_dict.keys():
            pos_dict[t[1]] += 1
        else:
            pos_dict.update({t[1]: 1})
    return pos_dict

all_df['pos_dict'] = all_df.apply(lambda x: get_pos_dict(x['pos']), axis=1)
all_df['pos_dict_no_sw'] = all_df.apply(lambda x: get_pos_dict(x['pos_no_sw']), axis=1)
all_df[:3]

# def get_bow_from_tokens(df, column):
#     all_column_data = ' '.join(df[column].tolist())
#     all_column_fd = Counter(all_column_data.split())
#     return all_column_fd

# # bow = get_bow_from_column(all_df, 'diy_cleaner')
# # bow =
from collections import Counter
all_df['bow'] = all_df.apply(lambda x: Counter(x['tokens']), axis=1)
all_df['bow_no_sw'] = all_df.apply(lambda x: Counter(x['no_sw']), axis=1)
all_df[:3]

all_df_n = all_df[all_df['PoN'] == 'N']
all_df_p = all_df[all_df['PoN'] == 'P']

big_bow = [item for review in all_df['bow'].tolist() for item in review]
big_bow_n = [item for review in all_df_n['bow'].tolist() for item in review]
big_bow_p = [item for review in all_df_p['bow'].tolist() for item in review]

df = pd.DataFrame.from_dict(Counter(big_bow), orient='index').reset_index()
df = df.rename(columns={'index':'word', 0:'count'})

df_n = pd.DataFrame.from_dict(Counter(big_bow_n), orient='index').reset_index()
df_n = df_n.rename(columns={'index':'word', 0:'count'})

df_p = pd.DataFrame.from_dict(Counter(big_bow_p), orient='index').reset_index()
df_p = df_p.rename(columns={'index':'word', 0:'count'})

import seaborn as sns
import matplotlib.pyplot as plt 
def bar_plot(df, title): 
    graph = sns.barplot(y = "count", x = "word", data = df, palette = "husl")
    plt.title(title)
    plt.xlabel("Word")
    plt.ylabel("Count")
    sns.set_context("talk")
    plt.xticks(rotation = 90)
    return plt

print(bar_plot(df.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (ALL) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_n.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (TRUE) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_p.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (FALSE) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

all_df_n = all_df[all_df['PoN'] == 'N']
all_df_p = all_df[all_df['PoN'] == 'P']

big_bow = [item for review in all_df['bow_no_sw'].tolist() for item in review]
big_bow_n = [item for review in all_df_n['bow_no_sw'].tolist() for item in review]
big_bow_p = [item for review in all_df_p['bow_no_sw'].tolist() for item in review]

df = pd.DataFrame.from_dict(Counter(big_bow), orient='index').reset_index()
df = df.rename(columns={'index':'word', 0:'count'})

df_n = pd.DataFrame.from_dict(Counter(big_bow_n), orient='index').reset_index()
df_n = df_n.rename(columns={'index':'word', 0:'count'})

df_p = pd.DataFrame.from_dict(Counter(big_bow_p), orient='index').reset_index()
df_p = df_p.rename(columns={'index':'word', 0:'count'})

print(bar_plot(df.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (ALL) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_n.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (TRUE) Stopwords Removed"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_p.sort_values(by=["count"], ascending=False)[:20], "Top 20 Items (FALSE) Stopwords Removed"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

all_df_n = all_df[all_df['PoN'] == 'N']
all_df_p = all_df[all_df['PoN'] == 'P']

big_bow = [item for review in all_df['pos_dict'].tolist() for item in review]
big_bow_n = [item for review in all_df_n['pos_dict'].tolist() for item in review]
big_bow_p = [item for review in all_df_p['pos_dict'].tolist() for item in review]

df = pd.DataFrame.from_dict(Counter(big_bow), orient='index').reset_index()
df = df.rename(columns={'index':'word', 0:'count'})

df_n = pd.DataFrame.from_dict(Counter(big_bow_n), orient='index').reset_index()
df_n = df_n.rename(columns={'index':'word', 0:'count'})

df_p = pd.DataFrame.from_dict(Counter(big_bow_p), orient='index').reset_index()
df_p = df_p.rename(columns={'index':'word', 0:'count'})

print(bar_plot(df.sort_values(by=["count"], ascending=False)[:10], "Top 10 Items (ALL) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_n.sort_values(by=["count"], ascending=False)[:10], "Top 10 POS (TRUE) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_p.sort_values(by=["count"], ascending=False)[:10], "Top 10 POS (FALSE) Prior to Cleaning"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

STEP 4: TEST EXPERIMENTS!!¶

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

def get_NB(small_df, labels):
    x_train, x_test, y_train, y_test = train_test_split(small_df.values, labels, test_size=0.3, random_state = 109)

    gnb = GaussianNB()
    gnb.fit(x_train, y_train)
    y_pred = gnb.predict(x_test)
    from sklearn import metrics
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

TEST 1: Parts of speech frequency distribution¶

pos_df = pd.DataFrame(all_df['pos_dict'].tolist(), all_df['PoN'])
pos_df[:3]

pos_df = pos_df.fillna(0).astype(int)
pos_df[:3]

get_NB(pos_df, pos_df.index)

Accuracy: 0.5925925925925926

TEST 1b: Normalized parts of speech frequency distribution¶

pos_df_norm = pos_df.copy()
pos_df_norm = pos_df_norm.apply(lambda x: x/x.sum(), axis=1)
pos_df_norm[:3]
pos_df_norm[1:]
test = pos_df.copy()
test['total'] = test.sum(axis = 1)
test[:3]

pos_df_norm[:3]

get_NB(pos_df_norm, pos_df.index)

Accuracy: 0.5925925925925926

# small_df
small_df = pos_df_norm.filter(['PRP', 'PRP$','NN'])
get_NB(small_df, pos_df.index)

Accuracy: 0.4444444444444444

pos_df_n = pos_df[pos_df.index == 'N']
pos_df_p = pos_df[pos_df.index == 'P']
print(pos_df['PRP'].sum())
print(pos_df_n['PRP'].sum())
print(pos_df_p['PRP'].sum())
print(pos_df_n['PRP'].sum()/pos_df['PRP'].sum())
print(pos_df_p['PRP'].sum()/pos_df['PRP'].sum())

337
160
177
0.47477744807121663
0.5252225519287834

pos_df_n = pos_df[pos_df.index == 'N']
pos_df_p = pos_df[pos_df.index == 'P']
print(pos_df['PRP$'].sum())
print(pos_df_n['PRP$'].sum())
print(pos_df_p['PRP$'].sum())

138
65
73

pos_df_n = pos_df_norm[pos_df_norm.index == 'N']
pos_df_p = pos_df_norm[pos_df_norm.index == 'P']
print(pos_df_norm['PRP'].sum())
print(pos_df_n['PRP'].sum())
print(pos_df_p['PRP'].sum())

4.256356712105416
2.060598739935355
2.19575797217006

pos_df_n = pos_df_norm[pos_df_norm.index == 'N']
pos_df_p = pos_df_norm[pos_df_norm.index == 'P']
print(pos_df_norm['PRP'].mean())
print(pos_df_n['PRP'].mean())
print(pos_df_p['PRP'].mean())

0.04729285235672684
0.04683178954398534
0.047733868960218695

pos_df_n = pos_df_norm[pos_df_norm.index == 'N']
pos_df_p = pos_df_norm[pos_df_norm.index == 'P']
print(pos_df_norm['PRP$'].mean())
print(pos_df_n['PRP$'].mean())
print(pos_df_p['PRP$'].mean())

0.0177106769174579
0.017530735194787515
0.0178827950869687

all_df['pos']

1     [(twin, NN), (trees, NNS), (cicero, VBP), (ny,...
2     [(the, DT), (worst, JJS), (restaurant, NN), (t...
4     [(i, NNS), (have, VBP), (been, VBN), (to, TO),...
5     [(the, DT), (best, JJS), (restaurant, NN), (i,...
6     [(the, DT), (restaurant, NN), (looked, VBD), (...
                            ...                        
87    [(mikes, NNS), (pizza, VBP), (high, JJ), (poin...
88    [(after, IN), (i, JJ), (went, VBD), (shopping,...
89    [(i, NN), (entered, VBD), (the, DT), (restaura...
90    [(carlos, NN), (plate, NN), (shack, NN), (was,...
91    [(olive, JJ), (oil, NN), (garden, NN), (was, V...
Name: pos, Length: 90, dtype: object

all_df['pos_sent'] = all_df.apply(lambda x: [word[1] for word in x['pos']], axis=1)
all_df['pos_sent_str'] = all_df.apply(lambda x: [' '.join(x['pos_sent'])], axis=1)
all_df['pos_no_sw_sent'] = all_df.apply(lambda x: [word[1] for word in x['pos_no_sw']], axis=1)

type(all_df['pos_sent_str'][1])

list

all_df['pos_sent_bi'] = all_df.apply(lambda x: [b for l in x['pos_sent_str'] for b in zip(l.split(" ")[:-1], l.split(" ")[1:])], axis=1)
# bigrams = [b for l in text for b in zip(l.split(" ")[:-1], l.split(" ")[1:])]

# all_df['pos_sent_tri'] = all_df.apply(lambda x: [b for l in x['pos_sent_str'] for b in zip(l.split(" ")[:-1], l.split(" ")[1:])], axis=1)

all_df[:4]

test = all_df['pos_sent_str'][1]
test

['NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN VBZ RB JJ RB CC JJ VB JJ TO VB DT NN TO VB NN IN RB RB IN PRP VBP VBN IN DT JJ NNS DT NN VBP JJ IN PRP$ NNS CC NN TO VB NN NN']

text = ["this is a sentence", "so is this one"]
test2 = ["NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN VBZ RB JJ RB CC JJ VB JJ TO VB DT NN TO VB NN IN RB RB IN", "PRP VBP VBN IN DT JJ NNS DT NN VBP JJ IN PRP$ NNS CC NN TO VB NN NN"]
test1 = ['NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN VBZ RB JJ RB CC JJ VB JJ TO VB DT NN TO VB NN IN RB RB IN PRP VBP VBN IN DT JJ NNS DT NN VBP JJ IN PRP$ NNS CC NN TO VB NN NN']
bigrams = [b for l in test1 for b in zip(l.split(" ")[:-1], l.split(" ")[1:])]
print(bigrams)

[('NN', 'NNS'), ('NNS', 'VBP'), ('VBP', 'JJ'), ('JJ', 'JJ'), ('JJ', 'NN'), ('NN', 'NN'), ('NN', 'CC'), ('CC', 'JJ'), ('JJ', 'NN'), ('NN', 'JJ'), ('JJ', 'VBZ'), ('VBZ', 'DT'), ('DT', 'NN'), ('NN', 'VBZ'), ('VBZ', 'RB'), ('RB', 'JJ'), ('JJ', 'RB'), ('RB', 'CC'), ('CC', 'JJ'), ('JJ', 'VB'), ('VB', 'JJ'), ('JJ', 'TO'), ('TO', 'VB'), ('VB', 'DT'), ('DT', 'NN'), ('NN', 'TO'), ('TO', 'VB'), ('VB', 'NN'), ('NN', 'IN'), ('IN', 'RB'), ('RB', 'RB'), ('RB', 'IN'), ('IN', 'PRP'), ('PRP', 'VBP'), ('VBP', 'VBN'), ('VBN', 'IN'), ('IN', 'DT'), ('DT', 'JJ'), ('JJ', 'NNS'), ('NNS', 'DT'), ('DT', 'NN'), ('NN', 'VBP'), ('VBP', 'JJ'), ('JJ', 'IN'), ('IN', 'PRP$'), ('PRP$', 'NNS'), ('NNS', 'CC'), ('CC', 'NN'), ('NN', 'TO'), ('TO', 'VB'), ('VB', 'NN'), ('NN', 'NN')]

# all_bigrams = [bigram for bigram in all_df.pos_sent_bi.tolist()]
# flat_list = [item for sublist in l for item in sublist]
all_df_n = all_df[all_df['PoN'] == 'N']
all_df_p = all_df[all_df['PoN'] == 'P']
all_bigrams = [bigram for sublist in all_df.pos_sent_bi.tolist() for bigram in sublist]
all_bigrams_n = [bigram for sublist in all_df_n.pos_sent_bi.tolist() for bigram in sublist]
all_bigrams_p = [bigram for sublist in all_df_p.pos_sent_bi.tolist() for bigram in sublist]
all_bigrams[:5]

[('NN', 'NNS'), ('NNS', 'VBP'), ('VBP', 'JJ'), ('JJ', 'JJ'), ('JJ', 'NN')]

count = Counter(all_bigrams)
count_n = Counter(all_bigrams_n)
count_p = Counter(all_bigrams_p)

count.most_common()[:5]

import numpy as np

# all_df['bow_v3'] = all_df.apply(lambda x: Counter(casual_tokenize(x['pruned'])), axis=1)
# new_df = pd.DataFrame(all_df['bow_v3'].tolist(), all_df['PoN'])

# most_common_pos = [word[0] for word in big_bow_p.most_common(100)]
# print("Unique values in array1 that are not in array2:")
most_common_n = [word[0] for word in count_n.most_common(10)]
most_common_p = [word[0] for word in count_p.most_common(10)]

neg_notpos = np.setdiff1d(most_common_n, most_common_p)
neg_notpos

# all_bigrams_n

array(['VBZ'], dtype='<U3')

all_df_n = all_df[all_df['PoN'] == 'N']
all_df_p = all_df[all_df['PoN'] == 'P']

big_bow = [item for review in all_df['pos_sent_bi'].tolist() for item in review]
big_bow_n = [item for review in all_df_n['pos_sent_bi'].tolist() for item in review]
big_bow_p = [item for review in all_df_p['pos_sent_bi'].tolist() for item in review]

df = pd.DataFrame.from_dict(Counter(big_bow), orient='index').reset_index()
df = df.rename(columns={'index':'word', 0:'count'})

df_n = pd.DataFrame.from_dict(Counter(big_bow_n), orient='index').reset_index()
df_n = df_n.rename(columns={'index':'word', 0:'count'})

df_p = pd.DataFrame.from_dict(Counter(big_bow_p), orient='index').reset_index()
df_p = df_p.rename(columns={'index':'word', 0:'count'})

print(bar_plot(df_p.sort_values(by=["count"], ascending=False)[:10], "Top 10 POS Bigrams (ALL)"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_n.sort_values(by=["count"], ascending=False)[:10], "Top 10 POS Bigrams (TRUE)"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

print(bar_plot(df_p.sort_values(by=["count"], ascending=False)[:10], "Top 10 POS Bigrams (FALSE)"))

<module 'matplotlib.pyplot' from '/usr/local/lib/python3.7/site-packages/matplotlib/pyplot.py'>

all_df['bow_pos'] = all_df.apply(lambda x: Counter(x['pos_sent_bi']), axis=1)

new_df = pd.DataFrame(all_df['bow_pos'].tolist(), all_df['PoN'])
new_df = new_df.fillna(0).astype(int)
new_df[:5]

get_NB(new_df, new_df.index)

Accuracy: 0.4444444444444444

bi_df_norm = new_df.copy()
bi_df_norm = bi_df_norm.apply(lambda x: x/x.sum(), axis=1)
# bi_df_norm = bi_df_norm.apply(lambda x: x/x.sum(), axis=1)
bi_df_norm

get_NB(bi_df_norm, bi_df_norm.index)
bi_df_norm

Accuracy: 0.48148148148148145

df_p.sort_values(by=["count"], ascending=False)[:20]

df_n.sort_values(by=["count"], ascending=False)[:20]

from nltk import word_tokenize 
from nltk.util import ngrams

text = ['cant railway station', 'citadel hotel', 'police stn']
def get_ngram(line, num):
    token = nltk.word_tokenize(line)
    grams = list(ngrams(token, num)) 
    return(grams)

# all_df['trigrams'] = all_df.apply(lambda x: get_ngram(x[0],3), axis=1)
all_df['trigrams'] = all_df.apply(lambda x: get_ngram(' '.join(x['tokens']),3), axis=1)
all_df['trigrams_pos'] = all_df.apply(lambda x: get_ngram(' '.join(x['pos_sent']),3), axis=1)

# ' '.join(all_df['tokens'][1])
                                  
# counter = all_df['trigrams_pos']

all_df['trigrams_feats'] = all_df.apply(lambda x: ['_'.join(trigram) for trigram in x['trigrams_pos']], axis=1)

def flatten_column(df, column):
    return [features for row in df[column].tolist() for features in row]
#     [bigram for sublist in all_df_n.pos_sent_bi.tolist() for bigram in sublist]

flat_trigrams = Counter(flatten_column(all_df, 'trigrams_feats'))
flat_trigrams_n = Counter(flatten_column(all_df[all_df['PoN'] == 'N'], 'trigrams_feats'))
flat_trigrams_p = Counter(flatten_column(all_df[all_df['PoN'] == 'P'], 'trigrams_feats'))

most_common_n = [word[0] for word in flat_trigrams_n.most_common(10)]
most_common_p = [word[0] for word in flat_trigrams_p.most_common(10)]

neg_notpos = np.setdiff1d(most_common_n, most_common_p)
neg_notpos

array(['JJ_NN_IN', 'NN_IN_DT', 'NN_IN_NN'], dtype='<U9')

all_df['trigrams_feats_bow'] = all_df.apply(lambda x: Counter(x['trigrams_feats']), axis=1)

new_df = pd.DataFrame(all_df['trigrams_feats_bow'].tolist(), all_df['PoN'])
new_df = new_df.fillna(0).astype(int)
new_df[:5]

get_NB(new_df, new_df.index)

Accuracy: 0.5185185185185185

tri_df_norm = new_df.copy()
tri_df_norm = tri_df_norm.apply(lambda x: x/x.sum(), axis=1)
tri_df_norm

get_NB(tri_df_norm, tri_df_norm.index)
tri_df_norm

Accuracy: 0.5185185185185185

all_df

	0	PoN
0	?	N
1	Twin Trees Cicero NY HUGE salad bar and high q...	N
2	The worst restaurant that I have ever eaten in...	N
3	?	N
4	I have been to a Asian restaurant in New York ...	N

	0	PoN	sentences	num_sentences	tokens	num_tokens
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw	stemmed	stemmed_no_sw
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32	[twin, tree, cicero, ny, huge, salad, bar, and...	[twin, tree, cicero, ny, huge, salad, bar, hig...
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49	[the, worst, restaur, that, i, have, ever, eat...	[worst, restaur, ever, eaten, undoubtedli, pla...
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23	[i, have, been, to, a, asian, restaur, in, new...	[asian, restaur, new, york, citi, menu, writte...

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw	stemmed	stemmed_no_sw	lemmed	lemmed_no_sw
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32	[twin, tree, cicero, ny, huge, salad, bar, and...	[twin, tree, cicero, ny, huge, salad, bar, hig...	[twin, tree, cicero, ny, huge, salad, bar, and...	[twin, tree, cicero, ny, huge, salad, bar, hig...
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49	[the, worst, restaur, that, i, have, ever, eat...	[worst, restaur, ever, eaten, undoubtedli, pla...	[the, worst, restaurant, that, i, have, ever, ...	[worst, restaurant, ever, eaten, undoubtedly, ...
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23	[i, have, been, to, a, asian, restaur, in, new...	[asian, restaur, new, york, citi, menu, writte...	[i, have, been, to, a, asian, restaurant, in, ...	[asian, restaurant, new, york, city, menu, wri...

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	VBG	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS
PoN
N	11.0	3.0	3.0	9.0	3.0	2.0	4.0	4.0	4.0	3.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
N	29.0	1.0	1.0	7.0	5.0	1.0	14.0	8.0	4.0	4.0	...	1.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
N	13.0	2.0	2.0	5.0	1.0	2.0	5.0	NaN	NaN	1.0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	VBG	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	0
N	29	1	1	7	5	1	14	8	4	4	...	1	0	0	0	0	0	0	0	0	0
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	0

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS	total
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	53
N	29	1	1	7	5	1	14	8	4	4	...	0	0	0	0	0	0	0	0	0	105
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	45

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	VBG	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS
PoN
N	0.207547	0.056604	0.056604	0.169811	0.056604	0.037736	0.075472	0.075472	0.075472	0.056604	...	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
N	0.276190	0.009524	0.009524	0.066667	0.047619	0.009524	0.133333	0.076190	0.038095	0.038095	...	0.009524	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
N	0.288889	0.044444	0.044444	0.111111	0.022222	0.044444	0.111111	0.000000	0.000000	0.022222	...	0.000000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	(NN, NNS)	(NNS, VBP)	(VBP, JJ)	(JJ, JJ)	(JJ, NN)	(NN, NN)	(NN, CC)	(CC, JJ)	(NN, JJ)	(JJ, VBZ)	...	(WDT, MD)	(WRB, MD)	(MD, DT)	(NNS, JJR)	(JJR, EX)	(VBP, MD)	(JJS, WRB)	(CD, RB)	(JJS, VBG)	(RP, TO)
PoN
N	1	1	2	1	2	2	1	2	1	1	...	0	0	0	0	0	0	0	0	0	0
N	1	0	0	0	2	4	3	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	1	0	0	2	4	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	4	1	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	(NN, NNS)	(NNS, VBP)	(VBP, JJ)	(JJ, JJ)	(JJ, NN)	(NN, NN)	(NN, CC)	(CC, JJ)	(NN, JJ)	(JJ, VBZ)	...	(WDT, MD)	(WRB, MD)	(MD, DT)	(NNS, JJR)	(JJR, EX)	(VBP, MD)	(JJS, WRB)	(CD, RB)	(JJS, VBG)	(RP, TO)
PoN
N	0.019231	0.019231	0.038462	0.019231	0.038462	0.038462	0.019231	0.038462	0.019231	0.019231	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
N	0.009615	0.000000	0.000000	0.000000	0.019231	0.038462	0.028846	0.000000	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
N	0.000000	0.022727	0.000000	0.000000	0.045455	0.090909	0.000000	0.022727	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
N	0.000000	0.000000	0.000000	0.000000	0.057143	0.014286	0.000000	0.014286	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
N	0.000000	0.000000	0.000000	0.000000	0.028571	0.000000	0.028571	0.000000	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
P	0.000000	0.023810	0.023810	0.000000	0.071429	0.023810	0.023810	0.000000	0.023810	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.02381	0.000000	0.000000	0.00000
P	0.000000	0.000000	0.000000	0.000000	0.000000	0.043478	0.000000	0.000000	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.000000	0.00000
P	0.000000	0.000000	0.000000	0.000000	0.030612	0.020408	0.020408	0.010204	0.010204	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.010204	0.000000	0.00000
P	0.000000	0.006494	0.000000	0.012987	0.038961	0.071429	0.025974	0.000000	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.00000	0.000000	0.006494	0.00000
P	0.000000	0.000000	0.023810	0.000000	0.119048	0.047619	0.023810	0.023810	0.000000	0.000000	...	0.0	0.0	0.0	0.0	0.0	0.0	0.02381	0.000000	0.000000	0.02381

	word	count
11	(DT, NN)	245
38	(JJ, NN)	151
0	(NN, NN)	125
44	(NN, IN)	114
1	(NN, VBD)	112
43	(IN, DT)	111
5	(NN, CC)	77
19	(TO, VB)	76
37	(DT, JJ)	62
26	(RB, JJ)	61
105	(PRP, VBD)	60
29	(IN, NN)	57
2	(VBD, DT)	55
25	(VBD, RB)	45
75	(PRP$, NN)	44
12	(NN, VBZ)	42
83	(NN, RB)	38
86	(IN, JJ)	37
39	(CC, DT)	36
65	(JJ, CC)	34

	NN_NNS_VBP	NNS_VBP_JJ	VBP_JJ_JJ	JJ_JJ_NN	JJ_NN_NN	NN_NN_CC	NN_CC_JJ	CC_JJ_NN	JJ_NN_JJ	NN_JJ_VBZ	...	RB_NN_NN	VBD_RP_PRP	PRP_TO_JJ	TO_JJ_JJ	CC_VB_NN	JJS_WRB_NN	NN_VBP_RP	VBP_RP_TO	RP_TO_VB	VBD_PRP_CC
PoN
N	1	1	1	1	1	1	1	1	1	1	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	2	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	0	PoN	sentences	num_sentences	tokens	num_tokens	no_sw	num_no_sw	stemmed	stemmed_no_sw	...	bow_no_sw	pos_sent	pos_sent_str	pos_no_sw_sent	pos_sent_bi	bow_pos	trigrams	trigrams_pos	trigrams_feats	trigrams_feats_bow
1	Twin Trees Cicero NY HUGE salad bar and high q...	N	[Twin Trees Cicero NY HUGE salad bar and high ...	4	[twin, trees, cicero, ny, huge, salad, bar, an...	53	[twin, trees, cicero, ny, huge, salad, bar, hi...	32	[twin, tree, cicero, ny, huge, salad, bar, and...	[twin, tree, cicero, ny, huge, salad, bar, hig...	...	{'twin': 1, 'trees': 1, 'cicero': 1, 'ny': 1, ...	[NN, NNS, VBP, JJ, JJ, NN, NN, CC, JJ, NN, JJ,...	[NN NNS VBP JJ JJ NN NN CC JJ NN JJ VBZ DT NN ...	[NN, NNS, VBP, JJ, JJ, NN, NN, JJ, NN, JJ, NNS...	[(NN, NNS), (NNS, VBP), (VBP, JJ), (JJ, JJ), (...	{('NN', 'NNS'): 1, ('NNS', 'VBP'): 1, ('VBP', ...	[(twin, trees, cicero), (trees, cicero, ny), (...	[(NN, NNS, VBP), (NNS, VBP, JJ), (VBP, JJ, JJ)...	[NN_NNS_VBP, NNS_VBP_JJ, VBP_JJ_JJ, JJ_JJ_NN, ...	{'NN_NNS_VBP': 1, 'NNS_VBP_JJ': 1, 'VBP_JJ_JJ'...
2	The worst restaurant that I have ever eaten in...	N	[The worst restaurant that I have ever eaten i...	5	[the, worst, restaurant, that, i, have, ever, ...	105	[worst, restaurant, ever, eaten, undoubtedly, ...	49	[the, worst, restaur, that, i, have, ever, eat...	[worst, restaur, ever, eaten, undoubtedli, pla...	...	{'worst': 1, 'restaurant': 1, 'ever': 1, 'eate...	[DT, JJS, NN, IN, NN, VBP, RB, VBN, IN, VBZ, R...	[DT JJS NN IN NN VBP RB VBN IN VBZ RB DT NN VB...	[RBS, NN, RB, RB, JJ, NN, VBN, NN, NN, VBD, NN...	[(DT, JJS), (JJS, NN), (NN, IN), (IN, NN), (NN...	{('DT', 'JJS'): 1, ('JJS', 'NN'): 1, ('NN', 'I...	[(the, worst, restaurant), (worst, restaurant,...	[(DT, JJS, NN), (JJS, NN, IN), (NN, IN, NN), (...	[DT_JJS_NN, JJS_NN_IN, NN_IN_NN, IN_NN_VBP, NN...	{'DT_JJS_NN': 1, 'JJS_NN_IN': 1, 'NN_IN_NN': 2...
4	I have been to a Asian restaurant in New York ...	N	[I have been to a Asian restaurant in New York...	4	[i, have, been, to, a, asian, restaurant, in, ...	45	[asian, restaurant, new, york, city, menu, wri...	23	[i, have, been, to, a, asian, restaur, in, new...	[asian, restaur, new, york, citi, menu, writte...	...	{'asian': 1, 'restaurant': 1, 'new': 1, 'york'...	[NNS, VBP, VBN, TO, DT, JJ, NN, IN, JJ, NN, NN...	[NNS VBP VBN TO DT JJ NN IN JJ NN NN DT NN VBZ...	[JJ, NN, JJ, NN, NN, NN, VBN, JJ, JJ, VBP, JJ,...	[(NNS, VBP), (VBP, VBN), (VBN, TO), (TO, DT), ...	{('NNS', 'VBP'): 1, ('VBP', 'VBN'): 1, ('VBN',...	[(i, have, been), (have, been, to), (been, to,...	[(NNS, VBP, VBN), (VBP, VBN, TO), (VBN, TO, DT...	[NNS_VBP_VBN, VBP_VBN_TO, VBN_TO_DT, TO_DT_JJ,...	{'NNS_VBP_VBN': 1, 'VBP_VBN_TO': 1, 'VBN_TO_DT...
5	The best restaurant I have gone to is when I w...	N	[The best restaurant I have gone to is when I ...	6	[the, best, restaurant, i, have, gone, to, is,...	71	[best, restaurant, gone, went, applebee, frien...	30	[the, best, restaur, i, have, gone, to, is, wh...	[best, restaur, gone, went, applebe, friend, s...	...	{'best': 1, 'restaurant': 2, 'gone': 1, 'went'...	[DT, JJS, NN, NN, VBP, VBN, TO, VBZ, WRB, JJ, ...	[DT JJS NN NN VBP VBN TO VBZ WRB JJ VBD TO VB ...	[RBS, NN, VBN, VBD, JJ, NNS, NN, RB, NN, JJ, V...	[(DT, JJS), (JJS, NN), (NN, NN), (NN, VBP), (V...	{('DT', 'JJS'): 1, ('JJS', 'NN'): 1, ('NN', 'N...	[(the, best, restaurant), (best, restaurant, i...	[(DT, JJS, NN), (JJS, NN, NN), (NN, NN, VBP), ...	[DT_JJS_NN, JJS_NN_NN, NN_NN_VBP, NN_VBP_VBN, ...	{'DT_JJS_NN': 1, 'JJS_NN_NN': 1, 'NN_NN_VBP': ...
6	The restaurant looked pretty good the people a...	N	[The restaurant looked pretty good the people ...	3	[the, restaurant, looked, pretty, good, the, p...	36	[restaurant, looked, pretty, good, people, aro...	19	[the, restaur, look, pretti, good, the, peopl,...	[restaur, look, pretti, good, peopl, around, a...	...	{'restaurant': 1, 'looked': 1, 'pretty': 1, 'g...	[DT, NN, VBD, RB, JJ, DT, NNS, IN, PRP, DT, NN...	[DT NN VBD RB JJ DT NNS IN PRP DT NN CC VBD RB...	[NN, VBD, RB, JJ, NNS, IN, NN, VBD, RB, NN, JJ...	[(DT, NN), (NN, VBD), (VBD, RB), (RB, JJ), (JJ...	{('DT', 'NN'): 5, ('NN', 'VBD'): 3, ('VBD', 'R...	[(the, restaurant, looked), (restaurant, looke...	[(DT, NN, VBD), (NN, VBD, RB), (VBD, RB, JJ), ...	[DT_NN_VBD, NN_VBD_RB, VBD_RB_JJ, RB_JJ_DT, JJ...	{'DT_NN_VBD': 3, 'NN_VBD_RB': 1, 'VBD_RB_JJ': ...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
87	Mikes Pizza High Point NY Service was very slo...	P	[Mikes Pizza High Point NY Service was very sl...	4	[mikes, pizza, high, point, ny, service, was, ...	43	[mikes, pizza, high, point, ny, service, slow,...	26	[mike, pizza, high, point, ny, servic, wa, ver...	[mike, pizza, high, point, ny, servic, slow, q...	...	{'mikes': 1, 'pizza': 2, 'high': 1, 'point': 1...	[NNS, VBP, JJ, NN, JJ, NN, VBD, RB, JJ, CC, DT...	[NNS VBP JJ NN JJ NN VBD RB JJ CC DT NN VBD JJ...	[NNS, VBP, JJ, NN, JJ, NN, JJ, NN, NN, MD, VB,...	[(NNS, VBP), (VBP, JJ), (JJ, NN), (NN, JJ), (J...	{('NNS', 'VBP'): 1, ('VBP', 'JJ'): 1, ('JJ', '...	[(mikes, pizza, high), (pizza, high, point), (...	[(NNS, VBP, JJ), (VBP, JJ, NN), (JJ, NN, JJ), ...	[NNS_VBP_JJ, VBP_JJ_NN, JJ_NN_JJ, NN_JJ_NN, JJ...	{'NNS_VBP_JJ': 1, 'VBP_JJ_NN': 1, 'JJ_NN_JJ': ...
88	After I went shopping with some of my friend w...	P	[After I went shopping with some of my friend ...	2	[after, i, went, shopping, with, some, of, my,...	24	[went, shopping, friend, went, dodo, restauran...	11	[after, i, went, shop, with, some, of, my, fri...	[went, shop, friend, went, dodo, restaur, dinn...	...	{'went': 2, 'shopping': 1, 'friend': 1, 'dodo'...	[IN, JJ, VBD, VBG, IN, DT, IN, PRP$, NN, PRP, ...	[IN JJ VBD VBG IN DT IN PRP$ NN PRP VBD TO VB ...	[VBD, VBG, NN, VBD, JJ, NN, NN, VBD, RB, CD, NNS]	[(IN, JJ), (JJ, VBD), (VBD, VBG), (VBG, IN), (...	{('IN', 'JJ'): 1, ('JJ', 'VBD'): 1, ('VBD', 'V...	[(after, i, went), (i, went, shopping), (went,...	[(IN, JJ, VBD), (JJ, VBD, VBG), (VBD, VBG, IN)...	[IN_JJ_VBD, JJ_VBD_VBG, VBD_VBG_IN, VBG_IN_DT,...	{'IN_JJ_VBD': 1, 'JJ_VBD_VBG': 1, 'VBD_VBG_IN'...
89	I entered the restaurant and a waitress came b...	P	[I entered the restaurant and a waitress came ...	5	[i, entered, the, restaurant, and, a, waitress...	99	[entered, restaurant, waitress, came, blanking...	49	[i, enter, the, restaur, and, a, waitress, cam...	[enter, restaur, waitress, came, blank, look, ...	...	{'entered': 1, 'restaurant': 1, 'waitress': 2,...	[NN, VBD, DT, NN, CC, DT, NN, VBD, IN, IN, DT,...	[NN VBD DT NN CC DT NN VBD IN IN DT NN VBG CC ...	[VBN, NN, NN, VBD, VBG, VBG, JJ, NN, NN, VBD, ...	[(NN, VBD), (VBD, DT), (DT, NN), (NN, CC), (CC...	{('NN', 'VBD'): 5, ('VBD', 'DT'): 4, ('DT', 'N...	[(i, entered, the), (entered, the, restaurant)...	[(NN, VBD, DT), (VBD, DT, NN), (DT, NN, CC), (...	[NN_VBD_DT, VBD_DT_NN, DT_NN_CC, NN_CC_DT, CC_...	{'NN_VBD_DT': 1, 'VBD_DT_NN': 3, 'DT_NN_CC': 1...
90	Carlos Plate Shack was the worst dining experi...	P	[Carlos Plate Shack was the worst dining exper...	9	[carlos, plate, shack, was, the, worst, dining...	155	[carlos, plate, shack, worst, dining, experien...	88	[carlo, plate, shack, wa, the, worst, dine, ex...	[carlo, plate, shack, worst, dine, experi, lif...	...	{'carlos': 1, 'plate': 6, 'shack': 1, 'worst':...	[NN, NN, NN, VBD, DT, JJS, VBG, NN, IN, PRP$, ...	[NN NN NN VBD DT JJS VBG NN IN PRP$ NN IN PRP$...	[NN, NN, NN, JJS, VBG, NN, NN, IN, JJ, NN, NN,...	[(NN, NN), (NN, NN), (NN, VBD), (VBD, DT), (DT...	{('NN', 'NN'): 11, ('NN', 'VBD'): 6, ('VBD', '...	[(carlos, plate, shack), (plate, shack, was), ...	[(NN, NN, NN), (NN, NN, VBD), (NN, VBD, DT), (...	[NN_NN_NN, NN_NN_VBD, NN_VBD_DT, VBD_DT_JJS, D...	{'NN_NN_NN': 2, 'NN_NN_VBD': 3, 'NN_VBD_DT': 2...
91	Olive Oil Garden was very disappointing. I exp...	P	[Olive Oil Garden was very disappointing., I e...	5	[olive, oil, garden, was, very, disappointing,...	43	[olive, oil, garden, disappointing, expect, go...	23	[oliv, oil, garden, wa, veri, disappoint, i, e...	[oliv, oil, garden, disappoint, expect, good, ...	...	{'olive': 2, 'oil': 2, 'garden': 2, 'disappoin...	[JJ, NN, NN, VBD, RB, JJ, NN, VBP, JJ, NN, CC,...	[JJ NN NN VBD RB JJ NN VBP JJ NN CC JJ NN IN J...	[JJ, NN, NN, NN, VBP, JJ, NN, JJ, NN, JJS, VB,...	[(JJ, NN), (NN, NN), (NN, VBD), (VBD, RB), (RB...	{('JJ', 'NN'): 5, ('NN', 'NN'): 2, ('NN', 'VBD...	[(olive, oil, garden), (oil, garden, was), (ga...	[(JJ, NN, NN), (NN, NN, VBD), (NN, VBD, RB), (...	[JJ_NN_NN, NN_NN_VBD, NN_VBD_RB, VBD_RB_JJ, RB...	{'JJ_NN_NN': 2, 'NN_NN_VBD': 1, 'NN_VBD_RB': 1...

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	VBG	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	0
N	29	1	1	7	5	1	14	8	4	4	...	1	0	0	0	0	0	0	0	0	0
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	0

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS	total
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	53
N	29	1	1	7	5	1	14	8	4	4	...	0	0	0	0	0	0	0	0	0	105
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	45

	(NN, NNS)	(NNS, VBP)	(VBP, JJ)	(JJ, JJ)	(JJ, NN)	(NN, NN)	(NN, CC)	(CC, JJ)	(NN, JJ)	(JJ, VBZ)	...	(WDT, MD)	(WRB, MD)	(MD, DT)	(NNS, JJR)	(JJR, EX)	(VBP, MD)	(JJS, WRB)	(CD, RB)	(JJS, VBG)	(RP, TO)
PoN
N	1	1	2	1	2	2	1	2	1	1	...	0	0	0	0	0	0	0	0	0	0
N	1	0	0	0	2	4	3	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	1	0	0	2	4	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	4	1	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	NN_NNS_VBP	NNS_VBP_JJ	VBP_JJ_JJ	JJ_JJ_NN	JJ_NN_NN	NN_NN_CC	NN_CC_JJ	CC_JJ_NN	JJ_NN_JJ	NN_JJ_VBZ	...	RB_NN_NN	VBD_RP_PRP	PRP_TO_JJ	TO_JJ_JJ	CC_VB_NN	JJS_WRB_NN	NN_VBP_RP	VBP_RP_TO	RP_TO_VB	VBD_PRP_CC
PoN
N	1	1	1	1	1	1	1	1	1	1	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	2	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	VBG	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	0
N	29	1	1	7	5	1	14	8	4	4	...	1	0	0	0	0	0	0	0	0	0
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	0

	NN	NNS	VBP	JJ	CC	VBZ	DT	RB	VB	TO	...	EX	JJR	PDT	RP	WP	CD	RBR	MD	RBS	total
PoN
N	11	3	3	9	3	2	4	4	4	3	...	0	0	0	0	0	0	0	0	0	53
N	29	1	1	7	5	1	14	8	4	4	...	0	0	0	0	0	0	0	0	0	105
N	13	2	2	5	1	2	5	0	0	1	...	0	0	0	0	0	0	0	0	0	45

	(NN, NNS)	(NNS, VBP)	(VBP, JJ)	(JJ, JJ)	(JJ, NN)	(NN, NN)	(NN, CC)	(CC, JJ)	(NN, JJ)	(JJ, VBZ)	...	(WDT, MD)	(WRB, MD)	(MD, DT)	(NNS, JJR)	(JJR, EX)	(VBP, MD)	(JJS, WRB)	(CD, RB)	(JJS, VBG)	(RP, TO)
PoN
N	1	1	2	1	2	2	1	2	1	1	...	0	0	0	0	0	0	0	0	0	0
N	1	0	0	0	2	4	3	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	1	0	0	2	4	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	4	1	0	1	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0

	NN_NNS_VBP	NNS_VBP_JJ	VBP_JJ_JJ	JJ_JJ_NN	JJ_NN_NN	NN_NN_CC	NN_CC_JJ	CC_JJ_NN	JJ_NN_JJ	NN_JJ_VBZ	...	RB_NN_NN	VBD_RP_PRP	PRP_TO_JJ	TO_JJ_JJ	CC_VB_NN	JJS_WRB_NN	NN_VBP_RP	VBP_RP_TO	RP_TO_VB	VBD_PRP_CC
PoN
N	1	1	1	1	1	1	1	1	1	1	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	2	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
N	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0