In [2]:
# file = open('HP1.txt').readlines()

# all_text = ""
# for line in file:
#     all_text += line

# all_text = all_text.replace("\n", " ")
# all_text = all_text.replace("\'", "")

# import re
# all_text = re.sub(r'[0-9]', '', all_text)
# chapters = all_text.split('CHAPTER ')
# ch1 = chapters[1]

file = open('../WK5/wk5_pdf.txt').readlines()

all_text = ""
for line in file:
    all_text += line

all_text = all_text.replace("\n", " ")
all_text = all_text.replace("\'", "")

import re
# all_text = re.sub(r'[0-9]', '', all_text)
# chapters = all_text.split('CHAPTER ')
ch1 = all_text
In [3]:
import nltk
sentence_list = nltk.sent_tokenize(ch1)
sentence_list[:10]
Out[3]:
['Abstract Human linguistic annotation is crucial for many natural language processing tasks but can be expensive and time-consuming.',
 'We explore the use of Amazon’s Mechanical Turk system, a significantly cheaper and faster method for collecting annotations from a broad base of paid non-expert contributors over the Web.',
 'We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.',
 'For all five, we show high agreement between Mechanical Turk non-expert annotations and existing gold standard labels provided by expert labelers.',
 'For the task of affect recognition, we also show that using non-expert labels for training machine learning algorithms can be as effective as using gold standard annotations from experts.',
 'We propose a technique for bias correction that significantly improves annotation quality on two tasks.',
 'We conclude that many large labeling tasks can be effectively designed and carried out in this method at a fraction of the usual expense.',
 '1 Introduction Large scale annotation projects such as TreeBank (Marcus et al., 1993), PropBank (Palmer et al., 2005), TimeBank (Pustejovsky et al., 2003), FrameNet (Baker et al., 1998), SemCor (Miller et al., 1993), and others play an important role in natural language processing research, encouraging the development of novel ideas, tasks, and algorithms.',
 'The construction of these datasets, however, is extremely expensive in both annotator-hours and financial cost.',
 'Since the performance of many natural language processing tasks is limited by the amount and quality of data available to them (Banko and Brill, 2001), one promising alternative for some tasks is the collection of non-expert annotations.']
In [4]:
stopwords = nltk.corpus.stopwords.words('english')
word_frequencies = {}
for word in nltk.word_tokenize(ch1):
    if word not in stopwords:
        if word not in word_frequencies.keys():
            word_frequencies[word] = 1
        else:
            word_frequencies[word] += 1
In [5]:
max_frequency = max(word_frequencies.values())
for word in word_frequencies.keys():
    word_frequencies[word] = (word_frequencies[word]/max_frequency)
In [6]:
for sent in sentence_list[:1]:
    for word in nltk.word_tokenize(sent.lower()):
        print(word)
abstract
human
linguistic
annotation
is
crucial
for
many
natural
language
processing
tasks
but
can
be
expensive
and
time-consuming
.
In [7]:
for sent in sentence_list[:1]:
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            print(word)
linguistic
annotation
crucial
many
natural
language
processing
tasks
expensive
time-consuming
.
In [8]:
sentence_scores = {}
for sent in sentence_list:
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            if len(sent.split(' ')) < 30:
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]
In [9]:
sorted_sentences = sorted(sentence_scores.items(), key=lambda kv: kv[1], reverse=True)
sorted_sentences[:10]
Out[9]:
[('Dolores Labs Blog, “AMT is fast, cheap, and good for machine learning data,” Brendan O’Connor, Sept. 9, 2008.',
  7.409090909090909),
 ('4 Annotation Tasks We analyze the quality of non-expert annotations on five tasks: affect recognition, word similarity, recognizing textual entailment, temporal event recognition, and word sense disambiguation.',
  6.7272727272727275),
 ('We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.',
  5.827272727272727),
 ('We did this by averaging the labels of each possible subset of n non-expert annotations, for value of n in {1, 2,... , 10}.',
  5.745454545454545),
 ('The tasks are: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.',
  5.736363636363635),
 ('For each task, we used AMT to annotate data and measured the quality of the annotations by comparing them with the gold standard (expert) labels on the same data.',
  3.9),
 ('We then do the same for individual non-expert annotations, averaging Pearson correlation across all sets of the five expert labelers (“NE vs. E”).',
  3.827272727272727),
 ('In Table 2 we give the minimum k for each emotion, and the averaged ITA for that meta-labeler consisting of k non-experts (marked “k-NE”).',
  3.2727272727272725),
 ('For the task of affect recognition, we also show that using non-expert labels for training machine learning algorithms can be as effective as using gold standard annotations from experts.',
  3.1545454545454548),
 ('Finally, after each HIT has been annotated, the Requester has the option of approving the work and optionally giving a bonus to individual workers.',
  3.0363636363636375)]
In [10]:
summary = [sent[0] for sent in sorted_sentences[:10]]
''.join(summary)
Out[10]:
'Dolores Labs Blog, “AMT is fast, cheap, and good for machine learning data,” Brendan O’Connor, Sept. 9, 2008.4 Annotation Tasks We analyze the quality of non-expert annotations on five tasks: affect recognition, word similarity, recognizing textual entailment, temporal event recognition, and word sense disambiguation.We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.We did this by averaging the labels of each possible subset of n non-expert annotations, for value of n in {1, 2,... , 10}.The tasks are: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.For each task, we used AMT to annotate data and measured the quality of the annotations by comparing them with the gold standard (expert) labels on the same data.We then do the same for individual non-expert annotations, averaging Pearson correlation across all sets of the five expert labelers (“NE vs. E”).In Table 2 we give the minimum k for each emotion, and the averaged ITA for that meta-labeler consisting of k non-experts (marked “k-NE”).For the task of affect recognition, we also show that using non-expert labels for training machine learning algorithms can be as effective as using gold standard annotations from experts.Finally, after each HIT has been annotated, the Requester has the option of approving the work and optionally giving a bonus to individual workers.'
In [11]:
list(sentence_scores.items())[:10]
Out[11]:
[('Abstract Human linguistic annotation is crucial for many natural language processing tasks but can be expensive and time-consuming.',
  1.3636363636363638),
 ('We explore the use of Amazon’s Mechanical Turk system, a significantly cheaper and faster method for collecting annotations from a broad base of paid non-expert contributors over the Web.',
  2.563636363636363),
 ('We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.',
  5.827272727272727),
 ('For all five, we show high agreement between Mechanical Turk non-expert annotations and existing gold standard labels provided by expert labelers.',
  3.018181818181818),
 ('For the task of affect recognition, we also show that using non-expert labels for training machine learning algorithms can be as effective as using gold standard annotations from experts.',
  3.1545454545454548),
 ('We propose a technique for bias correction that significantly improves annotation quality on two tasks.',
  1.3454545454545457),
 ('We conclude that many large labeling tasks can be effectively designed and carried out in this method at a fraction of the usual expense.',
  1.2454545454545456),
 ('The construction of these datasets, however, is extremely expensive in both annotator-hours and financial cost.',
  2.9181818181818184),
 ('In this work we explore the use of Amazon Mechanical Turk1 (AMT) to determine whether nonexpert labelers can provide reliable natural language annotations.',
  2.1545454545454543),
 ('The tasks are: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.',
  5.736363636363635)]
In [12]:
newly_sorted = [sent[0] for sent in sentence_scores.items() if sent[1] > 5]
In [13]:
newly_sorted
''.join(newly_sorted)
Out[13]:
'We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.The tasks are: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.Dolores Labs Blog, “AMT is fast, cheap, and good for machine learning data,” Brendan O’Connor, Sept. 9, 2008.4 Annotation Tasks We analyze the quality of non-expert annotations on five tasks: affect recognition, word similarity, recognizing textual entailment, temporal event recognition, and word sense disambiguation.We did this by averaging the labels of each possible subset of n non-expert annotations, for value of n in {1, 2,... , 10}.'
In [14]:
import nltk
def get_sentence_list(many_sentences):
    return nltk.sent_tokenize(many_sentences)
    
def get_word_frequencies(many_sentences):
    stopwords = nltk.corpus.stopwords.words('english')
    word_frequencies = {}
    for word in nltk.word_tokenize(many_sentences):
        if word not in stopwords:
            if word not in word_frequencies.keys():
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1
    return word_frequencies
                       
def get_weighted_frequencies(word_frequencies):
    max_frequency = max(word_frequencies.values())
    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word]/max_frequency)
    return word_frequencies

def get_sentence_scores(sentence_list, word_frequencies):
    sentence_scores = {}
    for sent in sentence_list:
        for word in nltk.word_tokenize(sent.lower()):
            if word in word_frequencies.keys():
                if len(sent.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word]
                    else:
                        sentence_scores[sent] += word_frequencies[word]
    return sentence_scores
    

def get_summary(many_sentences):
    sentence_list = get_sentence_list(many_sentences)
    word_frequencies = get_word_frequencies(many_sentences)
    weighted_word_frequencies = get_weighted_frequencies(word_frequencies)
    sentence_scores = get_sentence_scores(sentence_list, weighted_word_frequencies)

    newly_sorted = [sent[0] for sent in sentence_scores.items() if sent[1] > 5]
    print(''.join(newly_sorted))
    
def get_summary_by_chapters(chapters):
    for ch,chapter in enumerate(chapters):
        print('****** CHAPTER ' + str(ch) + '*******')
        get_summary(chapter)

# get_summary_by_chapters(chapters)

get_summary(all_text)
We investigate five tasks: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.The tasks are: affect recognition, word similarity, recognizing textual entailment, event temporal ordering, and word sense disambiguation.Dolores Labs Blog, “AMT is fast, cheap, and good for machine learning data,” Brendan O’Connor, Sept. 9, 2008.4 Annotation Tasks We analyze the quality of non-expert annotations on five tasks: affect recognition, word similarity, recognizing textual entailment, temporal event recognition, and word sense disambiguation.We did this by averaging the labels of each possible subset of n non-expert annotations, for value of n in {1, 2,... , 10}.
In [ ]:
 
In [ ]: