In [4]:
file = open('HP1.txt').readlines()

all_text = ""
for line in file:
    all_text += line

all_text = all_text.replace("\n", " ")
all_text = all_text.replace("\'", "")

import re
all_text = re.sub(r'[0-9]', '', all_text)
chapters = all_text.split('CHAPTER ')
ch1 = chapters[1]
In [30]:
import nltk
sentence_list = nltk.sent_tokenize(ch1)
sentence_list[:10]
Out[30]:
['ONE THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.',
 'They were the last people youd expect to be involved in anything strange or mysterious, because they just didnt hold with such nonsense.',
 'Mr. Dursley was the director of a firm called Grunnings, which made drills.',
 'He was a big, beefy man with hardly any neck, although he did have a very large mustache.',
 'Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors.',
 'The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.',
 'The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it.',
 'They didnt think they could bear it if anyone found out about the Potters.',
 'Mrs. Potter was Mrs. Dursleys sister, but they hadnt met for several years; in fact, Mrs. Dursley pretended she didnt have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be.',
 'The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street.']
In [8]:
stopwords = nltk.corpus.stopwords.words('english')
word_frequencies = {}
for word in nltk.word_tokenize(ch1):
    if word not in stopwords:
        if word not in word_frequencies.keys():
            word_frequencies[word] = 1
        else:
            word_frequencies[word] += 1
In [9]:
max_frequency = max(word_frequencies.values())
for word in word_frequencies.keys():
    word_frequencies[word] = (word_frequencies[word]/max_frequency)
In [10]:
for sent in sentence_list[:1]:
    for word in nltk.word_tokenize(sent.lower()):
        print(word)
one
the
boy
who
lived
mr.
and
mrs.
dursley
,
of
number
four
,
privet
drive
,
were
proud
to
say
that
they
were
perfectly
normal
,
thank
you
very
much
.
In [11]:
for sent in sentence_list[:1]:
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            print(word)
one
boy
lived
,
number
four
,
drive
,
proud
say
perfectly
normal
,
thank
much
.
In [12]:
sentence_scores = {}
for sent in sentence_list:
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            if len(sent.split(' ')) < 30:
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]
In [13]:
sorted_sentences = sorted(sentence_scores.items(), key=lambda kv: kv[1], reverse=True)
sorted_sentences[:10]
Out[13]:
[('Professor McGonagall opened her mouth, changed her mind, swallowed, and then said, "Yes -- yes, youre right, of course.',
  7.73448275862069),
 ('"Well, Ted," said the weatherman, "I dont know about that, but its not only the owls that have been acting oddly today.',
  6.279310344827586),
 ('"I shall see you soon, I expect, Professor McGonagall," said Dumbledore, nodding to her.',
  5.844827586206897),
 ('"Borrowed it, Professor Dumbledore, sit," said the giant, climbing carefully off the motorcycle as he spoke.',
  5.8310344827586205),
 ('"The Potters, thats right, thats what I heard yes, their son, Harry" Mr. Dursley stopped dead.',
  5.810344827586206),
 ('Theyre a kind of Muggle sweet Im rather fond of" "No, thank you," said Professor McGonagall coldly, as though she didnt think this was the moment for lemon drops.',
  5.344827586206897),
 ('She threw a sharp, sideways glance at Dumbledore here, as though hoping he was going to tell her something, but he didnt, so she went on.',
  5.1931034482758625),
 ('ONE THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.',
  5.144827586206896),
 ('He was tall, thin, and very old, judging by the silver of his hair and beard, which were both long enough to tuck into his belt.',
  5.055172413793103),
 ('"Oh yes, everyones celebrating, all right," she said impatiently.',
  4.858620689655172)]
In [14]:
summary = [sent[0] for sent in sorted_sentences[:10]]
''.join(summary)
Out[14]:
'Professor McGonagall opened her mouth, changed her mind, swallowed, and then said, "Yes -- yes, youre right, of course."Well, Ted," said the weatherman, "I dont know about that, but its not only the owls that have been acting oddly today."I shall see you soon, I expect, Professor McGonagall," said Dumbledore, nodding to her."Borrowed it, Professor Dumbledore, sit," said the giant, climbing carefully off the motorcycle as he spoke."The Potters, thats right, thats what I heard yes, their son, Harry" Mr. Dursley stopped dead.Theyre a kind of Muggle sweet Im rather fond of" "No, thank you," said Professor McGonagall coldly, as though she didnt think this was the moment for lemon drops.She threw a sharp, sideways glance at Dumbledore here, as though hoping he was going to tell her something, but he didnt, so she went on.ONE THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.He was tall, thin, and very old, judging by the silver of his hair and beard, which were both long enough to tuck into his belt."Oh yes, everyones celebrating, all right," she said impatiently.'
In [29]:
list(sentence_scores.items())[:10]
Out[29]:
[('ONE THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.',
  5.144827586206896),
 ('They were the last people youd expect to be involved in anything strange or mysterious, because they just didnt hold with such nonsense.',
  2.2103448275862068),
 ('Mr. Dursley was the director of a firm called Grunnings, which made drills.',
  2.0344827586206895),
 ('He was a big, beefy man with hardly any neck, although he did have a very large mustache.',
  3.0517241379310343),
 ('The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.',
  1.0620689655172415),
 ('The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it.',
  3.048275862068965),
 ('They didnt think they could bear it if anyone found out about the Potters.',
  1.1517241379310346),
 ('The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street.',
  1.1241379310344828),
 ('The Dursleys knew that the Potters had a small son, too, but they had never even seen him.',
  3.106896551724138),
 ('This boy was another good reason for keeping the Potters away; they didnt want Dudley mixing with a child like that.',
  1.213793103448276)]
In [23]:
newly_sorted = [sent[0] for sent in sentence_scores.items() if sent[1] > 5]
In [24]:
newly_sorted
''.join(newly_sorted)
Out[24]:
'ONE THE BOY WHO LIVED Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much."The Potters, thats right, thats what I heard yes, their son, Harry" Mr. Dursley stopped dead."Well, Ted," said the weatherman, "I dont know about that, but its not only the owls that have been acting oddly today.He was tall, thin, and very old, judging by the silver of his hair and beard, which were both long enough to tuck into his belt.She threw a sharp, sideways glance at Dumbledore here, as though hoping he was going to tell her something, but he didnt, so she went on.Theyre a kind of Muggle sweet Im rather fond of" "No, thank you," said Professor McGonagall coldly, as though she didnt think this was the moment for lemon drops.Professor McGonagall opened her mouth, changed her mind, swallowed, and then said, "Yes -- yes, youre right, of course."Borrowed it, Professor Dumbledore, sit," said the giant, climbing carefully off the motorcycle as he spoke."I shall see you soon, I expect, Professor McGonagall," said Dumbledore, nodding to her.'
In [ ]: