Python web crawling example

This tutorial is written based on chapters 11-13 from the book "Python for Everyone" https://www.py4e.com

Step 1: import all necessary packages

In [1]:
import re
import urllib
from bs4 import BeautifulSoup
import pprint
import pandas as pd

Step 2: download a sample webpage. You can save the html page onto your computer and use text editor to view its content

In [2]:
url = "http://www.metrolyrics.com/you-belong-with-me-lyrics-taylor-swift.html"
html = urllib.request.urlopen(url).read()

Step 3: use BeautifulSoup to parse the webpage and extract the lyrics content. The division that includes the lyrics starts from the html tag "lyrics-body-text"

In [3]:
soup = BeautifulSoup(html, 'html.parser')
print(soup.title.string)

text = soup.body.find_all(id='lyrics-body-text')
text = text[0].text
print(text)
Taylor Swift - You Belong With Me Lyrics | MetroLyrics


You're on the phone with your girlfriend—she's upset
She's going off about something that you said
'Cause she doesn't get your humor like I do.I'm in the room, it's a typical Tuesday night.
I'm listening to the kind of music she doesn't like.
And she'll never know your story like I doBut she wears short skirts
I wear t-shirt
She's cheer captain
And I'm on the bleachersDreaming about the day when you wake up and find
That what you're looking for has been here the whole time.



Related




 






11 Delicious Misheard Lyrics About Food







NEW SONG: Taylor Swift - 'Lover' - LYRICS







Prime Day concert by Amazon Music will be headlined by Taylor Swift






 
 

If you could see
That I'm the one
Who understands you
Been here all along
So, why can't you see
You belong with me
You belong with me?Walk in the streets with you in your worn out jeans
I can't help thinking this is how it ought to be.
Laughing on a park bench thinking to myself
"Hey, isn't this easy?"And you've got a smile
That can light up this whole town
I haven't seen it in awhile
Since she brought you down.You say you're fine—I know you better than that
Hey, what you doing with a girl like that?







 




She wears high heels
I wear sneakers
She's cheer captain
And I'm on the bleachersDreaming about the day when you wake up and find
That what you're looking for has been here the whole timeIf you could see
That I'm the one
Who understands you
Been here all along
So, why can't you see
You belong with me?Standing by and waiting at your backdoor.
All this time how could you not know, baby?
You belong with me
You belong with meOh, I remember you driving to my house
In the middle of the night
I'm the one who makes you laugh
When you know you're 'bout to cryI know your favorite songs
And you tell me about your dreams
Think I know where you belong
Think I know it's with meCan't you se that I'm the one
Who understands you?
Been here all along
So, why can't you see
You belong with me?Standing by and waiting at your backdoor.
All this time how could you not know, baby?
You belong with me
You belong with me
You belong with me
Have you ever thought just maybe
You belong with me?
You belong with me




 



Step 4: split text into individual words

In [4]:
words = text.split()
print(words)
["You're", 'on', 'the', 'phone', 'with', 'your', "girlfriend—she's", 'upset', "She's", 'going', 'off', 'about', 'something', 'that', 'you', 'said', "'Cause", 'she', "doesn't", 'get', 'your', 'humor', 'like', 'I', "do.I'm", 'in', 'the', 'room,', "it's", 'a', 'typical', 'Tuesday', 'night.', "I'm", 'listening', 'to', 'the', 'kind', 'of', 'music', 'she', "doesn't", 'like.', 'And', "she'll", 'never', 'know', 'your', 'story', 'like', 'I', 'doBut', 'she', 'wears', 'short', 'skirts', 'I', 'wear', 't-shirt', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'the', 'bleachersDreaming', 'about', 'the', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'the', 'whole', 'time.', 'Related', '11', 'Delicious', 'Misheard', 'Lyrics', 'About', 'Food', 'NEW', 'SONG:', 'Taylor', 'Swift', '-', "'Lover'", '-', 'LYRICS', 'Prime', 'Day', 'concert', 'by', 'Amazon', 'Music', 'will', 'be', 'headlined', 'by', 'Taylor', 'Swift', 'If', 'you', 'could', 'see', 'That', "I'm", 'the', 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me?Walk', 'in', 'the', 'streets', 'with', 'you', 'in', 'your', 'worn', 'out', 'jeans', 'I', "can't", 'help', 'thinking', 'this', 'is', 'how', 'it', 'ought', 'to', 'be.', 'Laughing', 'on', 'a', 'park', 'bench', 'thinking', 'to', 'myself', '"Hey,', "isn't", 'this', 'easy?"And', "you've", 'got', 'a', 'smile', 'That', 'can', 'light', 'up', 'this', 'whole', 'town', 'I', "haven't", 'seen', 'it', 'in', 'awhile', 'Since', 'she', 'brought', 'you', 'down.You', 'say', "you're", 'fine—I', 'know', 'you', 'better', 'than', 'that', 'Hey,', 'what', 'you', 'doing', 'with', 'a', 'girl', 'like', 'that?', 'She', 'wears', 'high', 'heels', 'I', 'wear', 'sneakers', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'the', 'bleachersDreaming', 'about', 'the', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'the', 'whole', 'timeIf', 'you', 'could', 'see', 'That', "I'm", 'the', 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'meOh,', 'I', 'remember', 'you', 'driving', 'to', 'my', 'house', 'In', 'the', 'middle', 'of', 'the', 'night', "I'm", 'the', 'one', 'who', 'makes', 'you', 'laugh', 'When', 'you', 'know', "you're", "'bout", 'to', 'cryI', 'know', 'your', 'favorite', 'songs', 'And', 'you', 'tell', 'me', 'about', 'your', 'dreams', 'Think', 'I', 'know', 'where', 'you', 'belong', 'Think', 'I', 'know', "it's", 'with', "meCan't", 'you', 'se', 'that', "I'm", 'the', 'one', 'Who', 'understands', 'you?', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'Have', 'you', 'ever', 'thought', 'just', 'maybe', 'You', 'belong', 'with', 'me?', 'You', 'belong', 'with', 'me']

Remove stopwords

In [6]:
stopwords = ['is', 'are', 'the', 'a', 'an']
def removeStopwords(wordlist, stopwords):
    return [w for w in wordlist if w not in stopwords]
words = removeStopwords(words, stopwords)
print(words)
["You're", 'on', 'phone', 'with', 'your', "girlfriend—she's", 'upset', "She's", 'going', 'off', 'about', 'something', 'that', 'you', 'said', "'Cause", 'she', "doesn't", 'get', 'your', 'humor', 'like', 'I', "do.I'm", 'in', 'room,', "it's", 'typical', 'Tuesday', 'night.', "I'm", 'listening', 'to', 'kind', 'of', 'music', 'she', "doesn't", 'like.', 'And', "she'll", 'never', 'know', 'your', 'story', 'like', 'I', 'doBut', 'she', 'wears', 'short', 'skirts', 'I', 'wear', 't-shirt', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'bleachersDreaming', 'about', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'whole', 'time.', 'Related', '11', 'Delicious', 'Misheard', 'Lyrics', 'About', 'Food', 'NEW', 'SONG:', 'Taylor', 'Swift', '-', "'Lover'", '-', 'LYRICS', 'Prime', 'Day', 'concert', 'by', 'Amazon', 'Music', 'will', 'be', 'headlined', 'by', 'Taylor', 'Swift', 'If', 'you', 'could', 'see', 'That', "I'm", 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me?Walk', 'in', 'streets', 'with', 'you', 'in', 'your', 'worn', 'out', 'jeans', 'I', "can't", 'help', 'thinking', 'this', 'how', 'it', 'ought', 'to', 'be.', 'Laughing', 'on', 'park', 'bench', 'thinking', 'to', 'myself', '"Hey,', "isn't", 'this', 'easy?"And', "you've", 'got', 'smile', 'That', 'can', 'light', 'up', 'this', 'whole', 'town', 'I', "haven't", 'seen', 'it', 'in', 'awhile', 'Since', 'she', 'brought', 'you', 'down.You', 'say', "you're", 'fine—I', 'know', 'you', 'better', 'than', 'that', 'Hey,', 'what', 'you', 'doing', 'with', 'girl', 'like', 'that?', 'She', 'wears', 'high', 'heels', 'I', 'wear', 'sneakers', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'bleachersDreaming', 'about', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'whole', 'timeIf', 'you', 'could', 'see', 'That', "I'm", 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'meOh,', 'I', 'remember', 'you', 'driving', 'to', 'my', 'house', 'In', 'middle', 'of', 'night', "I'm", 'one', 'who', 'makes', 'you', 'laugh', 'When', 'you', 'know', "you're", "'bout", 'to', 'cryI', 'know', 'your', 'favorite', 'songs', 'And', 'you', 'tell', 'me', 'about', 'your', 'dreams', 'Think', 'I', 'know', 'where', 'you', 'belong', 'Think', 'I', 'know', "it's", 'with', "meCan't", 'you', 'se', 'that', "I'm", 'one', 'Who', 'understands', 'you?', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'Have', 'you', 'ever', 'thought', 'just', 'maybe', 'You', 'belong', 'with', 'me?', 'You', 'belong', 'with', 'me']

count word frequency

In [7]:
counts = dict()
for word in words:
    counts[word] = counts.get(word,0) + 1
sorted(counts, key=counts.__getitem__, reverse=True)
pprint.pprint(counts)
{'"Hey,': 1,
 "'Cause": 1,
 "'Lover'": 1,
 "'bout": 1,
 '-': 2,
 '11': 1,
 'About': 1,
 'All': 2,
 'Amazon': 1,
 'And': 4,
 'Been': 3,
 'Day': 1,
 'Delicious': 1,
 'Food': 1,
 'Have': 1,
 'Hey,': 1,
 'I': 9,
 "I'm": 7,
 'If': 1,
 'In': 1,
 'LYRICS': 1,
 'Laughing': 1,
 'Lyrics': 1,
 'Misheard': 1,
 'Music': 1,
 'NEW': 1,
 'Prime': 1,
 'Related': 1,
 'SONG:': 1,
 'She': 1,
 "She's": 3,
 'Since': 1,
 'So,': 3,
 'Swift': 2,
 'Taylor': 2,
 'That': 5,
 'Think': 2,
 'Tuesday': 1,
 'When': 1,
 'Who': 3,
 'You': 11,
 "You're": 1,
 'about': 4,
 'all': 3,
 'along': 3,
 'and': 4,
 'at': 2,
 'awhile': 1,
 'baby?': 2,
 'backdoor.': 2,
 'be': 1,
 'be.': 1,
 'been': 2,
 'belong': 12,
 'bench': 1,
 'better': 1,
 'bleachersDreaming': 2,
 'brought': 1,
 'by': 4,
 'can': 1,
 "can't": 4,
 'captain': 2,
 'cheer': 2,
 'concert': 1,
 'could': 4,
 'cryI': 1,
 'day': 2,
 "do.I'm": 1,
 'doBut': 1,
 "doesn't": 2,
 'doing': 1,
 'down.You': 1,
 'dreams': 1,
 'driving': 1,
 'easy?"And': 1,
 'ever': 1,
 'favorite': 1,
 'find': 2,
 'fine—I': 1,
 'for': 2,
 'get': 1,
 'girl': 1,
 "girlfriend—she's": 1,
 'going': 1,
 'got': 1,
 'has': 2,
 "haven't": 1,
 'headlined': 1,
 'heels': 1,
 'help': 1,
 'here': 5,
 'high': 1,
 'house': 1,
 'how': 3,
 'humor': 1,
 'in': 4,
 "isn't": 1,
 'it': 2,
 "it's": 2,
 'jeans': 1,
 'just': 1,
 'kind': 1,
 'know': 6,
 'know,': 2,
 'laugh': 1,
 'light': 1,
 'like': 3,
 'like.': 1,
 'listening': 1,
 'looking': 2,
 'makes': 1,
 'maybe': 1,
 'me': 7,
 'me?': 1,
 'me?Standing': 2,
 'me?Walk': 1,
 "meCan't": 1,
 'meOh,': 1,
 'middle': 1,
 'music': 1,
 'my': 1,
 'myself': 1,
 'never': 1,
 'night': 1,
 'night.': 1,
 'not': 2,
 'of': 2,
 'off': 1,
 'on': 4,
 'one': 4,
 'ought': 1,
 'out': 1,
 'park': 1,
 'phone': 1,
 'remember': 1,
 'room,': 1,
 'said': 1,
 'say': 1,
 'se': 1,
 'see': 5,
 'seen': 1,
 'she': 4,
 "she'll": 1,
 'short': 1,
 'skirts': 1,
 'smile': 1,
 'sneakers': 1,
 'something': 1,
 'songs': 1,
 'story': 1,
 'streets': 1,
 't-shirt': 1,
 'tell': 1,
 'than': 1,
 'that': 3,
 'that?': 1,
 'thinking': 2,
 'this': 5,
 'thought': 1,
 'time': 2,
 'time.': 1,
 'timeIf': 1,
 'to': 5,
 'town': 1,
 'typical': 1,
 'understands': 3,
 'up': 3,
 'upset': 1,
 'waiting': 2,
 'wake': 2,
 'wear': 2,
 'wears': 2,
 'what': 3,
 'when': 2,
 'where': 1,
 'who': 1,
 'whole': 3,
 'why': 3,
 'will': 1,
 'with': 15,
 'worn': 1,
 'you': 23,
 "you're": 4,
 "you've": 1,
 'you?': 1,
 'your': 8}

sort words by frequency

The above method uses loop, which needs quite a lot of programming, and is also slow. The following method uses the dataframe data structure in the pandas package to quickly count and sort words by frequencies. Pandas documentation includes more details on its powerful data structure https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html

In [8]:
df=pd.DataFrame(words, columns=['word'])
x=df["word"].value_counts()
pprint.pprint(x)
you          23
with         15
belong       12
You          11
I             9
             ..
listening     1
short         1
do.I'm        1
time.         1
just          1
Name: word, Length: 186, dtype: int64
In [11]:
import nltk
tokens = nltk.word_tokenize(text)
tags = nltk.pos_tag(tokens)
print(tags[0][0], tags[0][1])
You PRP

Remove Stopwords

In [14]:
def removeStopwords(wordlist, stopwords):
    return [w for w in wordlist if w not in stopwords]
words = removeStopwords(words, stopwords)
print(words)
["You're", 'on', 'phone', 'with', 'your', "girlfriend—she's", 'upset', "She's", 'going', 'off', 'about', 'something', 'that', 'you', 'said', "'Cause", 'she', "doesn't", 'get', 'your', 'humor', 'like', 'I', "do.I'm", 'in', 'room,', "it's", 'typical', 'Tuesday', 'night.', "I'm", 'listening', 'to', 'kind', 'of', 'music', 'she', "doesn't", 'like.', 'And', "she'll", 'never', 'know', 'your', 'story', 'like', 'I', 'doBut', 'she', 'wears', 'short', 'skirts', 'I', 'wear', 't-shirt', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'bleachersDreaming', 'about', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'whole', 'time.', 'Related', '11', 'Delicious', 'Misheard', 'Lyrics', 'About', 'Food', 'NEW', 'SONG:', 'Taylor', 'Swift', '-', "'Lover'", '-', 'LYRICS', 'Prime', 'Day', 'concert', 'by', 'Amazon', 'Music', 'will', 'be', 'headlined', 'by', 'Taylor', 'Swift', 'If', 'you', 'could', 'see', 'That', "I'm", 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me?Walk', 'in', 'streets', 'with', 'you', 'in', 'your', 'worn', 'out', 'jeans', 'I', "can't", 'help', 'thinking', 'this', 'how', 'it', 'ought', 'to', 'be.', 'Laughing', 'on', 'park', 'bench', 'thinking', 'to', 'myself', '"Hey,', "isn't", 'this', 'easy?"And', "you've", 'got', 'smile', 'That', 'can', 'light', 'up', 'this', 'whole', 'town', 'I', "haven't", 'seen', 'it', 'in', 'awhile', 'Since', 'she', 'brought', 'you', 'down.You', 'say', "you're", 'fine—I', 'know', 'you', 'better', 'than', 'that', 'Hey,', 'what', 'you', 'doing', 'with', 'girl', 'like', 'that?', 'She', 'wears', 'high', 'heels', 'I', 'wear', 'sneakers', "She's", 'cheer', 'captain', 'And', "I'm", 'on', 'bleachersDreaming', 'about', 'day', 'when', 'you', 'wake', 'up', 'and', 'find', 'That', 'what', "you're", 'looking', 'for', 'has', 'been', 'here', 'whole', 'timeIf', 'you', 'could', 'see', 'That', "I'm", 'one', 'Who', 'understands', 'you', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'meOh,', 'I', 'remember', 'you', 'driving', 'to', 'my', 'house', 'In', 'middle', 'of', 'night', "I'm", 'one', 'who', 'makes', 'you', 'laugh', 'When', 'you', 'know', "you're", "'bout", 'to', 'cryI', 'know', 'your', 'favorite', 'songs', 'And', 'you', 'tell', 'me', 'about', 'your', 'dreams', 'Think', 'I', 'know', 'where', 'you', 'belong', 'Think', 'I', 'know', "it's", 'with', "meCan't", 'you', 'se', 'that', "I'm", 'one', 'Who', 'understands', 'you?', 'Been', 'here', 'all', 'along', 'So,', 'why', "can't", 'you', 'see', 'You', 'belong', 'with', 'me?Standing', 'by', 'and', 'waiting', 'at', 'your', 'backdoor.', 'All', 'this', 'time', 'how', 'could', 'you', 'not', 'know,', 'baby?', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'You', 'belong', 'with', 'me', 'Have', 'you', 'ever', 'thought', 'just', 'maybe', 'You', 'belong', 'with', 'me?', 'You', 'belong', 'with', 'me']
In [15]:
counts = dict()
for word in words:
    counts[word] = counts.get(word, 0) + 1
sorted(counts, key=counts.__getitem__, reverse=True)
pprint.pprint(counts)
{'"Hey,': 1,
 "'Cause": 1,
 "'Lover'": 1,
 "'bout": 1,
 '-': 2,
 '11': 1,
 'About': 1,
 'All': 2,
 'Amazon': 1,
 'And': 4,
 'Been': 3,
 'Day': 1,
 'Delicious': 1,
 'Food': 1,
 'Have': 1,
 'Hey,': 1,
 'I': 9,
 "I'm": 7,
 'If': 1,
 'In': 1,
 'LYRICS': 1,
 'Laughing': 1,
 'Lyrics': 1,
 'Misheard': 1,
 'Music': 1,
 'NEW': 1,
 'Prime': 1,
 'Related': 1,
 'SONG:': 1,
 'She': 1,
 "She's": 3,
 'Since': 1,
 'So,': 3,
 'Swift': 2,
 'Taylor': 2,
 'That': 5,
 'Think': 2,
 'Tuesday': 1,
 'When': 1,
 'Who': 3,
 'You': 11,
 "You're": 1,
 'about': 4,
 'all': 3,
 'along': 3,
 'and': 4,
 'at': 2,
 'awhile': 1,
 'baby?': 2,
 'backdoor.': 2,
 'be': 1,
 'be.': 1,
 'been': 2,
 'belong': 12,
 'bench': 1,
 'better': 1,
 'bleachersDreaming': 2,
 'brought': 1,
 'by': 4,
 'can': 1,
 "can't": 4,
 'captain': 2,
 'cheer': 2,
 'concert': 1,
 'could': 4,
 'cryI': 1,
 'day': 2,
 "do.I'm": 1,
 'doBut': 1,
 "doesn't": 2,
 'doing': 1,
 'down.You': 1,
 'dreams': 1,
 'driving': 1,
 'easy?"And': 1,
 'ever': 1,
 'favorite': 1,
 'find': 2,
 'fine—I': 1,
 'for': 2,
 'get': 1,
 'girl': 1,
 "girlfriend—she's": 1,
 'going': 1,
 'got': 1,
 'has': 2,
 "haven't": 1,
 'headlined': 1,
 'heels': 1,
 'help': 1,
 'here': 5,
 'high': 1,
 'house': 1,
 'how': 3,
 'humor': 1,
 'in': 4,
 "isn't": 1,
 'it': 2,
 "it's": 2,
 'jeans': 1,
 'just': 1,
 'kind': 1,
 'know': 6,
 'know,': 2,
 'laugh': 1,
 'light': 1,
 'like': 3,
 'like.': 1,
 'listening': 1,
 'looking': 2,
 'makes': 1,
 'maybe': 1,
 'me': 7,
 'me?': 1,
 'me?Standing': 2,
 'me?Walk': 1,
 "meCan't": 1,
 'meOh,': 1,
 'middle': 1,
 'music': 1,
 'my': 1,
 'myself': 1,
 'never': 1,
 'night': 1,
 'night.': 1,
 'not': 2,
 'of': 2,
 'off': 1,
 'on': 4,
 'one': 4,
 'ought': 1,
 'out': 1,
 'park': 1,
 'phone': 1,
 'remember': 1,
 'room,': 1,
 'said': 1,
 'say': 1,
 'se': 1,
 'see': 5,
 'seen': 1,
 'she': 4,
 "she'll": 1,
 'short': 1,
 'skirts': 1,
 'smile': 1,
 'sneakers': 1,
 'something': 1,
 'songs': 1,
 'story': 1,
 'streets': 1,
 't-shirt': 1,
 'tell': 1,
 'than': 1,
 'that': 3,
 'that?': 1,
 'thinking': 2,
 'this': 5,
 'thought': 1,
 'time': 2,
 'time.': 1,
 'timeIf': 1,
 'to': 5,
 'town': 1,
 'typical': 1,
 'understands': 3,
 'up': 3,
 'upset': 1,
 'waiting': 2,
 'wake': 2,
 'wear': 2,
 'wears': 2,
 'what': 3,
 'when': 2,
 'where': 1,
 'who': 1,
 'whole': 3,
 'why': 3,
 'will': 1,
 'with': 15,
 'worn': 1,
 'you': 23,
 "you're": 4,
 "you've": 1,
 'you?': 1,
 'your': 8}
In [18]:
df = pd.DataFrame(words, columns=['word'])
x = df["word"].value_counts()
pprint.pprint(x)
you          23
with         15
belong       12
You          11
I             9
             ..
listening     1
short         1
do.I'm        1
time.         1
just          1
Name: word, Length: 186, dtype: int64
In [ ]: