Vanity Fair Oscar Predictions 2020

In [25]:
import re
import urllib
from bs4 import BeautifulSoup
def get_reviews():
    url = "https://www.vanityfair.com/hollywood/2019/11/ultimate-guide-to-awards-season"
    html = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(html, 'html.parser')
#     text = soup.findAll("span", {"class": "category-entry-module--title--3YP5a"})
    text = soup.findAll("div", {"class": "slide-module--slide--m310h"})
    all_nominations = []

    for num,t in enumerate(text):
        category = t.find('h3').text
        all_content = [content.findAll('span') for content in t.find('div').findAll('div')[2].findAll('div')]
        all_in_category = []
        for content in all_content:
            temp = { 'movie': '', 'nominated': ''}
            try:
                try:
                    temp['movie'] = content[3].findAll('span')[0].text 
                    temp['nominated'] = content[3].findAll('span')[1].text 
                except:
                    temp['movie'] = content[3].findAll('span')[0].text 
                    temp['nominated'] = content[3].findAll('span')[1].text 
            except:
                pass
            all_in_category.append(temp)
        all_nominations.append({ category : all_in_category })
    return all_nominations
In [26]:
results = get_reviews()
In [27]:
import pandas as pd
import numpy as np
In [28]:
best_picture = pd.DataFrame(results[0]['BestPicture'])
best_picture.drop_duplicates(inplace=True)
best_picture['movie'].replace('', np.nan, inplace=True)
best_picture.dropna(inplace=True)
In [29]:
best_picture
Out[29]:
movie nominated
0 Once Upon a Time...In Hollywood
3 The Irishman
6 1917
9 Parasite
12 Marriage Story
15 Joker
18 The Farewell
21 Jojo Rabbit
24 Little Women
In [30]:
best_director = pd.DataFrame(results[1]['BestDirector'])
best_director.drop_duplicates(inplace=True)
best_director['movie'].replace('', np.nan, inplace=True)
best_director.dropna(inplace=True)
In [31]:
best_director
Out[31]:
movie nominated
0 Once Upon a Time...In Hollywood Quentin Tarantino
3 The Irishman Martin Scorsese
6 1917 Sam Mendes
9 Parasite Bon Joon-ho
12 Marriage Story Noah Baumbach
In [32]:
best_actor = pd.DataFrame(results[2]['BestActor'])
best_actor.drop_duplicates(inplace=True)
best_actor['movie'].replace('', np.nan, inplace=True)
best_actor.dropna(inplace=True)
In [33]:
best_actor
Out[33]:
movie nominated
0 Joker Joaquin Phoenix
3 Marriage Story Adam Driver
6 Pain and Glory Antonio Banderas
9 Once Upon a Time...In Hollywood Leonardo DiCaprio
12 The Two Popes Jonathan Pryce
In [34]:
best_actress = pd.DataFrame(results[3]['BestActress'])
best_actress.drop_duplicates(inplace=True)
best_actress['movie'].replace('', np.nan, inplace=True)
best_actress.dropna(inplace=True)
In [35]:
best_actress
Out[35]:
movie nominated
0 Judy Renée Zellweger
3 Marriage Story Scarlett Johansson
6 Bombshell Charlize Theron
9 Harriet Cynthia Erivo
12 Us Lupita Nyong'o
In [ ]:
 
In [ ]: