Getting the information about executed prisoners from the Texas Department of Criminal Justice

https://www.tdcj.texas.gov/death_row/dr_executed_offenders.html

In [1]:
import requests
import lxml.html as lh
import pandas as pd
from bs4 import BeautifulSoup as bs
In [2]:
url="https://www.tdcj.texas.gov/death_row/dr_executed_offenders.html"
#Create a handle, page, to handle the contents of the website
page = requests.get(url)
In [3]:
print(page.status_code) #It should show 200 to verify that I collected the data from the website 
200
In [4]:
#Creating the soup 
#Using beautiful soups prettify() function to print the data in a readable html format 
soup= bs(page.content, "html.parser")
# print(soup.prettify())
In [5]:
# The table we want is in the tag table and class "tdcj_table indent"
table = soup.find("table", class_="tdcj_table indent")
In [6]:
info = table.find("tr")
headers = info.text.split('\n')[1:-1]
headers
Out[6]:
['Execution',
 'Link',
 'Link',
 'Last Name',
 'First Name',
 'TDCJNumber',
 'Age',
 'Date',
 'Race',
 'County']
In [120]:
all_info = table.find_all("tr")
all_dics = []
for info in all_info:
    row = info.find_all("td")
    row_obj = {}
    for i,column_value in enumerate(row):
        if i == 1:
            row_obj.update({column_value.find('a').text : column_value.find('a').get('href')})
        elif i == 2:
            row_obj.update({column_value.find('a').text : column_value.find('a').get('href')})
        else:
            row_obj.update({headers[i] : column_value.text})
    all_dics.append(row_obj)
# all_dics
In [121]:
df = pd.DataFrame(all_dics)
In [122]:
df
Out[122]:
Execution Offender Information Last Statement Last Name First Name TDCJNumber Age Date Race County Offender Information Last Statement Last Statemen
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 566 dr_info/halljusten.html dr_info/halljustenlast.html Hall Justen 999497 38 11/6/2019 White El Paso NaN NaN NaN
2 565 dr_info/sparksrobert.html dr_info/sparksrobertlast.html Sparks Robert 999542 45 9/25/2019 Black Dallas NaN NaN NaN
3 564 dr_info/solizmarkanthony.html dr_info/solizmarkanthonylast.html Soliz Mark 999571 37 9/10/2019 Hispanic Johnson NaN NaN NaN
4 563 dr_info/crutsingerbilly.html dr_info/crutsingerbillylast.html Crutsinger Billy 999459 64 9/4/2019 White Tarrant NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
562 5 dr_info/skillerndoyle.jpg dr_info/skillerndoylelast.html Skillern Doyle 518 49 01/16/1985 White Lubbock NaN NaN NaN
563 4 dr_info/barefootthomas.jpg dr_info/barefootthomaslast.html Barefoot Thomas 621 39 10/30/1984 White Bell NaN NaN NaN
564 3 dr_info/obryanronald.jpg dr_info/obryanronaldlast.html O'Bryan Ronald 529 39 03/31/1984 White Harris NaN NaN NaN
565 2 dr_info/autryjames.html dr_info/no_last_statement.html Autry James 670 29 03/14/1984 White Jefferson NaN NaN NaN
566 1 dr_info/brookscharlie.html dr_info/brookscharlielast.html Brooks, Jr. Charlie 592 40 12/07/1982 Black Tarrant NaN NaN NaN

567 rows × 13 columns

In [142]:
clean_df = df.copy()[1:]
clean_df = clean_df[clean_df.columns[:-3]]
clean_df
Out[142]:
Execution Offender Information Last Statement Last Name First Name TDCJNumber Age Date Race County
1 566 dr_info/halljusten.html dr_info/halljustenlast.html Hall Justen 999497 38 11/6/2019 White El Paso
2 565 dr_info/sparksrobert.html dr_info/sparksrobertlast.html Sparks Robert 999542 45 9/25/2019 Black Dallas
3 564 dr_info/solizmarkanthony.html dr_info/solizmarkanthonylast.html Soliz Mark 999571 37 9/10/2019 Hispanic Johnson
4 563 dr_info/crutsingerbilly.html dr_info/crutsingerbillylast.html Crutsinger Billy 999459 64 9/4/2019 White Tarrant
5 562 dr_info/swearingenlarry.html dr_info/swearingenlarrylast.html Swearingen Larry 999361 48 8/21/2019 White Montgomery
... ... ... ... ... ... ... ... ... ... ...
562 5 dr_info/skillerndoyle.jpg dr_info/skillerndoylelast.html Skillern Doyle 518 49 01/16/1985 White Lubbock
563 4 dr_info/barefootthomas.jpg dr_info/barefootthomaslast.html Barefoot Thomas 621 39 10/30/1984 White Bell
564 3 dr_info/obryanronald.jpg dr_info/obryanronaldlast.html O'Bryan Ronald 529 39 03/31/1984 White Harris
565 2 dr_info/autryjames.html dr_info/no_last_statement.html Autry James 670 29 03/14/1984 White Jefferson
566 1 dr_info/brookscharlie.html dr_info/brookscharlielast.html Brooks, Jr. Charlie 592 40 12/07/1982 Black Tarrant

566 rows × 10 columns

In [147]:
clean_df = df.copy()[1:]
clean_df = clean_df[clean_df.columns[:-3]]
clean_df
Out[147]:
Execution Offender Information Last Statement Last Name First Name TDCJNumber Age Date Race County
1 566 dr_info/halljusten.html dr_info/halljustenlast.html Hall Justen 999497 38 11/6/2019 White El Paso
2 565 dr_info/sparksrobert.html dr_info/sparksrobertlast.html Sparks Robert 999542 45 9/25/2019 Black Dallas
3 564 dr_info/solizmarkanthony.html dr_info/solizmarkanthonylast.html Soliz Mark 999571 37 9/10/2019 Hispanic Johnson
4 563 dr_info/crutsingerbilly.html dr_info/crutsingerbillylast.html Crutsinger Billy 999459 64 9/4/2019 White Tarrant
5 562 dr_info/swearingenlarry.html dr_info/swearingenlarrylast.html Swearingen Larry 999361 48 8/21/2019 White Montgomery
... ... ... ... ... ... ... ... ... ... ...
562 5 dr_info/skillerndoyle.jpg dr_info/skillerndoylelast.html Skillern Doyle 518 49 01/16/1985 White Lubbock
563 4 dr_info/barefootthomas.jpg dr_info/barefootthomaslast.html Barefoot Thomas 621 39 10/30/1984 White Bell
564 3 dr_info/obryanronald.jpg dr_info/obryanronaldlast.html O'Bryan Ronald 529 39 03/31/1984 White Harris
565 2 dr_info/autryjames.html dr_info/no_last_statement.html Autry James 670 29 03/14/1984 White Jefferson
566 1 dr_info/brookscharlie.html dr_info/brookscharlielast.html Brooks, Jr. Charlie 592 40 12/07/1982 Black Tarrant

566 rows × 10 columns

In [149]:
# for testing
# clean_df = clean_df[:5]
# for link in clean_df['Last Statement'].tolist():
    
def get_last_statement(link):
    try:
        url="https://www.tdcj.texas.gov/death_row/" + link
        page = requests.get(url)
        soup= bs(page.content, "html.parser")
        p_tags = soup.find_all("p")
        for p in p_tags:
            print
            try:
                if 'Last Statement' in p.text:
    #                 print(p)
                    return p.next_sibling.next_sibling.text
            except:
                return 'no statement'
    except:
        return 'no link'
            
    
clean_df['Last Statement Text'] = clean_df.apply(lambda x: get_last_statement(x['Last Statement']), axis=1)
In [150]:
clean_df
Out[150]:
Execution Offender Information Last Statement Last Name First Name TDCJNumber Age Date Race County Last Statement Text
1 566 dr_info/halljusten.html dr_info/halljustenlast.html Hall Justen 999497 38 11/6/2019 White El Paso Yeah, I want to address the Roundtree family ...
2 565 dr_info/sparksrobert.html dr_info/sparksrobertlast.html Sparks Robert 999542 45 9/25/2019 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,...
3 564 dr_info/solizmarkanthony.html dr_info/solizmarkanthonylast.html Soliz Mark 999571 37 9/10/2019 Hispanic Johnson It’s 6:09 on September 10th, Kayla and David,...
4 563 dr_info/crutsingerbilly.html dr_info/crutsingerbillylast.html Crutsinger Billy 999459 64 9/4/2019 White Tarrant Hi ladies I wanted to tell ya’ll how much I l...
5 562 dr_info/swearingenlarry.html dr_info/swearingenlarrylast.html Swearingen Larry 999361 48 8/21/2019 White Montgomery Lord forgive them. They don’t know what they ...
... ... ... ... ... ... ... ... ... ... ... ...
562 5 dr_info/skillerndoyle.jpg dr_info/skillerndoylelast.html Skillern Doyle 518 49 01/16/1985 White Lubbock I pray that my family will rejoice and will fo...
563 4 dr_info/barefootthomas.jpg dr_info/barefootthomaslast.html Barefoot Thomas 621 39 10/30/1984 White Bell When asked if he had a last statement, he rep...
564 3 dr_info/obryanronald.jpg dr_info/obryanronaldlast.html O'Bryan Ronald 529 39 03/31/1984 White Harris What is about to transpire in a few moments is...
565 2 dr_info/autryjames.html dr_info/no_last_statement.html Autry James 670 29 03/14/1984 White Jefferson This offender declined to make a last statemen...
566 1 dr_info/brookscharlie.html dr_info/brookscharlielast.html Brooks, Jr. Charlie 592 40 12/07/1982 Black Tarrant Statement to the Media: I, at this very moment...

566 rows × 11 columns

In [ ]: