{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting the information about executed prisoners from the Texas Department of Criminal Justice \n", "### https://www.tdcj.texas.gov/death_row/dr_executed_offenders.html" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import lxml.html as lh\n", "import pandas as pd\n", "from bs4 import BeautifulSoup as bs" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "url=\"https://www.tdcj.texas.gov/death_row/dr_executed_offenders.html\"\n", "#Create a handle, page, to handle the contents of the website\n", "page = requests.get(url)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "200\n" ] } ], "source": [ "print(page.status_code) #It should show 200 to verify that I collected the data from the website " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "#Creating the soup \n", "#Using beautiful soups prettify() function to print the data in a readable html format \n", "soup= bs(page.content, \"html.parser\")\n", "# print(soup.prettify())" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# The table we want is in the tag table and class \"tdcj_table indent\"\n", "table = soup.find(\"table\", class_=\"tdcj_table indent\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Execution',\n", " 'Link',\n", " 'Link',\n", " 'Last Name',\n", " 'First Name',\n", " 'TDCJNumber',\n", " 'Age',\n", " 'Date',\n", " 'Race',\n", " 'County']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "info = table.find(\"tr\")\n", "headers = info.text.split('\\n')[1:-1]\n", "headers" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "all_info = table.find_all(\"tr\")\n", "all_dics = []\n", "for info in all_info:\n", " row = info.find_all(\"td\")\n", " row_obj = {}\n", " for i,column_value in enumerate(row):\n", " if i == 1:\n", " row_obj.update({column_value.find('a').text : column_value.find('a').get('href')})\n", " elif i == 2:\n", " row_obj.update({column_value.find('a').text : column_value.find('a').get('href')})\n", " else:\n", " row_obj.update({headers[i] : column_value.text})\n", " all_dics.append(row_obj)\n", "# all_dics" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(all_dics)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Execution | \n", "Offender Information | \n", "Last Statement | \n", "Last Name | \n", "First Name | \n", "TDCJNumber | \n", "Age | \n", "Date | \n", "Race | \n", "County | \n", "Offender Information | \n", "Last Statement | \n", "Last Statemen | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "566 | \n", "dr_info/halljusten.html | \n", "dr_info/halljustenlast.html | \n", "Hall | \n", "Justen | \n", "999497 | \n", "38 | \n", "11/6/2019 | \n", "White | \n", "El Paso | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "565 | \n", "dr_info/sparksrobert.html | \n", "dr_info/sparksrobertlast.html | \n", "Sparks | \n", "Robert | \n", "999542 | \n", "45 | \n", "9/25/2019 | \n", "Black | \n", "Dallas | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "564 | \n", "dr_info/solizmarkanthony.html | \n", "dr_info/solizmarkanthonylast.html | \n", "Soliz | \n", "Mark | \n", "999571 | \n", "37 | \n", "9/10/2019 | \n", "Hispanic | \n", "Johnson | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "563 | \n", "dr_info/crutsingerbilly.html | \n", "dr_info/crutsingerbillylast.html | \n", "Crutsinger | \n", "Billy | \n", "999459 | \n", "64 | \n", "9/4/2019 | \n", "White | \n", "Tarrant | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
562 | \n", "5 | \n", "dr_info/skillerndoyle.jpg | \n", "dr_info/skillerndoylelast.html | \n", "Skillern | \n", "Doyle | \n", "518 | \n", "49 | \n", "01/16/1985 | \n", "White | \n", "Lubbock | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
563 | \n", "4 | \n", "dr_info/barefootthomas.jpg | \n", "dr_info/barefootthomaslast.html | \n", "Barefoot | \n", "Thomas | \n", "621 | \n", "39 | \n", "10/30/1984 | \n", "White | \n", "Bell | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
564 | \n", "3 | \n", "dr_info/obryanronald.jpg | \n", "dr_info/obryanronaldlast.html | \n", "O'Bryan | \n", "Ronald | \n", "529 | \n", "39 | \n", "03/31/1984 | \n", "White | \n", "Harris | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
565 | \n", "2 | \n", "dr_info/autryjames.html | \n", "dr_info/no_last_statement.html | \n", "Autry | \n", "James | \n", "670 | \n", "29 | \n", "03/14/1984 | \n", "White | \n", "Jefferson | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
566 | \n", "1 | \n", "dr_info/brookscharlie.html | \n", "dr_info/brookscharlielast.html | \n", "Brooks, Jr. | \n", "Charlie | \n", "592 | \n", "40 | \n", "12/07/1982 | \n", "Black | \n", "Tarrant | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
567 rows × 13 columns
\n", "\n", " | Execution | \n", "Offender Information | \n", "Last Statement | \n", "Last Name | \n", "First Name | \n", "TDCJNumber | \n", "Age | \n", "Date | \n", "Race | \n", "County | \n", "
---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "566 | \n", "dr_info/halljusten.html | \n", "dr_info/halljustenlast.html | \n", "Hall | \n", "Justen | \n", "999497 | \n", "38 | \n", "11/6/2019 | \n", "White | \n", "El Paso | \n", "
2 | \n", "565 | \n", "dr_info/sparksrobert.html | \n", "dr_info/sparksrobertlast.html | \n", "Sparks | \n", "Robert | \n", "999542 | \n", "45 | \n", "9/25/2019 | \n", "Black | \n", "Dallas | \n", "
3 | \n", "564 | \n", "dr_info/solizmarkanthony.html | \n", "dr_info/solizmarkanthonylast.html | \n", "Soliz | \n", "Mark | \n", "999571 | \n", "37 | \n", "9/10/2019 | \n", "Hispanic | \n", "Johnson | \n", "
4 | \n", "563 | \n", "dr_info/crutsingerbilly.html | \n", "dr_info/crutsingerbillylast.html | \n", "Crutsinger | \n", "Billy | \n", "999459 | \n", "64 | \n", "9/4/2019 | \n", "White | \n", "Tarrant | \n", "
5 | \n", "562 | \n", "dr_info/swearingenlarry.html | \n", "dr_info/swearingenlarrylast.html | \n", "Swearingen | \n", "Larry | \n", "999361 | \n", "48 | \n", "8/21/2019 | \n", "White | \n", "Montgomery | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
562 | \n", "5 | \n", "dr_info/skillerndoyle.jpg | \n", "dr_info/skillerndoylelast.html | \n", "Skillern | \n", "Doyle | \n", "518 | \n", "49 | \n", "01/16/1985 | \n", "White | \n", "Lubbock | \n", "
563 | \n", "4 | \n", "dr_info/barefootthomas.jpg | \n", "dr_info/barefootthomaslast.html | \n", "Barefoot | \n", "Thomas | \n", "621 | \n", "39 | \n", "10/30/1984 | \n", "White | \n", "Bell | \n", "
564 | \n", "3 | \n", "dr_info/obryanronald.jpg | \n", "dr_info/obryanronaldlast.html | \n", "O'Bryan | \n", "Ronald | \n", "529 | \n", "39 | \n", "03/31/1984 | \n", "White | \n", "Harris | \n", "
565 | \n", "2 | \n", "dr_info/autryjames.html | \n", "dr_info/no_last_statement.html | \n", "Autry | \n", "James | \n", "670 | \n", "29 | \n", "03/14/1984 | \n", "White | \n", "Jefferson | \n", "
566 | \n", "1 | \n", "dr_info/brookscharlie.html | \n", "dr_info/brookscharlielast.html | \n", "Brooks, Jr. | \n", "Charlie | \n", "592 | \n", "40 | \n", "12/07/1982 | \n", "Black | \n", "Tarrant | \n", "
566 rows × 10 columns
\n", "\n", " | Execution | \n", "Offender Information | \n", "Last Statement | \n", "Last Name | \n", "First Name | \n", "TDCJNumber | \n", "Age | \n", "Date | \n", "Race | \n", "County | \n", "
---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "566 | \n", "dr_info/halljusten.html | \n", "dr_info/halljustenlast.html | \n", "Hall | \n", "Justen | \n", "999497 | \n", "38 | \n", "11/6/2019 | \n", "White | \n", "El Paso | \n", "
2 | \n", "565 | \n", "dr_info/sparksrobert.html | \n", "dr_info/sparksrobertlast.html | \n", "Sparks | \n", "Robert | \n", "999542 | \n", "45 | \n", "9/25/2019 | \n", "Black | \n", "Dallas | \n", "
3 | \n", "564 | \n", "dr_info/solizmarkanthony.html | \n", "dr_info/solizmarkanthonylast.html | \n", "Soliz | \n", "Mark | \n", "999571 | \n", "37 | \n", "9/10/2019 | \n", "Hispanic | \n", "Johnson | \n", "
4 | \n", "563 | \n", "dr_info/crutsingerbilly.html | \n", "dr_info/crutsingerbillylast.html | \n", "Crutsinger | \n", "Billy | \n", "999459 | \n", "64 | \n", "9/4/2019 | \n", "White | \n", "Tarrant | \n", "
5 | \n", "562 | \n", "dr_info/swearingenlarry.html | \n", "dr_info/swearingenlarrylast.html | \n", "Swearingen | \n", "Larry | \n", "999361 | \n", "48 | \n", "8/21/2019 | \n", "White | \n", "Montgomery | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
562 | \n", "5 | \n", "dr_info/skillerndoyle.jpg | \n", "dr_info/skillerndoylelast.html | \n", "Skillern | \n", "Doyle | \n", "518 | \n", "49 | \n", "01/16/1985 | \n", "White | \n", "Lubbock | \n", "
563 | \n", "4 | \n", "dr_info/barefootthomas.jpg | \n", "dr_info/barefootthomaslast.html | \n", "Barefoot | \n", "Thomas | \n", "621 | \n", "39 | \n", "10/30/1984 | \n", "White | \n", "Bell | \n", "
564 | \n", "3 | \n", "dr_info/obryanronald.jpg | \n", "dr_info/obryanronaldlast.html | \n", "O'Bryan | \n", "Ronald | \n", "529 | \n", "39 | \n", "03/31/1984 | \n", "White | \n", "Harris | \n", "
565 | \n", "2 | \n", "dr_info/autryjames.html | \n", "dr_info/no_last_statement.html | \n", "Autry | \n", "James | \n", "670 | \n", "29 | \n", "03/14/1984 | \n", "White | \n", "Jefferson | \n", "
566 | \n", "1 | \n", "dr_info/brookscharlie.html | \n", "dr_info/brookscharlielast.html | \n", "Brooks, Jr. | \n", "Charlie | \n", "592 | \n", "40 | \n", "12/07/1982 | \n", "Black | \n", "Tarrant | \n", "
566 rows × 10 columns
\n", "\n", " | Unnamed: 0 | \n", "Execution | \n", "Offender Information | \n", "Last Statement | \n", "Last Name | \n", "First Name | \n", "TDCJNumber | \n", "Age | \n", "Date | \n", "Race | \n", "County | \n", "Last Statement Text | \n", "Offender Info | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "566 | \n", "dr_info/halljusten.html | \n", "dr_info/halljustenlast.html | \n", "Hall | \n", "Justen | \n", "999497 | \n", "38 | \n", "11/6/2019 | \n", "White | \n", "El Paso | \n", "Yeah, I want to address the Roundtree family ... | \n", "{'Name': 'Hall, Justen Grant', 'TDCJ Number': ... | \n", "
1 | \n", "2 | \n", "565 | \n", "dr_info/sparksrobert.html | \n", "dr_info/sparksrobertlast.html | \n", "Sparks | \n", "Robert | \n", "999542 | \n", "45 | \n", "9/25/2019 | \n", "Black | \n", "Dallas | \n", "Umm, Pamela can you hear me Stephanie, Hardy,... | \n", "{'Name': 'Sparks, Robert', 'TDCJ Number': '999... | \n", "
2 | \n", "3 | \n", "564 | \n", "dr_info/solizmarkanthony.html | \n", "dr_info/solizmarkanthonylast.html | \n", "Soliz | \n", "Mark | \n", "999571 | \n", "37 | \n", "9/10/2019 | \n", "Hispanic | \n", "Johnson | \n", "It’s 6:09 on September 10th, Kayla and David,... | \n", "{'Name': 'Soliz, Mark Anthony', 'TDCJ Number':... | \n", "
3 | \n", "4 | \n", "563 | \n", "dr_info/crutsingerbilly.html | \n", "dr_info/crutsingerbillylast.html | \n", "Crutsinger | \n", "Billy | \n", "999459 | \n", "64 | \n", "9/4/2019 | \n", "White | \n", "Tarrant | \n", "Hi ladies I wanted to tell ya’ll how much I l... | \n", "{'Name': 'Crutsinger, Billy Jack', 'TDCJ Numbe... | \n", "
4 | \n", "5 | \n", "562 | \n", "dr_info/swearingenlarry.html | \n", "dr_info/swearingenlarrylast.html | \n", "Swearingen | \n", "Larry | \n", "999361 | \n", "48 | \n", "8/21/2019 | \n", "White | \n", "Montgomery | \n", "Lord forgive them. They don’t know what they ... | \n", "{'Name': 'Larry Ray Swearingen', 'TDCJ Number'... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
561 | \n", "562 | \n", "5 | \n", "dr_info/skillerndoyle.jpg | \n", "dr_info/skillerndoylelast.html | \n", "Skillern | \n", "Doyle | \n", "518 | \n", "49 | \n", "01/16/1985 | \n", "White | \n", "Lubbock | \n", "I pray that my family will rejoice and will fo... | \n", "{} | \n", "
562 | \n", "563 | \n", "4 | \n", "dr_info/barefootthomas.jpg | \n", "dr_info/barefootthomaslast.html | \n", "Barefoot | \n", "Thomas | \n", "621 | \n", "39 | \n", "10/30/1984 | \n", "White | \n", "Bell | \n", "When asked if he had a last statement, he rep... | \n", "{} | \n", "
563 | \n", "564 | \n", "3 | \n", "dr_info/obryanronald.jpg | \n", "dr_info/obryanronaldlast.html | \n", "O'Bryan | \n", "Ronald | \n", "529 | \n", "39 | \n", "03/31/1984 | \n", "White | \n", "Harris | \n", "What is about to transpire in a few moments is... | \n", "{} | \n", "
564 | \n", "565 | \n", "2 | \n", "dr_info/autryjames.html | \n", "dr_info/no_last_statement.html | \n", "Autry | \n", "James | \n", "670 | \n", "29 | \n", "03/14/1984 | \n", "White | \n", "Jefferson | \n", "This offender declined to make a last statemen... | \n", "{'Name': 'Autry, James David', 'TDCJ Number': ... | \n", "
565 | \n", "566 | \n", "1 | \n", "dr_info/brookscharlie.html | \n", "dr_info/brookscharlielast.html | \n", "Brooks, Jr. | \n", "Charlie | \n", "592 | \n", "40 | \n", "12/07/1982 | \n", "Black | \n", "Tarrant | \n", "Statement to the Media: I, at this very moment... | \n", "{'Photo not available': 'Name', 'TDCJ Number':... | \n", "
566 rows × 13 columns
\n", "