{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os \n",
"import csv \n",
"import pandas as pd "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Reading in the df and looking at the first 5 rows "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" execution \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" ... \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" inmate_number \n",
" age \n",
" date_executed \n",
" race \n",
" county \n",
" last_statement \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 566 \n",
" Hall \n",
" Justen \n",
" 23 \n",
" 9 \n",
" 21 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999497 \n",
" 38 \n",
" 11/6/2019 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" \n",
" \n",
" 1 \n",
" 565 \n",
" Sparks \n",
" Robert \n",
" 34 \n",
" 8 \n",
" 33 \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" ... \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 999542 \n",
" 45 \n",
" 9/25/2019 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" \n",
" \n",
" 2 \n",
" 564 \n",
" Soliz \n",
" Mark \n",
" 30 \n",
" 8 \n",
" 28 \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999571 \n",
" 37 \n",
" 9/10/2019 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" \n",
" \n",
" 3 \n",
" 563 \n",
" Crutsinger \n",
" Billy \n",
" 49 \n",
" 11 \n",
" 48 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 999459 \n",
" 64 \n",
" 9/4/2019 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" \n",
" \n",
" 4 \n",
" 562 \n",
" Swearingen \n",
" Larry \n",
" 29 \n",
" 11 \n",
" 27 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999361 \n",
" 48 \n",
" 8/21/2019 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" \n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" execution last_name first_name age_received education_level age_crime \\\n",
"0 566 Hall Justen 23 9 21 \n",
"1 565 Sparks Robert 34 8 33 \n",
"2 564 Soliz Mark 30 8 28 \n",
"3 563 Crutsinger Billy 49 11 48 \n",
"4 562 Swearingen Larry 29 11 27 \n",
"\n",
" occupation prior_record num_of_vic main_crime ... vic_kid \\\n",
"0 laborer yes 1 murder ... 0 \n",
"1 machine operator yes 3 murder ... 2 \n",
"2 cabinet maker yes 1 murder, robbery ... 0 \n",
"3 laborer yes 2 murder ... 0 \n",
"4 laborer yes 1 murder, kidnapping ... 0 \n",
"\n",
" vic_male vic_female vic_police inmate_number age date_executed race \\\n",
"0 0 1 no 999497 38 11/6/2019 White \n",
"1 2 1 no 999542 45 9/25/2019 Black \n",
"2 0 1 no 999571 37 9/10/2019 Hispanic \n",
"3 0 2 no 999459 64 9/4/2019 White \n",
"4 0 1 no 999361 48 8/21/2019 White \n",
"\n",
" county last_statement \n",
"0 El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row = pd.read_csv(\"death_row_final_project.csv\")\n",
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(566, 24)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# getting the shape of the df \n",
"death_row.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" execution \n",
" inmate_number \n",
" age \n",
" \n",
" \n",
" \n",
" \n",
" count \n",
" 566.0000 \n",
" 566.000000 \n",
" 566.000000 \n",
" \n",
" \n",
" mean \n",
" 283.5000 \n",
" 531777.630742 \n",
" 39.726148 \n",
" \n",
" \n",
" std \n",
" 163.5344 \n",
" 498661.405354 \n",
" 8.828008 \n",
" \n",
" \n",
" min \n",
" 1.0000 \n",
" 511.000000 \n",
" 24.000000 \n",
" \n",
" \n",
" 25% \n",
" 142.2500 \n",
" 819.250000 \n",
" 33.000000 \n",
" \n",
" \n",
" 50% \n",
" 283.5000 \n",
" 999033.000000 \n",
" 38.000000 \n",
" \n",
" \n",
" 75% \n",
" 424.7500 \n",
" 999269.750000 \n",
" 45.000000 \n",
" \n",
" \n",
" max \n",
" 566.0000 \n",
" 999571.000000 \n",
" 70.000000 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" execution inmate_number age\n",
"count 566.0000 566.000000 566.000000\n",
"mean 283.5000 531777.630742 39.726148\n",
"std 163.5344 498661.405354 8.828008\n",
"min 1.0000 511.000000 24.000000\n",
"25% 142.2500 819.250000 33.000000\n",
"50% 283.5000 999033.000000 38.000000\n",
"75% 424.7500 999269.750000 45.000000\n",
"max 566.0000 999571.000000 70.000000"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#looking at descriptive stats for the numeric columns\n",
"death_row.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"execution int64\n",
"last_name object\n",
"first_name object\n",
"age_received object\n",
"education_level object\n",
"age_crime object\n",
"occupation object\n",
"prior_record object\n",
"num_of_vic object\n",
"main_crime object\n",
"type_of_crime object\n",
"weapon object\n",
"co_defendants object\n",
"race_vic object\n",
"vic_kid object\n",
"vic_male object\n",
"vic_female object\n",
"vic_police object\n",
"inmate_number int64\n",
"age int64\n",
"date_executed object\n",
"race object\n",
"county object\n",
"last_statement object\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#looking at the data types for each column in the df \n",
"death_row.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Things I want to change: \n",
"1. age_received, age-crime, num_of_vic, vic_kid, vic_male, vic_female need to be changed to int \n",
"2. education level might make sense to discretize \n",
"3. occupation, main_crime, type_of_crime, weapon, race_vic, race, county, late_name, first_name, prior_record, vic_police boolean turn to a factor\n",
"4. can remove execution, date_executed and inmate_number as it serves no useful purpose and is unique to each prisoner "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"23 43\n",
"20 38\n",
"21 37\n",
"25 34\n",
"24 31\n",
"29 29\n",
"19 28\n",
"22 28\n",
"27 25\n",
"30 22\n",
"26 22\n",
"31 21\n",
"32 21\n",
"28 19\n",
"38 17\n",
"36 17\n",
"33 14\n",
"40 14\n",
"35 14\n",
"34 13\n",
"39 13\n",
"18 11\n",
"37 9\n",
"45 5\n",
"43 5\n",
"41 4\n",
"46 4\n",
"49 4\n",
"42 3\n",
"51 3\n",
"44 3\n",
"47 3\n",
"53 3\n",
"48 2\n",
"unknown 2\n",
"52 1\n",
"54 1\n",
"50 1\n",
"17 1\n",
"57 1\n",
"Name: age_received, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# looking at the values for age_received to ensure all are numeric.\n",
"death_row.age_received.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# need to remove unknown to n/a for all the columns that I want to change to numeric. \n",
"numeric_columns = [\"age_received\", \"age_crime\", \"num_of_vic\", \"vic_kid\", \"vic_male\", \"vic_female\", \"co_defendants\"]\n",
"\n",
"for column in numeric_columns: \n",
" death_row[column] = death_row[column].str.replace(\"unknown\", \"\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"23 43\n",
"20 38\n",
"21 37\n",
"25 34\n",
"24 31\n",
"29 29\n",
"19 28\n",
"22 28\n",
"27 25\n",
"30 22\n",
"26 22\n",
"31 21\n",
"32 21\n",
"28 19\n",
"38 17\n",
"36 17\n",
"33 14\n",
"40 14\n",
"35 14\n",
"34 13\n",
"39 13\n",
"18 11\n",
"37 9\n",
"45 5\n",
"43 5\n",
"41 4\n",
"46 4\n",
"49 4\n",
"42 3\n",
"51 3\n",
"44 3\n",
"47 3\n",
"53 3\n",
"48 2\n",
" 2\n",
"52 1\n",
"54 1\n",
"50 1\n",
"17 1\n",
"57 1\n",
"Name: age_received, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# checking to make sure that worked... \n",
"death_row.age_received.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"death_row[numeric_columns] = death_row[numeric_columns].apply(pd.to_numeric) #changes everything to float"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" execution \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" ... \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" inmate_number \n",
" age \n",
" date_executed \n",
" race \n",
" county \n",
" last_statement \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 566 \n",
" Hall \n",
" Justen \n",
" 23.0 \n",
" 9 \n",
" 21.0 \n",
" laborer \n",
" yes \n",
" 1.0 \n",
" murder \n",
" ... \n",
" 0.0 \n",
" 0.0 \n",
" 1.0 \n",
" no \n",
" 999497 \n",
" 38 \n",
" 11/6/2019 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" \n",
" \n",
" 1 \n",
" 565 \n",
" Sparks \n",
" Robert \n",
" 34.0 \n",
" 8 \n",
" 33.0 \n",
" machine operator \n",
" yes \n",
" 3.0 \n",
" murder \n",
" ... \n",
" 2.0 \n",
" 2.0 \n",
" 1.0 \n",
" no \n",
" 999542 \n",
" 45 \n",
" 9/25/2019 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" \n",
" \n",
" 2 \n",
" 564 \n",
" Soliz \n",
" Mark \n",
" 30.0 \n",
" 8 \n",
" 28.0 \n",
" cabinet maker \n",
" yes \n",
" 1.0 \n",
" murder, robbery \n",
" ... \n",
" 0.0 \n",
" 0.0 \n",
" 1.0 \n",
" no \n",
" 999571 \n",
" 37 \n",
" 9/10/2019 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" \n",
" \n",
" 3 \n",
" 563 \n",
" Crutsinger \n",
" Billy \n",
" 49.0 \n",
" 11 \n",
" 48.0 \n",
" laborer \n",
" yes \n",
" 2.0 \n",
" murder \n",
" ... \n",
" 0.0 \n",
" 0.0 \n",
" 2.0 \n",
" no \n",
" 999459 \n",
" 64 \n",
" 9/4/2019 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" \n",
" \n",
" 4 \n",
" 562 \n",
" Swearingen \n",
" Larry \n",
" 29.0 \n",
" 11 \n",
" 27.0 \n",
" laborer \n",
" yes \n",
" 1.0 \n",
" murder, kidnapping \n",
" ... \n",
" 0.0 \n",
" 0.0 \n",
" 1.0 \n",
" no \n",
" 999361 \n",
" 48 \n",
" 8/21/2019 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" \n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" execution last_name first_name age_received education_level age_crime \\\n",
"0 566 Hall Justen 23.0 9 21.0 \n",
"1 565 Sparks Robert 34.0 8 33.0 \n",
"2 564 Soliz Mark 30.0 8 28.0 \n",
"3 563 Crutsinger Billy 49.0 11 48.0 \n",
"4 562 Swearingen Larry 29.0 11 27.0 \n",
"\n",
" occupation prior_record num_of_vic main_crime ... vic_kid \\\n",
"0 laborer yes 1.0 murder ... 0.0 \n",
"1 machine operator yes 3.0 murder ... 2.0 \n",
"2 cabinet maker yes 1.0 murder, robbery ... 0.0 \n",
"3 laborer yes 2.0 murder ... 0.0 \n",
"4 laborer yes 1.0 murder, kidnapping ... 0.0 \n",
"\n",
" vic_male vic_female vic_police inmate_number age date_executed \\\n",
"0 0.0 1.0 no 999497 38 11/6/2019 \n",
"1 2.0 1.0 no 999542 45 9/25/2019 \n",
"2 0.0 1.0 no 999571 37 9/10/2019 \n",
"3 0.0 2.0 no 999459 64 9/4/2019 \n",
"4 0.0 1.0 no 999361 48 8/21/2019 \n",
"\n",
" race county last_statement \n",
"0 White El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Hispanic Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 White Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 White Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# I do not want to see the decimal places in the columns \n",
"pd.options.display.float_format = \"{:,.0f}\".format"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Replacing all missing values with the mean of each column"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The number of missing values in age_received is 2\n",
"Now the number of missing values in age_received is 0\n",
"The number of missing values in age_crime is 2\n",
"Now the number of missing values in age_crime is 0\n",
"The number of missing values in num_of_vic is 1\n",
"Now the number of missing values in num_of_vic is 0\n",
"The number of missing values in vic_kid is 1\n",
"Now the number of missing values in vic_kid is 0\n",
"The number of missing values in vic_male is 2\n",
"Now the number of missing values in vic_male is 0\n",
"The number of missing values in vic_female is 2\n",
"Now the number of missing values in vic_female is 0\n",
"The number of missing values in co_defendants is 1\n",
"Now the number of missing values in co_defendants is 0\n"
]
}
],
"source": [
"for column in numeric_columns: \n",
" print(\"The number of missing values in\", column, \"is\", death_row[column].isna().sum())\n",
" death_row[column] = death_row[column].fillna(death_row[column].mean())\n",
" print(\"Now the number of missing values in\", column, \"is\", death_row[column].isna().sum())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" execution \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" ... \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" inmate_number \n",
" age \n",
" date_executed \n",
" race \n",
" county \n",
" last_statement \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 566 \n",
" Hall \n",
" Justen \n",
" 23 \n",
" 9 \n",
" 21 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999497 \n",
" 38 \n",
" 11/6/2019 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" \n",
" \n",
" 1 \n",
" 565 \n",
" Sparks \n",
" Robert \n",
" 34 \n",
" 8 \n",
" 33 \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" ... \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 999542 \n",
" 45 \n",
" 9/25/2019 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" \n",
" \n",
" 2 \n",
" 564 \n",
" Soliz \n",
" Mark \n",
" 30 \n",
" 8 \n",
" 28 \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999571 \n",
" 37 \n",
" 9/10/2019 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" \n",
" \n",
" 3 \n",
" 563 \n",
" Crutsinger \n",
" Billy \n",
" 49 \n",
" 11 \n",
" 48 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 999459 \n",
" 64 \n",
" 9/4/2019 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" \n",
" \n",
" 4 \n",
" 562 \n",
" Swearingen \n",
" Larry \n",
" 29 \n",
" 11 \n",
" 27 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999361 \n",
" 48 \n",
" 8/21/2019 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" \n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" execution last_name first_name age_received education_level age_crime \\\n",
"0 566 Hall Justen 23 9 21 \n",
"1 565 Sparks Robert 34 8 33 \n",
"2 564 Soliz Mark 30 8 28 \n",
"3 563 Crutsinger Billy 49 11 48 \n",
"4 562 Swearingen Larry 29 11 27 \n",
"\n",
" occupation prior_record num_of_vic main_crime ... vic_kid \\\n",
"0 laborer yes 1 murder ... 0 \n",
"1 machine operator yes 3 murder ... 2 \n",
"2 cabinet maker yes 1 murder, robbery ... 0 \n",
"3 laborer yes 2 murder ... 0 \n",
"4 laborer yes 1 murder, kidnapping ... 0 \n",
"\n",
" vic_male vic_female vic_police inmate_number age date_executed \\\n",
"0 0 1 no 999497 38 11/6/2019 \n",
"1 2 1 no 999542 45 9/25/2019 \n",
"2 0 1 no 999571 37 9/10/2019 \n",
"3 0 2 no 999459 64 9/4/2019 \n",
"4 0 1 no 999361 48 8/21/2019 \n",
"\n",
" race county last_statement \n",
"0 White El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Hispanic Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 White Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 White Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Discretizing education level "
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12 110\n",
"11 75\n",
"10 75\n",
"9 72\n",
"ged 63\n",
"8 50\n",
"unknown 41\n",
"7 27\n",
"14 17\n",
"6 9\n",
"13 8\n",
"15 5\n",
"5 4\n",
"16 4\n",
"3 2\n",
"12.5 1\n",
"0 1\n",
"college 1\n",
"4 1\n",
"Name: education_level, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.education_level.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For discretizing, anything above 12 will be changed to college, 12 and ged will be changed to highschool, 9 - 11 to some highschool, less than 9 is not highschool"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def replace_items_in_column_from_list(a_list_of_items_to_replace, df, column, word_to_be_changed_to):\n",
" for item in a_list_of_items_to_replace: \n",
" df[column] = df[column].str.replace(item, word_to_be_changed_to)\n",
" return df[column]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"some_highschool 222\n",
"highschool 173\n",
"no_highschool 94\n",
"unknown 41\n",
"college 36\n",
"Name: education_level, dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"high_school = [\"12\", \"ged\"]\n",
"some_highschool = [\"11\", \"10\", \"9\"]\n",
"no_highschool = [\"8\", \"7\", \"6\", \"5\", \"4\", \"3\", \"2\", \"1\", \"0\"]\n",
"college = [\"13\", \"14\", \"15\", \"12.5\", \"16\"]\n",
"death_row[\"education_level\"] = replace_items_in_column_from_list(college, death_row, \"education_level\", \"college\")\n",
"death_row[\"education_level\"] = replace_items_in_column_from_list(high_school, death_row, \"education_level\", \"highschool\")\n",
"death_row[\"education_level\"] = replace_items_in_column_from_list(some_highschool, death_row, \"education_level\", \"some_highschool\")\n",
"death_row[\"education_level\"] = replace_items_in_column_from_list(no_highschool, death_row, \"education_level\", \"no_highschool\")\n",
"death_row.education_level.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CategoricalDtype(categories=['unknown', 'no_highschool', 'some_highschool', 'highschool',\n",
" 'college'],\n",
" ordered=True)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Changing the education level to an ordered category \n",
"cat = [\"unknown\", \"no_highschool\", \"some_highschool\", \"highschool\", \"college\"]\n",
"#Changing the month data type from int to ordered category \n",
"death_row[\"education_level\"] = pd.Categorical(death_row[\"education_level\"], ordered = True, categories = cat)\n",
"#Checking to see if it worked \n",
"death_row.education_level.dtype"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" execution \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" ... \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" inmate_number \n",
" age \n",
" date_executed \n",
" race \n",
" county \n",
" last_statement \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 566 \n",
" Hall \n",
" Justen \n",
" 23 \n",
" some_highschool \n",
" 21 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999497 \n",
" 38 \n",
" 11/6/2019 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" \n",
" \n",
" 1 \n",
" 565 \n",
" Sparks \n",
" Robert \n",
" 34 \n",
" no_highschool \n",
" 33 \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" ... \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 999542 \n",
" 45 \n",
" 9/25/2019 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" \n",
" \n",
" 2 \n",
" 564 \n",
" Soliz \n",
" Mark \n",
" 30 \n",
" no_highschool \n",
" 28 \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999571 \n",
" 37 \n",
" 9/10/2019 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" \n",
" \n",
" 3 \n",
" 563 \n",
" Crutsinger \n",
" Billy \n",
" 49 \n",
" some_highschool \n",
" 48 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" ... \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 999459 \n",
" 64 \n",
" 9/4/2019 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" \n",
" \n",
" 4 \n",
" 562 \n",
" Swearingen \n",
" Larry \n",
" 29 \n",
" some_highschool \n",
" 27 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" ... \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 999361 \n",
" 48 \n",
" 8/21/2019 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" \n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" execution last_name first_name age_received education_level age_crime \\\n",
"0 566 Hall Justen 23 some_highschool 21 \n",
"1 565 Sparks Robert 34 no_highschool 33 \n",
"2 564 Soliz Mark 30 no_highschool 28 \n",
"3 563 Crutsinger Billy 49 some_highschool 48 \n",
"4 562 Swearingen Larry 29 some_highschool 27 \n",
"\n",
" occupation prior_record num_of_vic main_crime ... vic_kid \\\n",
"0 laborer yes 1 murder ... 0 \n",
"1 machine operator yes 3 murder ... 2 \n",
"2 cabinet maker yes 1 murder, robbery ... 0 \n",
"3 laborer yes 2 murder ... 0 \n",
"4 laborer yes 1 murder, kidnapping ... 0 \n",
"\n",
" vic_male vic_female vic_police inmate_number age date_executed \\\n",
"0 0 1 no 999497 38 11/6/2019 \n",
"1 2 1 no 999542 45 9/25/2019 \n",
"2 0 1 no 999571 37 9/10/2019 \n",
"3 0 2 no 999459 64 9/4/2019 \n",
"4 0 1 no 999361 48 8/21/2019 \n",
"\n",
" race county last_statement \n",
"0 White El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Hispanic Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 White Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 White Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Changing the other columns that should be a category (factor)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def cat_fun(df, column): \n",
" df[column] = df[column].astype(\"category\") \n",
" return(df[column])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"category_columns = [\"occupation\", \"main_crime\", \"type_of_crime\", \"weapon\", \"race\", \"race_vic\", \"county\", \"last_name\", \"first_name\", \"prior_record\", \"vic_police\"]\n",
"for column in category_columns: \n",
" death_row[column] = cat_fun(death_row, column)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"execution int64\n",
"last_name category\n",
"first_name category\n",
"age_received float64\n",
"education_level category\n",
"age_crime float64\n",
"occupation category\n",
"prior_record category\n",
"num_of_vic float64\n",
"main_crime category\n",
"type_of_crime category\n",
"weapon category\n",
"co_defendants float64\n",
"race_vic category\n",
"vic_kid float64\n",
"vic_male float64\n",
"vic_female float64\n",
"vic_police category\n",
"inmate_number int64\n",
"age int64\n",
"date_executed object\n",
"race category\n",
"county category\n",
"last_statement object\n",
"dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Checking the data types\n",
"death_row.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Removing execution and inmate number "
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" 23 \n",
" some_highschool \n",
" 21 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" strangling \n",
" ... \n",
" 0 \n",
" unkown \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" 34 \n",
" no_highschool \n",
" 33 \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" stabbing \n",
" ... \n",
" 0 \n",
" black \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" 30 \n",
" no_highschool \n",
" 28 \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" 1 \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" 49 \n",
" some_highschool \n",
" 48 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" stabbing \n",
" ... \n",
" 0 \n",
" white \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" 29 \n",
" some_highschool \n",
" 27 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" 0 \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" \n",
" \n",
"
\n",
"
5 rows × 21 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen 23 some_highschool 21 \n",
"1 Sparks Robert 34 no_highschool 33 \n",
"2 Soliz Mark 30 no_highschool 28 \n",
"3 Crutsinger Billy 49 some_highschool 48 \n",
"4 Swearingen Larry 29 some_highschool 27 \n",
"\n",
" occupation prior_record num_of_vic main_crime \\\n",
"0 laborer yes 1 murder \n",
"1 machine operator yes 3 murder \n",
"2 cabinet maker yes 1 murder, robbery \n",
"3 laborer yes 2 murder \n",
"4 laborer yes 1 murder, kidnapping \n",
"\n",
" type_of_crime ... co_defendants race_vic vic_kid vic_male vic_female \\\n",
"0 strangling ... 0 unkown 0 0 1 \n",
"1 stabbing ... 0 black 2 2 1 \n",
"2 shooting ... 1 white 0 0 1 \n",
"3 stabbing ... 0 white 0 0 2 \n",
"4 strangling ... 0 white 0 0 1 \n",
"\n",
" vic_police age race county \\\n",
"0 no 38 White El Paso \n",
"1 no 45 Black Dallas \n",
"2 no 37 Hispanic Johnson \n",
"3 no 64 White Tarrant \n",
"4 no 48 White Montgomery \n",
"\n",
" last_statement \n",
"0 Yeah, I want to address the Roundtree family ... \n",
"1 Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 It's 6:09 on September 10th, Kayla and David,... \n",
"3 Hi ladies I wanted to tell ya'll how much I l... \n",
"4 Lord forgive them. They don't know what they ... \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.drop([\"execution\", \"inmate_number\", \"date_executed\"], axis = 1, inplace = True)\n",
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Aggregating a column: time_on_death_row"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"death_row[\"time_spent\"] = death_row[\"age\"] - death_row[\"age_received\"]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 153 \n",
" Rodriguez \n",
" Michael \n",
" 39 \n",
" highschool \n",
" 40 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, escape \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 1 \n",
" 0 \n",
" yes \n",
" 40 \n",
" Hispanic \n",
" Dallas \n",
" Yes I do, I know this no way makes up for all... \n",
" 1 \n",
" \n",
" \n",
" 184 \n",
" Swift \n",
" Christopher \n",
" 30 \n",
" some_highschool \n",
" 28 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" strangling \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 31 \n",
" White \n",
" Denton \n",
" This offender declined to make a last statemen... \n",
" 1 \n",
" \n",
" \n",
" 344 \n",
" Graham \n",
" Gary \n",
" 38 \n",
" some_highschool \n",
" 18 \n",
" laborer \n",
" no \n",
" 1 \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 1 \n",
" 0 \n",
" no \n",
" 39 \n",
" Black \n",
" Harris \n",
" I would like to say that I did not kill Bobby... \n",
" 1 \n",
" \n",
" \n",
" 392 \n",
" Foust \n",
" Aaron \n",
" 25 \n",
" highschool \n",
" 24 \n",
" laborer \n",
" no \n",
" 1 \n",
" murder, car theft, robbery \n",
" strangling \n",
" ... \n",
" white \n",
" 0 \n",
" 1 \n",
" 0 \n",
" no \n",
" 26 \n",
" White \n",
" Tarrant \n",
" Adios, amigos, I'll see ya'll on the other sid... \n",
" 1 \n",
" \n",
" \n",
" 420 \n",
" Renfro \n",
" Steven \n",
" 39 \n",
" unknown \n",
" 38 \n",
" laborer \n",
" no \n",
" 4 \n",
" murder \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 2 \n",
" 2 \n",
" yes \n",
" 40 \n",
" White \n",
" Harrison \n",
" I would like to tell the victims' families tha... \n",
" 1 \n",
" \n",
" \n",
" 459 \n",
" Gonzales, Jr. \n",
" Joe \n",
" 35 \n",
" highschool \n",
" 31 \n",
" construction \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 1 \n",
" 0 \n",
" no \n",
" 36 \n",
" Hispanic \n",
" Potter \n",
" There are people all over the world who face t... \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
6 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"153 Rodriguez Michael 39 highschool 40 \n",
"184 Swift Christopher 30 some_highschool 28 \n",
"344 Graham Gary 38 some_highschool 18 \n",
"392 Foust Aaron 25 highschool 24 \n",
"420 Renfro Steven 39 unknown 38 \n",
"459 Gonzales, Jr. Joe 35 highschool 31 \n",
"\n",
" occupation prior_record num_of_vic main_crime \\\n",
"153 laborer yes 1 murder, escape \n",
"184 laborer yes 2 murder \n",
"344 laborer no 1 murder, robbery \n",
"392 laborer no 1 murder, car theft, robbery \n",
"420 laborer no 4 murder \n",
"459 construction yes 1 murder, robbery \n",
"\n",
" type_of_crime ... race_vic vic_kid vic_male vic_female vic_police \\\n",
"153 shooting ... white 0 1 0 yes \n",
"184 strangling ... white 0 0 2 no \n",
"344 shooting ... white 0 1 0 no \n",
"392 strangling ... white 0 1 0 no \n",
"420 shooting ... white 0 2 2 yes \n",
"459 shooting ... white 0 1 0 no \n",
"\n",
" age race county \\\n",
"153 40 Hispanic Dallas \n",
"184 31 White Denton \n",
"344 39 Black Harris \n",
"392 26 White Tarrant \n",
"420 40 White Harrison \n",
"459 36 Hispanic Potter \n",
"\n",
" last_statement time_spent \n",
"153 Yes I do, I know this no way makes up for all... 1 \n",
"184 This offender declined to make a last statemen... 1 \n",
"344 I would like to say that I did not kill Bobby... 1 \n",
"392 Adios, amigos, I'll see ya'll on the other sid... 1 \n",
"420 I would like to tell the victims' families tha... 1 \n",
"459 There are people all over the world who face t... 1 \n",
"\n",
"[6 rows x 22 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"looking = death_row[death_row[\"time_spent\"] == 1]\n",
"looking\n",
"#double checking that the data is correct, as 1 year is a small amount of time spent on death row. \n",
"#There were a few records that were incorrect based on data entry from the website we scraped. Those records were updated. "
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" 23 \n",
" some_highschool \n",
" 21 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 15 \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" 34 \n",
" no_highschool \n",
" 33 \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 11 \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" 30 \n",
" no_highschool \n",
" 28 \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 7 \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" 49 \n",
" some_highschool \n",
" 48 \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 15 \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" 29 \n",
" some_highschool \n",
" 27 \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 19 \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen 23 some_highschool 21 \n",
"1 Sparks Robert 34 no_highschool 33 \n",
"2 Soliz Mark 30 no_highschool 28 \n",
"3 Crutsinger Billy 49 some_highschool 48 \n",
"4 Swearingen Larry 29 some_highschool 27 \n",
"\n",
" occupation prior_record num_of_vic main_crime \\\n",
"0 laborer yes 1 murder \n",
"1 machine operator yes 3 murder \n",
"2 cabinet maker yes 1 murder, robbery \n",
"3 laborer yes 2 murder \n",
"4 laborer yes 1 murder, kidnapping \n",
"\n",
" type_of_crime ... race_vic vic_kid vic_male vic_female vic_police age \\\n",
"0 strangling ... unkown 0 0 1 no 38 \n",
"1 stabbing ... black 2 2 1 no 45 \n",
"2 shooting ... white 0 0 1 no 37 \n",
"3 stabbing ... white 0 0 2 no 64 \n",
"4 strangling ... white 0 0 1 no 48 \n",
"\n",
" race county last_statement \\\n",
"0 White El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Hispanic Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 White Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 White Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
" time_spent \n",
"0 15 \n",
"1 11 \n",
"2 7 \n",
"3 15 \n",
"4 19 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### On further thought, since all of the columns except for last_statement are different labels. I am going to discretize everything. "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" age_crime \n",
" num_of_vic \n",
" co_defendants \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" age \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" count \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" \n",
" \n",
" mean \n",
" 29 \n",
" 27 \n",
" 2 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 40 \n",
" 11 \n",
" \n",
" \n",
" std \n",
" 8 \n",
" 8 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 1 \n",
" 9 \n",
" 5 \n",
" \n",
" \n",
" min \n",
" 17 \n",
" 17 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 24 \n",
" -1 \n",
" \n",
" \n",
" 25% \n",
" 22 \n",
" 21 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 33 \n",
" 8 \n",
" \n",
" \n",
" 50% \n",
" 27 \n",
" 25 \n",
" 1 \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 38 \n",
" 11 \n",
" \n",
" \n",
" 75% \n",
" 33 \n",
" 32 \n",
" 2 \n",
" 1 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" 45 \n",
" 13 \n",
" \n",
" \n",
" max \n",
" 57 \n",
" 56 \n",
" 15 \n",
" 9 \n",
" 5 \n",
" 5 \n",
" 15 \n",
" 70 \n",
" 32 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received age_crime num_of_vic co_defendants vic_kid vic_male \\\n",
"count 566 566 566 566 566 566 \n",
"mean 29 27 2 1 0 1 \n",
"std 8 8 1 1 1 1 \n",
"min 17 17 1 0 0 0 \n",
"25% 22 21 1 0 0 0 \n",
"50% 27 25 1 0 0 1 \n",
"75% 33 32 2 1 0 1 \n",
"max 57 56 15 9 5 5 \n",
"\n",
" vic_female age time_spent \n",
"count 566 566 566 \n",
"mean 1 40 11 \n",
"std 1 9 5 \n",
"min 0 24 -1 \n",
"25% 0 33 8 \n",
"50% 1 38 11 \n",
"75% 1 45 13 \n",
"max 15 70 32 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.describe()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 566\n",
"mean 29\n",
"std 8\n",
"min 17\n",
"25% 22\n",
"50% 27\n",
"75% 33\n",
"max 57\n",
"Name: age_received, dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.age_received.describe()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"teens\", \"twenties\", \"thirty+\"]\n",
"death_row[\"age_received\"] = pd.cut(death_row[\"age_received\"], [0, 19, 29, 99], labels = categories)\n",
"death_row[\"age_crime\"] = pd.cut(death_row[\"age_crime\"], [0, 19, 29, 99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 15 \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" 3 \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 11 \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" 1 \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 7 \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" 2 \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 15 \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" 1 \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 19 \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime \\\n",
"0 laborer yes 1 murder \n",
"1 machine operator yes 3 murder \n",
"2 cabinet maker yes 1 murder, robbery \n",
"3 laborer yes 2 murder \n",
"4 laborer yes 1 murder, kidnapping \n",
"\n",
" type_of_crime ... race_vic vic_kid vic_male vic_female vic_police age \\\n",
"0 strangling ... unkown 0 0 1 no 38 \n",
"1 stabbing ... black 2 2 1 no 45 \n",
"2 shooting ... white 0 0 1 no 37 \n",
"3 stabbing ... white 0 0 2 no 64 \n",
"4 strangling ... white 0 0 1 no 48 \n",
"\n",
" race county last_statement \\\n",
"0 White El Paso Yeah, I want to address the Roundtree family ... \n",
"1 Black Dallas Umm, Pamela can you hear me Stephanie, Hardy,... \n",
"2 Hispanic Johnson It's 6:09 on September 10th, Kayla and David,... \n",
"3 White Tarrant Hi ladies I wanted to tell ya'll how much I l... \n",
"4 White Montgomery Lord forgive them. They don't know what they ... \n",
"\n",
" time_spent \n",
"0 15 \n",
"1 11 \n",
"2 7 \n",
"3 15 \n",
"4 19 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 566\n",
"mean 2\n",
"std 1\n",
"min 1\n",
"25% 1\n",
"50% 1\n",
"75% 2\n",
"max 15\n",
"Name: num_of_vic, dtype: float64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.num_of_vic.describe()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"one\", \"two+\"]\n",
"death_row[\"num_of_vic\"] = pd.cut(death_row[\"num_of_vic\"], [0,1,99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 15 \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" 2 \n",
" 2 \n",
" 1 \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 11 \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 7 \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 2 \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 15 \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" 0 \n",
" 0 \n",
" 1 \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 19 \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown 0 0 1 no 38 White \n",
"1 ... black 2 2 1 no 45 Black \n",
"2 ... white 0 0 1 no 37 Hispanic \n",
"3 ... white 0 0 2 no 64 White \n",
"4 ... white 0 0 1 no 48 White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 15 \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 11 \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 7 \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 15 \n",
"4 Montgomery Lord forgive them. They don't know what they ... 19 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Before discretizing vic_kid, vic_male, vic_female I am going to get a count of the total number of victims for each column"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The number of children victims is 154.0\n",
"The number of male victims is 458.0\n",
"The number of female victims is 466.0\n"
]
}
],
"source": [
"sum_kid_victims = death_row.vic_kid.sum(axis = 0, skipna = True).round()\n",
"print(\"The number of children victims is\", sum_kid_victims)\n",
"sum_male_victims = death_row.vic_male.sum(axis = 0, skipna = True).round()\n",
"print(\"The number of male victims is\", sum_male_victims)\n",
"sum_female_victims = death_row.vic_female.sum(axis = 0, skipna = True).round()\n",
"print(\"The number of female victims is\", sum_female_victims)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# #Changing vic_kid, vic_male, vic_female back to object \n",
"# columns = [\"vic_kid\", \"vic_male\", \"vic_female\"]\n",
"# for column in columns: \n",
"# death_row[column] = death_row[column].astype(\"object\")"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"last_name category\n",
"first_name category\n",
"age_received category\n",
"education_level category\n",
"age_crime category\n",
"occupation category\n",
"prior_record category\n",
"num_of_vic category\n",
"main_crime category\n",
"type_of_crime category\n",
"weapon category\n",
"co_defendants float64\n",
"race_vic category\n",
"vic_kid float64\n",
"vic_male float64\n",
"vic_female float64\n",
"vic_police category\n",
"age int64\n",
"race category\n",
"county category\n",
"last_statement object\n",
"time_spent float64\n",
"dtype: object"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 460\n",
"1 67\n",
"2 30\n",
"3 6\n",
"0 1\n",
"5 1\n",
"4 1\n",
"Name: vic_kid, dtype: int64"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.vic_kid.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# numeric_columns = [\"vic_kid\", \"vic_male\", \"vic_female\"]\n",
"# death_row[numeric_columns] = death_row[numeric_columns].apply(pd.to_numeric)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"no\", \"yes\"]\n",
"death_row[\"vic_kid\"] = pd.cut(death_row[\"vic_kid\"], [-1, 0, 99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" no \n",
" 0 \n",
" 1 \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 15 \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" yes \n",
" 2 \n",
" 1 \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 11 \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" no \n",
" 0 \n",
" 1 \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 7 \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" no \n",
" 0 \n",
" 2 \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 15 \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" no \n",
" 0 \n",
" 1 \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 19 \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown no 0 1 no 38 White \n",
"1 ... black yes 2 1 no 45 Black \n",
"2 ... white no 0 1 no 37 Hispanic \n",
"3 ... white no 0 2 no 64 White \n",
"4 ... white no 0 1 no 48 White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 15 \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 11 \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 7 \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 15 \n",
"4 Montgomery Lord forgive them. They don't know what they ... 19 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 15 \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 11 \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 7 \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 15 \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 19 \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown no no yes no 38 White \n",
"1 ... black yes yes yes no 45 Black \n",
"2 ... white no no yes no 37 Hispanic \n",
"3 ... white no no yes no 64 White \n",
"4 ... white no no yes no 48 White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 15 \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 11 \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 7 \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 15 \n",
"4 Montgomery Lord forgive them. They don't know what they ... 19 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row[\"vic_male\"] = pd.cut(death_row[\"vic_male\"], [-1, 0, 99], labels = categories)\n",
"death_row[\"vic_female\"] = pd.cut(death_row[\"vic_female\"], [-1, 0, 99], labels = categories)\n",
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 566\n",
"mean 11\n",
"std 5\n",
"min -1\n",
"25% 8\n",
"50% 11\n",
"75% 13\n",
"max 32\n",
"Name: time_spent, dtype: float64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.time_spent.describe()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"10_or_less\", \"10+\"]\n",
"death_row[\"time_spent\"] = pd.cut(death_row[\"time_spent\"], [-1, 10, 99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 38 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 10+ \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 37 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 10_or_less \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 64 \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 10+ \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 48 \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 10+ \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown no no yes no 38 White \n",
"1 ... black yes yes yes no 45 Black \n",
"2 ... white no no yes no 37 Hispanic \n",
"3 ... white no no yes no 64 White \n",
"4 ... white no no yes no 48 White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 10+ \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 10+ \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 10_or_less \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 10+ \n",
"4 Montgomery Lord forgive them. They don't know what they ... 10+ \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 566\n",
"mean 40\n",
"std 9\n",
"min 24\n",
"25% 33\n",
"50% 38\n",
"75% 45\n",
"max 70\n",
"Name: age, dtype: float64"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.age.describe()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"18-34\", \"35-45\", \"45+\"]\n",
"death_row[\"age\"] = pd.cut(death_row[\"age\"], [18, 34, 45, 99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 10+ \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 10_or_less \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 10+ \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 10+ \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown no no yes no 35-45 White \n",
"1 ... black yes yes yes no 35-45 Black \n",
"2 ... white no no yes no 35-45 Hispanic \n",
"3 ... white no no yes no 45+ White \n",
"4 ... white no no yes no 45+ White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 10+ \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 10+ \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 10_or_less \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 10+ \n",
"4 Montgomery Lord forgive them. They don't know what they ... 10+ \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"last_name category\n",
"first_name category\n",
"age_received category\n",
"education_level category\n",
"age_crime category\n",
"occupation category\n",
"prior_record category\n",
"num_of_vic category\n",
"main_crime category\n",
"type_of_crime category\n",
"weapon category\n",
"co_defendants float64\n",
"race_vic category\n",
"vic_kid category\n",
"vic_male category\n",
"vic_female category\n",
"vic_police category\n",
"age category\n",
"race category\n",
"county category\n",
"last_statement object\n",
"time_spent category\n",
"dtype: object"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 566\n",
"mean 1\n",
"std 1\n",
"min 0\n",
"25% 0\n",
"50% 0\n",
"75% 1\n",
"max 9\n",
"Name: co_defendants, dtype: float64"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.co_defendants.describe()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"categories = [\"no\", \"yes\"]\n",
"death_row[\"co_defendants\"] = pd.cut(death_row[\"co_defendants\"], [-1, 0, 99], labels = categories)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" last_name \n",
" first_name \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" ... \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" Hall \n",
" Justen \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" ... \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" Sparks \n",
" Robert \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 10+ \n",
" \n",
" \n",
" 2 \n",
" Soliz \n",
" Mark \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 10_or_less \n",
" \n",
" \n",
" 3 \n",
" Crutsinger \n",
" Billy \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 10+ \n",
" \n",
" \n",
" 4 \n",
" Swearingen \n",
" Larry \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" ... \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 10+ \n",
" \n",
" \n",
"
\n",
"
5 rows × 22 columns
\n",
"
"
],
"text/plain": [
" last_name first_name age_received education_level age_crime \\\n",
"0 Hall Justen twenties some_highschool twenties \n",
"1 Sparks Robert thirty+ no_highschool thirty+ \n",
"2 Soliz Mark thirty+ no_highschool twenties \n",
"3 Crutsinger Billy thirty+ some_highschool thirty+ \n",
"4 Swearingen Larry twenties some_highschool twenties \n",
"\n",
" occupation prior_record num_of_vic main_crime type_of_crime \\\n",
"0 laborer yes one murder strangling \n",
"1 machine operator yes two+ murder stabbing \n",
"2 cabinet maker yes one murder, robbery shooting \n",
"3 laborer yes two+ murder stabbing \n",
"4 laborer yes one murder, kidnapping strangling \n",
"\n",
" ... race_vic vic_kid vic_male vic_female vic_police age race \\\n",
"0 ... unkown no no yes no 35-45 White \n",
"1 ... black yes yes yes no 35-45 Black \n",
"2 ... white no no yes no 35-45 Hispanic \n",
"3 ... white no no yes no 45+ White \n",
"4 ... white no no yes no 45+ White \n",
"\n",
" county last_statement time_spent \n",
"0 El Paso Yeah, I want to address the Roundtree family ... 10+ \n",
"1 Dallas Umm, Pamela can you hear me Stephanie, Hardy,... 10+ \n",
"2 Johnson It's 6:09 on September 10th, Kayla and David,... 10_or_less \n",
"3 Tarrant Hi ladies I wanted to tell ya'll how much I l... 10+ \n",
"4 Montgomery Lord forgive them. They don't know what they ... 10+ \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"last_name category\n",
"first_name category\n",
"age_received category\n",
"education_level category\n",
"age_crime category\n",
"occupation category\n",
"prior_record category\n",
"num_of_vic category\n",
"main_crime category\n",
"type_of_crime category\n",
"weapon category\n",
"co_defendants category\n",
"race_vic category\n",
"vic_kid category\n",
"vic_male category\n",
"vic_female category\n",
"vic_police category\n",
"age category\n",
"race category\n",
"county category\n",
"last_statement object\n",
"time_spent category\n",
"dtype: object"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ultimately we do not need the prisoner's first and last name unless we want to look to see if there are any specific names that occur more frequently than others. Therefore, I am removing those two columns"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" weapon \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" strangling \n",
" cord \n",
" no \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" knife \n",
" no \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 10+ \n",
" \n",
" \n",
" 2 \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" gun \n",
" yes \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 10_or_less \n",
" \n",
" \n",
" 3 \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" stabbing \n",
" knife \n",
" no \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 10+ \n",
" \n",
" \n",
" 4 \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" strangling \n",
" hands \n",
" no \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 10+ \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received education_level age_crime occupation prior_record \\\n",
"0 twenties some_highschool twenties laborer yes \n",
"1 thirty+ no_highschool thirty+ machine operator yes \n",
"2 thirty+ no_highschool twenties cabinet maker yes \n",
"3 thirty+ some_highschool thirty+ laborer yes \n",
"4 twenties some_highschool twenties laborer yes \n",
"\n",
" num_of_vic main_crime type_of_crime weapon co_defendants race_vic \\\n",
"0 one murder strangling cord no unkown \n",
"1 two+ murder stabbing knife no black \n",
"2 one murder, robbery shooting gun yes white \n",
"3 two+ murder stabbing knife no white \n",
"4 one murder, kidnapping strangling hands no white \n",
"\n",
" vic_kid vic_male vic_female vic_police age race county \\\n",
"0 no no yes no 35-45 White El Paso \n",
"1 yes yes yes no 35-45 Black Dallas \n",
"2 no no yes no 35-45 Hispanic Johnson \n",
"3 no no yes no 45+ White Tarrant \n",
"4 no no yes no 45+ White Montgomery \n",
"\n",
" last_statement time_spent \n",
"0 Yeah, I want to address the Roundtree family ... 10+ \n",
"1 Umm, Pamela can you hear me Stephanie, Hardy,... 10+ \n",
"2 It's 6:09 on September 10th, Kayla and David,... 10_or_less \n",
"3 Hi ladies I wanted to tell ya'll how much I l... 10+ \n",
"4 Lord forgive them. They don't know what they ... 10+ "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.drop([\"last_name\", \"first_name\"], axis = 1, inplace = True)\n",
"death_row.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Now the data is ready to analyze "
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Yeah, I want to address the Roundtree family ...\n",
"1 Umm, Pamela can you hear me Stephanie, Hardy,...\n",
"2 It's 6:09 on September 10th, Kayla and David,...\n",
"3 Hi ladies I wanted to tell ya'll how much I l...\n",
"4 Lord forgive them. They don't know what they ...\n",
"5 Spoken: No.\n",
"6 Yes Sir, that will be five Dollars I love you,...\n",
"7 To my friends and family it was a nice journey...\n",
"8 Yes Sir, I would like to thank the Shape Commu...\n",
"9 Yes Sir. Dear Heavenly Father please forgive t...\n",
"10 I am very thankful for all the hard work the M...\n",
"11 No statement given.\n",
"12 Thank you I love you all. Sandra, nice meeting...\n",
"13 l want to make sure the Patel family knows I l...\n",
"14 no statement\n",
"15 To everyone that has been there for me you kno...\n",
"16 Yes, I would like to say nephew it burns huh. ...\n",
"17 First I would like to say I have been here sin...\n",
"18 No, Well, Hi Mary Jean. See y'all later. Go ah...\n",
"19 First I would like to praise my Lord Jesus Ch...\n",
"20 I'd like to take a moment to say I'm sorry. N...\n",
"21 NaN\n",
"22 NaN\n",
"23 First and foremost I'd like to say, \"Justice h...\n",
"24 Yes, I do, Grace Kehler is that you? I have gi...\n",
"Name: last_statement, dtype: object"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.last_statement.head(25)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"541 This offender declined to make a last statemen...\n",
"542 This offender declined to make a last statemen...\n",
"543 Mother, I am sorry for all the pain I've cause...\n",
"544 This offender declined to make a last statemen...\n",
"545 This offender declined to make a last statemen...\n",
"546 This offender declined to make a last statemen...\n",
"547 I want to say I'm sorry for the things I've do...\n",
"548 This offender declined to make a last statemen...\n",
"549 Tell my mother I love her and continue on with...\n",
"550 Goodbye to my family; I love all of you, I'm s...\n",
"551 I have no last words. I am ready.\n",
"552 Goodbye to all my friends; be cool. Thank you ...\n",
"553 \"Be strong for me,\" Pinkerton told his father,...\n",
"554 This offender declined to make a last statemen...\n",
"555 I deserve this. Tell everyone I said goodbye.\n",
"556 D.J., Laurie, Dr. Wheat, about all I can say i...\n",
"557 I want to thank Father Walsh for his spiritual...\n",
"558 There's no God but Allah, and unto thy I belo...\n",
"559 This offender declined to make a last statemen...\n",
"560 Heavenly Father, I give thanks for this time, ...\n",
"561 I pray that my family will rejoice and will fo...\n",
"562 When asked if he had a last statement, he rep...\n",
"563 What is about to transpire in a few moments is...\n",
"564 This offender declined to make a last statemen...\n",
"565 Statement to the Media: I, at this very moment...\n",
"Name: last_statement, dtype: object"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.last_statement.tail(25)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"''' different ways that no statment is represented: \n",
"1. Spoken: No. \n",
"2. No statement given. \n",
"3. no statement \n",
"4. This offender declined to make a last statement.\n",
"\n",
"going to replace all of these with nothing'''\n",
"\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(\"Spoken: No.\", \"none\")\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(\"No statement given.\", \"none\")\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(\"no statement\", \"none\")\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(\"This offender declined to make a last statement.\", \"none\")\n"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Yeah, I want to address the Roundtree family ...\n",
"1 Umm, Pamela can you hear me Stephanie, Hardy,...\n",
"2 It's 6:09 on September 10th, Kayla and David,...\n",
"3 Hi ladies I wanted to tell ya'll how much I l...\n",
"4 Lord forgive them. They don't know what they ...\n",
"5 none\n",
"6 Yes Sir, that will be five Dollars I love you,...\n",
"7 To my friends and family it was a nice journey...\n",
"8 Yes Sir, I would like to thank the Shape Commu...\n",
"9 Yes Sir. Dear Heavenly Father please forgive t...\n",
"10 I am very thankful for all the hard work the M...\n",
"11 none\n",
"12 Thank you I love you all. Sandra, nice meeting...\n",
"13 l want to make sure the Patel family knows I l...\n",
"14 none\n",
"15 To everyone that has been there for me you kno...\n",
"16 Yes, I would like to say nephew it burns huh. ...\n",
"17 First I would like to say I have been here sin...\n",
"18 No, Well, Hi Mary Jean. See y'all later. Go ah...\n",
"19 First I would like to praise my Lord Jesus Ch...\n",
"20 I'd like to take a moment to say I'm sorry. N...\n",
"21 NaN\n",
"22 NaN\n",
"23 First and foremost I'd like to say, \"Justice h...\n",
"24 Yes, I do, Grace Kehler is that you? I have gi...\n",
"Name: last_statement, dtype: object"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.last_statement.head(25)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"541 none \n",
"542 none \n",
"543 Mother, I am sorry for all the pain I've cause...\n",
"544 none \n",
"545 none \n",
"546 none \n",
"547 I want to say I'm sorry for the things I've do...\n",
"548 none \n",
"549 Tell my mother I love her and continue on with...\n",
"550 Goodbye to my family; I love all of you, I'm s...\n",
"551 I have no last words. I am ready.\n",
"552 Goodbye to all my friends; be cool. Thank you ...\n",
"553 \"Be strong for me,\" Pinkerton told his father,...\n",
"554 none \n",
"555 I deserve this. Tell everyone I said goodbye.\n",
"556 D.J., Laurie, Dr. Wheat, about all I can say i...\n",
"557 I want to thank Father Walsh for his spiritual...\n",
"558 There's no God but Allah, and unto thy I belo...\n",
"559 none \n",
"560 Heavenly Father, I give thanks for this time, ...\n",
"561 I pray that my family will rejoice and will fo...\n",
"562 When asked if he had a last statement, he rep...\n",
"563 What is about to transpire in a few moments is...\n",
"564 none \n",
"565 Statement to the Media: I, at this very moment...\n",
"Name: last_statement, dtype: object"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.last_statement.tail(25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Now we can analyze (hopefully)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" weapon \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" count \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 564 \n",
" 565 \n",
" \n",
" \n",
" unique \n",
" 3 \n",
" 5 \n",
" 3 \n",
" 78 \n",
" 4 \n",
" 2 \n",
" 38 \n",
" 44 \n",
" 82 \n",
" 2 \n",
" 10 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 6 \n",
" 3 \n",
" 6 \n",
" 113 \n",
" 454 \n",
" 2 \n",
" \n",
" \n",
" top \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, robbery \n",
" shooting \n",
" gun \n",
" no \n",
" white \n",
" no \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" White \n",
" Harris \n",
" none \n",
" 10+ \n",
" \n",
" \n",
" freq \n",
" 308 \n",
" 222 \n",
" 299 \n",
" 206 \n",
" 298 \n",
" 354 \n",
" 209 \n",
" 290 \n",
" 297 \n",
" 328 \n",
" 298 \n",
" 460 \n",
" 356 \n",
" 329 \n",
" 478 \n",
" 245 \n",
" 250 \n",
" 128 \n",
" 101 \n",
" 284 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received education_level age_crime occupation prior_record \\\n",
"count 566 566 566 566 566 \n",
"unique 3 5 3 78 4 \n",
"top twenties some_highschool twenties laborer yes \n",
"freq 308 222 299 206 298 \n",
"\n",
" num_of_vic main_crime type_of_crime weapon co_defendants \\\n",
"count 566 566 566 566 566 \n",
"unique 2 38 44 82 2 \n",
"top one murder, robbery shooting gun no \n",
"freq 354 209 290 297 328 \n",
"\n",
" race_vic vic_kid vic_male vic_female vic_police age race county \\\n",
"count 566 566 566 566 566 566 566 566 \n",
"unique 10 2 2 2 6 3 6 113 \n",
"top white no yes yes no 35-45 White Harris \n",
"freq 298 460 356 329 478 245 250 128 \n",
"\n",
" last_statement time_spent \n",
"count 564 565 \n",
"unique 454 2 \n",
"top none 10+ \n",
"freq 101 284 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Going through each column and looking at the categories"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"twenties 308\n",
"thirty+ 218\n",
"teens 40\n",
"Name: age_received, dtype: int64\n"
]
}
],
"source": [
"print(death_row.age_received.value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"column_names = list(death_row.columns)\n",
"column_names.remove(\"last_statement\")"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['age_received',\n",
" 'education_level',\n",
" 'age_crime',\n",
" 'occupation',\n",
" 'prior_record',\n",
" 'num_of_vic',\n",
" 'main_crime',\n",
" 'type_of_crime',\n",
" 'weapon',\n",
" 'co_defendants',\n",
" 'race_vic',\n",
" 'vic_kid',\n",
" 'vic_male',\n",
" 'vic_female',\n",
" 'vic_police',\n",
" 'age',\n",
" 'race',\n",
" 'county',\n",
" 'time_spent']"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------- age_received ------------- \n",
" twenties 308\n",
"thirty+ 218\n",
"teens 40\n",
"Name: age_received, dtype: int64\n",
"------------- education_level ------------- \n",
" some_highschool 222\n",
"highschool 173\n",
"no_highschool 94\n",
"unknown 41\n",
"college 36\n",
"Name: education_level, dtype: int64\n",
"------------- age_crime ------------- \n",
" twenties 299\n",
"thirty+ 180\n",
"teens 87\n",
"Name: age_crime, dtype: int64\n",
"------------- occupation ------------- \n",
" laborer 206\n",
"unknown 47\n",
"mechanic 32\n",
"construction 28\n",
"food service 22\n",
" ... \n",
"press operator 1\n",
"farm worker 1\n",
"factory worker 1\n",
"produce broker 1\n",
"ac/heating tech 1\n",
"Name: occupation, Length: 78, dtype: int64\n",
"------------- prior_record ------------- \n",
" yes 298\n",
"no 253\n",
"unknown 14\n",
"no 1\n",
"Name: prior_record, dtype: int64\n",
"------------- num_of_vic ------------- \n",
" one 354\n",
"two+ 212\n",
"Name: num_of_vic, dtype: int64\n",
"------------- main_crime ------------- \n",
" murder, robbery 209\n",
"murder 115\n",
"murder, rape 47\n",
"murder, kidnapping, rape 30\n",
"murder, rape, robbery 28\n",
"murder, car theft 25\n",
"murder, eluding arrest 20\n",
"murder, kidnapping 20\n",
"murder, kidnapping, robbery 14\n",
"murder, kidnapping, rape, robbery 10\n",
"murder, car theft, robbery 8\n",
"murder, insurance scam 6\n",
"murder, escape 4\n",
"murder-serial, rape-serial 2\n",
"murder, eluding arrest, robbery 2\n",
"murder, car theft, rape 2\n",
"murder-serial, rape 2\n",
"murder, for hire, rape 2\n",
"murder, car theft, rape, robbery 1\n",
"murder, car theft, kidnappy, robbery 1\n",
"murder, car theft, kidnapping, rape 1\n",
"murder, car theft, kidnapping 1\n",
"murder, eluding arrest, kidnapping 1\n",
"murder, eluding arrest, rape 1\n",
"murder, car theft, eluding arrest, kidnapping, robbery 1\n",
"unknown 1\n",
"murder, for hire 1\n",
"murder, identity theft 1\n",
"murder-serial, robbery-serial, rape 1\n",
"murder, kidnapping, ransom 1\n",
"murder, mutilation-sexual 1\n",
"murder-attempted, escape 1\n",
"murder-attempted, robbery 1\n",
"murder-serial 1\n",
"murder-serial, rape, robbery 1\n",
"murder-serial, rape-serial, kidnapping 1\n",
"murder-serial, robbery-serial 1\n",
"murder, insurance scam, rape 1\n",
"Name: main_crime, dtype: int64\n",
"------------- type_of_crime ------------- \n",
" shooting 290\n",
"stabbing 82\n",
"strangling 44\n",
"beating 41\n",
"beating, stabbing 20\n",
"beating, strangling 15\n",
"shooting, stabbing 8\n",
"stabbing, strangling 4\n",
"shooting, strangling 4\n",
"beating, stabbing, strangling 4\n",
"drowning 3\n",
"beating, shooting, stabbing 3\n",
"car 3\n",
"arson 3\n",
"shooting, stabbing, strangling 3\n",
"shooting 3\n",
"unknown 2\n",
"beating, shooting 2\n",
"arson, strangling 2\n",
"arson, stabbing 2\n",
"drowning, strangling 2\n",
"arson, shooting 2\n",
"hate 2\n",
"beating, shooting, strangling 2\n",
"arson, shooting, stabbing 1\n",
"arson, shooting, strangling 1\n",
"stabbing, strangulation 1\n",
"beating, drowning, stabbing 1\n",
"beating, rape 1\n",
"arson, beating, stabbing 1\n",
"beating, rape, strangling 1\n",
"rape, shooting 1\n",
"stabbing, strangling 1\n",
"poisoning 1\n",
"shooting, stabbing 1\n",
"broke neck 1\n",
"buried alive, strangling 1\n",
"contract 1\n",
"rape, stabbing 1\n",
"drowning, shooting 1\n",
"drowning, shooting, strangling 1\n",
"drugs 1\n",
"neglect 1\n",
"shotting 1\n",
"Name: type_of_crime, dtype: int64\n",
"------------- weapon ------------- \n",
" gun 297\n",
"knife 78\n",
"hands 41\n",
"hands, knife 12\n",
"gun, knife 10\n",
" ... \n",
"cord, fireplace brush 1\n",
"cord, fire, gun 1\n",
"concrete 1\n",
"coat hangers 1\n",
"ace bandage 1\n",
"Name: weapon, Length: 82, dtype: int64\n",
"------------- co_defendants ------------- \n",
" no 328\n",
"yes 238\n",
"Name: co_defendants, dtype: int64\n",
"------------- race_vic ------------- \n",
" white 298\n",
"unknown 106\n",
"hispanic 86\n",
"black 57\n",
"asian 9\n",
"unkown 4\n",
"middle eastern 2\n",
"black 2\n",
"white 1\n",
"samoan 1\n",
"Name: race_vic, dtype: int64\n",
"------------- vic_kid ------------- \n",
" no 460\n",
"yes 106\n",
"Name: vic_kid, dtype: int64\n",
"------------- vic_male ------------- \n",
" yes 356\n",
"no 210\n",
"Name: vic_male, dtype: int64\n",
"------------- vic_female ------------- \n",
" yes 329\n",
"no 237\n",
"Name: vic_female, dtype: int64\n",
"------------- vic_police ------------- \n",
" no 478\n",
"yes 50\n",
"no 32\n",
"yes 3\n",
" no 2\n",
"unknown 1\n",
"Name: vic_police, dtype: int64\n",
"------------- age ------------- \n",
" 35-45 245\n",
"18-34 188\n",
"45+ 133\n",
"Name: age, dtype: int64\n",
"------------- race ------------- \n",
" White 250\n",
"Black 204\n",
"Hispanic 107\n",
"White 2\n",
"Other 2\n",
"Hispanic 1\n",
"Name: race, dtype: int64\n",
"------------- county ------------- \n",
" Harris 128\n",
"Dallas 59\n",
"Bexar 46\n",
"Tarrant 42\n",
"Montgomery 15\n",
" ... \n",
"Llano 1\n",
"Lubbock 1\n",
"Madison 1\n",
"McLennan 1\n",
"Kaufman 1\n",
"Name: county, Length: 113, dtype: int64\n",
"------------- time_spent ------------- \n",
" 10+ 284\n",
"10_or_less 281\n",
"Name: time_spent, dtype: int64\n"
]
}
],
"source": [
"for column in column_names: \n",
" print(\"-------------\", column, \"-------------\", \"\\n\", death_row[column].value_counts())"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nNeed to change shotting to shooting for type of crime\\nIn order to predict weapon or type_of_crime a main crime needs to be decided\\n\\nneed to remove the space after white, black, in race_vic and also might want to change it to white, and non-white\\n\\nneed to remove the space after yesm and no for vic police and for the unknown changing it to no, because our belief is if it was it would have said yes \\n\\nneed to remove the space after White, and Hispanic for race. Also might make sense to change to white, non-white\\n\\n'"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''\n",
"Need to change shotting to shooting for type of crime\n",
"In order to predict weapon or type_of_crime a main crime needs to be decided\n",
"\n",
"need to remove the space after white, black, in race_vic and also might want to change it to white, and non-white\n",
"\n",
"need to remove the space after yesm and no for vic police and for the unknown changing it to no, because our belief is if it was it would have said yes \n",
"\n",
"need to remove the space after White, and Hispanic for race. Also might make sense to change to white, non-white\n",
"\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"shooting 291\n",
"stabbing 82\n",
"strangling 44\n",
"beating 41\n",
"beating, stabbing 20\n",
"beating, strangling 15\n",
"shooting, stabbing 8\n",
"beating, stabbing, strangling 4\n",
"shooting, strangling 4\n",
"stabbing, strangling 4\n",
"shooting, stabbing, strangling 3\n",
"drowning 3\n",
"beating, shooting, stabbing 3\n",
"arson 3\n",
"shooting 3\n",
"car 3\n",
"hate 2\n",
"arson, stabbing 2\n",
"beating, shooting 2\n",
"drowning, strangling 2\n",
"beating, shooting, strangling 2\n",
"arson, strangling 2\n",
"arson, shooting 2\n",
"unknown 2\n",
"neglect 1\n",
"rape, shooting 1\n",
"beating, rape 1\n",
"shooting, stabbing 1\n",
"arson, shooting, stabbing 1\n",
"broke neck 1\n",
"poisoning 1\n",
"drowning, shooting, strangling 1\n",
"buried alive, strangling 1\n",
"beating, drowning, stabbing 1\n",
"arson, shooting, strangling 1\n",
"beating, rape, strangling 1\n",
"drowning, shooting 1\n",
"arson, beating, stabbing 1\n",
"drugs 1\n",
"contract 1\n",
"stabbing, strangling 1\n",
"stabbing, strangulation 1\n",
"rape, stabbing 1\n",
"Name: type_of_crime, dtype: int64"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#It looks like there is a space after one no therefore removing that and looking again to see if we just have 3 categories\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"shotting\", \"shooting\")\n",
"death_row.type_of_crime.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Because the majority of the crimes deal with guns, I think that it might be beneficial, in order to try to preduct type_of_crime, to change it to gun or non-gun. Non-gun would encompass any crime that did not use a gun so arson, stabbing, beating, strangling, drowning, car, neglect, rape. Contract will be included in gun. "
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 294\n",
"stabbing 82\n",
"strangling 44\n",
"beating 41\n",
"beating, stabbing 20\n",
"beating, strangling 15\n",
"gun, stabbing 8\n",
"beating, stabbing, strangling 4\n",
"gun, strangling 4\n",
"stabbing, strangling 4\n",
"car 3\n",
"drowning 3\n",
"arson 3\n",
"gun, stabbing, strangling 3\n",
"beating, gun, stabbing 3\n",
"hate 2\n",
"beating, gun 2\n",
"arson, strangling 2\n",
"beating, gun, strangling 2\n",
"arson, gun 2\n",
"drowning, strangling 2\n",
"arson, stabbing 2\n",
"unknown 2\n",
"poisoning 1\n",
"rape, stabbing 1\n",
"drowning, gun, strangling 1\n",
"arson, gun, strangling 1\n",
"beating, rape 1\n",
"arson, gun, stabbing 1\n",
"buried alive, strangling 1\n",
"broke neck 1\n",
"stabbing, strangulation 1\n",
"arson, beating, stabbing 1\n",
"beating, drowning, stabbing 1\n",
"rape, gun 1\n",
"beating, rape, strangling 1\n",
"gun, stabbing 1\n",
"stabbing, strangling 1\n",
"drugs 1\n",
"contract 1\n",
"drowning, gun 1\n",
"neglect 1\n",
"Name: type_of_crime, dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"shooting\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"gun \", \"gun\")\n",
"death_row.type_of_crime.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].astype(\"object\")\n",
"\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"gun, stabbing\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"gun, strangling\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"beating, gun, stabbing\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"gun, stabbing, strangling\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"beating, gun\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"arson, gun\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"beating, gun, strangling\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"rape, gun\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"drowning, gun, strangling\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"arson, gun, stabbing\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"arson, gun, strangling\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"drowning, gun\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"contract\", \"gun\")\n",
"death_row[\"type_of_crime\"] = death_row[\"type_of_crime\"].str.replace(\"gun, stabbing\", \"gun\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 324\n",
"stabbing 82\n",
"strangling 44\n",
"beating 41\n",
"beating, stabbing 20\n",
"beating, strangling 15\n",
"beating, stabbing, strangling 4\n",
"stabbing, strangling 4\n",
"drowning 3\n",
"arson 3\n",
"car 3\n",
"hate 2\n",
"drowning, strangling 2\n",
"unknown 2\n",
"arson, strangling 2\n",
"arson, stabbing 2\n",
"poisoning 1\n",
"arson, beating, stabbing 1\n",
"beating, rape 1\n",
"gun 1\n",
"broke neck 1\n",
"beating, drowning, stabbing 1\n",
"stabbing, strangulation 1\n",
"rape, stabbing 1\n",
"beating, rape, strangling 1\n",
"stabbing, strangling 1\n",
"drugs 1\n",
"buried alive, strangling 1\n",
"neglect 1\n",
"Name: type_of_crime, dtype: int64"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.type_of_crime.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('O')"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.type_of_crime.dtype"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"death_row.loc[death_row[\"type_of_crime\"] != \"gun\", \"type_of_crime\"] = \"other\""
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 324\n",
"other 242\n",
"Name: type_of_crime, dtype: int64"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.type_of_crime.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nneed to remove the space after white, black, in race_vic \\n\\nneed to remove the space after yes and no for vic police and for the unknown changing it to no, because our belief is if it was it would have said yes \\n\\nneed to remove the space after White, and Hispanic for race. Also might make sense to change to white, non-white\\n\\n'"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''\n",
"need to remove the space after white, black, in race_vic \n",
"\n",
"need to remove the space after yes and no for vic police and for the unknown changing it to no, because our belief is if it was it would have said yes \n",
"\n",
"need to remove the space after White, and Hispanic for race. Also might make sense to change to white, non-white\n",
"\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" weapon \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder \n",
" other \n",
" cord \n",
" no \n",
" unkown \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" White \n",
" El Paso \n",
" Yeah, I want to address the Roundtree family ... \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" thirty+ \n",
" no_highschool \n",
" thirty+ \n",
" machine operator \n",
" yes \n",
" two+ \n",
" murder \n",
" other \n",
" knife \n",
" no \n",
" black \n",
" yes \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" Black \n",
" Dallas \n",
" Umm, Pamela can you hear me Stephanie, Hardy,... \n",
" 10+ \n",
" \n",
" \n",
" 2 \n",
" thirty+ \n",
" no_highschool \n",
" twenties \n",
" cabinet maker \n",
" yes \n",
" one \n",
" murder, robbery \n",
" gun \n",
" gun \n",
" yes \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 35-45 \n",
" Hispanic \n",
" Johnson \n",
" It's 6:09 on September 10th, Kayla and David,... \n",
" 10_or_less \n",
" \n",
" \n",
" 3 \n",
" thirty+ \n",
" some_highschool \n",
" thirty+ \n",
" laborer \n",
" yes \n",
" two+ \n",
" murder \n",
" other \n",
" knife \n",
" no \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Tarrant \n",
" Hi ladies I wanted to tell ya'll how much I l... \n",
" 10+ \n",
" \n",
" \n",
" 4 \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder, kidnapping \n",
" other \n",
" hands \n",
" no \n",
" white \n",
" no \n",
" no \n",
" yes \n",
" no \n",
" 45+ \n",
" White \n",
" Montgomery \n",
" Lord forgive them. They don't know what they ... \n",
" 10+ \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received education_level age_crime occupation prior_record \\\n",
"0 twenties some_highschool twenties laborer yes \n",
"1 thirty+ no_highschool thirty+ machine operator yes \n",
"2 thirty+ no_highschool twenties cabinet maker yes \n",
"3 thirty+ some_highschool thirty+ laborer yes \n",
"4 twenties some_highschool twenties laborer yes \n",
"\n",
" num_of_vic main_crime type_of_crime weapon co_defendants race_vic \\\n",
"0 one murder other cord no unkown \n",
"1 two+ murder other knife no black \n",
"2 one murder, robbery gun gun yes white \n",
"3 two+ murder other knife no white \n",
"4 one murder, kidnapping other hands no white \n",
"\n",
" vic_kid vic_male vic_female vic_police age race county \\\n",
"0 no no yes no 35-45 White El Paso \n",
"1 yes yes yes no 35-45 Black Dallas \n",
"2 no no yes no 35-45 Hispanic Johnson \n",
"3 no no yes no 45+ White Tarrant \n",
"4 no no yes no 45+ White Montgomery \n",
"\n",
" last_statement time_spent \n",
"0 Yeah, I want to address the Roundtree family ... 10+ \n",
"1 Umm, Pamela can you hear me Stephanie, Hardy,... 10+ \n",
"2 It's 6:09 on September 10th, Kayla and David,... 10_or_less \n",
"3 Hi ladies I wanted to tell ya'll how much I l... 10+ \n",
"4 Lord forgive them. They don't know what they ... 10+ "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.head()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"yes 298\n",
"no 253\n",
"unknown 14\n",
"no 1\n",
"Name: prior_record, dtype: int64"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.prior_record.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"yes 298\n",
"no 254\n",
"unknown 14\n",
"Name: prior_record, dtype: int64"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#It looks like there is a space after one no therefore removing that and looking again to see if we just have 3 categories\n",
"death_row[\"prior_record\"] = death_row[\"prior_record\"].str.replace(\"no \", \"no\")\n",
"death_row.prior_record.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"white 298\n",
"unknown 106\n",
"hispanic 86\n",
"black 57\n",
"asian 9\n",
"unkown 4\n",
"middle eastern 2\n",
"black 2\n",
"white 1\n",
"samoan 1\n",
"Name: race_vic, dtype: int64"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.race_vic.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"white 299\n",
"unknown 110\n",
"hispanic 86\n",
"black 59\n",
"other 12\n",
"Name: race_vic, dtype: int64"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#We have decided to break race into white, hispanic, black, other, and unknown \n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"white \", \"white\")\n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"black \", \"black\")\n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"unkown\", \"unknown\")\n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"asian\", \"other\")\n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"middle eastern\", \"other\")\n",
"death_row[\"race_vic\"] = death_row[\"race_vic\"].str.replace(\"samoan\", \"other\")\n",
"death_row.race_vic.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"White 250\n",
"Black 204\n",
"Hispanic 107\n",
"White 2\n",
"Other 2\n",
"Hispanic 1\n",
"Name: race, dtype: int64"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.race.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"white 252\n",
"black 204\n",
"hispanic 108\n",
"other 2\n",
"Name: race, dtype: int64"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#We have decided to break race into white, hispanic, black, other, and unknown \n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"White\", \"white\")\n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"white \", \"white\")\n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"Black\", \"black\")\n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"Hispanic\", \"hispanic\")\n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"hispanic \", \"hispanic\")\n",
"death_row[\"race\"] = death_row[\"race\"].str.replace(\"Other\", \"other\")\n",
"\n",
"death_row.race.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"no 478\n",
"yes 50\n",
"no 32\n",
"yes 3\n",
" no 2\n",
"unknown 1\n",
"Name: vic_police, dtype: int64"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.vic_police.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"no 513\n",
"yes 53\n",
"Name: vic_police, dtype: int64"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row[\"vic_police\"] = death_row[\"vic_police\"].str.replace(\"no \", \"no\")\n",
"death_row[\"vic_police\"] = death_row[\"vic_police\"].str.replace(\"yes \", \"yes\")\n",
"death_row[\"vic_police\"] = death_row[\"vic_police\"].str.replace(\" no\", \"no\")\n",
"death_row[\"vic_police\"] = death_row[\"vic_police\"].str.replace(\"unknown\", \"no\")\n",
"death_row.vic_police.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 297\n",
"knife 78\n",
"hands 41\n",
"hands, knife 12\n",
"gun, knife 10\n",
" ... \n",
"cord, fireplace brush 1\n",
"cord, fire, gun 1\n",
"concrete 1\n",
"coat hangers 1\n",
"ace bandage 1\n",
"Name: weapon, Length: 82, dtype: int64"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.weapon.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"#Changing weapon to gun, knife, strangulation_item, other \n",
"#If a gun is used it will be the main weapon, then goes knife, then strangulation_item... \n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hands, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, knife\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"cord, fireplace brush\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"cord, fire, gun\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"coat hangers\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"ace bandage\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"clothes\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"concrete\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"blunt object\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, water\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bag, gun\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"knife, rope\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"belt, club\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"sword\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"screwdriver\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"rock\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, lamp\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"asphalt\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"board, strangulation_item\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"sissors\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"club, gun\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item, gun\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, pipe\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hammer\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"car\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, hands\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"cord\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"tool\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hammer, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"fire, gun\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bat\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"fire\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, hands\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"unknown\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"gun, wire\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"board\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"knife, other\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"knife, pipe\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bar\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bathtub\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"rope\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item, hammer\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bar, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"belt, fire\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"pillow\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item, other\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"belt, other\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"axe\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"otherhtub\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"heroin\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"knife, water\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bag, other\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, other, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"starvation\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"statuette\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"steel lock\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"pickax\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hands, strangulation_item\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item, hands\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"plastic tie wrap\", \"strangulation_item\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"poison\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"cellophane, gun, sink\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"beer bottle, hands\", \"hands\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hands, water\", \"hands\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item, ice pick\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hands, sand\", \"hands\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"club\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"knife, mug\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"ice pick, knife\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"pipe\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hatchet\", \"knife\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"chain\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"bumper jack\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"frying pan\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"river\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, gun, other\", \"gun\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, other\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other \", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, knife\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"other, hands\", \"other\")"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 324\n",
"knife 114\n",
"other 55\n",
"hands 44\n",
"strangulation_item 29\n",
"Name: weapon, dtype: int64"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.weapon.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gun 324\n",
"other 128\n",
"knife 114\n",
"Name: weapon, dtype: int64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"hands\", \"other\")\n",
"death_row[\"weapon\"] = death_row[\"weapon\"].str.replace(\"strangulation_item\", \"other\")\n",
"death_row.weapon.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [],
"source": [
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, robbery-serial, rape\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, robbery-serial\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, rape-serial, kidnapping\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, rape, robbery\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, rape-serial\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-serial\", \"murder\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"unknown\", \"murder\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-attempted, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder-attempted, escape\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, eluding arrest, kidnapping, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, kidnapping, rape\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, kidnappy, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, rape, robbery\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, kidnapping\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, eluding arrest, kidnapping\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, eluding arrest, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, kidnapping, rape, robbery\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, kidnapping, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, kidnapping, ransom\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, kidnapping, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, kidnapping\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, for hire, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, for hire\", \"murder\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, insurance scam, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, insurance scam\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft, rape\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, car theft\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, eluding arrest, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, eluding arrest\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, rape, robbery\", \"murder_rape_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, rape\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, mutilation-sexual\", \"murder_rape\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, identity theft\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, escape\", \"murder_other\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder, robbery\", \"murder_robbery\")\n",
"death_row[\"main_crime\"] = death_row[\"main_crime\"].str.replace(\"murder_robbery \", \"murder_robbery\")"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"murder_robbery 263\n",
"murder 118\n",
"murder_rape 87\n",
"murder_other 54\n",
"murder_rape_robbery 44\n",
"Name: main_crime, dtype: int64"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.main_crime.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" weapon \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" count \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 564 \n",
" 565 \n",
" \n",
" \n",
" unique \n",
" 3 \n",
" 5 \n",
" 3 \n",
" 78 \n",
" 3 \n",
" 2 \n",
" 5 \n",
" 2 \n",
" 3 \n",
" 2 \n",
" 5 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 113 \n",
" 454 \n",
" 2 \n",
" \n",
" \n",
" top \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" laborer \n",
" yes \n",
" one \n",
" murder_robbery \n",
" gun \n",
" gun \n",
" no \n",
" white \n",
" no \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" white \n",
" Harris \n",
" none \n",
" 10+ \n",
" \n",
" \n",
" freq \n",
" 308 \n",
" 222 \n",
" 299 \n",
" 206 \n",
" 298 \n",
" 354 \n",
" 263 \n",
" 324 \n",
" 324 \n",
" 328 \n",
" 299 \n",
" 460 \n",
" 356 \n",
" 329 \n",
" 513 \n",
" 245 \n",
" 252 \n",
" 128 \n",
" 101 \n",
" 284 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received education_level age_crime occupation prior_record \\\n",
"count 566 566 566 566 566 \n",
"unique 3 5 3 78 3 \n",
"top twenties some_highschool twenties laborer yes \n",
"freq 308 222 299 206 298 \n",
"\n",
" num_of_vic main_crime type_of_crime weapon co_defendants race_vic \\\n",
"count 566 566 566 566 566 566 \n",
"unique 2 5 2 3 2 5 \n",
"top one murder_robbery gun gun no white \n",
"freq 354 263 324 324 328 299 \n",
"\n",
" vic_kid vic_male vic_female vic_police age race county \\\n",
"count 566 566 566 566 566 566 566 \n",
"unique 2 2 2 2 3 4 113 \n",
"top no yes yes no 35-45 white Harris \n",
"freq 460 356 329 513 245 252 128 \n",
"\n",
" last_statement time_spent \n",
"count 564 565 \n",
"unique 454 2 \n",
"top none 10+ \n",
"freq 101 284 "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.describe()"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"death_row[\"occupation\"] = death_row[\"occupation\"].astype(\"object\")\n",
"death_row.loc[death_row[\"occupation\"] != \"laborer\", \"occupation\"] = \"other\""
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" age_received \n",
" education_level \n",
" age_crime \n",
" occupation \n",
" prior_record \n",
" num_of_vic \n",
" main_crime \n",
" type_of_crime \n",
" weapon \n",
" co_defendants \n",
" race_vic \n",
" vic_kid \n",
" vic_male \n",
" vic_female \n",
" vic_police \n",
" age \n",
" race \n",
" county \n",
" last_statement \n",
" time_spent \n",
" \n",
" \n",
" \n",
" \n",
" count \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 566 \n",
" 564 \n",
" 565 \n",
" \n",
" \n",
" unique \n",
" 3 \n",
" 5 \n",
" 3 \n",
" 2 \n",
" 3 \n",
" 2 \n",
" 5 \n",
" 2 \n",
" 3 \n",
" 2 \n",
" 5 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 113 \n",
" 454 \n",
" 2 \n",
" \n",
" \n",
" top \n",
" twenties \n",
" some_highschool \n",
" twenties \n",
" other \n",
" yes \n",
" one \n",
" murder_robbery \n",
" gun \n",
" gun \n",
" no \n",
" white \n",
" no \n",
" yes \n",
" yes \n",
" no \n",
" 35-45 \n",
" white \n",
" Harris \n",
" none \n",
" 10+ \n",
" \n",
" \n",
" freq \n",
" 308 \n",
" 222 \n",
" 299 \n",
" 360 \n",
" 298 \n",
" 354 \n",
" 263 \n",
" 324 \n",
" 324 \n",
" 328 \n",
" 299 \n",
" 460 \n",
" 356 \n",
" 329 \n",
" 513 \n",
" 245 \n",
" 252 \n",
" 128 \n",
" 101 \n",
" 284 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" age_received education_level age_crime occupation prior_record \\\n",
"count 566 566 566 566 566 \n",
"unique 3 5 3 2 3 \n",
"top twenties some_highschool twenties other yes \n",
"freq 308 222 299 360 298 \n",
"\n",
" num_of_vic main_crime type_of_crime weapon co_defendants race_vic \\\n",
"count 566 566 566 566 566 566 \n",
"unique 2 5 2 3 2 5 \n",
"top one murder_robbery gun gun no white \n",
"freq 354 263 324 324 328 299 \n",
"\n",
" vic_kid vic_male vic_female vic_police age race county \\\n",
"count 566 566 566 566 566 566 566 \n",
"unique 2 2 2 2 3 4 113 \n",
"top no yes yes no 35-45 white Harris \n",
"freq 460 356 329 513 245 252 128 \n",
"\n",
" last_statement time_spent \n",
"count 564 565 \n",
"unique 454 2 \n",
"top none 10+ \n",
"freq 101 284 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Now the data is ready to analyze/visualize/play with "
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['age_received',\n",
" 'education_level',\n",
" 'age_crime',\n",
" 'occupation',\n",
" 'prior_record',\n",
" 'num_of_vic',\n",
" 'main_crime',\n",
" 'type_of_crime',\n",
" 'weapon',\n",
" 'co_defendants',\n",
" 'race_vic',\n",
" 'vic_kid',\n",
" 'vic_male',\n",
" 'vic_female',\n",
" 'vic_police',\n",
" 'age',\n",
" 'race',\n",
" 'county',\n",
" 'time_spent']"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names = list(death_row.columns)\n",
"column_names.remove(\"last_statement\")\n",
"column_names"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"def get_value_counts(df, column): \n",
" new_df = pd.DataFrame(df[column].value_counts())\n",
" new_df.columns = [\"count\"]\n",
" new_df[\"category\"] = new_df.index \n",
" new_df.reset_index(drop = True, inplace = True)\n",
" return new_df"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 308 \n",
" twenties \n",
" \n",
" \n",
" 1 \n",
" 218 \n",
" thirty+ \n",
" \n",
" \n",
" 2 \n",
" 40 \n",
" teens \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 308 twenties\n",
"1 218 thirty+\n",
"2 40 teens"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"age_received_df = get_value_counts(death_row, \"age_received\")\n",
"age_received_df"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"from wordcloud import WordCloud, ImageColorGenerator\n",
"from PIL import Image\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt \n",
"from matplotlib import cm \n",
"from colorspacious import cspace_converter\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: creates a bar graph\n",
"#Input: the df and title of the graph \n",
"#Output: the bar graph\n",
"def category_bar_plot(df, title, rotation): \n",
" with sns.plotting_context(\"talk\"):\n",
" graph = sns.barplot(y = \"count\", x = \"category\", data = df, \n",
" palette = \"GnBu_d\")\n",
" plt.title(title)\n",
" plt.xlabel(\"Category\")\n",
" plt.ylabel(\"Count\")\n",
" plt.xticks(rotation = rotation)\n",
" return plt"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(age_received_df, \"Age Received Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 222 \n",
" some_highschool \n",
" \n",
" \n",
" 1 \n",
" 173 \n",
" highschool \n",
" \n",
" \n",
" 2 \n",
" 94 \n",
" no_highschool \n",
" \n",
" \n",
" 3 \n",
" 41 \n",
" unknown \n",
" \n",
" \n",
" 4 \n",
" 36 \n",
" college \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 222 some_highschool\n",
"1 173 highschool\n",
"2 94 no_highschool\n",
"3 41 unknown\n",
"4 36 college"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"education_level_df = get_value_counts(death_row, \"education_level\")\n",
"education_level_df"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(education_level_df, \"Education Level Breakdown\", 90)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 299 \n",
" twenties \n",
" \n",
" \n",
" 1 \n",
" 180 \n",
" thirty+ \n",
" \n",
" \n",
" 2 \n",
" 87 \n",
" teens \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 299 twenties\n",
"1 180 thirty+\n",
"2 87 teens"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"age_crime_df = get_value_counts(death_row, \"age_crime\")\n",
"age_crime_df"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 122,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(age_crime_df, \"Age at Time of Crime Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 360 \n",
" other \n",
" \n",
" \n",
" 1 \n",
" 206 \n",
" laborer \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 360 other\n",
"1 206 laborer"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"occupation_df = get_value_counts(death_row, \"occupation\")\n",
"occupation_df"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(occupation_df, \"Occupation Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 298 \n",
" yes \n",
" \n",
" \n",
" 1 \n",
" 254 \n",
" no \n",
" \n",
" \n",
" 2 \n",
" 14 \n",
" unknown \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 298 yes\n",
"1 254 no\n",
"2 14 unknown"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prior_record_df = get_value_counts(death_row, \"prior_record\")\n",
"prior_record_df"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(prior_record_df, \"Prior Record Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 354 \n",
" one \n",
" \n",
" \n",
" 1 \n",
" 212 \n",
" two+ \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 354 one\n",
"1 212 two+"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num_of_vic_df = get_value_counts(death_row, \"num_of_vic\")\n",
"num_of_vic_df"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(num_of_vic_df, \"Number of Crimes Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 263 \n",
" murder_robbery \n",
" \n",
" \n",
" 1 \n",
" 118 \n",
" murder \n",
" \n",
" \n",
" 2 \n",
" 87 \n",
" murder_rape \n",
" \n",
" \n",
" 3 \n",
" 54 \n",
" murder_other \n",
" \n",
" \n",
" 4 \n",
" 44 \n",
" murder_rape_robbery \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 263 murder_robbery\n",
"1 118 murder\n",
"2 87 murder_rape\n",
"3 54 murder_other\n",
"4 44 murder_rape_robbery"
]
},
"execution_count": 129,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_crime_df = get_value_counts(death_row, \"main_crime\")\n",
"main_crime_df"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(main_crime_df, \"Main Crime Breakdown\", 90)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 324 \n",
" gun \n",
" \n",
" \n",
" 1 \n",
" 242 \n",
" other \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 324 gun\n",
"1 242 other"
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type_of_crime_df = get_value_counts(death_row, \"type_of_crime\")\n",
"type_of_crime_df"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 132,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(type_of_crime_df, \"Type of Crime Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 324 \n",
" gun \n",
" \n",
" \n",
" 1 \n",
" 128 \n",
" other \n",
" \n",
" \n",
" 2 \n",
" 114 \n",
" knife \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 324 gun\n",
"1 128 other\n",
"2 114 knife"
]
},
"execution_count": 133,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weapon_df = get_value_counts(death_row, \"weapon\")\n",
"weapon_df"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(weapon_df, \"Main Weapon Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 135,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 328 \n",
" no \n",
" \n",
" \n",
" 1 \n",
" 238 \n",
" yes \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 328 no\n",
"1 238 yes"
]
},
"execution_count": 135,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"co_defendants_df = get_value_counts(death_row, \"co_defendants\")\n",
"co_defendants_df"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(co_defendants_df, \"Codefendant Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 299 \n",
" white \n",
" \n",
" \n",
" 1 \n",
" 110 \n",
" unknown \n",
" \n",
" \n",
" 2 \n",
" 86 \n",
" hispanic \n",
" \n",
" \n",
" 3 \n",
" 59 \n",
" black \n",
" \n",
" \n",
" 4 \n",
" 12 \n",
" other \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 299 white\n",
"1 110 unknown\n",
"2 86 hispanic \n",
"3 59 black\n",
"4 12 other"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"race_vic_df = get_value_counts(death_row, \"race_vic\")\n",
"race_vic_df"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(race_vic_df, \"Race of Victim Breakdown\", 90)"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 460 \n",
" no \n",
" \n",
" \n",
" 1 \n",
" 106 \n",
" yes \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 460 no\n",
"1 106 yes"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vic_kid_df = get_value_counts(death_row, \"vic_kid\")\n",
"vic_kid_df"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(vic_kid_df, \"Victim a Child Breakdown\", 90)"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 356 \n",
" yes \n",
" \n",
" \n",
" 1 \n",
" 210 \n",
" no \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 356 yes\n",
"1 210 no"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vic_male_df = get_value_counts(death_row, \"vic_male\")\n",
"vic_male_df"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 142,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(vic_male_df, \"Victim a Male Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 329 \n",
" yes \n",
" \n",
" \n",
" 1 \n",
" 237 \n",
" no \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 329 yes\n",
"1 237 no"
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vic_female_df = get_value_counts(death_row, \"vic_female\")\n",
"vic_female_df"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(vic_female_df, \"Victim a Female Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 513 \n",
" no \n",
" \n",
" \n",
" 1 \n",
" 53 \n",
" yes \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 513 no\n",
"1 53 yes"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vic_police_df = get_value_counts(death_row, \"vic_police\")\n",
"vic_police_df"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(vic_police_df, \"Victim a Police Office Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 245 \n",
" 35-45 \n",
" \n",
" \n",
" 1 \n",
" 188 \n",
" 18-34 \n",
" \n",
" \n",
" 2 \n",
" 133 \n",
" 45+ \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 245 35-45\n",
"1 188 18-34\n",
"2 133 45+"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"age_df = get_value_counts(death_row, \"age\")\n",
"age_df"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(age_df, \"Age at Time of Execution Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 252 \n",
" white \n",
" \n",
" \n",
" 1 \n",
" 204 \n",
" black \n",
" \n",
" \n",
" 2 \n",
" 108 \n",
" hispanic \n",
" \n",
" \n",
" 3 \n",
" 2 \n",
" other \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 252 white\n",
"1 204 black\n",
"2 108 hispanic\n",
"3 2 other"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"race_df = get_value_counts(death_row, \"race\")\n",
"race_df"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(race_df, \"Race of Prisoner Breakdown\", 0)"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 128 \n",
" Harris \n",
" \n",
" \n",
" 1 \n",
" 59 \n",
" Dallas \n",
" \n",
" \n",
" 2 \n",
" 46 \n",
" Bexar \n",
" \n",
" \n",
" 3 \n",
" 42 \n",
" Tarrant \n",
" \n",
" \n",
" 4 \n",
" 15 \n",
" Montgomery \n",
" \n",
" \n",
" 5 \n",
" 14 \n",
" Jefferson \n",
" \n",
" \n",
" 6 \n",
" 13 \n",
" Nueces \n",
" \n",
" \n",
" 7 \n",
" 12 \n",
" Lubbock \n",
" \n",
" \n",
" 8 \n",
" 11 \n",
" Brazos \n",
" \n",
" \n",
" 9 \n",
" 11 \n",
" Smith \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 128 Harris\n",
"1 59 Dallas\n",
"2 46 Bexar\n",
"3 42 Tarrant\n",
"4 15 Montgomery\n",
"5 14 Jefferson\n",
"6 13 Nueces\n",
"7 12 Lubbock\n",
"8 11 Brazos\n",
"9 11 Smith"
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"county_df = get_value_counts(death_row, \"county\")\n",
"county_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" category \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 284 \n",
" 10+ \n",
" \n",
" \n",
" 1 \n",
" 281 \n",
" 10_or_less \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count category\n",
"0 284 10+\n",
"1 281 10_or_less"
]
},
"execution_count": 152,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"time_spent_df = get_value_counts(death_row, \"time_spent\")\n",
"time_spent_df"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"category_bar_plot(time_spent_df, \"Time Spent on Death Row Breakdown\", 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualizing the last statements "
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter \n",
"import numpy as np\n",
"from wordcloud import WordCloud, ImageColorGenerator\n",
"from PIL import Image"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: to be creating a list of reviews, then joining the reviews together to a string and \n",
" #getting a count for each word in the string\n",
"#Input: df and column \n",
"#Output: a dictionary with each word and the count of the word\n",
"def creating_freq_list_from_df_to_dict(df, column):\n",
" reviews = df[column].tolist() \n",
" review_string = \" \".join(reviews)\n",
" review_string = review_string.split()\n",
" review_dict = Counter(review_string)\n",
" return review_dict"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: creates a word cloud that is in the shape of the mask passed in\n",
"#Input: the location where the mask image is saved, the frequency word dictionary, and the max # of words to include\n",
" #and the title of the plot \n",
"def create_word_cloud_with_mask(path_of_mask_image, dictionary, \n",
" max_num_words, title):\n",
" mask = np.array(Image.open(path_of_mask_image))\n",
" #creating the word cloud \n",
" word_cloud = WordCloud(background_color = \"white\", \n",
" max_words = max_num_words, \n",
" mask = mask, max_font_size = 125)\n",
" word_cloud.generate_from_frequencies(dictionary)\n",
" #creating the coloring for the word cloud \n",
" image_colors = ImageColorGenerator(mask)\n",
" plt.figure(figsize = [8,8])\n",
" plt.imshow(word_cloud.recolor(color_func = image_colors), \n",
" interpolation = \"bilinear\")\n",
" plt.title(title)\n",
" sns.set_context(\"poster\")\n",
" plt.axis(\"off\")\n",
" return plt"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: creates a df with two columns: word and count of the top 12 words\n",
"#Input: the word frequency dictionary \n",
"#Output: a df with the top x words \n",
"def word_freq_dict_to_df_top_words(dictionary, number_of_words_wanted): \n",
" df = pd.DataFrame.from_dict(dictionary,orient='index')\n",
" df.columns = [\"count\"]\n",
" df[\"word\"] = df.index\n",
" df.reset_index(drop = True, inplace = True)\n",
" df.sort_values(by=[\"count\"], ascending = False, inplace = True)\n",
" df = df[:number_of_words_wanted]\n",
" return(df)"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: creates a bar graph\n",
"#Input: the df and title of the graph \n",
"#Output: the bar graph\n",
"def top_words_bar_plot(df, title): \n",
" with sns.plotting_context(\"talk\"):\n",
" graph = sns.barplot(y = \"count\", x = \"word\", data = df, \n",
" palette = \"GnBu_d\")\n",
" plt.title(title)\n",
" plt.xlabel(\"Word\")\n",
" plt.ylabel(\"Count\")\n",
" plt.xticks(rotation = 90)\n",
" return plt"
]
},
{
"cell_type": "code",
"execution_count": 159,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: creates a df with two columns: word and count \n",
"#Input: the word frequency dictionary \n",
"#Output: a df\n",
"def word_freq_dict_to_df_all_words(dictionary): \n",
" df = pd.DataFrame.from_dict(dictionary,orient='index')\n",
" df.columns = [\"count\"]\n",
" df[\"word\"] = df.index\n",
" df.reset_index(drop = True, inplace = True)\n",
" df.sort_values(by=[\"count\"], ascending = False, inplace = True)\n",
" return(df)"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {},
"outputs": [],
"source": [
"#What the function does: Returns 2 statements: One with the total number of words and the other with the number \n",
" #of unique words \n",
"#Input: the frequency count dictionary \n",
"#output: 2 statements \n",
"def total_words_unique_words(dictionary): \n",
" eda_reviews_all_words = word_freq_dict_to_df_all_words(dictionary)\n",
" print(\"The total number of words is\", sum(eda_reviews_all_words[\"count\"]))\n",
" print(\"The total number of unique words is\", len(dictionary)) "
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [],
"source": [
"def creating_freq_list_from_df_to_dict_2(df, column):\n",
" reviews = df[column].tolist()\n",
" reviews = [review if (type(review) == str) else 'number' for review in reviews]\n",
" review_string = \" \".join(reviews)\n",
"# print(review_string)\n",
" review_string = review_string.split()\n",
" review_dict = Counter(review_string)\n",
" return review_dict\n",
"\n",
"last_statements_dic = creating_freq_list_from_df_to_dict_2(death_row, \"last_statement\")"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [],
"source": [
"#http://www.transparentpng.com/details/scroll-transparent-image-_4493.html\n",
"# create_word_cloud_with_mask(\"scroll3.png\", last_statements_dic, 750, \"Word Cloud Prior to Cleaning\")"
]
},
{
"cell_type": "code",
"execution_count": 185,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" word \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" 3085 \n",
" i \n",
" \n",
" \n",
" 18 \n",
" 1608 \n",
" you \n",
" \n",
" \n",
" 3 \n",
" 1569 \n",
" to \n",
" \n",
" \n",
" 8 \n",
" 1325 \n",
" and \n",
" \n",
" \n",
" 5 \n",
" 1177 \n",
" the \n",
" \n",
" \n",
" 26 \n",
" 837 \n",
" my \n",
" \n",
" \n",
" 10 \n",
" 760 \n",
" for \n",
" \n",
" \n",
" 15 \n",
" 725 \n",
" that \n",
" \n",
" \n",
" 29 \n",
" 705 \n",
" love \n",
" \n",
" \n",
" 33 \n",
" 659 \n",
" all \n",
" \n",
" \n",
" 39 \n",
" 616 \n",
" me \n",
" \n",
" \n",
" 111 \n",
" 598 \n",
" of \n",
" \n",
" \n",
" 45 \n",
" 489 \n",
" am \n",
" \n",
" \n",
" 23 \n",
" 460 \n",
" have \n",
" \n",
" \n",
" 51 \n",
" 451 \n",
" is \n",
" \n",
" \n",
" 94 \n",
" 432 \n",
" a \n",
" \n",
" \n",
" 123 \n",
" 423 \n",
" in \n",
" \n",
" \n",
" 21 \n",
" 377 \n",
" it \n",
" \n",
" \n",
" 20 \n",
" 373 \n",
" this \n",
" \n",
" \n",
" 7 \n",
" 325 \n",
" family \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count word\n",
"1 3085 i\n",
"18 1608 you\n",
"3 1569 to\n",
"8 1325 and\n",
"5 1177 the\n",
"26 837 my\n",
"10 760 for\n",
"15 725 that\n",
"29 705 love\n",
"33 659 all\n",
"39 616 me\n",
"111 598 of\n",
"45 489 am\n",
"23 460 have\n",
"51 451 is\n",
"94 432 a\n",
"123 423 in\n",
"21 377 it\n",
"20 373 this\n",
"7 325 family"
]
},
"execution_count": 185,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_words = word_freq_dict_to_df_top_words(last_statements_dic, 20)\n",
"top_words"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"top_words_bar_plot(top_words, \"Top 20 Words \\n Prior to Cleaning and Separating\")"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The total number of words is 41690\n",
"The total number of unique words is 3098\n"
]
}
],
"source": [
"total_words_unique_words(last_statements_dic)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It was decided to change all personal pronouns to \"first_person_pronounds\" and all other pronouns to \"pronoun\". The belief is that different types of criminal might speak of themselves versus other criminals. Punctuation will be removed prior to any changes and all words will be converted to lowercase. "
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.lower()\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(r\"[^\\w^\\s]\", \"\")\n",
"death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(r\"[0-9]+\", \"\")"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {},
"outputs": [],
"source": [
"first_person_pronouns = [\" i \", \" me \", \" mine \", \" my \", \" we \", \" our \", \" us \", \" ours \"]\n",
"pronouns = [\" you \", \" he \", \" she \", \" it \", \" they \", \" him \", \" her \", \" them \", \" your \", \" yours \", \" his \", \" hers \", \" its \"]"
]
},
{
"cell_type": "code",
"execution_count": 190,
"metadata": {},
"outputs": [],
"source": [
"for word in first_person_pronouns: \n",
" death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(word, \" first_person_pronoun \") "
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [],
"source": [
"for word in pronouns: \n",
" death_row[\"last_statement\"] = death_row[\"last_statement\"].str.replace(word, \" pronoun \") "
]
},
{
"cell_type": "code",
"execution_count": 194,
"metadata": {},
"outputs": [],
"source": [
"last_statements_dic = creating_freq_list_from_df_to_dict_2(death_row, \"last_statement\")\n",
"#http://www.transparentpng.com/details/scroll-transparent-image-_4493.html\n",
"# create_word_cloud_with_mask(\"scroll3.png\", last_statements_dic, 750, \"Word Cloud Prior to Cleaning\")"
]
},
{
"cell_type": "code",
"execution_count": 195,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" count \n",
" word \n",
" \n",
" \n",
" \n",
" \n",
" 1 \n",
" 4732 \n",
" first_person_pronoun \n",
" \n",
" \n",
" 18 \n",
" 2924 \n",
" pronoun \n",
" \n",
" \n",
" 3 \n",
" 1569 \n",
" to \n",
" \n",
" \n",
" 8 \n",
" 1325 \n",
" and \n",
" \n",
" \n",
" 5 \n",
" 1177 \n",
" the \n",
" \n",
" \n",
" 10 \n",
" 760 \n",
" for \n",
" \n",
" \n",
" 15 \n",
" 725 \n",
" that \n",
" \n",
" \n",
" 27 \n",
" 705 \n",
" love \n",
" \n",
" \n",
" 31 \n",
" 659 \n",
" all \n",
" \n",
" \n",
" 105 \n",
" 598 \n",
" of \n",
" \n",
" \n",
" 41 \n",
" 489 \n",
" am \n",
" \n",
" \n",
" 22 \n",
" 460 \n",
" have \n",
" \n",
" \n",
" 47 \n",
" 451 \n",
" is \n",
" \n",
" \n",
" 88 \n",
" 432 \n",
" a \n",
" \n",
" \n",
" 117 \n",
" 423 \n",
" in \n",
" \n",
" \n",
" 20 \n",
" 373 \n",
" this \n",
" \n",
" \n",
" 7 \n",
" 325 \n",
" family \n",
" \n",
" \n",
" 76 \n",
" 314 \n",
" know \n",
" \n",
" \n",
" 135 \n",
" 299 \n",
" be \n",
" \n",
" \n",
" 103 \n",
" 281 \n",
" not \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" count word\n",
"1 4732 first_person_pronoun\n",
"18 2924 pronoun\n",
"3 1569 to\n",
"8 1325 and\n",
"5 1177 the\n",
"10 760 for\n",
"15 725 that\n",
"27 705 love\n",
"31 659 all\n",
"105 598 of\n",
"41 489 am\n",
"22 460 have\n",
"47 451 is\n",
"88 432 a\n",
"117 423 in\n",
"20 373 this\n",
"7 325 family\n",
"76 314 know\n",
"135 299 be\n",
"103 281 not"
]
},
"execution_count": 195,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_words = word_freq_dict_to_df_top_words(last_statements_dic, 20)\n",
"top_words"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 196,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"top_words_bar_plot(top_words, \"Top 20 Words\")"
]
},
{
"cell_type": "code",
"execution_count": 197,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The total number of words is 41690\n",
"The total number of unique words is 3092\n"
]
}
],
"source": [
"total_words_unique_words(last_statements_dic)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The last statements will be tokenized and any words less than 3 character will be removed. The last statements will then be stemmed using the snowball stemmer. "
]
},
{
"cell_type": "code",
"execution_count": 201,
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"from nltk.stem.snowball import SnowballStemmer\n",
"\n",
"def tokenize_last_statement(statement):\n",
" try:\n",
" return nltk.word_tokenize(statement)\n",
" except:\n",
" return 'error'\n",
"death_row[\"last_statement\"] = death_row.apply(lambda row: tokenize_last_statement(row[\"last_statement\"]), axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 202,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 [yeah, first_person_pronoun, want, to, address...\n",
"1 [umm, pamela, can, pronoun, hear, first_person...\n",
"2 [its, on, september, th, kayla, and, david, fi...\n",
"3 [hi, ladies, first_person_pronoun, wanted, to,...\n",
"4 [lord, forgive, pronoun, pronoun, dont, know, ...\n",
"5 [none]\n",
"6 [yes, sir, that, will, be, five, dollars, firs...\n",
"7 [to, first_person_pronoun, friends, and, famil...\n",
"8 [yes, sir, first_person_pronoun, would, like, ...\n",
"9 [yes, sir, dear, heavenly, father, please, for...\n",
"Name: last_statement, dtype: object"
]
},
"execution_count": 202,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"death_row.last_statement.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
]
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [],
"source": [
"death_row.to_csv('death_row_discritized.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}