{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "def get_data_from_files(path):\n", " directory = os.listdir(path)\n", " results = []\n", " filenames = []\n", " for file in directory:\n", " f=open(path+file)\n", " filenames.append(file)\n", " results.append(f.read())\n", " f.close()\n", " return results, filenames\n", "\n", "inmates, filenames = get_data_from_files('FinalProject/inmates/')\n", "\n", "import pandas as pd\n", "import numpy as np\n", "df = pd.DataFrame(inmates, filenames)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df.reset_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_name
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnny
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBetty
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDaniel
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohn
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohn
..................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGerald
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharles
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaude
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolando
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliver
\n", "

380 rows × 5 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", "[380 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['inmate_number'] = df.apply(lambda x: x['index'].split('_')[2], axis=1)\n", "df['last_name'] = df.apply(lambda x: x['index'].split('_')[4].split('.')[0], axis=1)\n", "df['first_name'] = df.apply(lambda x: x['index'].split('_')[3], axis=1)\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import re\n", "df['clean'] = df.apply(lambda x: re.sub(r'[\\W_]+', ' ', x[0].lower()), axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_nameclean
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...
.....................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...
\n", "

380 rows × 6 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name clean \n", "0 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... \n", "1 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... \n", "2 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... \n", "3 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... \n", "4 Chavez John name john chavez d r 999186 dob 04 27 68 recei... \n", ".. ... ... ... \n", "375 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... \n", "376 Rumbaugh Charles charles francis rumbaugh execution 555 date se... \n", "377 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... \n", "378 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... \n", "379 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... \n", "\n", "[380 rows x 6 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def get_occupation(summary):\n", " try:\n", " p = re.compile(r'(?<=occupation)(\\W.*?)(?=\\s)')\n", " r = p.search(summary).group().strip()\n", " if 'prior' in r or len(r) < 3:\n", " return 'none_listed'\n", " else:\n", " return r\n", " except:\n", " return 'none_listed'\n", "\n", "occupations = [get_occupation(summary) for summary in df['clean'].values]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['none_listed',\n", " 'cashier',\n", " 'welder',\n", " 'mechanic',\n", " 'painter',\n", " 'laborer',\n", " 'education',\n", " 'laborer',\n", " 'borex',\n", " 'press',\n", " 'truck',\n", " 'mechanic',\n", " 'millwright',\n", " 'laborer',\n", " 'construction',\n", " 'education',\n", " 'laborer',\n", " 'none_listed',\n", " 'plumber',\n", " 'sculptor',\n", " 'iron',\n", " 'bull',\n", " 'laborer',\n", " 'none_listed',\n", " 'carpenter',\n", " 'brick',\n", " 'truck',\n", " 'auto',\n", " 'direct',\n", " 'restaurant',\n", " 'mechanic',\n", " 'fork',\n", " 'forklift',\n", " 'janitor',\n", " 'construction',\n", " 'machinist',\n", " 'salesman',\n", " 'construction',\n", " 'musician',\n", " 'musicia',\n", " 'plumber',\n", " 'construction',\n", " 'roofer',\n", " 'education',\n", " 'laborer',\n", " 'press',\n", " 'laborer',\n", " 'drywaller',\n", " 'none_listed',\n", " 'barber',\n", " 'construction',\n", " 'education',\n", " 'tankerman',\n", " 'bartender',\n", " 'roofer',\n", " 'welder',\n", " 'mechanic',\n", " 'computer',\n", " 'laborer',\n", " 'laborer',\n", " 'roofer',\n", " 'laborer',\n", " 'education',\n", " 'none_listed',\n", " 'laborer',\n", " 'produce',\n", " 'gardener',\n", " 'truck',\n", " 'landscaping',\n", " 'laborer',\n", " '2porer',\n", " 'analyst',\n", " 'antique',\n", " 'auto',\n", " 'welder',\n", " 'clectr',\n", " 'none_listed',\n", " 'clecitician',\n", " 'nurses',\n", " 'laborer',\n", " 'painter',\n", " 'laborer',\n", " 'oachi',\n", " 'electrician',\n", " 'carpenter',\n", " 'maintenance',\n", " 'truck',\n", " 'cabinet',\n", " 'security',\n", " 'laborer',\n", " 'waiter',\n", " 'construction',\n", " 'truck',\n", " 'laborer',\n", " 'electrician',\n", " 'laborer',\n", " 'laborer',\n", " 'correctional',\n", " 'construction',\n", " 'laborer',\n", " 'laborer',\n", " 'education',\n", " 'edckisyes',\n", " 'unemployed',\n", " 'landscaper',\n", " 'jaborer',\n", " 'laborer',\n", " 'none_listed',\n", " 'none_listed',\n", " 'paint',\n", " 'general',\n", " 'telemarketing',\n", " 'laborer',\n", " 'none_listed',\n", " 'construction',\n", " 'auto',\n", " 'laborer',\n", " 'informant',\n", " 'roofer',\n", " 'laborer',\n", " 'truck',\n", " 'telephone',\n", " 'unemployed',\n", " 'electricianfmarketing',\n", " 'student',\n", " 'none_listed',\n", " 'laborer',\n", " 'none_listed',\n", " 'laborer',\n", " 'laborer',\n", " 'salesman',\n", " 'welder',\n", " 'welder',\n", " 'clectrician',\n", " 'brick',\n", " 'food',\n", " 'food',\n", " 'laborer',\n", " 'metal',\n", " 'none_listed',\n", " 'deliveryman',\n", " 'laborer',\n", " 'construction',\n", " 'warehouse',\n", " 'jaborer',\n", " 'laborer',\n", " 'delivery',\n", " 'cement',\n", " 'construction',\n", " 'carpenter',\n", " 'food',\n", " 'paint',\n", " 'brick',\n", " 'machine',\n", " 'education',\n", " 'heavy',\n", " 'accounting',\n", " 'asst',\n", " 'laborer',\n", " 'mechanic',\n", " 'govt',\n", " 'sales',\n", " 'clerk',\n", " 'cook',\n", " 'construction',\n", " 'laborer',\n", " 'none_listed',\n", " 'mechanic',\n", " '1aborer',\n", " 'meat',\n", " 'painter',\n", " 'none_listed',\n", " 'none_listed',\n", " 'retail',\n", " 'laborer',\n", " 'laborer',\n", " 'electrician',\n", " 'car',\n", " 'none_listed',\n", " 'none_listed',\n", " 'mechanic',\n", " 'farmer',\n", " 'machinist',\n", " 'baker',\n", " 'commercial',\n", " 'video',\n", " 'machinist',\n", " 'machinist',\n", " 'laborer',\n", " 'none_listed',\n", " 'education',\n", " 'janitor',\n", " 'electrician',\n", " 'cashier',\n", " 'small',\n", " 'cable',\n", " 'laborer',\n", " 'welder',\n", " 'laborer',\n", " 'mechanic',\n", " 'radiator',\n", " 'mechanic',\n", " 'shrimper',\n", " 'paint',\n", " 'none_listed',\n", " 'landscaping',\n", " 'none_listed',\n", " 'none_listed',\n", " 'cook',\n", " 'none_listed',\n", " 'feed',\n", " 'auto',\n", " 'sales',\n", " 'laborer',\n", " 'student',\n", " 'cement',\n", " 'none_listed',\n", " 'laborer',\n", " 'laborer',\n", " 'cook',\n", " 'none_listed',\n", " 'none_listed',\n", " 'auto',\n", " 'drywaller',\n", " 'none_listed',\n", " 'painter',\n", " 'construction',\n", " 'laborer',\n", " 'clectrician',\n", " 'laborer',\n", " 'pax',\n", " 'electrician',\n", " 'laborer',\n", " 'nursing',\n", " 'laborer',\n", " 'produce',\n", " 'jaborer',\n", " 'none_listed',\n", " 'sales',\n", " 'anto',\n", " 'custom',\n", " 'electrician',\n", " 'roofer',\n", " 'truck',\n", " 'none_listed',\n", " 'warehouseman',\n", " 'auto',\n", " 'laborer',\n", " 'welder',\n", " 'laborer',\n", " 'laborer',\n", " 'carpenter',\n", " 'laborer',\n", " 'stocker',\n", " 'roofer',\n", " 'cashier',\n", " 'laborer',\n", " 'labor',\n", " 'stocker',\n", " 'oilfield',\n", " 'none_listed',\n", " 'painter',\n", " 'clectrician',\n", " 'restaurant',\n", " 'laborer',\n", " 'computer',\n", " 'none_listed',\n", " 'electrician',\n", " 'sales',\n", " 'maintenance',\n", " 'welder',\n", " 'carpenter',\n", " 'physical',\n", " 'mechanic',\n", " 'gump',\n", " 'jaborer',\n", " 'echanic',\n", " 'none_listed',\n", " 'carpenter',\n", " 'laborer',\n", " 'apprentice',\n", " 'electrician',\n", " 'welder',\n", " 'sales',\n", " 'laborer',\n", " 'laborer',\n", " 'truck',\n", " 'carpenter',\n", " 'auto',\n", " 'welder',\n", " 'education',\n", " 'stockbroker',\n", " 'general',\n", " 'landscaping',\n", " 'construct',\n", " 'construction',\n", " 'none_listed',\n", " 'carpenter',\n", " 'laborer',\n", " 'oil',\n", " 'laborer',\n", " 'paint',\n", " 'mechanic',\n", " 'mover',\n", " 'student',\n", " 'electrician',\n", " 'fork',\n", " 'ast',\n", " 'janitorial',\n", " 'receiving',\n", " 'auto',\n", " 'chemical',\n", " 'welder',\n", " 'motorcycle',\n", " 'mechanic',\n", " 'laborer',\n", " 'cook',\n", " 'electrician',\n", " 'heavy',\n", " 'none_listed',\n", " 'mechanic',\n", " 'mechanic',\n", " 'iron',\n", " 'auto',\n", " 'roofer',\n", " 'laborer',\n", " 'farm',\n", " 'electrician',\n", " 'cook',\n", " 'laborer',\n", " 'none_listed',\n", " 'cook',\n", " 'manager',\n", " 'general',\n", " 'laborer',\n", " 'insurance',\n", " 'cook',\n", " 'none_listed',\n", " 'laborer',\n", " 'painter',\n", " 'mechanic',\n", " 'education',\n", " 'office',\n", " 'ghneck',\n", " 'carpenter',\n", " 'delivery',\n", " 'security',\n", " 'cook',\n", " 'landscaping',\n", " 'diesel',\n", " 'laborer',\n", " 'laborer',\n", " 'construction',\n", " 'roughneck',\n", " 'construction',\n", " 'auto',\n", " 'education',\n", " 'construction',\n", " 'dishwasher',\n", " 'factory',\n", " 'laborer',\n", " 'carpenter',\n", " 'drywall',\n", " 'cement',\n", " 'none_listed',\n", " 'brickmason',\n", " 'laborer',\n", " 'roofer',\n", " 'construction',\n", " 'telemarketing',\n", " 'laborer',\n", " 'oilfield',\n", " 'clerical',\n", " 'laborer',\n", " 'dispatcher',\n", " 'carpenter',\n", " 'none_listed',\n", " 'electrician',\n", " 'laborer',\n", " 'laborer']" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "occupations" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "df['occupation'] = occupations" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_namecleanoccupation
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...none_listed
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...cashier
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...welder
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...mechanic
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...painter
........................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...carpenter
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...none_listed
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...electrician
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...laborer
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...laborer
\n", "

380 rows × 7 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \\\n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", " clean occupation \n", "0 name dohnny anderson pop 12 28 59 received cou... none_listed \n", "1 name bettyloubeets rio dob 03 12 37 received 1... cashier \n", "2 name daniel joe hittle d r 981 v pop 3 1 50 re... welder \n", "3 name john thomas satterwhite d r 651 dob 12 29... mechanic \n", "4 name john chavez d r 999186 dob 04 27 68 recei... painter \n", ".. ... ... \n", "375 name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter \n", "376 charles francis rumbaugh execution 555 date se... none_listed \n", "377 claude howard jones 980 name d r pop 9 24 40 r... electrician \n", "378 name roland ruiz jr d r 999145 dob 07 04 72 re... laborer \n", "379 david oliver cruz d r ao name dob 5 18 67 rece... laborer \n", "\n", "[380 rows x 7 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_namecleanoccupationprior_record
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...none_listedno
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...cashierno
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...welderyes
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...mechanicyes
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...painteryes
...........................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...carpenteryes
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...none_listednone_listed
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...electricianyes
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...laboreryes
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...laborerno
\n", "

380 rows × 8 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \\\n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", " clean occupation \\\n", "0 name dohnny anderson pop 12 28 59 received cou... none_listed \n", "1 name bettyloubeets rio dob 03 12 37 received 1... cashier \n", "2 name daniel joe hittle d r 981 v pop 3 1 50 re... welder \n", "3 name john thomas satterwhite d r 651 dob 12 29... mechanic \n", "4 name john chavez d r 999186 dob 04 27 68 recei... painter \n", ".. ... ... \n", "375 name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter \n", "376 charles francis rumbaugh execution 555 date se... none_listed \n", "377 claude howard jones 980 name d r pop 9 24 40 r... electrician \n", "378 name roland ruiz jr d r 999145 dob 07 04 72 re... laborer \n", "379 david oliver cruz d r ao name dob 5 18 67 rece... laborer \n", "\n", " prior_record \n", "0 no \n", "1 no \n", "2 yes \n", "3 yes \n", "4 yes \n", ".. ... \n", "375 yes \n", "376 none_listed \n", "377 yes \n", "378 yes \n", "379 no \n", "\n", "[380 rows x 8 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_priors(summary):\n", " try:\n", " text = re.compile(r'(?<=record)(\\W.*?)(?=\\s)')\n", " result = text.search(summary).group().strip()\n", " return 'no' if 'none' in result else 'yes'\n", " except:\n", " return 'none_listed'\n", "\n", "priors = [get_priors(summary) for summary in df['clean'].values]\n", "df['prior_record'] = priors\n", "df" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "def get_edu(summary):\n", " try:\n", " text = re.compile(r'(?<=education)(.*?)(years|yrs|ged|prior)')\n", " result = text.search(summary).group().strip()\n", " number = re.compile(r'\\d+')\n", " number_result = number.search(result).group()\n", "# print(number_result)\n", " return str(number_result) + \" years\"\n", "# return 'no' if 'none' in result else 'yes'\n", " except:\n", " return 'none_listed'\n", "\n", "edu = [get_edu(summary) for summary in df['clean'].values]\n", "df['education_level'] = edu\n", "df.to_csv('V8_fromphotos.csv')" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "1\n", "1\n", "3\n", "1\n", "0\n", "3\n", "0\n", "3\n", "3\n", "1\n", "2\n", "4\n", "4\n", "1\n", "2\n", "2\n", "2\n", "1\n", "2\n", "0\n", "1\n", "0\n", "2\n", "1\n", "1\n", "0\n", "2\n", "2\n", "2\n", "1\n", "3\n", "0\n", "1\n", "3\n", "0\n", "2\n", "0\n", "3\n", "3\n", "2\n", "1\n", "2\n", "2\n", "1\n", "0\n", "0\n", "1\n", "6\n", "2\n", "1\n", "2\n", "8\n", "3\n", "0\n", "0\n", "3\n", "1\n", "4\n", "1\n", "3\n", "2\n", "3\n", "6\n", "4\n", "0\n", "0\n", "1\n", "0\n", "0\n", "3\n", "1\n", "0\n", "1\n", "7\n", "3\n", "0\n", "0\n", "1\n", "3\n", "0\n", "2\n", "3\n", "0\n", "1\n", "3\n", "0\n", "1\n", "1\n", "3\n", "4\n", "8\n", "1\n", "0\n", "4\n", "4\n", "0\n", "0\n", "2\n", "2\n", "0\n", "0\n", "1\n", "0\n", "5\n", "1\n", "2\n", "1\n", "2\n", "2\n", "0\n", "2\n", "1\n", "1\n", "0\n", "2\n", "0\n", "1\n", "1\n", "0\n", "8\n", "5\n", "2\n", "2\n", "2\n", "3\n", "4\n", "1\n", "6\n", "1\n", "1\n", "1\n", "0\n", "2\n", "0\n", "4\n", "3\n", "1\n", "2\n", "0\n", "3\n", "0\n", "1\n", "5\n", "1\n", "1\n", "2\n", "2\n", "4\n", "0\n", "1\n", "2\n", "1\n", "1\n", "2\n", "3\n", "0\n", "3\n", "1\n", "0\n", "0\n", "2\n", "1\n", "1\n", "1\n", "0\n", "1\n", "1\n", "4\n", "1\n", "5\n", "1\n", "7\n", "6\n", "2\n", "1\n", "1\n", "1\n", "4\n", "0\n", "1\n", "1\n", "2\n", "0\n", "0\n", "3\n", "0\n", "5\n", "1\n", "2\n", "0\n", "2\n", "2\n", "0\n", "2\n", "3\n", "0\n", "2\n", "5\n", "1\n", "1\n", "1\n", "4\n", "0\n", "1\n", "2\n", "1\n", "1\n", "1\n", "0\n", "6\n", "5\n", "0\n", "1\n", "2\n", "1\n", "0\n", "1\n", "1\n", "1\n", "1\n", "1\n", "3\n", "0\n", "1\n", "2\n", "3\n", "0\n", "2\n", "2\n", "3\n", "0\n", "2\n", "5\n", "2\n", "2\n", "0\n", "2\n", "1\n", "1\n", "1\n", "2\n", "2\n", "3\n", "2\n", "3\n", "1\n", "3\n", "1\n", "2\n", "1\n", "1\n", "2\n", "7\n", "4\n", "0\n", "5\n", "1\n", "1\n", "2\n", "0\n", "1\n", "0\n", "10\n", "3\n", "2\n", "1\n", "2\n", "0\n", "1\n", "0\n", "1\n", "3\n", "0\n", "3\n", "3\n", "2\n", "1\n", "2\n", "3\n", "1\n", "0\n", "2\n", "3\n", "2\n", "1\n", "3\n", "3\n", "0\n", "2\n", "0\n", "2\n", "3\n", "0\n", "0\n", "0\n", "0\n", "1\n", "1\n", "0\n", "1\n", "2\n", "1\n", "1\n", "2\n", "3\n", "3\n", "0\n", "6\n", "2\n", "4\n", "2\n", "6\n", "2\n", "0\n", "1\n", "2\n", "2\n", "3\n", "3\n", "0\n", "3\n", "1\n", "1\n", "2\n", "1\n", "3\n", "4\n", "2\n", "1\n", "1\n", "3\n", "9\n", "3\n", "1\n", "1\n", "0\n", "1\n", "0\n", "0\n", "1\n", "2\n", "2\n", "0\n", "2\n", "2\n", "0\n", "2\n", "0\n", "1\n", "0\n", "1\n", "2\n", "1\n", "0\n", "1\n", "0\n", "8\n", "3\n", "1\n", "6\n", "2\n", "4\n", "2\n", "0\n", "2\n", "1\n", "2\n", "0\n", "0\n", "3\n", "2\n", "0\n", "0\n", "2\n", "0\n", "0\n", "1\n", "1\n", "2\n" ] } ], "source": [ "def get_vics(summary):\n", " try:\n", " text = re.compile(r'(male|men|man)')\n", " vics = text.findall(summary)\n", " print(len(vics))\n", "\n", " except:\n", " print('nope')\n", " \n", " \n", "vic_f = [get_vics(summary) for summary in df['clean'].values] " ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "def get_vics(summary):\n", " try:\n", "# text = re.compile(r'(?<=race of victim s)\\W(black|white|hispanic|hite|asian)(.*?)(male|female)')\n", " text = re.compile(r'(?<=race of victim s)(.*?)(male|female)')\n", " result = text.search(summary).group().strip()\n", " result_s = result.split(' ')\n", " if len(result_s) > 3:\n", " return 'error'\n", " else:\n", " return result_s\n", "# if len(result_s) >\n", "# print(result)\n", "# number = re.compile(r'\\d+')\n", "# number_result = number.search(result).group()\n", "# # print(number_result)\n", "# return str(number_result) + \" years\"\n", "# # return 'no' if 'none' in result else 'yes'\n", " except:\n", " return ['none_listed']\n", "\n", "vic_deets = [get_vics(summary) for summary in df['clean'].values]\n", "df['vic_deets'] = vic_deets\n", "# df['race_vic'] = df.apply()\n", "\n", "\n", "# if len(summary) == 3:\n", "# summary[0]\n", "multiple_vics = [summary[0] if len(summary) == 3 else 'no' for summary in df['vic_deets'].values]\n", "female_vics = ['yes' if 'female' in summary else 'no' for summary in df['vic_deets'].values]\n", "male_vics = ['yes' if 'male' in summary else 'no' for summary in df['vic_deets'].values]\n" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_namecleanoccupationprior_recordeducation_levelvic_deetsmultiple_vicsvic_femalevic_male
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...none_listedno6 years[white, male]nonoyes
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...cashierno10 years[white, male]nonoyes
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...welderyes14 years[white, male]nonoyes
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...mechanicyesnone_listed[none_listed]nonono
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...painteryes8 years[hispanic, male]nonoyes
..........................................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...carpenteryes10 years[none_listed]nonono
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...none_listednone_listednone_listed[none_listed]nonono
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...electricianyes9 years[white, male]nonoyes
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...laboreryes10 years[hispanic, female]noyesno
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...laborerno7 years[white, female]noyesno
\n", "

380 rows × 13 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \\\n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", " clean occupation \\\n", "0 name dohnny anderson pop 12 28 59 received cou... none_listed \n", "1 name bettyloubeets rio dob 03 12 37 received 1... cashier \n", "2 name daniel joe hittle d r 981 v pop 3 1 50 re... welder \n", "3 name john thomas satterwhite d r 651 dob 12 29... mechanic \n", "4 name john chavez d r 999186 dob 04 27 68 recei... painter \n", ".. ... ... \n", "375 name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter \n", "376 charles francis rumbaugh execution 555 date se... none_listed \n", "377 claude howard jones 980 name d r pop 9 24 40 r... electrician \n", "378 name roland ruiz jr d r 999145 dob 07 04 72 re... laborer \n", "379 david oliver cruz d r ao name dob 5 18 67 rece... laborer \n", "\n", " prior_record education_level vic_deets multiple_vics vic_female \\\n", "0 no 6 years [white, male] no no \n", "1 no 10 years [white, male] no no \n", "2 yes 14 years [white, male] no no \n", "3 yes none_listed [none_listed] no no \n", "4 yes 8 years [hispanic, male] no no \n", ".. ... ... ... ... ... \n", "375 yes 10 years [none_listed] no no \n", "376 none_listed none_listed [none_listed] no no \n", "377 yes 9 years [white, male] no no \n", "378 yes 10 years [hispanic, female] no yes \n", "379 no 7 years [white, female] no yes \n", "\n", " vic_male \n", "0 yes \n", "1 yes \n", "2 yes \n", "3 no \n", "4 yes \n", ".. ... \n", "375 no \n", "376 no \n", "377 yes \n", "378 no \n", "379 no \n", "\n", "[380 rows x 13 columns]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['multiple_vics'] = multiple_vics\n", "df['vic_female'] = female_vics\n", "df['vic_male'] = male_vics\n", "df" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "race_vics = [summary[1] if len(summary) == 3 else summary[0] for summary in df['vic_deets'].values]" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'hispanic',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'hispanic',\n", " 'male',\n", " 'black',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'black',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'hite',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'hispanic',\n", " 'hispanic',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'plack',\n", " 'waite',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'black',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'hispanic',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'e',\n", " 'e',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'hispanic',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'black',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'hispanic',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'e',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'hispanic',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'e',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'black',\n", " 'black',\n", " 'white',\n", " 'asian',\n", " 'white',\n", " 'hispanic',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'black',\n", " 'black',\n", " 'none_listed',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'hispanic',\n", " 'none_listed',\n", " 'biack',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'white',\n", " '1',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'black',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'e',\n", " 'hite',\n", " 'e',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'black',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'white',\n", " 'hispanic',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'asian',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " 'hite',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'none_listed',\n", " 'none_listed',\n", " 'white',\n", " 'hispanic',\n", " 'white']" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "race_vics" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "df['race_vic'] = race_vics" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_namecleanoccupationprior_recordeducation_levelvic_deetsmultiple_vicsvic_femalevic_malerace_vic
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...none_listedno6 years[white, male]nonoyeswhite
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...cashierno10 years[white, male]nonoyeswhite
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...welderyes14 years[white, male]nonoyeswhite
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...mechanicyesnone_listed[none_listed]nonononone_listed
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...painteryes8 years[hispanic, male]nonoyeshispanic
.............................................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...carpenteryes10 years[none_listed]nonononone_listed
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...none_listednone_listednone_listed[none_listed]nonononone_listed
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...electricianyes9 years[white, male]nonoyeswhite
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...laboreryes10 years[hispanic, female]noyesnohispanic
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...laborerno7 years[white, female]noyesnowhite
\n", "

380 rows × 14 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \\\n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", " clean occupation \\\n", "0 name dohnny anderson pop 12 28 59 received cou... none_listed \n", "1 name bettyloubeets rio dob 03 12 37 received 1... cashier \n", "2 name daniel joe hittle d r 981 v pop 3 1 50 re... welder \n", "3 name john thomas satterwhite d r 651 dob 12 29... mechanic \n", "4 name john chavez d r 999186 dob 04 27 68 recei... painter \n", ".. ... ... \n", "375 name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter \n", "376 charles francis rumbaugh execution 555 date se... none_listed \n", "377 claude howard jones 980 name d r pop 9 24 40 r... electrician \n", "378 name roland ruiz jr d r 999145 dob 07 04 72 re... laborer \n", "379 david oliver cruz d r ao name dob 5 18 67 rece... laborer \n", "\n", " prior_record education_level vic_deets multiple_vics vic_female \\\n", "0 no 6 years [white, male] no no \n", "1 no 10 years [white, male] no no \n", "2 yes 14 years [white, male] no no \n", "3 yes none_listed [none_listed] no no \n", "4 yes 8 years [hispanic, male] no no \n", ".. ... ... ... ... ... \n", "375 yes 10 years [none_listed] no no \n", "376 none_listed none_listed [none_listed] no no \n", "377 yes 9 years [white, male] no no \n", "378 yes 10 years [hispanic, female] no yes \n", "379 no 7 years [white, female] no yes \n", "\n", " vic_male race_vic \n", "0 yes white \n", "1 yes white \n", "2 yes white \n", "3 no none_listed \n", "4 yes hispanic \n", ".. ... ... \n", "375 no none_listed \n", "376 no none_listed \n", "377 yes white \n", "378 no hispanic \n", "379 no white \n", "\n", "[380 rows x 14 columns]" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "def get_age_crime(summary):\n", " try:\n", " text = re.compile(r'(?<=age at time of offense )(\\d.*?)\\W')\n", " result = text.search(summary).group().strip()\n", " if len(result) < 2:\n", " return 'none_listed'\n", " else:\n", " return result\n", " except:\n", " return 'none_listed'\n", "\n", "age_crime = [get_age_crime(summary) for summary in df['clean'].values] " ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['21',\n", " '46',\n", " '39',\n", " '32',\n", " '27',\n", " '30',\n", " '18',\n", " '19',\n", " '23',\n", " '22',\n", " '23',\n", " '27',\n", " '27',\n", " '22',\n", " '22',\n", " '43',\n", " '18',\n", " '19',\n", " '20',\n", " '21',\n", " '23',\n", " '26',\n", " '17',\n", " '34',\n", " '28',\n", " '25',\n", " 'none_listed',\n", " '18',\n", " '20',\n", " 'none_listed',\n", " '22',\n", " '30',\n", " '37',\n", " '27',\n", " '29',\n", " '33',\n", " '31',\n", " 'none_listed',\n", " '35',\n", " '28',\n", " 'none_listed',\n", " '24',\n", " '22',\n", " 'none_listed',\n", " '17',\n", " '26',\n", " '39',\n", " '39',\n", " 'none_listed',\n", " '38',\n", " '34',\n", " 'none_listed',\n", " '26',\n", " '21',\n", " '20',\n", " '51',\n", " '43',\n", " '27',\n", " '25',\n", " '18',\n", " '24',\n", " 'none_listed',\n", " '24',\n", " 'none_listed',\n", " '30',\n", " '33',\n", " '53',\n", " '42',\n", " '35',\n", " '27',\n", " 'none_listed',\n", " 'none_listed',\n", " '39',\n", " 'none_listed',\n", " '29',\n", " '32',\n", " '56',\n", " 'none_listed',\n", " '20',\n", " 'none_listed',\n", " '32',\n", " '22',\n", " '24',\n", " '33',\n", " '23',\n", " '26',\n", " '32',\n", " '29',\n", " '26',\n", " '35',\n", " '21',\n", " '29',\n", " '28',\n", " 'none_listed',\n", " '24',\n", " 'none_listed',\n", " '24',\n", " '26',\n", " 'none_listed',\n", " '20',\n", " '17',\n", " '20',\n", " '27',\n", " '19',\n", " '27',\n", " '29',\n", " '19',\n", " 'none_listed',\n", " '26',\n", " '33',\n", " '31',\n", " '21',\n", " '21',\n", " 'none_listed',\n", " '28',\n", " '28',\n", " '38',\n", " '34',\n", " '23',\n", " '20',\n", " 'none_listed',\n", " '38',\n", " '24',\n", " '25',\n", " '18',\n", " '30',\n", " '26',\n", " 'none_listed',\n", " '19',\n", " '18',\n", " '33',\n", " '39',\n", " '30',\n", " '23',\n", " 'none_listed',\n", " '31',\n", " '18',\n", " '21',\n", " '27',\n", " '36',\n", " '23',\n", " '18',\n", " '25',\n", " '22',\n", " '23',\n", " 'none_listed',\n", " 'none_listed',\n", " '39',\n", " '29',\n", " '44',\n", " '19',\n", " '20',\n", " '21',\n", " '45',\n", " '29',\n", " '27',\n", " 'none_listed',\n", " '19',\n", " '29',\n", " '21',\n", " '45',\n", " '24',\n", " '25',\n", " '19',\n", " '30',\n", " '17',\n", " '44',\n", " '18',\n", " '18',\n", " '33',\n", " '21',\n", " '32',\n", " '19',\n", " '30',\n", " '22',\n", " 'none_listed',\n", " '44',\n", " 'none_listed',\n", " 'none_listed',\n", " '27',\n", " '34',\n", " '32',\n", " '24',\n", " '29',\n", " '25',\n", " '19',\n", " '22',\n", " '27',\n", " '24',\n", " 'none_listed',\n", " '31',\n", " '19',\n", " '40',\n", " '22',\n", " 'none_listed',\n", " '33',\n", " '18',\n", " '32',\n", " '19',\n", " '24',\n", " '26',\n", " '22',\n", " '31',\n", " 'none_listed',\n", " 'none_listed',\n", " '31',\n", " 'none_listed',\n", " 'none_listed',\n", " '22',\n", " 'none_listed',\n", " '33',\n", " '24',\n", " '44',\n", " '17',\n", " 'none_listed',\n", " '20',\n", " 'none_listed',\n", " '19',\n", " '19',\n", " '29',\n", " 'none_listed',\n", " '44',\n", " '37',\n", " '26',\n", " '19',\n", " '23',\n", " '19',\n", " '19',\n", " '28',\n", " '20',\n", " '23',\n", " '19',\n", " '38',\n", " '24',\n", " '20',\n", " '26',\n", " '18',\n", " '23',\n", " '19',\n", " '37',\n", " '22',\n", " 'none_listed',\n", " '31',\n", " '26',\n", " 'none_listed',\n", " '21',\n", " '23',\n", " 'none_listed',\n", " 'none_listed',\n", " 'none_listed',\n", " '28',\n", " '24',\n", " '18',\n", " '20',\n", " '20',\n", " '19',\n", " '25',\n", " '22',\n", " '20',\n", " '34',\n", " 'none_listed',\n", " '19',\n", " '21',\n", " '22',\n", " '37',\n", " '45',\n", " '28',\n", " '37',\n", " '30',\n", " '24',\n", " '22',\n", " '22',\n", " '37',\n", " '26',\n", " '18',\n", " '17',\n", " '26',\n", " '40',\n", " '37',\n", " '22',\n", " '24',\n", " '36',\n", " '36',\n", " '23',\n", " 'none_listed',\n", " 'none_listed',\n", " '31',\n", " '26',\n", " '31',\n", " '24',\n", " '20',\n", " '32',\n", " '32',\n", " 'none_listed',\n", " '28',\n", " '22',\n", " 'none_listed',\n", " '30',\n", " '29',\n", " '28',\n", " '31',\n", " 'none_listed',\n", " '23',\n", " '20',\n", " 'none_listed',\n", " '29',\n", " '19',\n", " '17',\n", " '52',\n", " '27',\n", " '927',\n", " '35',\n", " '35',\n", " '27',\n", " 'none_listed',\n", " '20',\n", " '25',\n", " '22',\n", " '33',\n", " '30',\n", " '22',\n", " '24',\n", " '34',\n", " '23',\n", " '21',\n", " '26',\n", " 'none_listed',\n", " '19',\n", " '34',\n", " '22',\n", " 'none_listed',\n", " '23',\n", " '35',\n", " '47',\n", " '19',\n", " '28',\n", " '19',\n", " 'none_listed',\n", " '19',\n", " '30',\n", " '30',\n", " 'none_listed',\n", " '23',\n", " '26',\n", " '48',\n", " '50',\n", " '18',\n", " '28',\n", " '31',\n", " 'none_listed',\n", " 'none_listed',\n", " '20',\n", " '25',\n", " 'none_listed',\n", " '44',\n", " '32',\n", " '35',\n", " '54',\n", " '23',\n", " '19',\n", " '19',\n", " '24',\n", " '31',\n", " '33',\n", " 'none_listed',\n", " '34',\n", " '19',\n", " '25',\n", " '36',\n", " '20',\n", " 'none_listed',\n", " '28',\n", " '20',\n", " '24',\n", " '50',\n", " 'none_listed',\n", " 'none_listed',\n", " '49',\n", " '20',\n", " '21']" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age_crime" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0inmate_numberlast_namefirst_namecleanoccupationprior_recordeducation_levelvic_deetsmultiple_vicsvic_femalevic_malerace_vicage_crime
0img_text_732_Johnny_Anderson.txtName: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...732AndersonJohnnyname dohnny anderson pop 12 28 59 received cou...none_listedno6 years[white, male]nonoyeswhite21
1img_text_810_Betty_Beets.txtName: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...810BeetsBettyname bettyloubeets rio dob 03 12 37 received 1...cashierno10 years[white, male]nonoyeswhite46
2img_text_981_Daniel_Hittle.txtName: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...981HittleDanielname daniel joe hittle d r 981 v pop 3 1 50 re...welderyes14 years[white, male]nonoyeswhite39
3img_text_651_John_Satterwhite.txtName: John Thomas Satterwhite D.R.# 651\\n\\nDOB...651SatterwhiteJohnname john thomas satterwhite d r 651 dob 12 29...mechanicyesnone_listed[none_listed]nonononone_listed32
4img_text_999186_John_Chavez.txtName: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...999186ChavezJohnname john chavez d r 999186 dob 04 27 68 recei...painteryes8 years[hispanic, male]nonoyeshispanic27
................................................
375img_text_838_Gerald_Mitchell.txtName: Gerald Lee Mitchell ____._.___ D.R.#838_...838MitchellGeraldname gerald lee mitchell d r 838 dob 12 27 67 ...carpenteryes10 years[none_listed]nonononone_listednone_listed
376img_text_555_Charles_Rumbaugh.txtCHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...555RumbaughCharlescharles francis rumbaugh execution 555 date se...none_listednone_listednone_listed[none_listed]nonononone_listednone_listed
377img_text_980_Claude_Jones.txtClaude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...980JonesClaudeclaude howard jones 980 name d r pop 9 24 40 r...electricianyes9 years[white, male]nonoyeswhite49
378img_text_999145_Rolando_Ruiz.txtName: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...999145RuizRolandoname roland ruiz jr d r 999145 dob 07 04 72 re...laboreryes10 years[hispanic, female]noyesnohispanic20
379img_text_954_Oliver_Cruz.txtDavid Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...954CruzOliverdavid oliver cruz d r ao name dob 5 18 67 rece...laborerno7 years[white, female]noyesnowhite21
\n", "

380 rows × 15 columns

\n", "
" ], "text/plain": [ " index \\\n", "0 img_text_732_Johnny_Anderson.txt \n", "1 img_text_810_Betty_Beets.txt \n", "2 img_text_981_Daniel_Hittle.txt \n", "3 img_text_651_John_Satterwhite.txt \n", "4 img_text_999186_John_Chavez.txt \n", ".. ... \n", "375 img_text_838_Gerald_Mitchell.txt \n", "376 img_text_555_Charles_Rumbaugh.txt \n", "377 img_text_980_Claude_Jones.txt \n", "378 img_text_999145_Rolando_Ruiz.txt \n", "379 img_text_954_Oliver_Cruz.txt \n", "\n", " 0 inmate_number \\\n", "0 Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec... 732 \n", "1 Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive... 810 \n", "2 Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n... 981 \n", "3 Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB... 651 \n", "4 Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6... 999186 \n", ".. ... ... \n", "375 Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 \n", "376 CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat... 555 \n", "377 Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName... 980 \n", "378 Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ... 999145 \n", "379 David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n... 954 \n", "\n", " last_name first_name \\\n", "0 Anderson Johnny \n", "1 Beets Betty \n", "2 Hittle Daniel \n", "3 Satterwhite John \n", "4 Chavez John \n", ".. ... ... \n", "375 Mitchell Gerald \n", "376 Rumbaugh Charles \n", "377 Jones Claude \n", "378 Ruiz Rolando \n", "379 Cruz Oliver \n", "\n", " clean occupation \\\n", "0 name dohnny anderson pop 12 28 59 received cou... none_listed \n", "1 name bettyloubeets rio dob 03 12 37 received 1... cashier \n", "2 name daniel joe hittle d r 981 v pop 3 1 50 re... welder \n", "3 name john thomas satterwhite d r 651 dob 12 29... mechanic \n", "4 name john chavez d r 999186 dob 04 27 68 recei... painter \n", ".. ... ... \n", "375 name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter \n", "376 charles francis rumbaugh execution 555 date se... none_listed \n", "377 claude howard jones 980 name d r pop 9 24 40 r... electrician \n", "378 name roland ruiz jr d r 999145 dob 07 04 72 re... laborer \n", "379 david oliver cruz d r ao name dob 5 18 67 rece... laborer \n", "\n", " prior_record education_level vic_deets multiple_vics vic_female \\\n", "0 no 6 years [white, male] no no \n", "1 no 10 years [white, male] no no \n", "2 yes 14 years [white, male] no no \n", "3 yes none_listed [none_listed] no no \n", "4 yes 8 years [hispanic, male] no no \n", ".. ... ... ... ... ... \n", "375 yes 10 years [none_listed] no no \n", "376 none_listed none_listed [none_listed] no no \n", "377 yes 9 years [white, male] no no \n", "378 yes 10 years [hispanic, female] no yes \n", "379 no 7 years [white, female] no yes \n", "\n", " vic_male race_vic age_crime \n", "0 yes white 21 \n", "1 yes white 46 \n", "2 yes white 39 \n", "3 no none_listed 32 \n", "4 yes hispanic 27 \n", ".. ... ... ... \n", "375 no none_listed none_listed \n", "376 no none_listed none_listed \n", "377 yes white 49 \n", "378 no hispanic 20 \n", "379 no white 21 \n", "\n", "[380 rows x 15 columns]" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['age_crime'] = age_crime\n", "df" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "def get_weapon(summary):\n", " try:\n", " if 'knife' in summary:\n", " return 'knife'\n", "# weapon = 'knife'\n", " elif 'gun' in summary:\n", " return 'gun'\n", "# weapon = 'gun'\n", " elif 'cord ' in summary:\n", "# print(summary.split('cord')[1])\n", " return 'cord'\n", " elif 'blunt object':\n", " return 'blunt object'\n", " else:\n", " return 'other'\n", " except:\n", " return 'none_listed'\n", "\n", "df['clean_summary'] = [summary.split('summary')[1] if 'summary' in summary else 'nope' for summary in df['clean'].values]\n", "weapon = [get_weapon(summary) for summary in df['clean_summary'].values] " ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[df['clean_summary'] == 'nope'])" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'cord',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'cord',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'gun',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'cord',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'cord',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'knife',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'cord',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'cord',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'knife',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'gun',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object',\n", " 'gun',\n", " 'blunt object',\n", " 'blunt object',\n", " 'blunt object']" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weapon" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "df['weapon'] = weapon" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index([ 'index', 0, 'inmate_number',\n", " 'last_name', 'first_name', 'clean',\n", " 'occupation', 'prior_record', 'education_level',\n", " 'vic_deets', 'multiple_vics', 'vic_female',\n", " 'vic_male', 'race_vic', 'age_crime',\n", " 'clean_summary', 'weapon'],\n", " dtype='object')" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "columns = ['inmate_number','last_name', 'first_name','education_level','age_crime',\n", " 'occupation','prior_record','multiple_vics','weapon','race_vic','vic_male','vic_female']" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
inmate_numberlast_namefirst_nameeducation_levelage_crimeoccupationprior_recordmultiple_vicsweaponrace_vicvic_malevic_female
0732AndersonJohnny6 years21none_listednonoblunt objectwhiteyesno
1810BeetsBetty10 years46cashiernonoblunt objectwhiteyesno
2981HittleDaniel14 years39welderyesnogunwhiteyesno
3651SatterwhiteJohnnone_listed32mechanicyesnogunnone_listednono
4999186ChavezJohn8 years27painteryesnoblunt objecthispanicyesno
.......................................
375838MitchellGerald10 yearsnone_listedcarpenteryesnoblunt objectnone_listednono
376555RumbaughCharlesnone_listednone_listednone_listednone_listednogunnone_listednono
377980JonesClaude9 years49electricianyesnoblunt objectwhiteyesno
378999145RuizRolando10 years20laboreryesnoblunt objecthispanicnoyes
379954CruzOliver7 years21laborernonoblunt objectwhitenoyes
\n", "

380 rows × 12 columns

\n", "
" ], "text/plain": [ " inmate_number last_name first_name education_level age_crime \\\n", "0 732 Anderson Johnny 6 years 21 \n", "1 810 Beets Betty 10 years 46 \n", "2 981 Hittle Daniel 14 years 39 \n", "3 651 Satterwhite John none_listed 32 \n", "4 999186 Chavez John 8 years 27 \n", ".. ... ... ... ... ... \n", "375 838 Mitchell Gerald 10 years none_listed \n", "376 555 Rumbaugh Charles none_listed none_listed \n", "377 980 Jones Claude 9 years 49 \n", "378 999145 Ruiz Rolando 10 years 20 \n", "379 954 Cruz Oliver 7 years 21 \n", "\n", " occupation prior_record multiple_vics weapon race_vic \\\n", "0 none_listed no no blunt object white \n", "1 cashier no no blunt object white \n", "2 welder yes no gun white \n", "3 mechanic yes no gun none_listed \n", "4 painter yes no blunt object hispanic \n", ".. ... ... ... ... ... \n", "375 carpenter yes no blunt object none_listed \n", "376 none_listed none_listed no gun none_listed \n", "377 electrician yes no blunt object white \n", "378 laborer yes no blunt object hispanic \n", "379 laborer no no blunt object white \n", "\n", " vic_male vic_female \n", "0 yes no \n", "1 yes no \n", "2 yes no \n", "3 no no \n", "4 yes no \n", ".. ... ... \n", "375 no no \n", "376 no no \n", "377 yes no \n", "378 no yes \n", "379 no yes \n", "\n", "[380 rows x 12 columns]" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame(df, columns=columns)\n", "df1" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "df1.to_csv('V9_photo_inmates.csv')" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "380" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }