{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "def get_data_from_files(path):\n",
    "    directory = os.listdir(path)\n",
    "    results = []\n",
    "    filenames = []\n",
    "    for file in directory:\n",
    "        f=open(path+file)\n",
    "        filenames.append(file)\n",
    "        results.append(f.read())\n",
    "        f.close()\n",
    "    return results, filenames\n",
    "\n",
    "inmates, filenames = get_data_from_files('FinalProject/inmates/')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "df = pd.DataFrame(inmates, filenames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \n",
       "0       Anderson     Johnny  \n",
       "1          Beets      Betty  \n",
       "2         Hittle     Daniel  \n",
       "3    Satterwhite       John  \n",
       "4         Chavez       John  \n",
       "..           ...        ...  \n",
       "375     Mitchell     Gerald  \n",
       "376     Rumbaugh    Charles  \n",
       "377        Jones     Claude  \n",
       "378         Ruiz    Rolando  \n",
       "379         Cruz     Oliver  \n",
       "\n",
       "[380 rows x 5 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['inmate_number'] = df.apply(lambda x: x['index'].split('_')[2], axis=1)\n",
    "df['last_name'] = df.apply(lambda x: x['index'].split('_')[4].split('.')[0], axis=1)\n",
    "df['first_name'] = df.apply(lambda x: x['index'].split('_')[3], axis=1)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "df['clean'] = df.apply(lambda x: re.sub(r'[\\W_]+', ' ', x[0].lower()), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name                                              clean  \n",
       "0       Anderson     Johnny  name dohnny anderson pop 12 28 59 received cou...  \n",
       "1          Beets      Betty  name bettyloubeets rio dob 03 12 37 received 1...  \n",
       "2         Hittle     Daniel  name daniel joe hittle d r 981 v pop 3 1 50 re...  \n",
       "3    Satterwhite       John  name john thomas satterwhite d r 651 dob 12 29...  \n",
       "4         Chavez       John  name john chavez d r 999186 dob 04 27 68 recei...  \n",
       "..           ...        ...                                                ...  \n",
       "375     Mitchell     Gerald  name gerald lee mitchell d r 838 dob 12 27 67 ...  \n",
       "376     Rumbaugh    Charles  charles francis rumbaugh execution 555 date se...  \n",
       "377        Jones     Claude  claude howard jones 980 name d r pop 9 24 40 r...  \n",
       "378         Ruiz    Rolando  name roland ruiz jr d r 999145 dob 07 04 72 re...  \n",
       "379         Cruz     Oliver  david oliver cruz d r ao name dob 5 18 67 rece...  \n",
       "\n",
       "[380 rows x 6 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_occupation(summary):\n",
    "    try:\n",
    "        p = re.compile(r'(?<=occupation)(\\W.*?)(?=\\s)')\n",
    "        r = p.search(summary).group().strip()\n",
    "        if 'prior' in r or len(r) < 3:\n",
    "            return 'none_listed'\n",
    "        else:\n",
    "            return r\n",
    "    except:\n",
    "        return 'none_listed'\n",
    "\n",
    "occupations = [get_occupation(summary) for summary in df['clean'].values]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['none_listed',\n",
       " 'cashier',\n",
       " 'welder',\n",
       " 'mechanic',\n",
       " 'painter',\n",
       " 'laborer',\n",
       " 'education',\n",
       " 'laborer',\n",
       " 'borex',\n",
       " 'press',\n",
       " 'truck',\n",
       " 'mechanic',\n",
       " 'millwright',\n",
       " 'laborer',\n",
       " 'construction',\n",
       " 'education',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'plumber',\n",
       " 'sculptor',\n",
       " 'iron',\n",
       " 'bull',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'carpenter',\n",
       " 'brick',\n",
       " 'truck',\n",
       " 'auto',\n",
       " 'direct',\n",
       " 'restaurant',\n",
       " 'mechanic',\n",
       " 'fork',\n",
       " 'forklift',\n",
       " 'janitor',\n",
       " 'construction',\n",
       " 'machinist',\n",
       " 'salesman',\n",
       " 'construction',\n",
       " 'musician',\n",
       " 'musicia',\n",
       " 'plumber',\n",
       " 'construction',\n",
       " 'roofer',\n",
       " 'education',\n",
       " 'laborer',\n",
       " 'press',\n",
       " 'laborer',\n",
       " 'drywaller',\n",
       " 'none_listed',\n",
       " 'barber',\n",
       " 'construction',\n",
       " 'education',\n",
       " 'tankerman',\n",
       " 'bartender',\n",
       " 'roofer',\n",
       " 'welder',\n",
       " 'mechanic',\n",
       " 'computer',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'roofer',\n",
       " 'laborer',\n",
       " 'education',\n",
       " 'none_listed',\n",
       " 'laborer',\n",
       " 'produce',\n",
       " 'gardener',\n",
       " 'truck',\n",
       " 'landscaping',\n",
       " 'laborer',\n",
       " '2porer',\n",
       " 'analyst',\n",
       " 'antique',\n",
       " 'auto',\n",
       " 'welder',\n",
       " 'clectr',\n",
       " 'none_listed',\n",
       " 'clecitician',\n",
       " 'nurses',\n",
       " 'laborer',\n",
       " 'painter',\n",
       " 'laborer',\n",
       " 'oachi',\n",
       " 'electrician',\n",
       " 'carpenter',\n",
       " 'maintenance',\n",
       " 'truck',\n",
       " 'cabinet',\n",
       " 'security',\n",
       " 'laborer',\n",
       " 'waiter',\n",
       " 'construction',\n",
       " 'truck',\n",
       " 'laborer',\n",
       " 'electrician',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'correctional',\n",
       " 'construction',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'education',\n",
       " 'edckisyes',\n",
       " 'unemployed',\n",
       " 'landscaper',\n",
       " 'jaborer',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'paint',\n",
       " 'general',\n",
       " 'telemarketing',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'construction',\n",
       " 'auto',\n",
       " 'laborer',\n",
       " 'informant',\n",
       " 'roofer',\n",
       " 'laborer',\n",
       " 'truck',\n",
       " 'telephone',\n",
       " 'unemployed',\n",
       " 'electricianfmarketing',\n",
       " 'student',\n",
       " 'none_listed',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'salesman',\n",
       " 'welder',\n",
       " 'welder',\n",
       " 'clectrician',\n",
       " 'brick',\n",
       " 'food',\n",
       " 'food',\n",
       " 'laborer',\n",
       " 'metal',\n",
       " 'none_listed',\n",
       " 'deliveryman',\n",
       " 'laborer',\n",
       " 'construction',\n",
       " 'warehouse',\n",
       " 'jaborer',\n",
       " 'laborer',\n",
       " 'delivery',\n",
       " 'cement',\n",
       " 'construction',\n",
       " 'carpenter',\n",
       " 'food',\n",
       " 'paint',\n",
       " 'brick',\n",
       " 'machine',\n",
       " 'education',\n",
       " 'heavy',\n",
       " 'accounting',\n",
       " 'asst',\n",
       " 'laborer',\n",
       " 'mechanic',\n",
       " 'govt',\n",
       " 'sales',\n",
       " 'clerk',\n",
       " 'cook',\n",
       " 'construction',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'mechanic',\n",
       " '1aborer',\n",
       " 'meat',\n",
       " 'painter',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'retail',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'electrician',\n",
       " 'car',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'mechanic',\n",
       " 'farmer',\n",
       " 'machinist',\n",
       " 'baker',\n",
       " 'commercial',\n",
       " 'video',\n",
       " 'machinist',\n",
       " 'machinist',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'education',\n",
       " 'janitor',\n",
       " 'electrician',\n",
       " 'cashier',\n",
       " 'small',\n",
       " 'cable',\n",
       " 'laborer',\n",
       " 'welder',\n",
       " 'laborer',\n",
       " 'mechanic',\n",
       " 'radiator',\n",
       " 'mechanic',\n",
       " 'shrimper',\n",
       " 'paint',\n",
       " 'none_listed',\n",
       " 'landscaping',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'cook',\n",
       " 'none_listed',\n",
       " 'feed',\n",
       " 'auto',\n",
       " 'sales',\n",
       " 'laborer',\n",
       " 'student',\n",
       " 'cement',\n",
       " 'none_listed',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'cook',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'auto',\n",
       " 'drywaller',\n",
       " 'none_listed',\n",
       " 'painter',\n",
       " 'construction',\n",
       " 'laborer',\n",
       " 'clectrician',\n",
       " 'laborer',\n",
       " 'pax',\n",
       " 'electrician',\n",
       " 'laborer',\n",
       " 'nursing',\n",
       " 'laborer',\n",
       " 'produce',\n",
       " 'jaborer',\n",
       " 'none_listed',\n",
       " 'sales',\n",
       " 'anto',\n",
       " 'custom',\n",
       " 'electrician',\n",
       " 'roofer',\n",
       " 'truck',\n",
       " 'none_listed',\n",
       " 'warehouseman',\n",
       " 'auto',\n",
       " 'laborer',\n",
       " 'welder',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'carpenter',\n",
       " 'laborer',\n",
       " 'stocker',\n",
       " 'roofer',\n",
       " 'cashier',\n",
       " 'laborer',\n",
       " 'labor',\n",
       " 'stocker',\n",
       " 'oilfield',\n",
       " 'none_listed',\n",
       " 'painter',\n",
       " 'clectrician',\n",
       " 'restaurant',\n",
       " 'laborer',\n",
       " 'computer',\n",
       " 'none_listed',\n",
       " 'electrician',\n",
       " 'sales',\n",
       " 'maintenance',\n",
       " 'welder',\n",
       " 'carpenter',\n",
       " 'physical',\n",
       " 'mechanic',\n",
       " 'gump',\n",
       " 'jaborer',\n",
       " 'echanic',\n",
       " 'none_listed',\n",
       " 'carpenter',\n",
       " 'laborer',\n",
       " 'apprentice',\n",
       " 'electrician',\n",
       " 'welder',\n",
       " 'sales',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'truck',\n",
       " 'carpenter',\n",
       " 'auto',\n",
       " 'welder',\n",
       " 'education',\n",
       " 'stockbroker',\n",
       " 'general',\n",
       " 'landscaping',\n",
       " 'construct',\n",
       " 'construction',\n",
       " 'none_listed',\n",
       " 'carpenter',\n",
       " 'laborer',\n",
       " 'oil',\n",
       " 'laborer',\n",
       " 'paint',\n",
       " 'mechanic',\n",
       " 'mover',\n",
       " 'student',\n",
       " 'electrician',\n",
       " 'fork',\n",
       " 'ast',\n",
       " 'janitorial',\n",
       " 'receiving',\n",
       " 'auto',\n",
       " 'chemical',\n",
       " 'welder',\n",
       " 'motorcycle',\n",
       " 'mechanic',\n",
       " 'laborer',\n",
       " 'cook',\n",
       " 'electrician',\n",
       " 'heavy',\n",
       " 'none_listed',\n",
       " 'mechanic',\n",
       " 'mechanic',\n",
       " 'iron',\n",
       " 'auto',\n",
       " 'roofer',\n",
       " 'laborer',\n",
       " 'farm',\n",
       " 'electrician',\n",
       " 'cook',\n",
       " 'laborer',\n",
       " 'none_listed',\n",
       " 'cook',\n",
       " 'manager',\n",
       " 'general',\n",
       " 'laborer',\n",
       " 'insurance',\n",
       " 'cook',\n",
       " 'none_listed',\n",
       " 'laborer',\n",
       " 'painter',\n",
       " 'mechanic',\n",
       " 'education',\n",
       " 'office',\n",
       " 'ghneck',\n",
       " 'carpenter',\n",
       " 'delivery',\n",
       " 'security',\n",
       " 'cook',\n",
       " 'landscaping',\n",
       " 'diesel',\n",
       " 'laborer',\n",
       " 'laborer',\n",
       " 'construction',\n",
       " 'roughneck',\n",
       " 'construction',\n",
       " 'auto',\n",
       " 'education',\n",
       " 'construction',\n",
       " 'dishwasher',\n",
       " 'factory',\n",
       " 'laborer',\n",
       " 'carpenter',\n",
       " 'drywall',\n",
       " 'cement',\n",
       " 'none_listed',\n",
       " 'brickmason',\n",
       " 'laborer',\n",
       " 'roofer',\n",
       " 'construction',\n",
       " 'telemarketing',\n",
       " 'laborer',\n",
       " 'oilfield',\n",
       " 'clerical',\n",
       " 'laborer',\n",
       " 'dispatcher',\n",
       " 'carpenter',\n",
       " 'none_listed',\n",
       " 'electrician',\n",
       " 'laborer',\n",
       " 'laborer']"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "occupations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['occupation'] = occupations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "      <th>occupation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "      <td>cashier</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "      <td>welder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "      <td>mechanic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "      <td>painter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "      <td>carpenter</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "      <td>electrician</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "      <td>laborer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "      <td>laborer</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \\\n",
       "0       Anderson     Johnny   \n",
       "1          Beets      Betty   \n",
       "2         Hittle     Daniel   \n",
       "3    Satterwhite       John   \n",
       "4         Chavez       John   \n",
       "..           ...        ...   \n",
       "375     Mitchell     Gerald   \n",
       "376     Rumbaugh    Charles   \n",
       "377        Jones     Claude   \n",
       "378         Ruiz    Rolando   \n",
       "379         Cruz     Oliver   \n",
       "\n",
       "                                                 clean   occupation  \n",
       "0    name dohnny anderson pop 12 28 59 received cou...  none_listed  \n",
       "1    name bettyloubeets rio dob 03 12 37 received 1...      cashier  \n",
       "2    name daniel joe hittle d r 981 v pop 3 1 50 re...       welder  \n",
       "3    name john thomas satterwhite d r 651 dob 12 29...     mechanic  \n",
       "4    name john chavez d r 999186 dob 04 27 68 recei...      painter  \n",
       "..                                                 ...          ...  \n",
       "375  name gerald lee mitchell d r 838 dob 12 27 67 ...    carpenter  \n",
       "376  charles francis rumbaugh execution 555 date se...  none_listed  \n",
       "377  claude howard jones 980 name d r pop 9 24 40 r...  electrician  \n",
       "378  name roland ruiz jr d r 999145 dob 07 04 72 re...      laborer  \n",
       "379  david oliver cruz d r ao name dob 5 18 67 rece...      laborer  \n",
       "\n",
       "[380 rows x 7 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "      <th>occupation</th>\n",
       "      <th>prior_record</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "      <td>cashier</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "      <td>welder</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "      <td>mechanic</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "      <td>painter</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "      <td>carpenter</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "      <td>electrician</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \\\n",
       "0       Anderson     Johnny   \n",
       "1          Beets      Betty   \n",
       "2         Hittle     Daniel   \n",
       "3    Satterwhite       John   \n",
       "4         Chavez       John   \n",
       "..           ...        ...   \n",
       "375     Mitchell     Gerald   \n",
       "376     Rumbaugh    Charles   \n",
       "377        Jones     Claude   \n",
       "378         Ruiz    Rolando   \n",
       "379         Cruz     Oliver   \n",
       "\n",
       "                                                 clean   occupation  \\\n",
       "0    name dohnny anderson pop 12 28 59 received cou...  none_listed   \n",
       "1    name bettyloubeets rio dob 03 12 37 received 1...      cashier   \n",
       "2    name daniel joe hittle d r 981 v pop 3 1 50 re...       welder   \n",
       "3    name john thomas satterwhite d r 651 dob 12 29...     mechanic   \n",
       "4    name john chavez d r 999186 dob 04 27 68 recei...      painter   \n",
       "..                                                 ...          ...   \n",
       "375  name gerald lee mitchell d r 838 dob 12 27 67 ...    carpenter   \n",
       "376  charles francis rumbaugh execution 555 date se...  none_listed   \n",
       "377  claude howard jones 980 name d r pop 9 24 40 r...  electrician   \n",
       "378  name roland ruiz jr d r 999145 dob 07 04 72 re...      laborer   \n",
       "379  david oliver cruz d r ao name dob 5 18 67 rece...      laborer   \n",
       "\n",
       "    prior_record  \n",
       "0             no  \n",
       "1             no  \n",
       "2            yes  \n",
       "3            yes  \n",
       "4            yes  \n",
       "..           ...  \n",
       "375          yes  \n",
       "376  none_listed  \n",
       "377          yes  \n",
       "378          yes  \n",
       "379           no  \n",
       "\n",
       "[380 rows x 8 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_priors(summary):\n",
    "    try:\n",
    "        text = re.compile(r'(?<=record)(\\W.*?)(?=\\s)')\n",
    "        result = text.search(summary).group().strip()\n",
    "        return 'no' if 'none' in result else 'yes'\n",
    "    except:\n",
    "        return 'none_listed'\n",
    "\n",
    "priors = [get_priors(summary) for summary in df['clean'].values]\n",
    "df['prior_record'] = priors\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_edu(summary):\n",
    "    try:\n",
    "        text = re.compile(r'(?<=education)(.*?)(years|yrs|ged|prior)')\n",
    "        result = text.search(summary).group().strip()\n",
    "        number = re.compile(r'\\d+')\n",
    "        number_result = number.search(result).group()\n",
    "#         print(number_result)\n",
    "        return str(number_result) + \" years\"\n",
    "#         return 'no' if 'none' in result else 'yes'\n",
    "    except:\n",
    "        return 'none_listed'\n",
    "\n",
    "edu = [get_edu(summary) for summary in df['clean'].values]\n",
    "df['education_level'] = edu\n",
    "df.to_csv('V8_fromphotos.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "1\n",
      "1\n",
      "3\n",
      "1\n",
      "0\n",
      "3\n",
      "0\n",
      "3\n",
      "3\n",
      "1\n",
      "2\n",
      "4\n",
      "4\n",
      "1\n",
      "2\n",
      "2\n",
      "2\n",
      "1\n",
      "2\n",
      "0\n",
      "1\n",
      "0\n",
      "2\n",
      "1\n",
      "1\n",
      "0\n",
      "2\n",
      "2\n",
      "2\n",
      "1\n",
      "3\n",
      "0\n",
      "1\n",
      "3\n",
      "0\n",
      "2\n",
      "0\n",
      "3\n",
      "3\n",
      "2\n",
      "1\n",
      "2\n",
      "2\n",
      "1\n",
      "0\n",
      "0\n",
      "1\n",
      "6\n",
      "2\n",
      "1\n",
      "2\n",
      "8\n",
      "3\n",
      "0\n",
      "0\n",
      "3\n",
      "1\n",
      "4\n",
      "1\n",
      "3\n",
      "2\n",
      "3\n",
      "6\n",
      "4\n",
      "0\n",
      "0\n",
      "1\n",
      "0\n",
      "0\n",
      "3\n",
      "1\n",
      "0\n",
      "1\n",
      "7\n",
      "3\n",
      "0\n",
      "0\n",
      "1\n",
      "3\n",
      "0\n",
      "2\n",
      "3\n",
      "0\n",
      "1\n",
      "3\n",
      "0\n",
      "1\n",
      "1\n",
      "3\n",
      "4\n",
      "8\n",
      "1\n",
      "0\n",
      "4\n",
      "4\n",
      "0\n",
      "0\n",
      "2\n",
      "2\n",
      "0\n",
      "0\n",
      "1\n",
      "0\n",
      "5\n",
      "1\n",
      "2\n",
      "1\n",
      "2\n",
      "2\n",
      "0\n",
      "2\n",
      "1\n",
      "1\n",
      "0\n",
      "2\n",
      "0\n",
      "1\n",
      "1\n",
      "0\n",
      "8\n",
      "5\n",
      "2\n",
      "2\n",
      "2\n",
      "3\n",
      "4\n",
      "1\n",
      "6\n",
      "1\n",
      "1\n",
      "1\n",
      "0\n",
      "2\n",
      "0\n",
      "4\n",
      "3\n",
      "1\n",
      "2\n",
      "0\n",
      "3\n",
      "0\n",
      "1\n",
      "5\n",
      "1\n",
      "1\n",
      "2\n",
      "2\n",
      "4\n",
      "0\n",
      "1\n",
      "2\n",
      "1\n",
      "1\n",
      "2\n",
      "3\n",
      "0\n",
      "3\n",
      "1\n",
      "0\n",
      "0\n",
      "2\n",
      "1\n",
      "1\n",
      "1\n",
      "0\n",
      "1\n",
      "1\n",
      "4\n",
      "1\n",
      "5\n",
      "1\n",
      "7\n",
      "6\n",
      "2\n",
      "1\n",
      "1\n",
      "1\n",
      "4\n",
      "0\n",
      "1\n",
      "1\n",
      "2\n",
      "0\n",
      "0\n",
      "3\n",
      "0\n",
      "5\n",
      "1\n",
      "2\n",
      "0\n",
      "2\n",
      "2\n",
      "0\n",
      "2\n",
      "3\n",
      "0\n",
      "2\n",
      "5\n",
      "1\n",
      "1\n",
      "1\n",
      "4\n",
      "0\n",
      "1\n",
      "2\n",
      "1\n",
      "1\n",
      "1\n",
      "0\n",
      "6\n",
      "5\n",
      "0\n",
      "1\n",
      "2\n",
      "1\n",
      "0\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "3\n",
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "0\n",
      "2\n",
      "2\n",
      "3\n",
      "0\n",
      "2\n",
      "5\n",
      "2\n",
      "2\n",
      "0\n",
      "2\n",
      "1\n",
      "1\n",
      "1\n",
      "2\n",
      "2\n",
      "3\n",
      "2\n",
      "3\n",
      "1\n",
      "3\n",
      "1\n",
      "2\n",
      "1\n",
      "1\n",
      "2\n",
      "7\n",
      "4\n",
      "0\n",
      "5\n",
      "1\n",
      "1\n",
      "2\n",
      "0\n",
      "1\n",
      "0\n",
      "10\n",
      "3\n",
      "2\n",
      "1\n",
      "2\n",
      "0\n",
      "1\n",
      "0\n",
      "1\n",
      "3\n",
      "0\n",
      "3\n",
      "3\n",
      "2\n",
      "1\n",
      "2\n",
      "3\n",
      "1\n",
      "0\n",
      "2\n",
      "3\n",
      "2\n",
      "1\n",
      "3\n",
      "3\n",
      "0\n",
      "2\n",
      "0\n",
      "2\n",
      "3\n",
      "0\n",
      "0\n",
      "0\n",
      "0\n",
      "1\n",
      "1\n",
      "0\n",
      "1\n",
      "2\n",
      "1\n",
      "1\n",
      "2\n",
      "3\n",
      "3\n",
      "0\n",
      "6\n",
      "2\n",
      "4\n",
      "2\n",
      "6\n",
      "2\n",
      "0\n",
      "1\n",
      "2\n",
      "2\n",
      "3\n",
      "3\n",
      "0\n",
      "3\n",
      "1\n",
      "1\n",
      "2\n",
      "1\n",
      "3\n",
      "4\n",
      "2\n",
      "1\n",
      "1\n",
      "3\n",
      "9\n",
      "3\n",
      "1\n",
      "1\n",
      "0\n",
      "1\n",
      "0\n",
      "0\n",
      "1\n",
      "2\n",
      "2\n",
      "0\n",
      "2\n",
      "2\n",
      "0\n",
      "2\n",
      "0\n",
      "1\n",
      "0\n",
      "1\n",
      "2\n",
      "1\n",
      "0\n",
      "1\n",
      "0\n",
      "8\n",
      "3\n",
      "1\n",
      "6\n",
      "2\n",
      "4\n",
      "2\n",
      "0\n",
      "2\n",
      "1\n",
      "2\n",
      "0\n",
      "0\n",
      "3\n",
      "2\n",
      "0\n",
      "0\n",
      "2\n",
      "0\n",
      "0\n",
      "1\n",
      "1\n",
      "2\n"
     ]
    }
   ],
   "source": [
    "def get_vics(summary):\n",
    "    try:\n",
    "        text = re.compile(r'(male|men|man)')\n",
    "        vics = text.findall(summary)\n",
    "        print(len(vics))\n",
    "\n",
    "    except:\n",
    "        print('nope')\n",
    "        \n",
    "    \n",
    "vic_f = [get_vics(summary) for summary in df['clean'].values]                          "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_vics(summary):\n",
    "    try:\n",
    "#         text = re.compile(r'(?<=race of victim s)\\W(black|white|hispanic|hite|asian)(.*?)(male|female)')\n",
    "        text = re.compile(r'(?<=race of victim s)(.*?)(male|female)')\n",
    "        result = text.search(summary).group().strip()\n",
    "        result_s = result.split(' ')\n",
    "        if len(result_s) > 3:\n",
    "            return 'error'\n",
    "        else:\n",
    "            return result_s\n",
    "#         if len(result_s) >\n",
    "#         print(result)\n",
    "#         number = re.compile(r'\\d+')\n",
    "#         number_result = number.search(result).group()\n",
    "# #         print(number_result)\n",
    "#         return str(number_result) + \" years\"\n",
    "# #         return 'no' if 'none' in result else 'yes'\n",
    "    except:\n",
    "        return ['none_listed']\n",
    "\n",
    "vic_deets = [get_vics(summary) for summary in df['clean'].values]\n",
    "df['vic_deets'] = vic_deets\n",
    "# df['race_vic'] = df.apply()\n",
    "\n",
    "\n",
    "# if len(summary) == 3:\n",
    "#     summary[0]\n",
    "multiple_vics = [summary[0] if len(summary) == 3 else 'no' for summary in df['vic_deets'].values]\n",
    "female_vics = ['yes' if 'female' in summary else 'no' for summary in df['vic_deets'].values]\n",
    "male_vics = ['yes' if 'male' in summary else 'no' for summary in df['vic_deets'].values]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "      <th>occupation</th>\n",
       "      <th>prior_record</th>\n",
       "      <th>education_level</th>\n",
       "      <th>vic_deets</th>\n",
       "      <th>multiple_vics</th>\n",
       "      <th>vic_female</th>\n",
       "      <th>vic_male</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>6 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "      <td>cashier</td>\n",
       "      <td>no</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "      <td>welder</td>\n",
       "      <td>yes</td>\n",
       "      <td>14 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "      <td>mechanic</td>\n",
       "      <td>yes</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "      <td>painter</td>\n",
       "      <td>yes</td>\n",
       "      <td>8 years</td>\n",
       "      <td>[hispanic, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "      <td>carpenter</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "      <td>electrician</td>\n",
       "      <td>yes</td>\n",
       "      <td>9 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[hispanic, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>no</td>\n",
       "      <td>7 years</td>\n",
       "      <td>[white, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \\\n",
       "0       Anderson     Johnny   \n",
       "1          Beets      Betty   \n",
       "2         Hittle     Daniel   \n",
       "3    Satterwhite       John   \n",
       "4         Chavez       John   \n",
       "..           ...        ...   \n",
       "375     Mitchell     Gerald   \n",
       "376     Rumbaugh    Charles   \n",
       "377        Jones     Claude   \n",
       "378         Ruiz    Rolando   \n",
       "379         Cruz     Oliver   \n",
       "\n",
       "                                                 clean   occupation  \\\n",
       "0    name dohnny anderson pop 12 28 59 received cou...  none_listed   \n",
       "1    name bettyloubeets rio dob 03 12 37 received 1...      cashier   \n",
       "2    name daniel joe hittle d r 981 v pop 3 1 50 re...       welder   \n",
       "3    name john thomas satterwhite d r 651 dob 12 29...     mechanic   \n",
       "4    name john chavez d r 999186 dob 04 27 68 recei...      painter   \n",
       "..                                                 ...          ...   \n",
       "375  name gerald lee mitchell d r 838 dob 12 27 67 ...    carpenter   \n",
       "376  charles francis rumbaugh execution 555 date se...  none_listed   \n",
       "377  claude howard jones 980 name d r pop 9 24 40 r...  electrician   \n",
       "378  name roland ruiz jr d r 999145 dob 07 04 72 re...      laborer   \n",
       "379  david oliver cruz d r ao name dob 5 18 67 rece...      laborer   \n",
       "\n",
       "    prior_record education_level           vic_deets multiple_vics vic_female  \\\n",
       "0             no         6 years       [white, male]            no         no   \n",
       "1             no        10 years       [white, male]            no         no   \n",
       "2            yes        14 years       [white, male]            no         no   \n",
       "3            yes     none_listed       [none_listed]            no         no   \n",
       "4            yes         8 years    [hispanic, male]            no         no   \n",
       "..           ...             ...                 ...           ...        ...   \n",
       "375          yes        10 years       [none_listed]            no         no   \n",
       "376  none_listed     none_listed       [none_listed]            no         no   \n",
       "377          yes         9 years       [white, male]            no         no   \n",
       "378          yes        10 years  [hispanic, female]            no        yes   \n",
       "379           no         7 years     [white, female]            no        yes   \n",
       "\n",
       "    vic_male  \n",
       "0        yes  \n",
       "1        yes  \n",
       "2        yes  \n",
       "3         no  \n",
       "4        yes  \n",
       "..       ...  \n",
       "375       no  \n",
       "376       no  \n",
       "377      yes  \n",
       "378       no  \n",
       "379       no  \n",
       "\n",
       "[380 rows x 13 columns]"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['multiple_vics'] = multiple_vics\n",
    "df['vic_female'] = female_vics\n",
    "df['vic_male'] = male_vics\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "race_vics = [summary[1] if len(summary) == 3 else summary[0] for summary in df['vic_deets'].values]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'male',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'black',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hite',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'plack',\n",
       " 'waite',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'e',\n",
       " 'e',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'e',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'e',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'black',\n",
       " 'white',\n",
       " 'asian',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'biack',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'white',\n",
       " '1',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'black',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'e',\n",
       " 'hite',\n",
       " 'e',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'black',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'asian',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'hite',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'white',\n",
       " 'hispanic',\n",
       " 'white']"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "race_vics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['race_vic'] = race_vics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "      <th>occupation</th>\n",
       "      <th>prior_record</th>\n",
       "      <th>education_level</th>\n",
       "      <th>vic_deets</th>\n",
       "      <th>multiple_vics</th>\n",
       "      <th>vic_female</th>\n",
       "      <th>vic_male</th>\n",
       "      <th>race_vic</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>6 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "      <td>cashier</td>\n",
       "      <td>no</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "      <td>welder</td>\n",
       "      <td>yes</td>\n",
       "      <td>14 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "      <td>mechanic</td>\n",
       "      <td>yes</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "      <td>painter</td>\n",
       "      <td>yes</td>\n",
       "      <td>8 years</td>\n",
       "      <td>[hispanic, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>hispanic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "      <td>carpenter</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "      <td>electrician</td>\n",
       "      <td>yes</td>\n",
       "      <td>9 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[hispanic, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>hispanic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>no</td>\n",
       "      <td>7 years</td>\n",
       "      <td>[white, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>white</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \\\n",
       "0       Anderson     Johnny   \n",
       "1          Beets      Betty   \n",
       "2         Hittle     Daniel   \n",
       "3    Satterwhite       John   \n",
       "4         Chavez       John   \n",
       "..           ...        ...   \n",
       "375     Mitchell     Gerald   \n",
       "376     Rumbaugh    Charles   \n",
       "377        Jones     Claude   \n",
       "378         Ruiz    Rolando   \n",
       "379         Cruz     Oliver   \n",
       "\n",
       "                                                 clean   occupation  \\\n",
       "0    name dohnny anderson pop 12 28 59 received cou...  none_listed   \n",
       "1    name bettyloubeets rio dob 03 12 37 received 1...      cashier   \n",
       "2    name daniel joe hittle d r 981 v pop 3 1 50 re...       welder   \n",
       "3    name john thomas satterwhite d r 651 dob 12 29...     mechanic   \n",
       "4    name john chavez d r 999186 dob 04 27 68 recei...      painter   \n",
       "..                                                 ...          ...   \n",
       "375  name gerald lee mitchell d r 838 dob 12 27 67 ...    carpenter   \n",
       "376  charles francis rumbaugh execution 555 date se...  none_listed   \n",
       "377  claude howard jones 980 name d r pop 9 24 40 r...  electrician   \n",
       "378  name roland ruiz jr d r 999145 dob 07 04 72 re...      laborer   \n",
       "379  david oliver cruz d r ao name dob 5 18 67 rece...      laborer   \n",
       "\n",
       "    prior_record education_level           vic_deets multiple_vics vic_female  \\\n",
       "0             no         6 years       [white, male]            no         no   \n",
       "1             no        10 years       [white, male]            no         no   \n",
       "2            yes        14 years       [white, male]            no         no   \n",
       "3            yes     none_listed       [none_listed]            no         no   \n",
       "4            yes         8 years    [hispanic, male]            no         no   \n",
       "..           ...             ...                 ...           ...        ...   \n",
       "375          yes        10 years       [none_listed]            no         no   \n",
       "376  none_listed     none_listed       [none_listed]            no         no   \n",
       "377          yes         9 years       [white, male]            no         no   \n",
       "378          yes        10 years  [hispanic, female]            no        yes   \n",
       "379           no         7 years     [white, female]            no        yes   \n",
       "\n",
       "    vic_male     race_vic  \n",
       "0        yes        white  \n",
       "1        yes        white  \n",
       "2        yes        white  \n",
       "3         no  none_listed  \n",
       "4        yes     hispanic  \n",
       "..       ...          ...  \n",
       "375       no  none_listed  \n",
       "376       no  none_listed  \n",
       "377      yes        white  \n",
       "378       no     hispanic  \n",
       "379       no        white  \n",
       "\n",
       "[380 rows x 14 columns]"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_age_crime(summary):\n",
    "    try:\n",
    "        text = re.compile(r'(?<=age at time of offense )(\\d.*?)\\W')\n",
    "        result = text.search(summary).group().strip()\n",
    "        if len(result) < 2:\n",
    "            return 'none_listed'\n",
    "        else:\n",
    "            return result\n",
    "    except:\n",
    "        return 'none_listed'\n",
    "\n",
    "age_crime = [get_age_crime(summary) for summary in df['clean'].values]    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['21',\n",
       " '46',\n",
       " '39',\n",
       " '32',\n",
       " '27',\n",
       " '30',\n",
       " '18',\n",
       " '19',\n",
       " '23',\n",
       " '22',\n",
       " '23',\n",
       " '27',\n",
       " '27',\n",
       " '22',\n",
       " '22',\n",
       " '43',\n",
       " '18',\n",
       " '19',\n",
       " '20',\n",
       " '21',\n",
       " '23',\n",
       " '26',\n",
       " '17',\n",
       " '34',\n",
       " '28',\n",
       " '25',\n",
       " 'none_listed',\n",
       " '18',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '22',\n",
       " '30',\n",
       " '37',\n",
       " '27',\n",
       " '29',\n",
       " '33',\n",
       " '31',\n",
       " 'none_listed',\n",
       " '35',\n",
       " '28',\n",
       " 'none_listed',\n",
       " '24',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '17',\n",
       " '26',\n",
       " '39',\n",
       " '39',\n",
       " 'none_listed',\n",
       " '38',\n",
       " '34',\n",
       " 'none_listed',\n",
       " '26',\n",
       " '21',\n",
       " '20',\n",
       " '51',\n",
       " '43',\n",
       " '27',\n",
       " '25',\n",
       " '18',\n",
       " '24',\n",
       " 'none_listed',\n",
       " '24',\n",
       " 'none_listed',\n",
       " '30',\n",
       " '33',\n",
       " '53',\n",
       " '42',\n",
       " '35',\n",
       " '27',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '39',\n",
       " 'none_listed',\n",
       " '29',\n",
       " '32',\n",
       " '56',\n",
       " 'none_listed',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '32',\n",
       " '22',\n",
       " '24',\n",
       " '33',\n",
       " '23',\n",
       " '26',\n",
       " '32',\n",
       " '29',\n",
       " '26',\n",
       " '35',\n",
       " '21',\n",
       " '29',\n",
       " '28',\n",
       " 'none_listed',\n",
       " '24',\n",
       " 'none_listed',\n",
       " '24',\n",
       " '26',\n",
       " 'none_listed',\n",
       " '20',\n",
       " '17',\n",
       " '20',\n",
       " '27',\n",
       " '19',\n",
       " '27',\n",
       " '29',\n",
       " '19',\n",
       " 'none_listed',\n",
       " '26',\n",
       " '33',\n",
       " '31',\n",
       " '21',\n",
       " '21',\n",
       " 'none_listed',\n",
       " '28',\n",
       " '28',\n",
       " '38',\n",
       " '34',\n",
       " '23',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '38',\n",
       " '24',\n",
       " '25',\n",
       " '18',\n",
       " '30',\n",
       " '26',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '18',\n",
       " '33',\n",
       " '39',\n",
       " '30',\n",
       " '23',\n",
       " 'none_listed',\n",
       " '31',\n",
       " '18',\n",
       " '21',\n",
       " '27',\n",
       " '36',\n",
       " '23',\n",
       " '18',\n",
       " '25',\n",
       " '22',\n",
       " '23',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '39',\n",
       " '29',\n",
       " '44',\n",
       " '19',\n",
       " '20',\n",
       " '21',\n",
       " '45',\n",
       " '29',\n",
       " '27',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '29',\n",
       " '21',\n",
       " '45',\n",
       " '24',\n",
       " '25',\n",
       " '19',\n",
       " '30',\n",
       " '17',\n",
       " '44',\n",
       " '18',\n",
       " '18',\n",
       " '33',\n",
       " '21',\n",
       " '32',\n",
       " '19',\n",
       " '30',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '44',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '27',\n",
       " '34',\n",
       " '32',\n",
       " '24',\n",
       " '29',\n",
       " '25',\n",
       " '19',\n",
       " '22',\n",
       " '27',\n",
       " '24',\n",
       " 'none_listed',\n",
       " '31',\n",
       " '19',\n",
       " '40',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '33',\n",
       " '18',\n",
       " '32',\n",
       " '19',\n",
       " '24',\n",
       " '26',\n",
       " '22',\n",
       " '31',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '31',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '33',\n",
       " '24',\n",
       " '44',\n",
       " '17',\n",
       " 'none_listed',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '19',\n",
       " '29',\n",
       " 'none_listed',\n",
       " '44',\n",
       " '37',\n",
       " '26',\n",
       " '19',\n",
       " '23',\n",
       " '19',\n",
       " '19',\n",
       " '28',\n",
       " '20',\n",
       " '23',\n",
       " '19',\n",
       " '38',\n",
       " '24',\n",
       " '20',\n",
       " '26',\n",
       " '18',\n",
       " '23',\n",
       " '19',\n",
       " '37',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '31',\n",
       " '26',\n",
       " 'none_listed',\n",
       " '21',\n",
       " '23',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '28',\n",
       " '24',\n",
       " '18',\n",
       " '20',\n",
       " '20',\n",
       " '19',\n",
       " '25',\n",
       " '22',\n",
       " '20',\n",
       " '34',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '21',\n",
       " '22',\n",
       " '37',\n",
       " '45',\n",
       " '28',\n",
       " '37',\n",
       " '30',\n",
       " '24',\n",
       " '22',\n",
       " '22',\n",
       " '37',\n",
       " '26',\n",
       " '18',\n",
       " '17',\n",
       " '26',\n",
       " '40',\n",
       " '37',\n",
       " '22',\n",
       " '24',\n",
       " '36',\n",
       " '36',\n",
       " '23',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '31',\n",
       " '26',\n",
       " '31',\n",
       " '24',\n",
       " '20',\n",
       " '32',\n",
       " '32',\n",
       " 'none_listed',\n",
       " '28',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '30',\n",
       " '29',\n",
       " '28',\n",
       " '31',\n",
       " 'none_listed',\n",
       " '23',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '29',\n",
       " '19',\n",
       " '17',\n",
       " '52',\n",
       " '27',\n",
       " '927',\n",
       " '35',\n",
       " '35',\n",
       " '27',\n",
       " 'none_listed',\n",
       " '20',\n",
       " '25',\n",
       " '22',\n",
       " '33',\n",
       " '30',\n",
       " '22',\n",
       " '24',\n",
       " '34',\n",
       " '23',\n",
       " '21',\n",
       " '26',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '34',\n",
       " '22',\n",
       " 'none_listed',\n",
       " '23',\n",
       " '35',\n",
       " '47',\n",
       " '19',\n",
       " '28',\n",
       " '19',\n",
       " 'none_listed',\n",
       " '19',\n",
       " '30',\n",
       " '30',\n",
       " 'none_listed',\n",
       " '23',\n",
       " '26',\n",
       " '48',\n",
       " '50',\n",
       " '18',\n",
       " '28',\n",
       " '31',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '20',\n",
       " '25',\n",
       " 'none_listed',\n",
       " '44',\n",
       " '32',\n",
       " '35',\n",
       " '54',\n",
       " '23',\n",
       " '19',\n",
       " '19',\n",
       " '24',\n",
       " '31',\n",
       " '33',\n",
       " 'none_listed',\n",
       " '34',\n",
       " '19',\n",
       " '25',\n",
       " '36',\n",
       " '20',\n",
       " 'none_listed',\n",
       " '28',\n",
       " '20',\n",
       " '24',\n",
       " '50',\n",
       " 'none_listed',\n",
       " 'none_listed',\n",
       " '49',\n",
       " '20',\n",
       " '21']"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "age_crime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>0</th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>clean</th>\n",
       "      <th>occupation</th>\n",
       "      <th>prior_record</th>\n",
       "      <th>education_level</th>\n",
       "      <th>vic_deets</th>\n",
       "      <th>multiple_vics</th>\n",
       "      <th>vic_female</th>\n",
       "      <th>vic_male</th>\n",
       "      <th>race_vic</th>\n",
       "      <th>age_crime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>img_text_732_Johnny_Anderson.txt</td>\n",
       "      <td>Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>name dohnny anderson pop 12 28 59 received cou...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>6 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>img_text_810_Betty_Beets.txt</td>\n",
       "      <td>Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>name bettyloubeets rio dob 03 12 37 received 1...</td>\n",
       "      <td>cashier</td>\n",
       "      <td>no</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "      <td>46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>img_text_981_Daniel_Hittle.txt</td>\n",
       "      <td>Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>name daniel joe hittle d r 981 v pop 3 1 50 re...</td>\n",
       "      <td>welder</td>\n",
       "      <td>yes</td>\n",
       "      <td>14 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>img_text_651_John_Satterwhite.txt</td>\n",
       "      <td>Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>name john thomas satterwhite d r 651 dob 12 29...</td>\n",
       "      <td>mechanic</td>\n",
       "      <td>yes</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>img_text_999186_John_Chavez.txt</td>\n",
       "      <td>Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>name john chavez d r 999186 dob 04 27 68 recei...</td>\n",
       "      <td>painter</td>\n",
       "      <td>yes</td>\n",
       "      <td>8 years</td>\n",
       "      <td>[hispanic, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>hispanic</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>img_text_838_Gerald_Mitchell.txt</td>\n",
       "      <td>Name: Gerald Lee Mitchell ____._.___ D.R.#838_...</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>name gerald lee mitchell d r 838 dob 12 27 67 ...</td>\n",
       "      <td>carpenter</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>img_text_555_Charles_Rumbaugh.txt</td>\n",
       "      <td>CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>charles francis rumbaugh execution 555 date se...</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>[none_listed]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>img_text_980_Claude_Jones.txt</td>\n",
       "      <td>Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>claude howard jones 980 name d r pop 9 24 40 r...</td>\n",
       "      <td>electrician</td>\n",
       "      <td>yes</td>\n",
       "      <td>9 years</td>\n",
       "      <td>[white, male]</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>white</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>img_text_999145_Rolando_Ruiz.txt</td>\n",
       "      <td>Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>name roland ruiz jr d r 999145 dob 07 04 72 re...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>yes</td>\n",
       "      <td>10 years</td>\n",
       "      <td>[hispanic, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>hispanic</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>img_text_954_Oliver_Cruz.txt</td>\n",
       "      <td>David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>david oliver cruz d r ao name dob 5 18 67 rece...</td>\n",
       "      <td>laborer</td>\n",
       "      <td>no</td>\n",
       "      <td>7 years</td>\n",
       "      <td>[white, female]</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>white</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 index  \\\n",
       "0     img_text_732_Johnny_Anderson.txt   \n",
       "1         img_text_810_Betty_Beets.txt   \n",
       "2       img_text_981_Daniel_Hittle.txt   \n",
       "3    img_text_651_John_Satterwhite.txt   \n",
       "4      img_text_999186_John_Chavez.txt   \n",
       "..                                 ...   \n",
       "375   img_text_838_Gerald_Mitchell.txt   \n",
       "376  img_text_555_Charles_Rumbaugh.txt   \n",
       "377      img_text_980_Claude_Jones.txt   \n",
       "378   img_text_999145_Rolando_Ruiz.txt   \n",
       "379       img_text_954_Oliver_Cruz.txt   \n",
       "\n",
       "                                                     0 inmate_number  \\\n",
       "0    Name: dohnny Anderson\\n\\npop: 12/_ 28 /_59 Rec...           732   \n",
       "1    Name: BettyLouBeets RIO\\nDOB:03/12/37_ Receive...           810   \n",
       "2    Name: Daniel Joe Hittle D.R. # 981\\n\\n \\n\\n \\n...           981   \n",
       "3    Name: John Thomas Satterwhite D.R.# 651\\n\\nDOB...           651   \n",
       "4    Name: John Chavez D.R.# 999186\\nDOB: _ 04/27/6...        999186   \n",
       "..                                                 ...           ...   \n",
       "375  Name: Gerald Lee Mitchell ____._.___ D.R.#838_...           838   \n",
       "376  CHARLES FRANCIS RUMBAUGH\\n\\nEXECUTION #555 Dat...           555   \n",
       "377  Claude Howard Jones 980\\n\\n \\n\\n \\n\\n \\n\\nName...           980   \n",
       "378  Name: Roland Ruiz, Jr. D.R. #999145\\n\\n@ DOB: ...        999145   \n",
       "379  David Oliver Cruz\\nD.R.# ao\\n\\n \\n\\n \\n\\n \\n\\n...           954   \n",
       "\n",
       "       last_name first_name  \\\n",
       "0       Anderson     Johnny   \n",
       "1          Beets      Betty   \n",
       "2         Hittle     Daniel   \n",
       "3    Satterwhite       John   \n",
       "4         Chavez       John   \n",
       "..           ...        ...   \n",
       "375     Mitchell     Gerald   \n",
       "376     Rumbaugh    Charles   \n",
       "377        Jones     Claude   \n",
       "378         Ruiz    Rolando   \n",
       "379         Cruz     Oliver   \n",
       "\n",
       "                                                 clean   occupation  \\\n",
       "0    name dohnny anderson pop 12 28 59 received cou...  none_listed   \n",
       "1    name bettyloubeets rio dob 03 12 37 received 1...      cashier   \n",
       "2    name daniel joe hittle d r 981 v pop 3 1 50 re...       welder   \n",
       "3    name john thomas satterwhite d r 651 dob 12 29...     mechanic   \n",
       "4    name john chavez d r 999186 dob 04 27 68 recei...      painter   \n",
       "..                                                 ...          ...   \n",
       "375  name gerald lee mitchell d r 838 dob 12 27 67 ...    carpenter   \n",
       "376  charles francis rumbaugh execution 555 date se...  none_listed   \n",
       "377  claude howard jones 980 name d r pop 9 24 40 r...  electrician   \n",
       "378  name roland ruiz jr d r 999145 dob 07 04 72 re...      laborer   \n",
       "379  david oliver cruz d r ao name dob 5 18 67 rece...      laborer   \n",
       "\n",
       "    prior_record education_level           vic_deets multiple_vics vic_female  \\\n",
       "0             no         6 years       [white, male]            no         no   \n",
       "1             no        10 years       [white, male]            no         no   \n",
       "2            yes        14 years       [white, male]            no         no   \n",
       "3            yes     none_listed       [none_listed]            no         no   \n",
       "4            yes         8 years    [hispanic, male]            no         no   \n",
       "..           ...             ...                 ...           ...        ...   \n",
       "375          yes        10 years       [none_listed]            no         no   \n",
       "376  none_listed     none_listed       [none_listed]            no         no   \n",
       "377          yes         9 years       [white, male]            no         no   \n",
       "378          yes        10 years  [hispanic, female]            no        yes   \n",
       "379           no         7 years     [white, female]            no        yes   \n",
       "\n",
       "    vic_male     race_vic    age_crime  \n",
       "0        yes        white           21  \n",
       "1        yes        white           46  \n",
       "2        yes        white           39  \n",
       "3         no  none_listed           32  \n",
       "4        yes     hispanic           27  \n",
       "..       ...          ...          ...  \n",
       "375       no  none_listed  none_listed  \n",
       "376       no  none_listed  none_listed  \n",
       "377      yes        white           49  \n",
       "378       no     hispanic           20  \n",
       "379       no        white           21  \n",
       "\n",
       "[380 rows x 15 columns]"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['age_crime'] = age_crime\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_weapon(summary):\n",
    "    try:\n",
    "        if 'knife' in summary:\n",
    "            return 'knife'\n",
    "#             weapon = 'knife'\n",
    "        elif 'gun' in summary:\n",
    "            return 'gun'\n",
    "#             weapon = 'gun'\n",
    "        elif 'cord ' in summary:\n",
    "#             print(summary.split('cord')[1])\n",
    "            return 'cord'\n",
    "        elif 'blunt object':\n",
    "            return 'blunt object'\n",
    "        else:\n",
    "            return 'other'\n",
    "    except:\n",
    "        return 'none_listed'\n",
    "\n",
    "df['clean_summary'] = [summary.split('summary')[1] if 'summary' in summary else 'nope' for summary in df['clean'].values]\n",
    "weapon = [get_weapon(summary) for summary in df['clean_summary'].values]    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "97"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df[df['clean_summary'] == 'nope'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'cord',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'cord',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'cord',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'cord',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'cord',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'cord',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'knife',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'gun',\n",
       " 'blunt object',\n",
       " 'blunt object',\n",
       " 'blunt object']"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weapon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['weapon'] = weapon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([          'index',                 0,   'inmate_number',\n",
       "             'last_name',      'first_name',           'clean',\n",
       "            'occupation',    'prior_record', 'education_level',\n",
       "             'vic_deets',   'multiple_vics',      'vic_female',\n",
       "              'vic_male',        'race_vic',       'age_crime',\n",
       "         'clean_summary',          'weapon'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "columns = ['inmate_number','last_name', 'first_name','education_level','age_crime',\n",
    "           'occupation','prior_record','multiple_vics','weapon','race_vic','vic_male','vic_female']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>inmate_number</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>education_level</th>\n",
       "      <th>age_crime</th>\n",
       "      <th>occupation</th>\n",
       "      <th>prior_record</th>\n",
       "      <th>multiple_vics</th>\n",
       "      <th>weapon</th>\n",
       "      <th>race_vic</th>\n",
       "      <th>vic_male</th>\n",
       "      <th>vic_female</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>732</td>\n",
       "      <td>Anderson</td>\n",
       "      <td>Johnny</td>\n",
       "      <td>6 years</td>\n",
       "      <td>21</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>white</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>810</td>\n",
       "      <td>Beets</td>\n",
       "      <td>Betty</td>\n",
       "      <td>10 years</td>\n",
       "      <td>46</td>\n",
       "      <td>cashier</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>white</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>981</td>\n",
       "      <td>Hittle</td>\n",
       "      <td>Daniel</td>\n",
       "      <td>14 years</td>\n",
       "      <td>39</td>\n",
       "      <td>welder</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>gun</td>\n",
       "      <td>white</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>651</td>\n",
       "      <td>Satterwhite</td>\n",
       "      <td>John</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>32</td>\n",
       "      <td>mechanic</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>gun</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>999186</td>\n",
       "      <td>Chavez</td>\n",
       "      <td>John</td>\n",
       "      <td>8 years</td>\n",
       "      <td>27</td>\n",
       "      <td>painter</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>hispanic</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>375</td>\n",
       "      <td>838</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Gerald</td>\n",
       "      <td>10 years</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>carpenter</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>376</td>\n",
       "      <td>555</td>\n",
       "      <td>Rumbaugh</td>\n",
       "      <td>Charles</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>gun</td>\n",
       "      <td>none_listed</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>377</td>\n",
       "      <td>980</td>\n",
       "      <td>Jones</td>\n",
       "      <td>Claude</td>\n",
       "      <td>9 years</td>\n",
       "      <td>49</td>\n",
       "      <td>electrician</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>white</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>378</td>\n",
       "      <td>999145</td>\n",
       "      <td>Ruiz</td>\n",
       "      <td>Rolando</td>\n",
       "      <td>10 years</td>\n",
       "      <td>20</td>\n",
       "      <td>laborer</td>\n",
       "      <td>yes</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>hispanic</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>379</td>\n",
       "      <td>954</td>\n",
       "      <td>Cruz</td>\n",
       "      <td>Oliver</td>\n",
       "      <td>7 years</td>\n",
       "      <td>21</td>\n",
       "      <td>laborer</td>\n",
       "      <td>no</td>\n",
       "      <td>no</td>\n",
       "      <td>blunt object</td>\n",
       "      <td>white</td>\n",
       "      <td>no</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>380 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    inmate_number    last_name first_name education_level    age_crime  \\\n",
       "0             732     Anderson     Johnny         6 years           21   \n",
       "1             810        Beets      Betty        10 years           46   \n",
       "2             981       Hittle     Daniel        14 years           39   \n",
       "3             651  Satterwhite       John     none_listed           32   \n",
       "4          999186       Chavez       John         8 years           27   \n",
       "..            ...          ...        ...             ...          ...   \n",
       "375           838     Mitchell     Gerald        10 years  none_listed   \n",
       "376           555     Rumbaugh    Charles     none_listed  none_listed   \n",
       "377           980        Jones     Claude         9 years           49   \n",
       "378        999145         Ruiz    Rolando        10 years           20   \n",
       "379           954         Cruz     Oliver         7 years           21   \n",
       "\n",
       "      occupation prior_record multiple_vics        weapon     race_vic  \\\n",
       "0    none_listed           no            no  blunt object        white   \n",
       "1        cashier           no            no  blunt object        white   \n",
       "2         welder          yes            no           gun        white   \n",
       "3       mechanic          yes            no           gun  none_listed   \n",
       "4        painter          yes            no  blunt object     hispanic   \n",
       "..           ...          ...           ...           ...          ...   \n",
       "375    carpenter          yes            no  blunt object  none_listed   \n",
       "376  none_listed  none_listed            no           gun  none_listed   \n",
       "377  electrician          yes            no  blunt object        white   \n",
       "378      laborer          yes            no  blunt object     hispanic   \n",
       "379      laborer           no            no  blunt object        white   \n",
       "\n",
       "    vic_male vic_female  \n",
       "0        yes         no  \n",
       "1        yes         no  \n",
       "2        yes         no  \n",
       "3         no         no  \n",
       "4        yes         no  \n",
       "..       ...        ...  \n",
       "375       no         no  \n",
       "376       no         no  \n",
       "377      yes         no  \n",
       "378       no        yes  \n",
       "379       no        yes  \n",
       "\n",
       "[380 rows x 12 columns]"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.DataFrame(df, columns=columns)\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.to_csv('V9_photo_inmates.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "380"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
