In [4]:
import os
def get_data_from_files(path):
    directory = os.listdir(path)
    results = []
    filenames = []
    for file in directory:
        f=open(path+file)
        filenames.append(file)
        results.append(f.read())
        f.close()
    return results, filenames

inmates, filenames = get_data_from_files('FinalProject/inmates/')

import pandas as pd
import numpy as np
df = pd.DataFrame(inmates, filenames)
In [5]:
df.reset_index(inplace=True)
In [6]:
df['inmate_number'] = df.apply(lambda x: x['index'].split('_')[2], axis=1)
df['last_name'] = df.apply(lambda x: x['index'].split('_')[4].split('.')[0], axis=1)
df['first_name'] = df.apply(lambda x: x['index'].split('_')[3], axis=1)
df
Out[6]:
index 0 inmate_number last_name first_name
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John
... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver

380 rows × 5 columns

In [7]:
import re
df['clean'] = df.apply(lambda x: re.sub(r'[\W_]+', ' ', x[0].lower()), axis=1)
In [8]:
df
Out[8]:
index 0 inmate_number last_name first_name clean
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou...
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1...
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re...
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29...
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei...
... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ...
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se...
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r...
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re...
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece...

380 rows × 6 columns

In [23]:
def get_occupation(summary):
    try:
        p = re.compile(r'(?<=occupation)(\W.*?)(?=\s)')
        r = p.search(summary).group().strip()
        if 'prior' in r or len(r) < 3:
            return 'none_listed'
        else:
            return r
    except:
        return 'none_listed'

occupations = [get_occupation(summary) for summary in df['clean'].values]
In [24]:
occupations
Out[24]:
['none_listed',
 'cashier',
 'welder',
 'mechanic',
 'painter',
 'laborer',
 'education',
 'laborer',
 'borex',
 'press',
 'truck',
 'mechanic',
 'millwright',
 'laborer',
 'construction',
 'education',
 'laborer',
 'none_listed',
 'plumber',
 'sculptor',
 'iron',
 'bull',
 'laborer',
 'none_listed',
 'carpenter',
 'brick',
 'truck',
 'auto',
 'direct',
 'restaurant',
 'mechanic',
 'fork',
 'forklift',
 'janitor',
 'construction',
 'machinist',
 'salesman',
 'construction',
 'musician',
 'musicia',
 'plumber',
 'construction',
 'roofer',
 'education',
 'laborer',
 'press',
 'laborer',
 'drywaller',
 'none_listed',
 'barber',
 'construction',
 'education',
 'tankerman',
 'bartender',
 'roofer',
 'welder',
 'mechanic',
 'computer',
 'laborer',
 'laborer',
 'roofer',
 'laborer',
 'education',
 'none_listed',
 'laborer',
 'produce',
 'gardener',
 'truck',
 'landscaping',
 'laborer',
 '2porer',
 'analyst',
 'antique',
 'auto',
 'welder',
 'clectr',
 'none_listed',
 'clecitician',
 'nurses',
 'laborer',
 'painter',
 'laborer',
 'oachi',
 'electrician',
 'carpenter',
 'maintenance',
 'truck',
 'cabinet',
 'security',
 'laborer',
 'waiter',
 'construction',
 'truck',
 'laborer',
 'electrician',
 'laborer',
 'laborer',
 'correctional',
 'construction',
 'laborer',
 'laborer',
 'education',
 'edckisyes',
 'unemployed',
 'landscaper',
 'jaborer',
 'laborer',
 'none_listed',
 'none_listed',
 'paint',
 'general',
 'telemarketing',
 'laborer',
 'none_listed',
 'construction',
 'auto',
 'laborer',
 'informant',
 'roofer',
 'laborer',
 'truck',
 'telephone',
 'unemployed',
 'electricianfmarketing',
 'student',
 'none_listed',
 'laborer',
 'none_listed',
 'laborer',
 'laborer',
 'salesman',
 'welder',
 'welder',
 'clectrician',
 'brick',
 'food',
 'food',
 'laborer',
 'metal',
 'none_listed',
 'deliveryman',
 'laborer',
 'construction',
 'warehouse',
 'jaborer',
 'laborer',
 'delivery',
 'cement',
 'construction',
 'carpenter',
 'food',
 'paint',
 'brick',
 'machine',
 'education',
 'heavy',
 'accounting',
 'asst',
 'laborer',
 'mechanic',
 'govt',
 'sales',
 'clerk',
 'cook',
 'construction',
 'laborer',
 'none_listed',
 'mechanic',
 '1aborer',
 'meat',
 'painter',
 'none_listed',
 'none_listed',
 'retail',
 'laborer',
 'laborer',
 'electrician',
 'car',
 'none_listed',
 'none_listed',
 'mechanic',
 'farmer',
 'machinist',
 'baker',
 'commercial',
 'video',
 'machinist',
 'machinist',
 'laborer',
 'none_listed',
 'education',
 'janitor',
 'electrician',
 'cashier',
 'small',
 'cable',
 'laborer',
 'welder',
 'laborer',
 'mechanic',
 'radiator',
 'mechanic',
 'shrimper',
 'paint',
 'none_listed',
 'landscaping',
 'none_listed',
 'none_listed',
 'cook',
 'none_listed',
 'feed',
 'auto',
 'sales',
 'laborer',
 'student',
 'cement',
 'none_listed',
 'laborer',
 'laborer',
 'cook',
 'none_listed',
 'none_listed',
 'auto',
 'drywaller',
 'none_listed',
 'painter',
 'construction',
 'laborer',
 'clectrician',
 'laborer',
 'pax',
 'electrician',
 'laborer',
 'nursing',
 'laborer',
 'produce',
 'jaborer',
 'none_listed',
 'sales',
 'anto',
 'custom',
 'electrician',
 'roofer',
 'truck',
 'none_listed',
 'warehouseman',
 'auto',
 'laborer',
 'welder',
 'laborer',
 'laborer',
 'carpenter',
 'laborer',
 'stocker',
 'roofer',
 'cashier',
 'laborer',
 'labor',
 'stocker',
 'oilfield',
 'none_listed',
 'painter',
 'clectrician',
 'restaurant',
 'laborer',
 'computer',
 'none_listed',
 'electrician',
 'sales',
 'maintenance',
 'welder',
 'carpenter',
 'physical',
 'mechanic',
 'gump',
 'jaborer',
 'echanic',
 'none_listed',
 'carpenter',
 'laborer',
 'apprentice',
 'electrician',
 'welder',
 'sales',
 'laborer',
 'laborer',
 'truck',
 'carpenter',
 'auto',
 'welder',
 'education',
 'stockbroker',
 'general',
 'landscaping',
 'construct',
 'construction',
 'none_listed',
 'carpenter',
 'laborer',
 'oil',
 'laborer',
 'paint',
 'mechanic',
 'mover',
 'student',
 'electrician',
 'fork',
 'ast',
 'janitorial',
 'receiving',
 'auto',
 'chemical',
 'welder',
 'motorcycle',
 'mechanic',
 'laborer',
 'cook',
 'electrician',
 'heavy',
 'none_listed',
 'mechanic',
 'mechanic',
 'iron',
 'auto',
 'roofer',
 'laborer',
 'farm',
 'electrician',
 'cook',
 'laborer',
 'none_listed',
 'cook',
 'manager',
 'general',
 'laborer',
 'insurance',
 'cook',
 'none_listed',
 'laborer',
 'painter',
 'mechanic',
 'education',
 'office',
 'ghneck',
 'carpenter',
 'delivery',
 'security',
 'cook',
 'landscaping',
 'diesel',
 'laborer',
 'laborer',
 'construction',
 'roughneck',
 'construction',
 'auto',
 'education',
 'construction',
 'dishwasher',
 'factory',
 'laborer',
 'carpenter',
 'drywall',
 'cement',
 'none_listed',
 'brickmason',
 'laborer',
 'roofer',
 'construction',
 'telemarketing',
 'laborer',
 'oilfield',
 'clerical',
 'laborer',
 'dispatcher',
 'carpenter',
 'none_listed',
 'electrician',
 'laborer',
 'laborer']
In [25]:
df['occupation'] = occupations
In [26]:
df
Out[26]:
index 0 inmate_number last_name first_name clean occupation
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... none_listed
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... cashier
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... welder
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... mechanic
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei... painter
... ... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se... none_listed
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... electrician
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... laborer
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... laborer

380 rows × 7 columns

In [33]:
def get_priors(summary):
    try:
        text = re.compile(r'(?<=record)(\W.*?)(?=\s)')
        result = text.search(summary).group().strip()
        return 'no' if 'none' in result else 'yes'
    except:
        return 'none_listed'

priors = [get_priors(summary) for summary in df['clean'].values]
df['prior_record'] = priors
df
Out[33]:
index 0 inmate_number last_name first_name clean occupation prior_record
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... none_listed no
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... cashier no
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... welder yes
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... mechanic yes
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei... painter yes
... ... ... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter yes
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se... none_listed none_listed
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... electrician yes
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... laborer yes
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... laborer no

380 rows × 8 columns

In [51]:
def get_edu(summary):
    try:
        text = re.compile(r'(?<=education)(.*?)(years|yrs|ged|prior)')
        result = text.search(summary).group().strip()
        number = re.compile(r'\d+')
        number_result = number.search(result).group()
#         print(number_result)
        return str(number_result) + " years"
#         return 'no' if 'none' in result else 'yes'
    except:
        return 'none_listed'

edu = [get_edu(summary) for summary in df['clean'].values]
df['education_level'] = edu
df.to_csv('V8_fromphotos.csv')
In [59]:
def get_vics(summary):
    try:
        text = re.compile(r'(male|men|man)')
        vics = text.findall(summary)
        print(len(vics))

    except:
        print('nope')
        
    
vic_f = [get_vics(summary) for summary in df['clean'].values]                          
1
1
1
3
1
0
3
0
3
3
1
2
4
4
1
2
2
2
1
2
0
1
0
2
1
1
0
2
2
2
1
3
0
1
3
0
2
0
3
3
2
1
2
2
1
0
0
1
6
2
1
2
8
3
0
0
3
1
4
1
3
2
3
6
4
0
0
1
0
0
3
1
0
1
7
3
0
0
1
3
0
2
3
0
1
3
0
1
1
3
4
8
1
0
4
4
0
0
2
2
0
0
1
0
5
1
2
1
2
2
0
2
1
1
0
2
0
1
1
0
8
5
2
2
2
3
4
1
6
1
1
1
0
2
0
4
3
1
2
0
3
0
1
5
1
1
2
2
4
0
1
2
1
1
2
3
0
3
1
0
0
2
1
1
1
0
1
1
4
1
5
1
7
6
2
1
1
1
4
0
1
1
2
0
0
3
0
5
1
2
0
2
2
0
2
3
0
2
5
1
1
1
4
0
1
2
1
1
1
0
6
5
0
1
2
1
0
1
1
1
1
1
3
0
1
2
3
0
2
2
3
0
2
5
2
2
0
2
1
1
1
2
2
3
2
3
1
3
1
2
1
1
2
7
4
0
5
1
1
2
0
1
0
10
3
2
1
2
0
1
0
1
3
0
3
3
2
1
2
3
1
0
2
3
2
1
3
3
0
2
0
2
3
0
0
0
0
1
1
0
1
2
1
1
2
3
3
0
6
2
4
2
6
2
0
1
2
2
3
3
0
3
1
1
2
1
3
4
2
1
1
3
9
3
1
1
0
1
0
0
1
2
2
0
2
2
0
2
0
1
0
1
2
1
0
1
0
8
3
1
6
2
4
2
0
2
1
2
0
0
3
2
0
0
2
0
0
1
1
2
In [87]:
def get_vics(summary):
    try:
#         text = re.compile(r'(?<=race of victim s)\W(black|white|hispanic|hite|asian)(.*?)(male|female)')
        text = re.compile(r'(?<=race of victim s)(.*?)(male|female)')
        result = text.search(summary).group().strip()
        result_s = result.split(' ')
        if len(result_s) > 3:
            return 'error'
        else:
            return result_s
#         if len(result_s) >
#         print(result)
#         number = re.compile(r'\d+')
#         number_result = number.search(result).group()
# #         print(number_result)
#         return str(number_result) + " years"
# #         return 'no' if 'none' in result else 'yes'
    except:
        return ['none_listed']

vic_deets = [get_vics(summary) for summary in df['clean'].values]
df['vic_deets'] = vic_deets
# df['race_vic'] = df.apply()


# if len(summary) == 3:
#     summary[0]
multiple_vics = [summary[0] if len(summary) == 3 else 'no' for summary in df['vic_deets'].values]
female_vics = ['yes' if 'female' in summary else 'no' for summary in df['vic_deets'].values]
male_vics = ['yes' if 'male' in summary else 'no' for summary in df['vic_deets'].values]
In [91]:
df['multiple_vics'] = multiple_vics
df['vic_female'] = female_vics
df['vic_male'] = male_vics
df
Out[91]:
index 0 inmate_number last_name first_name clean occupation prior_record education_level vic_deets multiple_vics vic_female vic_male
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... none_listed no 6 years [white, male] no no yes
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... cashier no 10 years [white, male] no no yes
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... welder yes 14 years [white, male] no no yes
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... mechanic yes none_listed [none_listed] no no no
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei... painter yes 8 years [hispanic, male] no no yes
... ... ... ... ... ... ... ... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter yes 10 years [none_listed] no no no
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se... none_listed none_listed none_listed [none_listed] no no no
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... electrician yes 9 years [white, male] no no yes
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... laborer yes 10 years [hispanic, female] no yes no
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... laborer no 7 years [white, female] no yes no

380 rows × 13 columns

In [92]:
race_vics = [summary[1] if len(summary) == 3 else summary[0] for summary in df['vic_deets'].values]
In [93]:
race_vics
Out[93]:
['white',
 'white',
 'white',
 'none_listed',
 'hispanic',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'hispanic',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'hispanic',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'hispanic',
 'male',
 'black',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'hispanic',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'hispanic',
 'black',
 'white',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'hispanic',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'hite',
 'none_listed',
 'none_listed',
 'none_listed',
 'hispanic',
 'hispanic',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'black',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'white',
 'white',
 'white',
 'black',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'plack',
 'waite',
 'none_listed',
 'white',
 'none_listed',
 'black',
 'none_listed',
 'hispanic',
 'none_listed',
 'white',
 'white',
 'white',
 'white',
 'white',
 'none_listed',
 'hispanic',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'hispanic',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'e',
 'e',
 'none_listed',
 'white',
 'white',
 'white',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'black',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'hispanic',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'black',
 'white',
 'white',
 'none_listed',
 'hispanic',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'hispanic',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'white',
 'none_listed',
 'white',
 'black',
 'none_listed',
 'none_listed',
 'none_listed',
 'e',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'hispanic',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'e',
 'none_listed',
 'white',
 'none_listed',
 'black',
 'black',
 'white',
 'asian',
 'white',
 'hispanic',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'black',
 'black',
 'none_listed',
 'black',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'hispanic',
 'none_listed',
 'biack',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'white',
 '1',
 'white',
 'white',
 'none_listed',
 'black',
 'white',
 'none_listed',
 'none_listed',
 'e',
 'hite',
 'e',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'white',
 'black',
 'none_listed',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'none_listed',
 'white',
 'hispanic',
 'none_listed',
 'none_listed',
 'none_listed',
 'asian',
 'white',
 'none_listed',
 'none_listed',
 'none_listed',
 'hite',
 'none_listed',
 'none_listed',
 'white',
 'none_listed',
 'none_listed',
 'white',
 'hispanic',
 'white']
In [94]:
df['race_vic'] = race_vics
In [95]:
df
Out[95]:
index 0 inmate_number last_name first_name clean occupation prior_record education_level vic_deets multiple_vics vic_female vic_male race_vic
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... none_listed no 6 years [white, male] no no yes white
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... cashier no 10 years [white, male] no no yes white
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... welder yes 14 years [white, male] no no yes white
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... mechanic yes none_listed [none_listed] no no no none_listed
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei... painter yes 8 years [hispanic, male] no no yes hispanic
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter yes 10 years [none_listed] no no no none_listed
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se... none_listed none_listed none_listed [none_listed] no no no none_listed
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... electrician yes 9 years [white, male] no no yes white
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... laborer yes 10 years [hispanic, female] no yes no hispanic
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... laborer no 7 years [white, female] no yes no white

380 rows × 14 columns

In [104]:
def get_age_crime(summary):
    try:
        text = re.compile(r'(?<=age at time of offense )(\d.*?)\W')
        result = text.search(summary).group().strip()
        if len(result) < 2:
            return 'none_listed'
        else:
            return result
    except:
        return 'none_listed'

age_crime = [get_age_crime(summary) for summary in df['clean'].values]    
In [105]:
age_crime
Out[105]:
['21',
 '46',
 '39',
 '32',
 '27',
 '30',
 '18',
 '19',
 '23',
 '22',
 '23',
 '27',
 '27',
 '22',
 '22',
 '43',
 '18',
 '19',
 '20',
 '21',
 '23',
 '26',
 '17',
 '34',
 '28',
 '25',
 'none_listed',
 '18',
 '20',
 'none_listed',
 '22',
 '30',
 '37',
 '27',
 '29',
 '33',
 '31',
 'none_listed',
 '35',
 '28',
 'none_listed',
 '24',
 '22',
 'none_listed',
 '17',
 '26',
 '39',
 '39',
 'none_listed',
 '38',
 '34',
 'none_listed',
 '26',
 '21',
 '20',
 '51',
 '43',
 '27',
 '25',
 '18',
 '24',
 'none_listed',
 '24',
 'none_listed',
 '30',
 '33',
 '53',
 '42',
 '35',
 '27',
 'none_listed',
 'none_listed',
 '39',
 'none_listed',
 '29',
 '32',
 '56',
 'none_listed',
 '20',
 'none_listed',
 '32',
 '22',
 '24',
 '33',
 '23',
 '26',
 '32',
 '29',
 '26',
 '35',
 '21',
 '29',
 '28',
 'none_listed',
 '24',
 'none_listed',
 '24',
 '26',
 'none_listed',
 '20',
 '17',
 '20',
 '27',
 '19',
 '27',
 '29',
 '19',
 'none_listed',
 '26',
 '33',
 '31',
 '21',
 '21',
 'none_listed',
 '28',
 '28',
 '38',
 '34',
 '23',
 '20',
 'none_listed',
 '38',
 '24',
 '25',
 '18',
 '30',
 '26',
 'none_listed',
 '19',
 '18',
 '33',
 '39',
 '30',
 '23',
 'none_listed',
 '31',
 '18',
 '21',
 '27',
 '36',
 '23',
 '18',
 '25',
 '22',
 '23',
 'none_listed',
 'none_listed',
 '39',
 '29',
 '44',
 '19',
 '20',
 '21',
 '45',
 '29',
 '27',
 'none_listed',
 '19',
 '29',
 '21',
 '45',
 '24',
 '25',
 '19',
 '30',
 '17',
 '44',
 '18',
 '18',
 '33',
 '21',
 '32',
 '19',
 '30',
 '22',
 'none_listed',
 '44',
 'none_listed',
 'none_listed',
 '27',
 '34',
 '32',
 '24',
 '29',
 '25',
 '19',
 '22',
 '27',
 '24',
 'none_listed',
 '31',
 '19',
 '40',
 '22',
 'none_listed',
 '33',
 '18',
 '32',
 '19',
 '24',
 '26',
 '22',
 '31',
 'none_listed',
 'none_listed',
 '31',
 'none_listed',
 'none_listed',
 '22',
 'none_listed',
 '33',
 '24',
 '44',
 '17',
 'none_listed',
 '20',
 'none_listed',
 '19',
 '19',
 '29',
 'none_listed',
 '44',
 '37',
 '26',
 '19',
 '23',
 '19',
 '19',
 '28',
 '20',
 '23',
 '19',
 '38',
 '24',
 '20',
 '26',
 '18',
 '23',
 '19',
 '37',
 '22',
 'none_listed',
 '31',
 '26',
 'none_listed',
 '21',
 '23',
 'none_listed',
 'none_listed',
 'none_listed',
 '28',
 '24',
 '18',
 '20',
 '20',
 '19',
 '25',
 '22',
 '20',
 '34',
 'none_listed',
 '19',
 '21',
 '22',
 '37',
 '45',
 '28',
 '37',
 '30',
 '24',
 '22',
 '22',
 '37',
 '26',
 '18',
 '17',
 '26',
 '40',
 '37',
 '22',
 '24',
 '36',
 '36',
 '23',
 'none_listed',
 'none_listed',
 '31',
 '26',
 '31',
 '24',
 '20',
 '32',
 '32',
 'none_listed',
 '28',
 '22',
 'none_listed',
 '30',
 '29',
 '28',
 '31',
 'none_listed',
 '23',
 '20',
 'none_listed',
 '29',
 '19',
 '17',
 '52',
 '27',
 '927',
 '35',
 '35',
 '27',
 'none_listed',
 '20',
 '25',
 '22',
 '33',
 '30',
 '22',
 '24',
 '34',
 '23',
 '21',
 '26',
 'none_listed',
 '19',
 '34',
 '22',
 'none_listed',
 '23',
 '35',
 '47',
 '19',
 '28',
 '19',
 'none_listed',
 '19',
 '30',
 '30',
 'none_listed',
 '23',
 '26',
 '48',
 '50',
 '18',
 '28',
 '31',
 'none_listed',
 'none_listed',
 '20',
 '25',
 'none_listed',
 '44',
 '32',
 '35',
 '54',
 '23',
 '19',
 '19',
 '24',
 '31',
 '33',
 'none_listed',
 '34',
 '19',
 '25',
 '36',
 '20',
 'none_listed',
 '28',
 '20',
 '24',
 '50',
 'none_listed',
 'none_listed',
 '49',
 '20',
 '21']
In [106]:
df['age_crime'] = age_crime
df
Out[106]:
index 0 inmate_number last_name first_name clean occupation prior_record education_level vic_deets multiple_vics vic_female vic_male race_vic age_crime
0 img_text_732_Johnny_Anderson.txt Name: dohnny Anderson\n\npop: 12/_ 28 /_59 Rec... 732 Anderson Johnny name dohnny anderson pop 12 28 59 received cou... none_listed no 6 years [white, male] no no yes white 21
1 img_text_810_Betty_Beets.txt Name: BettyLouBeets RIO\nDOB:03/12/37_ Receive... 810 Beets Betty name bettyloubeets rio dob 03 12 37 received 1... cashier no 10 years [white, male] no no yes white 46
2 img_text_981_Daniel_Hittle.txt Name: Daniel Joe Hittle D.R. # 981\n\n \n\n \n... 981 Hittle Daniel name daniel joe hittle d r 981 v pop 3 1 50 re... welder yes 14 years [white, male] no no yes white 39
3 img_text_651_John_Satterwhite.txt Name: John Thomas Satterwhite D.R.# 651\n\nDOB... 651 Satterwhite John name john thomas satterwhite d r 651 dob 12 29... mechanic yes none_listed [none_listed] no no no none_listed 32
4 img_text_999186_John_Chavez.txt Name: John Chavez D.R.# 999186\nDOB: _ 04/27/6... 999186 Chavez John name john chavez d r 999186 dob 04 27 68 recei... painter yes 8 years [hispanic, male] no no yes hispanic 27
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
375 img_text_838_Gerald_Mitchell.txt Name: Gerald Lee Mitchell ____._.___ D.R.#838_... 838 Mitchell Gerald name gerald lee mitchell d r 838 dob 12 27 67 ... carpenter yes 10 years [none_listed] no no no none_listed none_listed
376 img_text_555_Charles_Rumbaugh.txt CHARLES FRANCIS RUMBAUGH\n\nEXECUTION #555 Dat... 555 Rumbaugh Charles charles francis rumbaugh execution 555 date se... none_listed none_listed none_listed [none_listed] no no no none_listed none_listed
377 img_text_980_Claude_Jones.txt Claude Howard Jones 980\n\n \n\n \n\n \n\nName... 980 Jones Claude claude howard jones 980 name d r pop 9 24 40 r... electrician yes 9 years [white, male] no no yes white 49
378 img_text_999145_Rolando_Ruiz.txt Name: Roland Ruiz, Jr. D.R. #999145\n\n@ DOB: ... 999145 Ruiz Rolando name roland ruiz jr d r 999145 dob 07 04 72 re... laborer yes 10 years [hispanic, female] no yes no hispanic 20
379 img_text_954_Oliver_Cruz.txt David Oliver Cruz\nD.R.# ao\n\n \n\n \n\n \n\n... 954 Cruz Oliver david oliver cruz d r ao name dob 5 18 67 rece... laborer no 7 years [white, female] no yes no white 21

380 rows × 15 columns

In [121]:
def get_weapon(summary):
    try:
        if 'knife' in summary:
            return 'knife'
#             weapon = 'knife'
        elif 'gun' in summary:
            return 'gun'
#             weapon = 'gun'
        elif 'cord ' in summary:
#             print(summary.split('cord')[1])
            return 'cord'
        elif 'blunt object':
            return 'blunt object'
        else:
            return 'other'
    except:
        return 'none_listed'

df['clean_summary'] = [summary.split('summary')[1] if 'summary' in summary else 'nope' for summary in df['clean'].values]
weapon = [get_weapon(summary) for summary in df['clean_summary'].values]    
In [122]:
len(df[df['clean_summary'] == 'nope'])
Out[122]:
97
In [123]:
weapon
Out[123]:
['blunt object',
 'blunt object',
 'gun',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'gun',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'knife',
 'gun',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'gun',
 'blunt object',
 'blunt object',
 'gun',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'cord',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'cord',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'knife',
 'blunt object',
 'gun',
 'knife',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'knife',
 'gun',
 'blunt object',
 'gun',
 'gun',
 'cord',
 'blunt object',
 'blunt object',
 'blunt object',
 'cord',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'knife',
 'blunt object',
 'gun',
 'gun',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'cord',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'knife',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'gun',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'gun',
 'blunt object',
 'blunt object',
 'knife',
 'blunt object',
 'cord',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'knife',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'gun',
 'gun',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'blunt object',
 'gun',
 'blunt object',
 'blunt object',
 'blunt object']
In [124]:
df['weapon'] = weapon
In [126]:
df.columns
Out[126]:
Index([          'index',                 0,   'inmate_number',
             'last_name',      'first_name',           'clean',
            'occupation',    'prior_record', 'education_level',
             'vic_deets',   'multiple_vics',      'vic_female',
              'vic_male',        'race_vic',       'age_crime',
         'clean_summary',          'weapon'],
      dtype='object')
In [128]:
columns = ['inmate_number','last_name', 'first_name','education_level','age_crime',
           'occupation','prior_record','multiple_vics','weapon','race_vic','vic_male','vic_female']
In [131]:
df1 = pd.DataFrame(df, columns=columns)
df1
Out[131]:
inmate_number last_name first_name education_level age_crime occupation prior_record multiple_vics weapon race_vic vic_male vic_female
0 732 Anderson Johnny 6 years 21 none_listed no no blunt object white yes no
1 810 Beets Betty 10 years 46 cashier no no blunt object white yes no
2 981 Hittle Daniel 14 years 39 welder yes no gun white yes no
3 651 Satterwhite John none_listed 32 mechanic yes no gun none_listed no no
4 999186 Chavez John 8 years 27 painter yes no blunt object hispanic yes no
... ... ... ... ... ... ... ... ... ... ... ... ...
375 838 Mitchell Gerald 10 years none_listed carpenter yes no blunt object none_listed no no
376 555 Rumbaugh Charles none_listed none_listed none_listed none_listed no gun none_listed no no
377 980 Jones Claude 9 years 49 electrician yes no blunt object white yes no
378 999145 Ruiz Rolando 10 years 20 laborer yes no blunt object hispanic no yes
379 954 Cruz Oliver 7 years 21 laborer no no blunt object white no yes

380 rows × 12 columns

In [132]:
df1.to_csv('V9_photo_inmates.csv')
In [134]:
len(df1)
Out[134]:
380
In [ ]: