daily log 10.20.20

2 minute read

python - How to select rows from a DataFrame based on column values - Stack Overflow

python - AttributeError: Series object has no attribute value - Stack Overflow

def get_report_chain(employee):
    if employee == THE_CEO:
        return 'done'
    else:
        curr_employee = df.loc[df['first_last'] == employee]
        print(curr_employee['First Name'].values[0], 
              curr_employee['Last Name'].values[0], ":",
              curr_employee['Job'].values[0])
        supervisor = df.loc[df['first_last'] == employee]['supervisor'].values[0]
        return get_report_chain(supervisor)
        
# EXAMPLE
get_report_chain(PERSON_IN_QUETSION)

import pandas as pd
df = pd.read_csv('VG_P10_ZIPS_V3.csv', encoding='latin-1')
df = df.dropna(subset=['Zip Code (Formatted)'])
df = df[df['Employment Type'] == 'Regular']

## ==========================================================
## ADDING LOCATION COLUMNS
## ==========================================================
## --- lat and lon for zip code visuals
## --- city and state for easy subsetting

us_zips = pd.read_csv('us-zip-code-latitude-and-longitude.csv', sep=';')

zip_lat = dict(zip(us_zips['Zip'].astype(str), us_zips['Latitude']))
zip_lon = dict(zip(us_zips['Zip'].astype(str), us_zips['Longitude']))

def get_city(city_str):
    try:
        city = city_str.split(',')[0]
        city_formatted = [x[0].upper() + x[1:].lower() for x in city.lower().split(' ')]
        return ' '.join(city_formatted)
    except:
        return 'no data'

def get_zip(city_str):
    try:
        zip_code = city_str.split(' ')[-1].split('-')[0]
        return zip_code
#         print(state_str)
#         return state_str.split(',')[1].split(' ')[1]
    except:
        return 'no data'
    
def get_city_zip(city_str):
    try:
        zip_code = city_str.split(' ')[-1].split('-')[0]
        city = city_str.split(',')[0]
        city_formatted = [x[0].upper() + x[1:].lower() for x in city.lower().split(' ')]
        return ' '.join(city_formatted) + " (" + zip_code + ")"
#         print(state_str)
#         return state_str.split(',')[1].split(' ')[1]
    except:
        return 'no data'
    
def get_state(state_str):
    try:
#         print(state_str)
        return state_str.split(',')[1].split(' ')[1]
    except:
        return 'no data'
        
def get_lat(zip_code):
    zc = str(zip_code)
    while len(zc) < 5:
        zc = '0'+zc
    try:
        return zip_lat[zc]
    except:
        return 'no data'

def get_lon(zip_code):
    zc = str(zip_code)
    while len(zc) < 5:
        zc = '0'+zc
    try:
        return zip_lon[zc]
    except:
        return 'no data'
    
df['city'] = df.apply(lambda x: get_city(x['City, State Zip Code (Formatted)']), axis=1)
df['zip_code'] = df.apply(lambda x: get_zip(x['City, State Zip Code (Formatted)']), axis=1)
df['city_zip'] = df.apply(lambda x: get_city_zip(x['City, State Zip Code (Formatted)']), axis=1)
df['state'] = df.apply(lambda x: get_state(x['City, State Zip Code (Formatted)']), axis=1)
df['lat'] = df.apply(lambda x: get_lat(x['Zip Code (Formatted)']), axis=1)
df['lon'] = df.apply(lambda x: get_lon(x['Zip Code (Formatted)']), axis=1)

## --------------------------------------------------
## Adding Lat Lon for CITY

city_df = pd.read_csv('simplemaps_uscities_basicv1.7/uscities.csv')
# ca_city_df = city_df[city_df['state_id'] == 'CA']
ca_city_df = city_df.copy()
city_to_lat = dict(zip(ca_city_df['city'], ca_city_df['lat']))
city_to_lon = dict(zip(ca_city_df['city'], ca_city_df['lng']))

def get_city_lat(city, zip_code):
#     print(zip_code)
    try:
        return city_to_lat[city]
    except:
        try:
        
            new_lat = ca_city_df[ca_city_df['zips'].str.contains(zip_code,na=False)]['lat'].values[0]
            print('new_lat!', new_lat)
        except:
            print(city, type(zip_code))
            return 'no data'

def get_city_lon(city):
    try:
        return city_to_lon[city]
    except:
        return 'no data'
    
df['city_lat'] = df.apply(lambda x: get_city_lat(x['city'], x['zip_code']), axis=1)
df['city_lon'] = df.apply(lambda x: get_city_lon(x['city']), axis=1)    

## ==========================================================
## ADDING VG COLUMNS
## ==========================================================
## --- dept_binned & dept_cleaned for standardization
## --- exec leader for subsetting
## --- function for subsetting


dept_binned = pd.read_csv('P7_TIME_ALLOCATION/VG_TIME__DATA_DEPT_NAMES_AND_BINS_V3.csv')
dept_binned = dict(zip(dept_binned['currently'], dept_binned['kendra']))
df['Dept'] = df['Org Level 1'].str.strip()

def get_dept_binned(dept):    
    try:
        return dept_binned[dept]
    except:
        return 'no data'


def get_clean_dept(dept):
    return dept.replace(' - ', '-').replace('-', ': ')
    
leaders = pd.read_csv('VG_DATA_DICT_exec_leader_to_dept.csv')
dept_to_leader = {k:v for k,v in leaders.values}

def get_exec_leader(dept):
    try:
        return dept_to_leader[dept]
    except:
        print(dept)
        return 'no data'
    

function_v2 = pd.read_csv('function_v2e.csv')
dept_to_function = {k:v for k,v in function_v2.values}

def try_match(dept):
    try:
        return dept_to_function[dept]
    except:
        return 'no data'

df['dept_binned'] = df.apply(lambda x: get_dept_binned(x['Dept']), axis=1) 
df['Org Level 1 Clean'] = df.apply(lambda x: get_clean_dept(x['Org Level 1']), axis=1) 
df['executive_leader'] = df.apply(lambda x: get_exec_leader(x['Org Level 1 Clean']), axis=1)      
df['new_func'] = df.apply(lambda x: try_match(x['Org Level 1']), axis=1)

Share on

Twitter Facebook LinkedIn

Daniel Caraway

daily log 10.20.20

Share on

You may also enjoy

daily log 03-25-21

How to use Data Science Superpowers for Useless Things: Getting a Job at Amazon, Take 2

How to use Data Science Superpowers for Useless Things: Getting a Job at Amazon

How to use Data Science Superpowers for Useless Things: Adding Text to Images (aka Cats Narrate the Big Lebowski)