daily log 10.20.20
python - How to select rows from a DataFrame based on column values - Stack Overflow
python - AttributeError: Series object has no attribute value - Stack Overflow
def get_report_chain(employee):
if employee == THE_CEO:
return 'done'
else:
curr_employee = df.loc[df['first_last'] == employee]
print(curr_employee['First Name'].values[0],
curr_employee['Last Name'].values[0], ":",
curr_employee['Job'].values[0])
supervisor = df.loc[df['first_last'] == employee]['supervisor'].values[0]
return get_report_chain(supervisor)
# EXAMPLE
get_report_chain(PERSON_IN_QUETSION)
import pandas as pd
df = pd.read_csv('VG_P10_ZIPS_V3.csv', encoding='latin-1')
df = df.dropna(subset=['Zip Code (Formatted)'])
df = df[df['Employment Type'] == 'Regular']
## ==========================================================
## ADDING LOCATION COLUMNS
## ==========================================================
## --- lat and lon for zip code visuals
## --- city and state for easy subsetting
us_zips = pd.read_csv('us-zip-code-latitude-and-longitude.csv', sep=';')
zip_lat = dict(zip(us_zips['Zip'].astype(str), us_zips['Latitude']))
zip_lon = dict(zip(us_zips['Zip'].astype(str), us_zips['Longitude']))
def get_city(city_str):
try:
city = city_str.split(',')[0]
city_formatted = [x[0].upper() + x[1:].lower() for x in city.lower().split(' ')]
return ' '.join(city_formatted)
except:
return 'no data'
def get_zip(city_str):
try:
zip_code = city_str.split(' ')[-1].split('-')[0]
return zip_code
# print(state_str)
# return state_str.split(',')[1].split(' ')[1]
except:
return 'no data'
def get_city_zip(city_str):
try:
zip_code = city_str.split(' ')[-1].split('-')[0]
city = city_str.split(',')[0]
city_formatted = [x[0].upper() + x[1:].lower() for x in city.lower().split(' ')]
return ' '.join(city_formatted) + " (" + zip_code + ")"
# print(state_str)
# return state_str.split(',')[1].split(' ')[1]
except:
return 'no data'
def get_state(state_str):
try:
# print(state_str)
return state_str.split(',')[1].split(' ')[1]
except:
return 'no data'
def get_lat(zip_code):
zc = str(zip_code)
while len(zc) < 5:
zc = '0'+zc
try:
return zip_lat[zc]
except:
return 'no data'
def get_lon(zip_code):
zc = str(zip_code)
while len(zc) < 5:
zc = '0'+zc
try:
return zip_lon[zc]
except:
return 'no data'
df['city'] = df.apply(lambda x: get_city(x['City, State Zip Code (Formatted)']), axis=1)
df['zip_code'] = df.apply(lambda x: get_zip(x['City, State Zip Code (Formatted)']), axis=1)
df['city_zip'] = df.apply(lambda x: get_city_zip(x['City, State Zip Code (Formatted)']), axis=1)
df['state'] = df.apply(lambda x: get_state(x['City, State Zip Code (Formatted)']), axis=1)
df['lat'] = df.apply(lambda x: get_lat(x['Zip Code (Formatted)']), axis=1)
df['lon'] = df.apply(lambda x: get_lon(x['Zip Code (Formatted)']), axis=1)
## --------------------------------------------------
## Adding Lat Lon for CITY
city_df = pd.read_csv('simplemaps_uscities_basicv1.7/uscities.csv')
# ca_city_df = city_df[city_df['state_id'] == 'CA']
ca_city_df = city_df.copy()
city_to_lat = dict(zip(ca_city_df['city'], ca_city_df['lat']))
city_to_lon = dict(zip(ca_city_df['city'], ca_city_df['lng']))
def get_city_lat(city, zip_code):
# print(zip_code)
try:
return city_to_lat[city]
except:
try:
new_lat = ca_city_df[ca_city_df['zips'].str.contains(zip_code,na=False)]['lat'].values[0]
print('new_lat!', new_lat)
except:
print(city, type(zip_code))
return 'no data'
def get_city_lon(city):
try:
return city_to_lon[city]
except:
return 'no data'
df['city_lat'] = df.apply(lambda x: get_city_lat(x['city'], x['zip_code']), axis=1)
df['city_lon'] = df.apply(lambda x: get_city_lon(x['city']), axis=1)
## ==========================================================
## ADDING VG COLUMNS
## ==========================================================
## --- dept_binned & dept_cleaned for standardization
## --- exec leader for subsetting
## --- function for subsetting
dept_binned = pd.read_csv('P7_TIME_ALLOCATION/VG_TIME__DATA_DEPT_NAMES_AND_BINS_V3.csv')
dept_binned = dict(zip(dept_binned['currently'], dept_binned['kendra']))
df['Dept'] = df['Org Level 1'].str.strip()
def get_dept_binned(dept):
try:
return dept_binned[dept]
except:
return 'no data'
def get_clean_dept(dept):
return dept.replace(' - ', '-').replace('-', ': ')
leaders = pd.read_csv('VG_DATA_DICT_exec_leader_to_dept.csv')
dept_to_leader = {k:v for k,v in leaders.values}
def get_exec_leader(dept):
try:
return dept_to_leader[dept]
except:
print(dept)
return 'no data'
function_v2 = pd.read_csv('function_v2e.csv')
dept_to_function = {k:v for k,v in function_v2.values}
def try_match(dept):
try:
return dept_to_function[dept]
except:
return 'no data'
df['dept_binned'] = df.apply(lambda x: get_dept_binned(x['Dept']), axis=1)
df['Org Level 1 Clean'] = df.apply(lambda x: get_clean_dept(x['Org Level 1']), axis=1)
df['executive_leader'] = df.apply(lambda x: get_exec_leader(x['Org Level 1 Clean']), axis=1)
df['new_func'] = df.apply(lambda x: try_match(x['Org Level 1']), axis=1)