daily log 10.14.20

8 minute read
import pandas as pd
from tabulate import tabulate

df = pd.read_csv('VG_P9_employees.csv')

## **********************************************************
## ==========================================================
## MANUAL EDITS AND UPDATES
## ==========================================================

# Updating Andrew (Andy) Jones
df.loc[df['Employee Number'] == 2158, ['Met with CEO']] = 'Y'

## **********************************************************


## ==========================================================
## ADDING DAY COLUMNS
## ==========================================================
## --- DAYS --- Which days work for which people?

monday_ok = {'Mon-Thu,\nEvery other Fri': True,
 'Mon-Fri': True,
 'Mon-Thu': True,
 'Fri-Sun': False,
 'Fri-Sat': False,
 'Thu-Mon': False,
 'Tue-Fri': False}

tuesday_ok = {'Mon-Thu,\nEvery other Fri': True,
 'Mon-Fri': True,
 'Mon-Thu': True,
 'Fri-Sun': False,
 'Fri-Sat': False,
 'Thu-Mon': False,
 'Tue-Fri': True}

wednesday_ok = {'Mon-Thu,\nEvery other Fri': True,
 'Mon-Fri': True,
 'Mon-Thu': True,
 'Fri-Sun': False,
 'Fri-Sat': False,
 'Thu-Mon': False,
 'Tue-Fri': True}

thursday_ok = {'Mon-Thu,\nEvery other Fri': True,
 'Mon-Fri': True,
 'Mon-Thu': True,
 'Fri-Sun': False,
 'Fri-Sat': False,
 'Thu-Mon': True,
 'Tue-Fri': True}

friday_ok = {'Mon-Thu,\nEvery other Fri': True,
 'Mon-Fri': True,
 'Mon-Thu': False,
 'Fri-Sun': True,
 'Fri-Sat': True,
 'Thu-Mon': True,
 'Tue-Fri': True}

df['monday_ok'] = df.apply(lambda x: monday_ok[x['Day']], axis=1)
df['tuesday_ok'] = df.apply(lambda x: tuesday_ok[x['Day']], axis=1)
df['wednesday_ok'] = df.apply(lambda x: wednesday_ok[x['Day']], axis=1)
df['thursday_ok'] = df.apply(lambda x: thursday_ok[x['Day']], axis=1)
df['friday_ok'] = df.apply(lambda x: friday_ok[x['Day']], axis=1)


## ==========================================================
## ADDING LOCATION BIN
## ==========================================================

location_dict = dict(df['Location'].value_counts())
def get_location(x):
    if location_dict[x] > 100:
        return x
    else:
        return 'other'
df['location_binned'] = df.apply(lambda x: get_location(x['Location']), axis=1)


## ==========================================================
## ADDING PST TIME
## ==========================================================
## --- TIMES --- Which time slots work for which people?
## --- NOTE: Adjusted everything to PST


buildings = pd.read_csv('VG_P9_buildingtostate.csv')
buildings_to_state = dict(zip(buildings['building'], buildings['state']))

df['state'] = df.apply(lambda x: buildings_to_state[x['Location']], axis=1)

nm_time = {'7:00am 4:30pm': '8:00am 5:30pm',
 '8:00am 4:30pm': '9:00am 5:30pm',
 '6:00am 4:30pm': '7:00am 5:30pm',
 '5:00pm 3:30am': '6:00pm 4:30am',
 '5:00am 6:00pm': '6:00am 7:00pm',
 '3:30pm 2:00am': '4:30pm 3:00am',
 '3:30pm 1:00am': '4:30pm 2:00am',
 '2:00pm 10:30pm': '3:00pm 11:30pm'}

def get_pst_time(state, start_end):
    if state == 'CA':
        return start_end
    if state == 'NM':
        return nm_time[start_end]
    if state == 'London':
        return '4:00pm 12:30am'

df['start_end'] = df.apply(lambda x: x['Start Time'] + " " +  x['End Time'], axis=1)
df['start_end_PST'] = df.apply(lambda x: get_pst_time(x['state'], x['start_end']), axis=1)


## ==========================================================
## ADDING SHIFT TIMES
## ==========================================================

# MEETINGS FROM 12-2
# Pref_time_1A = 12-1
# Pref_time_1B = 1-2

pref_time_1A_ok = {'7:00am 4:30pm': True,
 '9:00am 5:30pm': True,
 '6:00am 4:30pm': True,
 '8:00am 4:30pm': True,
 '7:00am 5:30pm': True,
 '5:00pm 3:30am': False,
 '5:00am 6:00pm': True,
 '4:00pm 12:30am': False,
 '4:30pm 3:00am': False,
 '3:30pm 1:00am': False,
 '2:00pm 10:30pm': False}

pref_time_1B_ok = {'7:00am 4:30pm': True,
 '9:00am 5:30pm': True,
 '6:00am 4:30pm': True,
 '8:00am 4:30pm': True,
 '7:00am 5:30pm': True,
 '5:00pm 3:30am': False,
 '5:00am 6:00pm': True,
 '4:00pm 12:30am': False,
 '4:30pm 3:00am': False,
 '3:30pm 1:00am': False,
 '2:00pm 10:30pm': False}


# MEETINGS FROM 2-4:30 
# Pref_time_2A = 2:30-3:30
# Pref_time_2B = 3:30-4:30

pref_time_2A_ok = {'7:00am 4:30pm': True,
 '9:00am 5:30pm': True,
 '6:00am 4:30pm': True,
 '8:00am 4:30pm': True,
 '7:00am 5:30pm': True,
 '5:00pm 3:30am': False,
 '5:00am 6:00pm': True,
 '4:00pm 12:30am': False,
 '4:30pm 3:00am': False,
 '3:30pm 1:00am': False,
 '2:00pm 10:30pm': False}

pref_time_2B_ok = {'7:00am 4:30pm': True,
 '9:00am 5:30pm': True,
 '6:00am 4:30pm': True,
 '8:00am 4:30pm': True,
 '7:00am 5:30pm': True,
 '5:00pm 3:30am': False,
 '5:00am 6:00pm': True,
 '4:00pm 12:30am': False,
 '4:30pm 3:00am': False,
 '3:30pm 1:00am': True,
 '2:00pm 10:30pm': True}


# NOTE -- MUST CONFIRM WITH MICHAEL THAT 4:30-5:30 is OK
# NOTE -- MUST COMFIRM that these people: `fivepm = df[df['start_end_PST'] == '5:00pm 3:30am']`
# can start half an hour early
# AKA "SECOND SHIFT"
# Pref_time_2C = 4:30-5:30
pref_time_2C_ok = {'7:00am 4:30pm': False,
 '9:00am 5:30pm': True,
 '6:00am 4:30pm': False,
 '8:00am 4:30pm': False,
 '7:00am 5:30pm': True,
 '5:00pm 3:30am': True,
 '5:00am 6:00pm': True,
 '4:00pm 12:30am': True,
 '4:30pm 3:00am': True,
 '3:30pm 1:00am': True,
 '2:00pm 10:30pm': True}

df['pref_time_1A_ok'] = df.apply(lambda x: pref_time_1A_ok[x['start_end_PST']], axis=1)
df['pref_time_1B_ok'] = df.apply(lambda x: pref_time_1B_ok[x['start_end_PST']], axis=1)
df['pref_time_2A_ok'] = df.apply(lambda x: pref_time_2A_ok[x['start_end_PST']], axis=1)
df['pref_time_2B_ok'] = df.apply(lambda x: pref_time_2B_ok[x['start_end_PST']], axis=1)
df['pref_time_2C_ok'] = df.apply(lambda x: pref_time_2C_ok[x['start_end_PST']], axis=1)


def get_only_friday(x):
    if x['monday_ok'] == True:
        return False
    if x['tuesday_ok'] == True:
        return False
    if x['wednesday_ok'] == True:
        return False
    if x['thursday_ok'] == True:
        return False
    else:
        return True

def get_only_2C(x):
    if x['pref_time_1A_ok'] == True:
        return False
    else:
        return True
    
def get_only_1A(x):
    if x['pref_time_2C_ok'] == True:
        return False
    else:
        return True

df['only_friday'] = df.apply(lambda x: get_only_friday(x), axis=1)
df['only_2C'] = df.apply(lambda x: get_only_2C(x), axis=1)
df['only_1A'] = df.apply(lambda x: get_only_1A(x), axis=1)

## ==========================================================
## ADDING UNITY
## ==========================================================

unity_df = pd.read_csv('VG_P9_UNITY__FROM_VG_TIME_V15_NO_CONTRACTORS.csv')
only_unity = [col for col in unity_df.columns if 'Unity' in col]
# sm_df = df[['Employee Number', 'JUN-20-Unity', 'JUL-20-Unity', 'AUG-20-Unity']]
sm_df = unity_df[['Employee Number', 'AUG-20-Unity']]
sm_df['AUG-20-Unity'][sm_df['AUG-20-Unity'] > 0] = True
sm_df['AUG-20-Unity'][sm_df['AUG-20-Unity'] == 0] = False
unity_dict = dict(zip(sm_df['Employee Number'], sm_df['AUG-20-Unity']))
unity_dict

def get_unity(e_num):
    str_e_num = str(e_num)
    try:
        return unity_dict[str_e_num]
    except:
#         print('nope')
        return 'no record'


df['Unity'] = df.apply(lambda x: get_unity(x['Employee Number']), axis=1)

## ==========================================================
## FUNCTIONS TO CREATE PRINTABLE REPORT
## ==========================================================

def get_clean_name(first, pref_first, last):
    if type(pref_first) == str and first.lower() != pref_first.lower():
        return "{} ({}) {}".format(first, pref_first, last)
    else:
        return "{} {}".format(first, last)
df['clean_name'] = df.apply(lambda x: get_clean_name(x['First Name'], 
                                                     x['Preferred First Name'], x['Last Name']), axis=1)

def get_military_time(time):
    first_digit = time.split(':')[0]
    half = 0
    if time[-4] != 0:
        half = int(time[-4])*.10
    if time[-2] == 'p':
        return int(first_digit) + 12 + half
    else:
        return int(first_digit) + half

df['start_PST'] = df.apply(lambda x: x['start_end_PST'].split(' ')[0], axis=1)
df['end_PST'] = df.apply(lambda x: x['start_end_PST'].split(' ')[1], axis=1)
df['start_PST_military'] = df.apply(lambda x: get_military_time(x['start_PST']), axis=1)
df['end_PST_military'] = df.apply(lambda x: get_military_time(x['end_PST']), axis=1)

## ==========================================================
## FUNCTIONS TO CREATE EVEN GROUPS
## ==========================================================

import math, random

def generate_choices(number_of_groups, group_size):
    choices = []
    for i in range(1,number_of_groups + 1):
        choices += [i]*group_size  
    return choices

def assign_mtg_id(choices, group_name):
    group_num = random.choice(choices)
    choices.remove(group_num)
    return group_name + '_' + str(group_num)


## ==========================================================
## ==========================================================
## ==========================================================
## REMOVING UNITY AND MET WITH MICHAEL
## ==========================================================
## ==========================================================
## ==========================================================
## ==========================================================



og_df = df.copy()

# sm_df = df[(df['Met with CEO'] == 'N') & (df['Unity'] == False)]

sm_df = df[(df['Met with CEO'] == 'N') & (df['Unity'] == False) & (df['Org Level 1'] != "Office of CEO")]

df = sm_df.copy()




## ==========================================================
## CREATING THE GROUPS: 2C AND 1A
## ==========================================================

# --- 2C
group_id = 'GROUP_2C_FRIDAY'
sm_df = df[(df['pref_time_2C_ok'] == True) & (df['friday_ok']== True)]
number_of_groups = math.ceil(len(sm_df)/8)
choices = generate_choices(number_of_groups,8)
sm_df['mtg_id'] = sm_df.apply(lambda x: assign_mtg_id(choices, group_id),axis=1)
GROUP_2C_FRIDAY = sm_df.copy()
sm_df['mtg_id'].value_counts()

group_id = 'GROUP_2C_NO_FRIDAY'
sm_df = df[(df['pref_time_2C_ok'] == True) & (df['friday_ok']== False)]
number_of_groups = math.ceil(len(sm_df)/8)
choices = generate_choices(number_of_groups,8)
sm_df['mtg_id'] = sm_df.apply(lambda x: assign_mtg_id(choices, group_id),axis=1)
GROUP_2C_NO_FRIDAY = sm_df.copy()
sm_df['mtg_id'].value_counts()

df_2C = pd.concat([GROUP_2C_FRIDAY, GROUP_2C_NO_FRIDAY], axis="rows")

# --- 1A
group_id = 'GROUP_1A_FRIDAY'
sm_df = df[(df['pref_time_1A_ok'] == True) & (df['pref_time_2C_ok'] == False) & (df['friday_ok']== True)]
number_of_groups = math.ceil(len(sm_df)/8)
choices = generate_choices(number_of_groups,8)
sm_df['mtg_id'] = sm_df.apply(lambda x: assign_mtg_id(choices, group_id),axis=1)
GROUP_1A_FRIDAY = sm_df.copy()
sm_df['mtg_id'].value_counts()

group_id = 'GROUP_1A_NO_FRIDAY'
sm_df = df[(df['pref_time_1A_ok'] == True) & (df['pref_time_2C_ok'] == False) & (df['friday_ok']== False)]
number_of_groups = math.ceil(len(sm_df)/8)
choices = generate_choices(number_of_groups,8)
sm_df['mtg_id'] = sm_df.apply(lambda x: assign_mtg_id(choices, group_id),axis=1)
GROUP_1A_NO_FRIDAY = sm_df.copy()
sm_df['mtg_id'].value_counts()


df_2C = pd.concat([GROUP_2C_FRIDAY, GROUP_2C_NO_FRIDAY], axis="rows")
df_1A = pd.concat([GROUP_1A_FRIDAY, GROUP_1A_NO_FRIDAY], axis="rows")

df_1A_2C = pd.concat([df_1A, df_2C], axis="rows")

df = df_1A_2C.copy()
df['sub_group_num'] = df.apply(lambda x: x['mtg_id'].split('_')[-1], axis=1 )
df['group_id'] = df.apply(lambda x: '_'.join(x['mtg_id'].split('_')[:-1]), axis=1 )
df['sub_group_num'] = df['sub_group_num'].astype(int)


## ==========================================================
## MARK MEETINGS THAT MUST HAPPEN ON A FRIDAY
## ==========================================================

only_friday_mtg = {}
for g in set(df['mtg_id']):
    sm_df = df[df['mtg_id'] == g]
    only_friday_dict = dict(sm_df['only_friday'].value_counts())
    if True in only_friday_dict:
        only_friday_mtg[g] = True
    else:
        only_friday_mtg[g] = False
        
df['only_friday_mtg'] = df.apply(lambda x: only_friday_mtg[x['mtg_id']], axis=1)


## ==========================================================
## REGROUPING
## ==========================================================

dept_dict = dict(df['Org Level 1'].value_counts())

def get_dept_bin(dept):
    if "Eng" in dept:
        return 'ENG'
    if "Ops"in dept:
        return 'OPS'
    if dept in ['Finance','People','Legal & General Complianc','Office of CEO', 'Executive']:
        return 'people_finance_legal'
    if dept in ['Logistics', 'Supply Chain', 'Procurement','Project Management Office', 'Business Development']:
        return 'pm_logistics'
    if dept in ['Manufacturing', 'Machine Shop']:
        return 'manufacturing_machine_shop'
    if dept in ['Information Technology','Quality Assurance']:
        return 'QA_IT'
    if dept in ['Tooling', 'Spaceship Propulsion']:
        return 'tooling_spaceprop'
    else:
        return 'support_other'
    
df['dept_binned'] = df.apply(lambda x: get_dept_bin(x['Org Level 1']), axis=1)

group_num_dict = {}
for dept in set(df['dept_binned']):
    sm_df = df[df['dept_binned'] == dept]
    for i in range(len(sm_df)):
        
        idx = str(i)

        if idx in group_num_dict:
            group_num_dict[idx] += [sm_df.iloc[i]['Employee Number']]
        else: 
            group_num_dict[idx] = [sm_df.iloc[i]['Employee Number']]

group_num_dict_keys = [k for k,v in group_num_dict.items()]

employee_to_group = {}
for k in group_num_dict_keys:
    for employee_num in group_num_dict[k]:
        employee_to_group[employee_num] = k
df['new_group_number'] = df.apply(lambda x: employee_to_group[x['Employee Number']], axis=1)


# New group number is 0 indexed 
# changed to normal_gorup_number for Allison's readability 
# df['normal_group_number'] = df.apply(lambda x: x['new_group_number'] + 1, )

new_df = pd.DataFrame()

df['normal_group_number'] = df['new_group_number'].astype(int) + 1
df['Emails'] = df['E-mail Address']
df['ID'] = df['Employee Number']
df['Name'] = df['clean_name']
df['Department'] = df['Org Level 1']
df['Supervisor'] = df['Supervisor Name (Last Suffix, First MI)']
for i in range(1,51):  
    print("*"*70)
    print("-"*70)
    print(' '* 70)
    print("GROUP NUMBER:", i)
#     sm_df = df[df['normal_group_number'] == str(i)]
    sm_df = df[df['normal_group_number'] == i]
    monday_ok = True
    tuesday_ok = True
    wednesday_ok = True
    thursday_ok = True
    friday_ok = True
    must_be_friday = False
    must_be_1A = False
    must_be_2C = False
    min_time = 0
    max_time = 0
    for index, row in sm_df.iterrows():
        if (row['only_friday'] == True):
            must_be_friday = True
        if (row['only_1A'] == True):
            must_be_1A = True
        if (row['only_2C'] == True):
            must_be_2C = True    
        
        if (row['monday_ok'] == False):
            monday_ok = False
        if (row['tuesday_ok'] == False):
            tuesday_ok = False
        if (row['wednesday_ok'] == False):
            wednesday_ok = False
        if (row['thursday_ok'] == False):
            thursday_ok = False
        if (row['friday_ok'] == False):
            friday_ok = False

        if (row['start_PST_military'] > min_time):
            min_time = row['start_PST_military']
            
        if (row['end_PST_military'] > max_time):
            max_time = row['end_PST_military']
            
            
    
    print(' '*70)
    print("-"*70)
    print(' '*70)
    print('FOR MICHAEL')
    print(' '*70)
    
    sm_sm_df = sm_df[['ID', 'Name', 'Job', 'Dept']]
    print(tabulate(sm_sm_df, tablefmt="rst", headers=sm_sm_df.columns, showindex=False))
    
    print(' '*70)
    print("-"*70)
    print(' '*70)
    print('FOR ALLISON')
    print(' '*70)
    
    sm_sm_df2 = sm_df[['Emails']]
    print(tabulate(sm_sm_df2, tablefmt="rst", headers=sm_sm_df2.columns, showindex=False))

    
    ok_days = ""
    if must_be_friday:
        print('NOTES: Meeting must be friday')
        ok_days = "Can only meet Friday"
    else:
        ok_days = "Can meet on {} {} {} {} {}".format("M" if monday_ok else "", 
                            "Tu" if tuesday_ok else "", 
                            "W" if wednesday_ok else "", 
                            "Th" if thursday_ok else "",
                            "F" if friday_ok else "")
    
    ok_times = ""
    if must_be_2C:
        print('NOTES: Meeting must be 4:30-5:30pm')
        ok_times = "4:30pm-5:30pm"
    else:
        ok_times = "Can meet between {} and {}".format(str(round(min_time, 3)).replace('.', ':')+'0' , 
                                                       str(round(max_time-12, 3)).replace('.', ':')+'0' )

    
    sm_df['Meeting Dates'] = ok_days
    sm_df['Meeting Times']  = ok_times
    sm_df['Group Number'] = sm_df['normal_group_number']
    sm_df_sm = sm_df[['Group Number', 'ID', 'Name', 'Emails', 'Department', 'Supervisor', 'Meeting Dates', 'Meeting Times']]
    new_df = pd.concat([new_df, sm_df_sm], axis="rows")
    print(ok_days)
    print(ok_times)
    print("-"*70)
    print(' '*70)
    print(' '*70)
    print(' '*70)
    

# new_df.to_csv('VG_P9_MeetAndGreet_Groups_1_50.csv', index=False)    
Share on

Twitter Facebook LinkedIn
Daniel Caraway

daily log 10.14.20

Share on

You may also enjoy

daily log 03-25-21

How to use Data Science Superpowers for Useless Things: Getting a Job at Amazon, Take 2

How to use Data Science Superpowers for Useless Things: Getting a Job at Amazon

How to use Data Science Superpowers for Useless Things: Adding Text to Images (aka Cats Narrate the Big Lebowski)