VG_P9 Meet and Greet DATA CLEANING

Started 10/30/20

GOAL

INPUT: "Active Employees" (from Sonie)

OUTPUT: "Active Employees" + schedule + supervisor chain

In [122]:
import pandas as pd
df = pd.read_csv('VG_P9_1104_Active Employee Listing for Meet and Greet - Kendra afcb5e312.csv', encoding='latin')

# NOT USING THIS ONE BECAUSE TOO MESSY
# df_utm = pd.read_csv('VG_P9_1104_UTM Shift Report v1.csv') 

Add Schedule Columns & "Met with CEO"

In [123]:
df_shifts = pd.read_csv("VG_P9_employees.csv")

def get_shift(e_id, column):
    try:
        return df_shifts.loc[df_shifts['Employee Number'] == e_id][column].values[0]
    except:
        return 'no data'

df['Shift'] = df.apply(lambda x: get_shift(x['Employee Number'], 'Shift') , axis=1)
df['Day'] = df.apply(lambda x: get_shift(x['Employee Number'], 'Day') , axis=1)
df['Start Time'] = df.apply(lambda x: get_shift(x['Employee Number'], 'Start Time') , axis=1)
df['End Time'] = df.apply(lambda x: get_shift(x['Employee Number'], 'End Time') , axis=1)
df['Met with CEO'] = df.apply(lambda x: get_shift(x['Employee Number'], 'Met with CEO') , axis=1)

Then, Add Supervisor Chain

In [124]:
def return_report_chain(employee):
#     print("*"*100)
#     print(employee)
    all_sups = []
    def get_report_chain(employee):
        employee = int(employee)
        if employee == 3035:
            all_sups.append('Michael Colglazier')
            return 'done'
        else:
            curr_employee = df.loc[df['Employee Number'] == employee]
            next_level = "{} {}, {} ({}) reports to -- ".format(
                curr_employee['First Name'].values[0],
                curr_employee['Last Name'].values[0],
                curr_employee['Job'].values[0],
                curr_employee['Org Level 1'].values[0])

            all_sups.append(next_level)
            supervisor = df.loc[df['Employee Number'] == employee]['Supervisor Employee Number'].values[0]

            return get_report_chain(supervisor)
    try:
        get_report_chain(employee)
        return ''.join(all_sups)
    except:
        print(employee)
        return 'no data'
df['supervisor_chain'] = df.apply(lambda x: return_report_chain(x['Employee Number']), axis=1)
1822
1851
2031
1764
2069
2553
1379
2067
2859

Add "Y" to "Met with CEO" for people Allison already put into groups

In [125]:
df_al = pd.read_csv('VG_P9_Allison_already_grouped.csv')

def get_shift(e_id, column):
    try:
        status = df_al.loc[df_shifts['Employee Number'] == e_id][column].values[0] 
        if type(status) != str:
            return 'no status yet'
        else:
            return status
    except:
        return 'no status yet'
    
df['Status'] = df.apply(lambda x: get_shift(x['Employee Number'], 'Status') , axis=1)
In [126]:
df['Met with CEO'] = ['Y' if x == 'complete' else 'N' for x in df['Status']]
df['Met with CEO'].value_counts()
Out[126]:
N    798
Y     21
Name: Met with CEO, dtype: int64
In [128]:
from datetime import datetime
curr_time = datetime.now().strftime('%Y-%m-%d-%s')
df.to_csv('VG_P9_employees_CLEANED_WITH_SHEDULES_AND_SUPERVISORS_' + curr_time + '_.csv', index=False)

NOTES/ISSUES/QUESTIONS

In [103]:
# Scott Knight is not listed as an employee but is still listed as a supervisor?
test = df[df['Last Name'] == 'Knight']
test
test = df[df['Last Name'] == 'Galvan']
test
test = df2.loc[df2['Employee Number'] == 2129]
test
Out[103]:
Employment Type Company Location Employee Number First Name Preferred First Name Last Name E-mail Address Job Org Level 1 Supervisor Employee Number Supervisor Name (Last Suffix, First MI) Shift
In [ ]: