daily log 10.26.20

1 minute read

How to change colors for pandas barh plot

JUST CHANGE THE COLOR ARRAY

df = og_df.copy()

# ===============================================================
# ===============================================================
# ADD COUNTY
# ===============================================================

city_df = pd.read_csv('simplemaps_uscities_basicv1.7/uscities.csv')
ca_city_df = city_df[(city_df['state_id'] == 'CA') | (city_df['state_id'] == 'NM')]
def get_county(zip_code):
    try:
        return ca_city_df.loc[ca_city_df['zips'].str.contains(zip_code)]['county_name'].values[0]
    except:
#         print(zip_code)
        return 'no data'
    
df['county'] = df.apply(lambda x: get_county(x['zip_code']), axis=1)    


import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import seaborn as sns
%matplotlib inline  

# ===============================================================
# ===============================================================
# VISUALIZATION VARIABLES
# ===============================================================
# -- fonts & colors
font = {'family': 'san-serif','size': 14, 'weight': 'normal'}
labelfont = {'family': 'san-serif', 'size': 18}
labelfont_2 = {'family': 'serif', 'size': 12}
titlefont = {'family': 'serif', 'size': 30}
multiplot_x = FontProperties()
multiplot_x.set_family('serif')
multiplot_x.set_name('Times New Roman')
multiplot_x.set_style('italic')

# ===============================================================
# ===============================================================
# GRAPHING THE DATA
# ===============================================================

plt.figure(figsize=(20,20), dpi=500)
def make_stacked_bar_chart(sm_df, _y, 
                           COLORS__OF__BARS, 
                           NUMBER__OF__COLORS, 
                           _title, 
                           _xlabel, 
                           _ylabel, 
                           min_palette=False,):
#     my_colors = plt.cm.Set3(np.linspace(0, 1, NUMBER__OF__COLORS))
#     sm_df = df[df['new_func'] != "Commercial"]
#     funcs = set(sm_df['new_func'])
#     color_dict = dict(zip(funcs, my_colors))
    color_dict = {"Commercial": "#BC80BD","Support": "#FFED6F",
                  "Engineering": "#8DD3C7", "Manufacturing": "#80B1D3","Spaceflight Operations": "#D6D8D9"}
    
    
    
    sm_df_g = sm_df.groupby(
        [_y, COLORS__OF__BARS]).size().reset_index().pivot(
        columns=COLORS__OF__BARS, index=_y, values=0)
    sm_df_g.fillna(0, inplace=True)

    sm_df_g.loc[:,'Total'] = sm_df_g.sum(axis=1)
    sm_df_g.sort_values(by='Total', ascending=True, inplace=True)

    subset = sm_df_g.columns[:-1]
    sm_df_g = sm_df_g[subset]
    
    print(sm_df_g)
    
#     if min_palette:
#         sm_palette = rdylgn[len(subset)-1:]
#         customPalette = sns.set_palette(sns.color_palette(sm_palette))
#     else:
#         customPalette = sns.set_palette(sns.color_palette(color_dict))
    fig = plt.figure()
    
#     sm_df_g["Color"] = sm_df_g[COLORS__OF__BARS].apply(lambda x: color_dict[x])
#     ax.scatter(df[xcol], df[ycol], c=df.Color)
    ax = sm_df_g.plot.barh(stacked=True, figsize=(16,9), color=['orange', 'yellow', 'green', 'blue'])
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    col_dict = dict(zip(range(0,len(sm_df.values)),[0]*len(sm_df_g.values)))
    for col in sm_df_g.columns:
        for i, v in enumerate(sm_df_g[col]):
            col_dict[i] = col_dict[i] + v
            new_v = col_dict[i]
            v_text = str(round(v))
            if(v>0):
                ax.text(new_v-len(v_text), i,v_text+ " ", color='white', va='center', ha='right', 
                        fontweight='bold', fontsize='10')
    plt.title(_title, fontdict=titlefont, y=1.1)
    plt.xlabel(_xlabel,fontdict=labelfont, labelpad=20)
    plt.ylabel(_ylabel, fontdict=labelfont, labelpad=20)
    ax.tick_params(axis='y', labelsize=18)
    for tick in ax.get_yticklabels():
        tick.set_fontname("Georgia")
    plt.show()
    
# ===============================================================
# ===============================================================
# PREPARING THE DATA
# ===============================================================

# COLUMN = 'Zip Code (Formatted)'

# df = og_df.copy()
COLUMN = 'city'

COLORS__OF__BARS = "new_func"
NUMBER__OF__COLORS = 4
NUM = 10

column_counts = dict(df[COLUMN].value_counts())
df['threshold'] = df.apply(lambda x:  column_counts[x[COLUMN]] > NUM, axis = 1)

df = df[df['threshold'] == True]
df[COLUMN].value_counts()

sm_df = df[(df['state'] == 'CA') & (df['county'] != 'no data')]
make_stacked_bar_chart(sm_df, 
                       COLUMN, 
                       COLORS__OF__BARS, 
                       NUMBER__OF__COLORS, 
                       'County Distribution (in CA), by Function', 
                       'Total Number of Teammates',
                       'County')