daily log 10.26.20
How to change colors for pandas barh plot
JUST CHANGE THE COLOR ARRAY
df = og_df.copy()
# ===============================================================
# ===============================================================
# ADD COUNTY
# ===============================================================
city_df = pd.read_csv('simplemaps_uscities_basicv1.7/uscities.csv')
ca_city_df = city_df[(city_df['state_id'] == 'CA') | (city_df['state_id'] == 'NM')]
def get_county(zip_code):
try:
return ca_city_df.loc[ca_city_df['zips'].str.contains(zip_code)]['county_name'].values[0]
except:
# print(zip_code)
return 'no data'
df['county'] = df.apply(lambda x: get_county(x['zip_code']), axis=1)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import seaborn as sns
%matplotlib inline
# ===============================================================
# ===============================================================
# VISUALIZATION VARIABLES
# ===============================================================
# -- fonts & colors
font = {'family': 'san-serif','size': 14, 'weight': 'normal'}
labelfont = {'family': 'san-serif', 'size': 18}
labelfont_2 = {'family': 'serif', 'size': 12}
titlefont = {'family': 'serif', 'size': 30}
multiplot_x = FontProperties()
multiplot_x.set_family('serif')
multiplot_x.set_name('Times New Roman')
multiplot_x.set_style('italic')
# ===============================================================
# ===============================================================
# GRAPHING THE DATA
# ===============================================================
plt.figure(figsize=(20,20), dpi=500)
def make_stacked_bar_chart(sm_df, _y,
COLORS__OF__BARS,
NUMBER__OF__COLORS,
_title,
_xlabel,
_ylabel,
min_palette=False,):
# my_colors = plt.cm.Set3(np.linspace(0, 1, NUMBER__OF__COLORS))
# sm_df = df[df['new_func'] != "Commercial"]
# funcs = set(sm_df['new_func'])
# color_dict = dict(zip(funcs, my_colors))
color_dict = {"Commercial": "#BC80BD","Support": "#FFED6F",
"Engineering": "#8DD3C7", "Manufacturing": "#80B1D3","Spaceflight Operations": "#D6D8D9"}
sm_df_g = sm_df.groupby(
[_y, COLORS__OF__BARS]).size().reset_index().pivot(
columns=COLORS__OF__BARS, index=_y, values=0)
sm_df_g.fillna(0, inplace=True)
sm_df_g.loc[:,'Total'] = sm_df_g.sum(axis=1)
sm_df_g.sort_values(by='Total', ascending=True, inplace=True)
subset = sm_df_g.columns[:-1]
sm_df_g = sm_df_g[subset]
print(sm_df_g)
# if min_palette:
# sm_palette = rdylgn[len(subset)-1:]
# customPalette = sns.set_palette(sns.color_palette(sm_palette))
# else:
# customPalette = sns.set_palette(sns.color_palette(color_dict))
fig = plt.figure()
# sm_df_g["Color"] = sm_df_g[COLORS__OF__BARS].apply(lambda x: color_dict[x])
# ax.scatter(df[xcol], df[ycol], c=df.Color)
ax = sm_df_g.plot.barh(stacked=True, figsize=(16,9), color=['orange', 'yellow', 'green', 'blue'])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
col_dict = dict(zip(range(0,len(sm_df.values)),[0]*len(sm_df_g.values)))
for col in sm_df_g.columns:
for i, v in enumerate(sm_df_g[col]):
col_dict[i] = col_dict[i] + v
new_v = col_dict[i]
v_text = str(round(v))
if(v>0):
ax.text(new_v-len(v_text), i,v_text+ " ", color='white', va='center', ha='right',
fontweight='bold', fontsize='10')
plt.title(_title, fontdict=titlefont, y=1.1)
plt.xlabel(_xlabel,fontdict=labelfont, labelpad=20)
plt.ylabel(_ylabel, fontdict=labelfont, labelpad=20)
ax.tick_params(axis='y', labelsize=18)
for tick in ax.get_yticklabels():
tick.set_fontname("Georgia")
plt.show()
# ===============================================================
# ===============================================================
# PREPARING THE DATA
# ===============================================================
# COLUMN = 'Zip Code (Formatted)'
# df = og_df.copy()
COLUMN = 'city'
COLORS__OF__BARS = "new_func"
NUMBER__OF__COLORS = 4
NUM = 10
column_counts = dict(df[COLUMN].value_counts())
df['threshold'] = df.apply(lambda x: column_counts[x[COLUMN]] > NUM, axis = 1)
df = df[df['threshold'] == True]
df[COLUMN].value_counts()
sm_df = df[(df['state'] == 'CA') & (df['county'] != 'no data')]
make_stacked_bar_chart(sm_df,
COLUMN,
COLORS__OF__BARS,
NUMBER__OF__COLORS,
'County Distribution (in CA), by Function',
'Total Number of Teammates',
'County')