Open In Colab

In [1]:
# import packages for analysis and modeling
import pandas as pd #data frame operations
import numpy as np #arrays and math functions
from scipy.stats import uniform #for training and test splits
import statsmodels.api as smf #R-like model specification
import matplotlib.pyplot as plt #2D plotting
In [5]:
 # read in Coaches data and create data frame
Coaches = pd.read_excel("coaches_modify.xlsx")

print(pd.DataFrame.head(Coaches))
              School      Conf              Coach  NCAAFBREV16  MedianConfSal  \
0          Air Force  Mt. West       Troy Calhoun   59577780.0       879288.0   
1              Akron       MAC       Terry Bowden   35331217.0       492413.0   
2            Alabama       SEC         Nick Saban  174307419.0      3929800.0   
3  Appalachian State  Sun Belt  Scott Satterfield   35058621.0       675000.0   
4            Arizona    Pac-12       Kevin Sumlin   90976758.0      2752232.5   

   SchoolPay   TotalPay      Bonus  BonusPaid  PayPlusBonus2016  ...  GSRank  \
0   885000.0   885000.0   247000.0        NaN          885000.0  ...      97   
1   411000.0   412500.0   225000.0    50000.0          462500.0  ...       5   
2  8307000.0  8307000.0  1100000.0   500000.0         8807000.0  ...      84   
3   712500.0   712500.0   295000.0   145000.0          857500.0  ...      13   
4  1600000.0  2000000.0  2025000.0        NaN         2000000.0  ...      60   

   Combo Rank  TrueRank     W    L      Ratio  OffenceScore  Defense Score  \
0   64.666667        67  10.0  3.0   3.333333          4.96          -0.87   
1   10.666667       129   5.0  7.0   0.714286         -0.40          -6.12   
2  111.333333         5  14.0  1.0  14.000000          8.01          17.62   
3   19.666667       122  10.0  3.0   3.333333         -0.86           9.68   
4   65.666667        62   3.0  9.0   0.333333          1.01          -5.64   

   Score  PointsPerGame  
0   4.09          78.16  
1  -6.52          72.64  
2  25.62          90.38  
3   8.83          70.76  
4  -4.63          74.42  

[5 rows x 23 columns]
In [6]:
Coaches.describe()
Out[6]:
NCAAFBREV16 MedianConfSal SchoolPay TotalPay Bonus BonusPaid PayPlusBonus2016 StadSize Graduation Rate (GSR) Seat Rank GSRank Combo Rank TrueRank W L Ratio OffenceScore Defense Score Score PointsPerGame
count 9.900000e+01 1.180000e+02 1.130000e+02 1.130000e+02 9.600000e+01 6.300000e+01 1.140000e+02 118.000000 118.000000 118.000000 118.000000 118.000000 118.000000 107.000000 107.000000 107.000000 107.000000 107.000000 107.000000 107.000000
mean 8.292182e+07 2.301919e+06 2.550025e+06 2.557438e+06 9.175975e+05 2.011909e+05 2.679926e+06 53059.228814 74.644068 67.245763 65.152542 65.666667 62.203390 6.775701 5.981308 1.865069 0.787757 0.827009 1.614579 76.364673
std 4.768607e+07 1.313944e+06 1.906396e+06 1.910683e+06 6.500860e+05 2.640723e+05 1.999123e+06 23699.435546 14.246022 37.649289 38.479127 28.235948 37.878474 3.032243 2.441703 2.396191 6.030251 6.443782 9.220991 7.547450
min 1.613242e+07 4.924130e+05 3.900000e+05 3.900000e+05 5.000000e+04 1.000000e+04 3.900000e+05 9214.000000 0.000000 1.000000 1.000000 10.666667 1.000000 1.000000 1.000000 0.090909 -13.010000 -11.120000 -18.860000 60.550000
25% 3.818882e+07 8.069122e+05 8.500000e+05 8.500000e+05 4.032500e+05 5.000000e+04 8.912500e+05 32062.000000 69.250000 36.250000 33.250000 42.916667 30.000000 4.000000 4.000000 0.500000 -3.460000 -3.370000 -5.700000 70.510000
50% 8.367264e+07 2.458032e+06 2.163000e+06 2.163000e+06 8.075000e+05 9.500000e+04 2.325603e+06 50035.500000 75.000000 66.500000 67.000000 66.333333 59.000000 7.000000 6.000000 1.166667 0.480000 0.290000 2.300000 75.160000
75% 1.146884e+08 3.775000e+06 3.703975e+06 3.703975e+06 1.263750e+06 2.770835e+05 3.946500e+06 66680.000000 83.750000 98.750000 100.000000 85.333333 95.500000 9.000000 8.000000 2.250000 4.835000 5.010000 8.170000 81.130000
max 2.148306e+08 3.929800e+06 8.307000e+06 8.307000e+06 3.100000e+06 1.350000e+06 8.807000e+06 107601.000000 100.000000 130.000000 130.000000 126.333333 129.000000 14.000000 11.000000 14.000000 14.640000 17.620000 25.620000 92.540000
In [4]:
# System Information
import platform

print('Python is ' + platform.python_version())

pd.show_versions(as_json=False)
Python is 3.7.4

INSTALLED VERSIONS
------------------
commit           : None
python           : 3.7.4.final.0
python-bits      : 64
OS               : Darwin
OS-release       : 18.6.0
machine          : x86_64
processor        : i386
byteorder        : little
LC_ALL           : None
LANG             : en_US.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 0.25.1
numpy            : 1.17.2
pytz             : 2019.3
dateutil         : 2.8.0
pip              : 19.2.3
setuptools       : 41.4.0
Cython           : 0.29.13
pytest           : 5.2.1
hypothesis       : None
sphinx           : 2.2.0
blosc            : None
feather          : None
xlsxwriter       : 1.2.1
lxml.etree       : 4.4.1
html5lib         : 1.0.1
pymysql          : None
psycopg2         : None
jinja2           : 2.10.3
IPython          : 7.8.0
pandas_datareader: None
bs4              : 4.8.0
bottleneck       : 1.2.1
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.4.1
matplotlib       : 3.1.1
numexpr          : 2.7.0
odfpy            : None
openpyxl         : 3.0.0
pandas_gbq       : None
pyarrow          : None
pytables         : None
s3fs             : None
scipy            : 1.3.1
sqlalchemy       : 1.3.9
tables           : 3.5.2
xarray           : None
xlrd             : 1.2.0
xlwt             : 1.3.0
xlsxwriter       : 1.2.1
In [7]:
Coaches.hist()
Out[7]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1c1d7dad90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20097790>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c200820d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20062b90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1c200b0390>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20847ed0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20885710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c208b6f10>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1c208c0a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20901450>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c2094ef10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20980bd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1c209bef50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c209f4c10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20a34f90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20a69c50>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1c20aa9fd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20adcc90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20b1d4d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1c20b4fcd0>]],
      dtype=object)
In [12]:
Coaches['TotalPay'].hist()
plt.xticks(rotation='vertical')
Out[12]:
(array([-1000000.,        0.,  1000000.,  2000000.,  3000000.,  4000000.,
         5000000.,  6000000.,  7000000.,  8000000.,  9000000.]),
 <a list of 11 Text xticklabel objects>)
In [11]:
p = plt.hist(Coaches['TotalPay'])
plt.xticks(rotation='vertical')
plt.show()
In [14]:
import seaborn as sns  # PROVIDES TRELLIS AND SMALL MULTIPLE PLOTTING

sns.boxplot(y="TotalPay", data=Coaches)
plt.show()
In [17]:
sns.boxplot(x="Conf", y="TotalPay", data=Coaches, color = "gray");
plt.xticks(rotation="vertical")
plt.show()
In [23]:
sns.scatterplot(x="W", y="TotalPay", hue="Conf", data=Coaches)
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c220dee90>
In [24]:
sns.scatterplot(x="L", y="TotalPay", hue="Conf", data=Coaches)
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c22276790>
In [26]:
sns.regplot(x="W", y="TotalPay", data=Coaches);
In [27]:
sns.regplot(x="L", y="TotalPay", data=Coaches);
In [30]:
sns.lmplot(x="W", y="TotalPay", col="Conf", col_wrap=3, data=Coaches);
In [ ]: