In [2]:
import pandas as pd
data = pd.read_csv('coaches2019_clean.csv')
In [3]:
data.head()
Out[3]:
Unnamed: 0 SCHOOL CONF COACH SCHOOL PAY MAX BONUS BONUSES PAID (2018-19) ASST PAY TOTAL SCHOOL BUYOUT AS OF 12/1/19
0 0 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0
1 1 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0
2 2 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0
3 3 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0
4 4 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0
In [4]:
data = pd.read_csv('coach_salary_df.csv')
In [5]:
data
Out[5]:
Unnamed: 0 RANK SCHOOL CONF COACH SCHOOL PAY YEAR
0 0 1.0 Alabama SEC Nick Saban $11,132,000 2017.0
1 1 2.0 Clemson ACC Dabo Swinney $8,504,600 2017.0
2 2 3.0 Michigan Big Ten Jim Harbaugh $7,004,000 2017.0
3 3 4.0 Ohio State Big Ten Urban Meyer $6,431,240 2017.0
4 4 5.0 Arizona Pac-12 Rich Rodriguez $5,631,563 2017.0
... ... ... ... ... ... ... ...
772 125 126.0 Army INDEP Jeff Monken -- 2019.0
773 126 127.0 Liberty INDEP Hugh Freeze -- 2019.0
774 127 128.0 Brigham Young INDEP Kalani Sitake -- 2019.0
775 128 129.0 Air Force MWC Troy Calhoun -- 2019.0
776 129 130.0 Southern Methodist AAC Sonny Dykes -- 2019.0

777 rows × 7 columns

In [7]:
attrs = list(data[data.columns[5:]].columns)
data[attrs] = data[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
In [26]:
data.columns = "id rank school conf coach school_pay year".split()
# data['year'] = data['year'].astype('int').astype('category')
data['year'] = data['year'].astype('int')
data
Out[26]:
id rank school conf coach school_pay year
0 0 1.0 Alabama SEC Nick Saban 11132000.0 2017
1 1 2.0 Clemson ACC Dabo Swinney 8504600.0 2017
2 2 3.0 Michigan Big Ten Jim Harbaugh 7004000.0 2017
3 3 4.0 Ohio State Big Ten Urban Meyer 6431240.0 2017
4 4 5.0 Arizona Pac-12 Rich Rodriguez 5631563.0 2017
... ... ... ... ... ... ... ...
772 125 126.0 Army INDEP Jeff Monken NaN 2019
773 126 127.0 Liberty INDEP Hugh Freeze NaN 2019
774 127 128.0 Brigham Young INDEP Kalani Sitake NaN 2019
775 128 129.0 Air Force MWC Troy Calhoun NaN 2019
776 129 130.0 Southern Methodist AAC Sonny Dykes NaN 2019

777 rows × 7 columns

In [27]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)
import statsmodels.formula.api as smf
my_model = str('school_pay  ~ conf + year')
train_model_fit = smf.ols(my_model, data = train_set).fit()
print(train_model_fit.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:             school_pay   R-squared:                       0.650
Model:                            OLS   Adj. R-squared:                  0.640
Method:                 Least Squares   F-statistic:                     65.35
Date:                Fri, 24 Jan 2020   Prob (F-statistic):          6.85e-117
Time:                        18:32:02   Log-Likelihood:                -8863.6
No. Observations:                 580   AIC:                         1.776e+04
Df Residuals:                     563   BIC:                         1.784e+04
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept        -3.189e+08   5.58e+07     -5.714      0.000   -4.28e+08   -2.09e+08
conf[T.ACC]       1.343e+06   2.06e+05      6.505      0.000    9.37e+05    1.75e+06
conf[T.Big 12]    1.813e+06    2.2e+05      8.255      0.000    1.38e+06    2.24e+06
conf[T.Big Ten]   1.705e+06   2.04e+05      8.365      0.000     1.3e+06    2.11e+06
conf[T.C-USA]    -1.113e+06   2.24e+05     -4.958      0.000   -1.55e+06   -6.72e+05
conf[T.CUSA]     -1.024e+06   2.76e+05     -3.713      0.000   -1.57e+06   -4.83e+05
conf[T.INDEP]    -1.306e+06   6.38e+05     -2.047      0.041   -2.56e+06   -5.28e+04
conf[T.Ind.]     -8.399e+05   3.72e+05     -2.261      0.024   -1.57e+06    -1.1e+05
conf[T.MAC]      -1.306e+06   2.14e+05     -6.116      0.000   -1.73e+06   -8.87e+05
conf[T.MWC]      -1.104e+06   3.64e+05     -3.034      0.003   -1.82e+06   -3.89e+05
conf[T.Mt. West] -7.773e+05    2.3e+05     -3.380      0.001   -1.23e+06   -3.26e+05
conf[T.PAC-12]    9.931e+05   3.78e+05      2.628      0.009    2.51e+05    1.74e+06
conf[T.Pac-12]    1.143e+06   2.24e+05      5.111      0.000    7.04e+05    1.58e+06
conf[T.S-Belt]   -1.575e+06   5.06e+05     -3.114      0.002   -2.57e+06   -5.82e+05
conf[T.SEC]       2.508e+06   2.01e+05     12.482      0.000    2.11e+06     2.9e+06
conf[T.Sun Belt]  -1.15e+06   2.33e+05     -4.944      0.000   -1.61e+06   -6.93e+05
year               1.59e+05   2.77e+04      5.747      0.000    1.05e+05    2.13e+05
==============================================================================
Omnibus:                      241.320   Durbin-Watson:                   2.007
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1611.236
Skew:                           1.699   Prob(JB):                         0.00
Kurtosis:                      10.425   Cond. No.                     2.55e+06
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.55e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
In [31]:
syr = data[data['school'] == 'Syracuse']
syr
Out[31]:
id rank school conf coach school_pay year
125 125 126.0 Syracuse ACC Dino Babers NaN 2017
252 122 123.0 Syracuse ACC Dino Babers NaN 2016
380 122 123.0 Syracuse ACC Scott Shafer NaN 2014
451 65 66.0 Syracuse ACC Scott Shafer 1302721.0 2015
571 54 55.0 Syracuse ACC Dino Babers 2401206.0 2018
711 64 65.0 Syracuse ACC Dino Babers 2268825.0 2019
In [33]:
# train_model_fit.params[0] + train_model_fit.params[1] + (train_model_fit.params[-1]*syr['2019.0'])
train_model_fit.params[0] + train_model_fit.params[1] + train_model_fit.params[-1]*syr[year]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-33-e4383103637c> in <module>
      1 # train_model_fit.params[0] + train_model_fit.params[1] + (train_model_fit.params[-1]*syr['2019.0'])
----> 2 train_model_fit.params[0] + train_model_fit.params[1] + train_model_fit.params[-1]*syr[year]

NameError: name 'year' is not defined
In [35]:
kwiki = pd.read_csv('kwiki.csv')
In [36]:
kwiki.dropna(inplace=True)
kwiki
Out[36]:
Team Nickname City State Enrollment Current
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West
1 Akron Zips Akron Ohio 19,200 MAC
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12
... ... ... ... ... ... ...
125 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12
126 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA
127 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC
128 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten
129 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West

130 rows × 6 columns

In [46]:
kwiki.columns = "school nickname city state enrollment conf".split()
In [37]:
kstatic = pd.read_csv('kstatic.csv')
In [45]:
kstatic.columns = "school conf coach stad_size gsr seat_rank gsr_rank combo_rank true_rank".split()
kstatic
Out[45]:
school conf coach stad_size gsr seat_rank gsr_rank combo_rank true_rank
0 Air Force Mt. West Troy Calhoun 46,692 83 60 97 65 67
1 Akron MAC Terry Bowden 30,000 45 20 5 11 129
2 Alabama SEC Nick Saban 101,821 79 124 84 111 5
3 Appalachian State Sun Belt Scott Satterfield 24,050 57 11 13 20 122
4 Arizona Pac-12 Kevin Sumlin 51,811 74 73 60 66 62
... ... ... ... ... ... ... ... ... ...
113 Washington State Pac-12 Mike Leach 32,248 93 37 121 83 37
114 West Virginia Big 12 Dana Holgorsen 60,000 66 82 20 66 62
115 Western Michigan MAC Tim Lester 30,200 74 26 60 39 100
116 Wisconsin Big Ten Paul Chryst 80,321 71 112 40 84 32
117 Wyoming Mt. West Craig Bohl 29,181 97 19 125 66 59

118 rows × 9 columns

In [63]:
kwiki_copy = kwiki.copy()
kstatic_copy = kstatic.copy()
# test = kstatic_copy.merge(kwiki_copy, on="school")
test = kwiki_copy.merge(kstatic_copy, on="school", how="left")
In [64]:
test
Out[64]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr seat_rank gsr_rank combo_rank true_rank
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 60.0 97.0 65.0 67.0
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 20.0 5.0 11.0 129.0
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 124.0 84.0 111.0 5.0
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 11.0 13.0 20.0 122.0
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 73.0 60.0 66.0 62.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 82.0 20.0 66.0 62.0
126 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN NaN NaN NaN NaN
127 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 26.0 60.0 39.0 100.0
128 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 112.0 40.0 84.0 32.0
129 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 19.0 125.0 66.0 59.0

130 rows × 14 columns

In [65]:
test.to_csv('kwiki_kstatic.csv', index=False)
In [72]:
kwiki_kstatic = test.copy()
In [67]:
kattend = pd.read_csv('kattend.csv')
In [70]:
kattend.columns = "rank_attend school conf total_attend avg_attend stad_size2 pct_capacity num_games_2019".split()
In [71]:
kattend
Out[71]:
rank_attend school conf total_attend avg_attend stad_size2 pct_capacity num_games_2019
0 1 Michigan Big Ten 780,215 111,459 109,901 101.42 7
1 2 Texas A&M SEC 779,673 97,459 102,733 94.87 8
2 3 Penn St. Big Ten 739,747 105,678 106,572 99.16 7
3 4 Ohio St. Big Ten 723,679 103,383 102,329 101.03 7
4 5 Alabama SEC 707,817 101,117 101,821 99.31 7
... ... ... ... ... ... ... ... ...
126 127 Charlotte C-USA 73,915 12,319 15,314 80.44 6
127 128 Kent St. MAC 64,109 12,822 25,319 50.64 5
128 129 Ball St. MAC 59,545 9,924 22,500 44.11 6
129 130 Massachusetts FBS Independent 56,987 9,498 17,000 55.87 6
130 131 Northern Ill. MAC 42,590 8,518 23,595 36.10 5

131 rows × 8 columns

In [73]:
kwiki_kstatic_copy = kwiki_kstatic.copy()
kattend_copy = kattend.copy()
# test = kstatic_copy.merge(kwiki_copy, on="school")
test = kwiki_kstatic_copy.merge(kattend_copy, on="school", how="left")
In [74]:
test
Out[74]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... gsr_rank combo_rank true_rank rank_attend conf total_attend avg_attend stad_size2 pct_capacity num_games_2019
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 97.0 65.0 67.0 80.0 Mountain West 162,505 27,084 46,692 58.01 6.0
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 5.0 11.0 129.0 105.0 MAC 107,752 17,959 30,000 59.86 6.0
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 84.0 111.0 5.0 5.0 SEC 707,817 101,117 101,821 99.31 7.0
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... 13.0 20.0 122.0 NaN NaN NaN NaN NaN NaN NaN
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 60.0 66.0 62.0 60.0 Pac-12 237,194 39,532 53,646 73.69 6.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 20.0 66.0 62.0 37.0 Big 12 335,443 55,907 60,000 93.18 6.0
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 60.0 39.0 100.0 NaN NaN NaN NaN NaN NaN NaN
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 40.0 84.0 32.0 14.0 Big Ten 535,301 76,472 80,321 95.21 7.0
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 125.0 66.0 59.0 89.0 Mountain West 138,042 23,007 29,181 78.84 6.0

131 rows × 21 columns

In [75]:
kwiki_kstatic_kattend = test.copy()
In [77]:
kwiki_kstatic_kattend.to_csv('kwiki_kstatic_kattend.csv', index=False)
In [78]:
k00 = pd.read_csv('k00.csv')
In [84]:
k00.dropna(inplace=True)
In [ ]:
 
In [85]:
import re
In [86]:
k00['school'] = k00.apply(lambda x: re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", x['school']), axis=1 )
In [87]:
k00
Out[87]:
rank_00 school total_games_00 won_00 lost_00 pct_00
0 1.0 Alabama 138.0 123.0 15.0 0.891
1 2.0 Oregon 131.0 99.0 32.0 0.756
2 3.0 Florida State 132.0 96.0 36.0 0.727
3 4.0 Ohio State 68.0 56.0 12.0 0.824
4 5.0 Stanford 81.0 66.0 15.0 0.815
... ... ... ... ... ... ...
124 125.0 New Mexico State 73.0 14.0 59.0 0.192
125 126.0 Georgia State 37.0 7.0 30.0 0.189
126 127.0 Kansas 72.0 12.0 60.0 0.167
127 128.0 Massachusetts 48.0 8.0 40.0 0.167
128 129.0 Charlotte 12.0 2.0 10.0 0.167

129 rows × 6 columns

In [88]:
kwiki_kstatic_kattend_copy = kwiki_kstatic_kattend.copy()
k00_copy = k00.copy()
test = kwiki_kstatic_kattend_copy.merge(k00_copy, on="school", how="left")
In [90]:
kwiki_kstatic_kattend_k00 = test.copy()
In [91]:
kwiki_kstatic_kattend_k00.to_csv('kwiki_kstatic_kattend_k00.csv', index=False)
In [92]:
kwiki_kstatic_kattend_k00
Out[92]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... total_attend avg_attend stad_size2 pct_capacity num_games_2019 rank_00 total_games_00 won_00 lost_00 pct_00
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 162,505 27,084 46,692 58.01 6.0 63.0 78.0 42.0 36.0 0.538
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 107,752 17,959 30,000 59.86 6.0 117.0 73.0 21.0 52.0 0.288
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 707,817 101,117 101,821 99.31 7.0 1.0 138.0 123.0 15.0 0.891
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 237,194 39,532 53,646 73.69 6.0 58.0 78.0 44.0 34.0 0.564
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 335,443 55,907 60,000 93.18 6.0 49.0 77.0 45.0 32.0 0.584
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 51.0 76.0 44.0 32.0 0.579
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... NaN NaN NaN NaN NaN 81.0 75.0 34.0 41.0 0.453
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 535,301 76,472 80,321 95.21 7.0 14.0 81.0 60.0 21.0 0.741
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 138,042 23,007 29,181 78.84 6.0 108.0 73.0 26.0 47.0 0.356

131 rows × 26 columns

In [93]:
kcent = pd.read_csv('kcent.csv')
In [95]:
kcent.dropna(inplace=True)
In [96]:
kcent['school'] = kcent.apply(lambda x: re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", x['school']), axis=1 )
In [97]:
kwiki_kstatic_kattend_k00_copy = kwiki_kstatic_kattend_k00.copy()
kcent_copy = kcent.copy()
test = kwiki_kstatic_kattend_k00_copy.merge(kcent_copy, on="school", how="left")
In [98]:
test
Out[98]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... lost_00 pct_00 rank_cent won_cent lost_cent tied_cent ratio_cent years_active total_games_cent conf_y
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 36.0 0.538 63.0 401.0 329.0 13.0 0.548 63.0 743.0 Mountain West
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 52.0 0.288 101.0 523.0 558.0 36.0 0.484 128.0 1117.0 MAC
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 15.0 0.891 NaN NaN NaN NaN NaN NaN NaN NaN
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... NaN NaN 15.0 630.0 336.0 29.0 0.648 91.0 995.0 Sun Belt
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 34.0 0.564 54.0 617.0 473.0 33.0 0.564 120.0 1123.0 Pac-12
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 32.0 0.584 27.0 755.0 504.0 45.0 0.596 128.0 1304.0 Big 12
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... 32.0 0.579 32.0 584.0 404.0 30.0 0.588 106.0 1018.0 C-USA
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 41.0 0.453 58.0 576.0 459.0 24.0 0.555 113.0 1059.0 MAC
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 21.0 0.741 33.0 715.0 499.0 53.0 0.585 130.0 1267.0 Big Ten
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 47.0 0.356 102.0 540.0 579.0 28.0 0.483 126.0 1147.0 Mountain West

131 rows × 34 columns

In [99]:
kwiki_kstatic_kattend_k00_kcent = test.copy()
In [100]:
kwiki_kstatic_kattend_k00_kcent.to_csv('kwiki_kstatic_kattend_k00_kcent.csv', index=False)

COMPILE VEGAS STATS

In [126]:
import glob

path = r'/Users/danielcaraway/Documents/IST_718_Big_Data/WK2/ncaa_vegas/'

all_og_files = glob.glob(path + "/*.csv")


# coach_df = pd.concat((pd.read_csv(f) for f in all_coach_files))
for f in all_og_files:
    year = f.split('_')[6].split('.')[0]
    df = pd.read_csv(f)
    df['year'] = year
    df.columns = ['school', 'W', 'L', 'C', 'N', 'P', 'AVG_PTS_FOR', 'AVG_PTS_AGT',
       'AVG_MGN', 'AVE_RUN_YPG_FOR', 'AVG_PAS_YPG_FOR', 'AVG_TOT_YPG_FOR',
       'AVG_RUN_YPG_AGT', 'AVG_PAS_YPG_AGT', 'AVG_TOT_YPG_AGT',
       'AVG_NET_YPG', 'NET_TO', 'AVG_PWR_RTG', 'AVG_OPP_PWR_RTG',
       'NET_PWR_RTG_CHG', 'LINE_POWR', 'GAME_PLAY_POWR', 'COMP_POWR',
       'X_FCTR', 'vegas_year']
    df.to_csv(year + '.csv', index=False)
#     df = pd.read_csv(f)
#     df[]
In [127]:
path = r'/Users/danielcaraway/Documents/IST_718_Big_Data/WK2/ncaa_vegas_years/'

all_vegas_files = glob.glob(path + "/*.csv")
vegas_df = pd.concat((pd.read_csv(f) for f in all_vegas_files))
In [129]:
kvegas = vegas_df.copy()
In [121]:
base = pd.read_csv('ncaa_vegas_years/2019.csv')
In [123]:
base.columns.values
Out[123]:
array(['Team', 'W', 'L', 'C', 'N', 'P', 'AVG_PTS_FOR', 'AVG_PTS_AGT',
       'AVG_MGN', 'AVE_RUN_YPG_FOR', 'AVG_PAS_YPG_FOR', 'AVG_TOT_YPG_FOR',
       'AVG_RUN_YPG_AGT', 'AVG_PAS_YPG_AGT', 'AVG_TOT_YPG_AGT',
       'AVG_NET_YPG', 'NET_TO', 'AVG_PWR_RTG', 'AVG_OPP_PWR_RTG',
       'NET_PWR_RTG_CHG', 'LINE_POWR', 'GAME_PLAY_POWR', 'COMP_POWR',
       'X_FCTR', 'year'], dtype=object)
In [130]:
kwiki_kstatic_kattend_k00_kcent_copy = kwiki_kstatic_kattend_k00_kcent.copy()
kvegas_copy = kvegas.copy()
test = kwiki_kstatic_kattend_k00_kcent_copy.merge(kvegas_copy, on="school", how="left")
In [131]:
test
Out[131]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... AVG_NET_YPG NET_TO AVG_PWR_RTG AVG_OPP_PWR_RTG NET_PWR_RTG_CHG LINE_POWR GAME_PLAY_POWR COMP_POWR X_FCTR vegas_year
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 103.0 -2.0 184.3 167.7 11.0 110.8 111.4 151.7 27.0 2019.0
1 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 361.0 54.0 -1.0 177.3 167.5 4.0 104.5 106.7 146.7 2018.0
2 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 86.0 6.0 175.8 154.4 1.0 103.1 111.2 144.1 -9.0 2016.0
3 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 24.0 -10.0 177.6 171.0 -6.0 98.7 97.7 144.5 -31.0 2017.0
4 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... -171.0 -15.0 136.7 151.1 -19.0 79.7 74.7 115.8 -39.0 2019.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
464 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 153.0 5.0 202.1 182.5 5.0 113.0 115.5 163.0 21.0 2017.0
465 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... -13.0 5.0 177.0 163.8 0.0 107.1 109.4 146.9 47.0 2019.0
466 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 326.0 5.0 4.0 173.2 170.8 2.0 99.2 104.0 144.1 2018.0
467 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... -18.0 3.0 168.7 168.2 15.0 103.5 101.2 142.1 22.0 2016.0
468 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... -49.0 24.0 173.8 162.1 1.0 105.1 105.9 144.1 35.0 2017.0

469 rows × 58 columns

In [134]:
kvegas_long = kvegas.copy()
kvegas_long.to_csv('kvegas_long.csv', index=False)
In [135]:
kvegas
Out[135]:
school W L C N P AVG_PTS_FOR AVG_PTS_AGT AVG_MGN AVE_RUN_YPG_FOR ... AVG_NET_YPG NET_TO AVG_PWR_RTG AVG_OPP_PWR_RTG NET_PWR_RTG_CHG LINE_POWR GAME_PLAY_POWR COMP_POWR X_FCTR vegas_year
0 Air Force 11 2 8 5 0 34.1 19.9 14.2 299.0 ... 103 -2 184.3 167.7 11.0 110.8 111.4 151.7 27.0 2019
1 Akron 0 12 1 11 0 10.5 36.2 -25.8 48.0 ... -171 -15 136.7 151.1 -19.0 79.7 74.7 115.8 -39.0 2019
2 Alabama 11 2 7 6 0 47.2 18.6 28.6 169.0 ... 186 18 216.3 179.8 1.0 116.2 117.0 169.1 12.0 2019
3 UAB 9 5 8 5 1 23.1 21.6 1.6 151.0 ... 50 -8 134.3 136.9 -13.0 98.7 100.5 120.9 -17.0 2019
4 Appalachian State 13 1 9 5 0 38.8 20.0 18.8 231.0 ... 97 15 174.9 147.9 -1.0 113.1 116.2 145.4 36.0 2019
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 Washington State 9 4 7 6 0 30.3 25.8 4.5 67.0 ... 111 -3 194.8 183.2 -9.0 100.8 96.9 154.1 -59.0 2017
126 Washington 10 3 7 6 0 36.2 16.1 20.1 183.0 ... 107 13 208.2 181.3 -9.0 111.6 116.7 165.2 44.0 2017
127 West Virginia 7 6 5 8 0 34.5 31.5 3.1 151.0 ... 14 -7 191.2 186.3 -4.0 100.2 97.2 153.2 15.0 2017
128 Wisconsin 13 1 9 5 0 33.8 13.9 19.9 223.0 ... 153 5 202.1 182.5 5.0 113.0 115.5 163.0 21.0 2017
129 Wyoming 8 5 8 5 0 23.5 17.5 5.9 109.0 ... -49 24 173.8 162.1 1.0 105.1 105.9 144.1 35.0 2017

518 rows × 25 columns

In [144]:
kcoach2019 = pd.read_csv('kcoach_2019.csv')
In [154]:
attrs = kcoach2019.columns[4:]
kcoach2019[attrs] = kcoach2019[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
In [155]:
kwiki_kstatic_kattend_k00_kcent_copy = kwiki_kstatic_kattend_k00_kcent.copy()
kcoach2019_copy = kcoach2019.copy()
test = kwiki_kstatic_kattend_k00_kcent_copy.merge(kcoach2019_copy, on="school", how="left")
In [156]:
test
Out[156]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... total_games_cent conf_y rank_2019 conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 743.0 Mountain West 129.0 MWC Troy Calhoun NaN NaN NaN 0.0 NaN
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 1117.0 MAC 115.0 MAC Tom Arth 500000.0 120000.0 NaN 963000.0 1275000.0
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... NaN NaN 2.0 SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... 995.0 Sun Belt 95.0 S-Belt Eliah Drinkwitz 750000.0 315000.0 NaN 1144990.0 1735417.0
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 1123.0 Pac-12 68.0 Pac-12 Kevin Sumlin 1600000.0 2025000.0 NaN 2990000.0 10000000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 1304.0 Big 12 46.0 Big 12 Neal Brown 3055000.0 1000000.0 NaN 3290000.0 16254167.0
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... 1018.0 C-USA 91.0 CUSA Tyson Helton 800000.0 400000.0 NaN 1068348.0 1200000.0
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 1059.0 MAC 93.0 MAC Tim Lester 800000.0 346500.0 65250.0 1138000.0 600000.0
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 1267.0 Big Ten 25.0 Big Ten Paul Chryst 4150000.0 1250000.0 1650.0 4370811.0 14993055.0
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 1147.0 Mountain West 67.0 MWC Craig Bohl 2137000.0 450000.0 90000.0 1853286.0 6525000.0

131 rows × 42 columns

In [158]:
kcoach2018 = pd.read_csv('kcoach_2018.csv')
attrs = kcoach2018.columns[4:]
kcoach2018[attrs] = kcoach2018[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
In [159]:
kcoach2019_copy = kcoach2019.copy()
kcoach2018_copy = kcoach2018.copy()
test = kcoach2019_copy.merge(kcoach2018_copy, on="school", how="left")
In [160]:
test
Out[160]:
rank_2019 school conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019 rank_2018 conf_2018 coach_2018 school_pay_2018 max_bonus_2018 bonuses_paid_2018 asst_pay_total_2018 buyout_2018
0 1 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0 7.0 ACC Dabo Swinney 6205000.0 1125000.0 500000.0 0.0 35000000.0
1 2 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0 1.0 SEC Nick Saban 8307000.0 1100000.0 500000.0 0.0 33600000.0
2 3 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0 3.0 Big Ten Jim Harbaugh 7504000.0 1325000.0 150000.0 0.0 17111110.0
3 4 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0 4.0 SEC Jimbo Fisher 7500000.0 1350000.0 NaN 0.0 68125000.0
4 5 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0 6.0 SEC Kirby Smart 6603600.0 1150000.0 1350000.0 0.0 27917500.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Army INDEP Jeff Monken NaN NaN NaN 0.0 NaN 127.0 Ind. Jeff Monken NaN NaN NaN 0.0 NaN
126 127 Liberty INDEP Hugh Freeze NaN NaN NaN 0.0 NaN 83.0 Ind. Turner Gill 947281.0 NaN NaN 0.0 NaN
127 128 Brigham Young INDEP Kalani Sitake NaN NaN NaN 0.0 NaN 126.0 Ind. Kalani Sitake NaN NaN NaN 0.0 NaN
128 129 Air Force MWC Troy Calhoun NaN NaN NaN 0.0 NaN 125.0 Mt. West Troy Calhoun NaN NaN NaN 0.0 NaN
129 130 Southern Methodist AAC Sonny Dykes NaN NaN NaN 0.0 NaN 130.0 AAC Sonny Dykes NaN NaN NaN 0.0 NaN

130 rows × 17 columns

In [161]:
kcoach1918 = test.copy()
In [162]:
kcoach2017 = pd.read_csv('kcoach_2017.csv')
attrs = kcoach2017.columns[4:]
kcoach2017[attrs] = kcoach2017[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
kcoach1918_copy = kcoach1918.copy()
kcoach2017_copy = kcoach2017.copy()
test = kcoach1918_copy.merge(kcoach2017_copy, on="school", how="left")
test
Out[162]:
rank_2019 school conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019 rank_2018 ... asst_pay_total_2018 buyout_2018 rank_2017 conf_2017 coach_2017 school_pay_2017 max_bonus_2017 bonuses_paid_2017 asst_pay_total_2017 buyout_2017
0 1 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0 7.0 ... 0.0 35000000.0 2.0 ACC Dabo Swinney 8504600.0 1000000.0 1525000.0 5725000.0 40000000.0
1 2 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0 1.0 ... 0.0 33600000.0 1.0 SEC Nick Saban 11132000.0 700000.0 550000.0 5995000.0 26900000.0
2 3 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0 3.0 ... 0.0 17111110.0 3.0 Big Ten Jim Harbaugh 7004000.0 1325000.0 200000.0 5645000.0 20555556.0
3 4 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0 4.0 ... 0.0 68125000.0 10.0 SEC Kevin Sumlin 5000000.0 1050000.0 100000.0 4818000.0 10416667.0
4 5 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0 6.0 ... 0.0 27917500.0 23.0 SEC Kirby Smart 3753600.0 1800000.0 75000.0 4565200.0 10800000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Army INDEP Jeff Monken NaN NaN NaN 0.0 NaN 127.0 ... 0.0 NaN 125.0 Ind. Jeff Monken NaN NaN NaN 0.0 NaN
126 127 Liberty INDEP Hugh Freeze NaN NaN NaN 0.0 NaN 83.0 ... 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN
127 128 Brigham Young INDEP Kalani Sitake NaN NaN NaN 0.0 NaN 126.0 ... 0.0 NaN 127.0 Ind. Kalani Sitake NaN NaN NaN 0.0 NaN
128 129 Air Force MWC Troy Calhoun NaN NaN NaN 0.0 NaN 125.0 ... 0.0 NaN 122.0 Mt. West Troy Calhoun NaN NaN NaN 0.0 NaN
129 130 Southern Methodist AAC Sonny Dykes NaN NaN NaN 0.0 NaN 130.0 ... 0.0 NaN 55.0 AAC Chad Morris 2095793.0 NaN NaN 0.0 NaN

130 rows × 25 columns

In [163]:
kcoach191817 = test.copy()
In [164]:
kcoach2016 = pd.read_csv('kcoach_2016.csv')
attrs = kcoach2016.columns[4:]
kcoach2016[attrs] = kcoach2016[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
kcoach191817_copy = kcoach191817.copy()
kcoach2016_copy = kcoach2016.copy()
test = kcoach191817_copy.merge(kcoach2016_copy, on="school", how="left")
test
Out[164]:
rank_2019 school conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019 rank_2018 ... bonuses_paid_2017 asst_pay_total_2017 buyout_2017 rank_2016 conf_2016 coach_2016 school_pay_2016 max_bonus_2016 bonuses_paid_2016 asst_pay_total_2016
0 1 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0 7.0 ... 1525000.0 5725000.0 40000000.0 12.0 ACC Dabo Swinney 4417500.0 1725000.0 1100000.0 5390417.0
1 2 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0 1.0 ... 550000.0 5995000.0 26900000.0 2.0 SEC Nick Saban 6939395.0 700000.0 625000.0 5320000.0
2 3 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0 3.0 ... 200000.0 5645000.0 20555556.0 1.0 Big Ten Jim Harbaugh 9004000.0 1325000.0 0.0 4308750.0
3 4 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0 4.0 ... 100000.0 4818000.0 10416667.0 7.0 SEC Kevin Sumlin 5000000.0 1350000.0 50000.0 4811000.0
4 5 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0 6.0 ... 75000.0 4565200.0 10800000.0 23.0 SEC Kirby Smart 3753600.0 1800000.0 NaN 4675000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Army INDEP Jeff Monken NaN NaN NaN 0.0 NaN 127.0 ... NaN 0.0 NaN 73.0 Ind. Jeff Monken 932521.0 256000.0 NaN 2185759.0
126 127 Liberty INDEP Hugh Freeze NaN NaN NaN 0.0 NaN 83.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
127 128 Brigham Young INDEP Kalani Sitake NaN NaN NaN 0.0 NaN 126.0 ... NaN 0.0 NaN 126.0 Ind. Kalani Sitake NaN NaN NaN NaN
128 129 Air Force MWC Troy Calhoun NaN NaN NaN 0.0 NaN 125.0 ... NaN 0.0 NaN 76.0 Mt. West Troy Calhoun 885000.0 NaN 85000.0 1618000.0
129 130 Southern Methodist AAC Sonny Dykes NaN NaN NaN 0.0 NaN 130.0 ... NaN 0.0 NaN 125.0 AAC Chad Morris NaN NaN NaN NaN

130 rows × 32 columns

In [165]:
kcoach19181716 = test.copy()
In [166]:
kcoach2015 = pd.read_csv('kcoach_2015.csv')
attrs = kcoach2015.columns[4:]
kcoach2015[attrs] = kcoach2015[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
kcoach19181716_copy = kcoach19181716.copy()
kcoach2015_copy = kcoach2015.copy()
test = kcoach19181716_copy.merge(kcoach2015_copy, on="school", how="left")
test
Out[166]:
rank_2019 school conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019 rank_2018 ... max_bonus_2016 bonuses_paid_2016 asst_pay_total_2016 rank_2015 conf_2015 coach_2015 school_pay_2015 max_bonus_2015 bonuses_paid_2015 asst_pay_total_2015
0 1 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0 7.0 ... 1725000.0 1100000.0 5390417.0 26.0 ACC Dabo Swinney 3300000.0 1125000.0 50000.0 4329606.0
1 2 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0 1.0 ... 700000.0 625000.0 5320000.0 1.0 SEC Nick Saban 6932395.0 700000.0 425000.0 5227090.0
2 3 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0 3.0 ... 1325000.0 0.0 4308750.0 2.0 Big Ten Jim Harbaugh 7004000.0 1175000.0 NaN 4248667.0
3 4 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0 4.0 ... 1350000.0 50000.0 4811000.0 7.0 SEC Kevin Sumlin 5000000.0 1350000.0 50000.0 4419360.0
4 5 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0 6.0 ... 1800000.0 NaN 4675000.0 12.0 SEC Mark Richt 4000000.0 1800000.0 75000.0 4807200.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Army INDEP Jeff Monken NaN NaN NaN 0.0 NaN 127.0 ... 256000.0 NaN 2185759.0 75.0 Ind. Jeff Monken 883000.0 256000.0 0.0 2220450.0
126 127 Liberty INDEP Hugh Freeze NaN NaN NaN 0.0 NaN 83.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
127 128 Brigham Young INDEP Kalani Sitake NaN NaN NaN 0.0 NaN 126.0 ... NaN NaN NaN 123.0 Ind. Bronco Mendenhall NaN NaN NaN NaN
128 129 Air Force MWC Troy Calhoun NaN NaN NaN 0.0 NaN 125.0 ... NaN 85000.0 1618000.0 73.0 Mt. West Troy Calhoun 892500.0 255000.0 123750.0 1518000.0
129 130 Southern Methodist AAC Sonny Dykes NaN NaN NaN 0.0 NaN 130.0 ... NaN NaN NaN 128.0 AAC Chad Morris NaN NaN NaN NaN

130 rows × 39 columns

In [167]:
kcoach1918171615 = test.copy()
In [168]:
kcoach2014 = pd.read_csv('kcoach_2014.csv')
attrs = kcoach2014.columns[4:]
kcoach2014[attrs] = kcoach2014[attrs].replace({'\$':'', ',': '', '--':None}, regex=True).astype('float')
kcoach1918171615_copy = kcoach1918171615.copy()
kcoach2014_copy = kcoach2014.copy()
test = kcoach1918171615_copy.merge(kcoach2014_copy, on="school", how="left")
test
Out[168]:
rank_2019 school conf_2019 coach_2019 school_pay_2019 max_bonus_2019 bonuses_paid_2019 asst_pay_total_2019 buyout_2019 rank_2018 ... school_pay_2015 max_bonus_2015 bonuses_paid_2015 asst_pay_total_2015 rank_2014 conf_2014 coach_2014 school_pay_2014 max_bonus_2014 asst_pay_total_2014
0 1 Clemson ACC Dabo Swinney 9255000.0 1125000.0 1075000.0 7410000.0 50000000.0 7.0 ... 3300000.0 1125000.0 50000.0 4329606.0 21.0 ACC Dabo Swinney 3150000.0 1200000.0 4448225.0
1 2 Alabama SEC Nick Saban 8707000.0 1100000.0 875000.0 7541277.0 34100000.0 1.0 ... 6932395.0 700000.0 425000.0 5227090.0 1.0 SEC Nick Saban 6950203.0 700000.0 5213400.0
2 3 Michigan Big Ten Jim Harbaugh 7504000.0 1325000.0 350000.0 6005000.0 11687500.0 3.0 ... 7004000.0 1175000.0 NaN 4248667.0 30.0 Big Ten Brady Hoke 2854000.0 575000.0 3504323.0
3 4 Texas A&M SEC Jimbo Fisher 7500000.0 1500000.0 250000.0 7145215.0 60625000.0 4.0 ... 5000000.0 1350000.0 50000.0 4419360.0 4.0 SEC Kevin Sumlin 5000000.0 750000.0 3484050.0
4 5 Georgia SEC Kirby Smart 6703600.0 1150000.0 275000.0 6212935.0 24239584.0 6.0 ... 4000000.0 1800000.0 75000.0 4807200.0 17.0 SEC Mark Richt 3200000.0 1000000.0 3327800.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 126 Army INDEP Jeff Monken NaN NaN NaN 0.0 NaN 127.0 ... 883000.0 256000.0 0.0 2220450.0 77.0 Ind. Jeff Monken 834667.0 256000.0 2326450.0
126 127 Liberty INDEP Hugh Freeze NaN NaN NaN 0.0 NaN 83.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
127 128 Brigham Young INDEP Kalani Sitake NaN NaN NaN 0.0 NaN 126.0 ... NaN NaN NaN NaN 127.0 Ind. Bronco Mendenhall NaN NaN NaN
128 129 Air Force MWC Troy Calhoun NaN NaN NaN 0.0 NaN 125.0 ... 892500.0 255000.0 123750.0 1518000.0 74.0 Mt. West Troy Calhoun 866250.0 247500.0 1467500.0
129 130 Southern Methodist AAC Sonny Dykes NaN NaN NaN 0.0 NaN 130.0 ... NaN NaN NaN NaN 48.0 AAC June Jones 2019718.0 NaN NaN

130 rows × 45 columns

In [169]:
kcoach191817161514 = test.copy()
In [170]:
kcoach6years = test.copy()
In [171]:
kcoach6years.to_csv('kcoach6years.csv', index=False)
In [172]:
kwiki_kstatic_kattend_k00_kcent
Out[172]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... lost_00 pct_00 rank_cent won_cent lost_cent tied_cent ratio_cent years_active total_games_cent conf_y
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 36.0 0.538 63.0 401.0 329.0 13.0 0.548 63.0 743.0 Mountain West
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 52.0 0.288 101.0 523.0 558.0 36.0 0.484 128.0 1117.0 MAC
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 15.0 0.891 NaN NaN NaN NaN NaN NaN NaN NaN
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... NaN NaN 15.0 630.0 336.0 29.0 0.648 91.0 995.0 Sun Belt
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 34.0 0.564 54.0 617.0 473.0 33.0 0.564 120.0 1123.0 Pac-12
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 32.0 0.584 27.0 755.0 504.0 45.0 0.596 128.0 1304.0 Big 12
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... 32.0 0.579 32.0 584.0 404.0 30.0 0.588 106.0 1018.0 C-USA
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 41.0 0.453 58.0 576.0 459.0 24.0 0.555 113.0 1059.0 MAC
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 21.0 0.741 33.0 715.0 499.0 53.0 0.585 130.0 1267.0 Big Ten
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 47.0 0.356 102.0 540.0 579.0 28.0 0.483 126.0 1147.0 Mountain West

131 rows × 34 columns

In [173]:
kwiki_kstatic_kattend_k00_kcent_copy = kwiki_kstatic_kattend_k00_kcent.copy()
kcoach6years_copy = kcoach6years.copy()

test = kwiki_kstatic_kattend_k00_kcent_copy.merge(kcoach6years_copy, on="school", how="left")
test
Out[173]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... school_pay_2015 max_bonus_2015 bonuses_paid_2015 asst_pay_total_2015 rank_2014 conf_2014 coach_2014 school_pay_2014 max_bonus_2014 asst_pay_total_2014
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 892500.0 255000.0 123750.0 1518000.0 74.0 Mt. West Troy Calhoun 866250.0 247500.0 1467500.0
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... 406000.0 255000.0 10000.0 934220.0 108.0 MAC Terry Bowden 406000.0 255000.0 910520.0
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 6932395.0 700000.0 425000.0 5227090.0 1.0 SEC Nick Saban 6950203.0 700000.0 5213400.0
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... 375000.0 165000.0 27500.0 936500.0 121.0 Sun Belt Scott Satterfield 225000.0 25000.0 823000.0
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... 2878844.0 2025000.0 650000.0 3036400.0 18.0 PAC-12 Rich Rodriguez 2898500.0 2125000.0 2498650.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 2880000.0 600000.0 35000.0 2801000.0 23.0 Big 12 Dana Holgorsen 3080000.0 600000.0 2884000.0
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... 610800.0 450000.0 100000.0 1054792.0 89.0 CUSA Jeff Brohm 600000.0 550000.0 978480.0
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 800000.0 336000.0 78750.0 1037500.0 115.0 MAC P.J. Fleck 392500.0 236000.0 804500.0
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 2300000.0 440000.0 NaN 2632200.0 41.0 Big Ten Gary Andersen 2200000.0 440000.0 2368600.0
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... 882000.0 330000.0 120000.0 1224315.0 78.0 Mt. West Craig Bohl 832000.0 330000.0 1320726.0

131 rows × 78 columns

In [174]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years = test.copy()
In [175]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years.to_csv('kwiki_kstatic_kattend_k00_kcent_kcoach6years.csv', index=False)
In [176]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years.isnull().sum()
Out[176]:
school                  0
nickname                0
city                    0
state                   0
enrollment              0
                       ..
conf_2014              24
coach_2014             24
school_pay_2014        29
max_bonus_2014         39
asst_pay_total_2014    39
Length: 78, dtype: int64
In [177]:
# s = pd.pivot_table(kvegas, index='school', columns=
# df_out = kvegas.groupby(['col0','col1','col2']).mean().unstack([1,2])
# df_out.columns = df_out.columns.map('_'.join)
# df_out.reset_index()
In [192]:
import glob

path = r'/Users/danielcaraway/Documents/IST_718_Big_Data/WK2/ncaa_vegas/'

all_og_files = glob.glob(path + "/*.csv")

columns = ['school', 'W', 'L', 'C', 'N', 'P', 'AVG_PTS_FOR', 'AVG_PTS_AGT',
       'AVG_MGN', 'AVE_RUN_YPG_FOR', 'AVG_PAS_YPG_FOR', 'AVG_TOT_YPG_FOR',
       'AVG_RUN_YPG_AGT', 'AVG_PAS_YPG_AGT', 'AVG_TOT_YPG_AGT',
       'AVG_NET_YPG', 'NET_TO', 'AVG_PWR_RTG', 'AVG_OPP_PWR_RTG',
       'NET_PWR_RTG_CHG', 'LINE_POWR', 'GAME_PLAY_POWR', 'COMP_POWR',
       'X_FCTR', 'vegas_year']
# coach_df = pd.concat((pd.read_csv(f) for f in all_coach_files))
for f in all_og_files:
    year = f.split('_')[6].split('.')[0]
    df = pd.read_csv(f)
    df['year'] = year
    newcolumns = ['school']
    renamed = [newcolumns.append(col + '_' + year) for col in columns[1:]]
    df.columns = newcolumns
    df.to_csv(year + '.csv', index=False)
#     df = pd.read_csv(f)
#     df[]
In [178]:
columns = ['school', 'W', 'L', 'C', 'N', 'P', 'AVG_PTS_FOR', 'AVG_PTS_AGT',
       'AVG_MGN', 'AVE_RUN_YPG_FOR', 'AVG_PAS_YPG_FOR', 'AVG_TOT_YPG_FOR',
       'AVG_RUN_YPG_AGT', 'AVG_PAS_YPG_AGT', 'AVG_TOT_YPG_AGT',
       'AVG_NET_YPG', 'NET_TO', 'AVG_PWR_RTG', 'AVG_OPP_PWR_RTG',
       'NET_PWR_RTG_CHG', 'LINE_POWR', 'GAME_PLAY_POWR', 'COMP_POWR',
       'X_FCTR', 'vegas_year']

    
In [190]:
newcolumns = ['school']
renamed = [newcolumns.append(col + '_2018') for col in columns[1:]]
In [191]:
# newcolumns = ['school']
# newcolumns.append(renamed)

newcolumns
Out[191]:
['school',
 'W_2018',
 'L_2018',
 'C_2018',
 'N_2018',
 'P_2018',
 'AVG_PTS_FOR_2018',
 'AVG_PTS_AGT_2018',
 'AVG_MGN_2018',
 'AVE_RUN_YPG_FOR_2018',
 'AVG_PAS_YPG_FOR_2018',
 'AVG_TOT_YPG_FOR_2018',
 'AVG_RUN_YPG_AGT_2018',
 'AVG_PAS_YPG_AGT_2018',
 'AVG_TOT_YPG_AGT_2018',
 'AVG_NET_YPG_2018',
 'NET_TO_2018',
 'AVG_PWR_RTG_2018',
 'AVG_OPP_PWR_RTG_2018',
 'NET_PWR_RTG_CHG_2018',
 'LINE_POWR_2018',
 'GAME_PLAY_POWR_2018',
 'COMP_POWR_2018',
 'X_FCTR_2018',
 'vegas_year_2018']
In [194]:
kvegas2019 = pd.read_csv('ncaa_vegas2/2019.csv')
In [196]:
kvegas2018 = pd.read_csv('ncaa_vegas2/2018.csv')
kvegas2019_copy = kvegas2019.copy()
kvegas2018_copy = kvegas2018.copy()
test = kvegas2019_copy.merge(kvegas2018_copy, on="school", how="left")
test
Out[196]:
school W_2019 L_2019 C_2019 N_2019 P_2019 AVG_PTS_FOR_2019 AVG_PTS_AGT_2019 AVG_MGN_2019 AVE_RUN_YPG_FOR_2019 ... AVG_NET_YPG_2018 NET_TO_2018 AVG_PWR_RTG_2018 AVG_OPP_PWR_RTG_2018 NET_PWR_RTG_CHG_2018 LINE_POWR_2018 GAME_PLAY_POWR_2018 COMP_POWR_2018 X_FCTR_2018 vegas_year_2018
0 Air Force 11 2 8 5 0 34.1 19.9 14.2 299 ... 361 54 -1 177.3 167.5 4 104.5 106.7 146.7 2018
1 Akron 0 12 1 11 0 10.5 36.2 -25.8 48 ... 391 -97 -5 152.1 162.6 -1 93.8 89.9 130.1 2018
2 Alabama 11 2 7 6 0 47.2 18.6 28.6 169 ... 320 202 6 222.4 191.5 -3 115.9 114.9 173.4 2018
3 UAB 9 5 8 5 1 23.1 21.6 1.6 151 ... 299 106 0 136.8 151.4 19 110.0 109.5 128.9 2018
4 Appalachian State 13 1 9 5 0 38.8 20.0 18.8 231 ... 289 142 6 173.9 148.6 3 115.5 115.0 145.4 2018
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 Washington State 6 7 4 9 0 37.8 31.4 6.4 68 ... 358 93 8 194.9 182.0 11 111.5 104.3 157.5 2018
126 Washington 8 5 8 5 0 32.0 19.4 12.6 152 ... 306 108 3 201.9 186.9 -4 104.3 109.0 160.8 2018
127 West Virginia 5 7 6 5 1 20.6 28.8 -8.2 73 ... 406 106 7 196.9 190.7 9 108.9 107.1 160.1 2018
128 Wisconsin 10 4 8 6 0 34.1 16.9 17.1 233 ... 344 91 1 193.3 180.3 -15 101.2 107.0 155.0 2018
129 Wyoming 8 5 8 5 0 25.4 17.8 7.6 215 ... 326 5 4 173.2 170.8 2 99.2 104.0 144.1 2018

130 rows × 49 columns

In [197]:
kvegas1918 = test.copy()
In [198]:
kvegas2017 = pd.read_csv('ncaa_vegas2/2017.csv')
kvegas1918_copy = kvegas1918.copy()
kvegas2017_copy = kvegas2017.copy()
test = kvegas1918_copy.merge(kvegas2017_copy, on="school", how="left")
test
Out[198]:
school W_2019 L_2019 C_2019 N_2019 P_2019 AVG_PTS_FOR_2019 AVG_PTS_AGT_2019 AVG_MGN_2019 AVE_RUN_YPG_FOR_2019 ... AVG_NET_YPG_2017 NET_TO_2017 AVG_PWR_RTG_2017 AVG_OPP_PWR_RTG_2017 NET_PWR_RTG_CHG_2017 LINE_POWR_2017 GAME_PLAY_POWR_2017 COMP_POWR_2017 X_FCTR_2017 vegas_year_2017
0 Air Force 11 2 8 5 0 34.1 19.9 14.2 299 ... 24.0 -10.0 177.6 171.0 -6.0 98.7 97.7 144.5 -31.0 2017.0
1 Akron 0 12 1 11 0 10.5 36.2 -25.8 48 ... -127.0 10.0 152.4 159.5 3.0 97.0 95.7 131.4 45.0 2017.0
2 Alabama 11 2 7 6 0 47.2 18.6 28.6 169 ... 184.0 14.0 220.6 188.5 -4.0 115.0 122.4 173.4 14.0 2017.0
3 UAB 9 5 8 5 1 23.1 21.6 1.6 151 ... -2.0 4.0 118.7 147.3 25.0 102.6 102.5 118.0 11.0 2017.0
4 Appalachian State 13 1 9 5 0 38.8 20.0 18.8 231 ... 97.0 12.0 161.4 148.8 4.0 108.6 112.3 138.5 9.0 2017.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 Washington State 6 7 4 9 0 37.8 31.4 6.4 68 ... 111.0 -3.0 194.8 183.2 -9.0 100.8 96.9 154.1 -59.0 2017.0
126 Washington 8 5 8 5 0 32.0 19.4 12.6 152 ... 107.0 13.0 208.2 181.3 -9.0 111.6 116.7 165.2 44.0 2017.0
127 West Virginia 5 7 6 5 1 20.6 28.8 -8.2 73 ... 14.0 -7.0 191.2 186.3 -4.0 100.2 97.2 153.2 15.0 2017.0
128 Wisconsin 10 4 8 6 0 34.1 16.9 17.1 233 ... 153.0 5.0 202.1 182.5 5.0 113.0 115.5 163.0 21.0 2017.0
129 Wyoming 8 5 8 5 0 25.4 17.8 7.6 215 ... -49.0 24.0 173.8 162.1 1.0 105.1 105.9 144.1 35.0 2017.0

130 rows × 73 columns

In [200]:
kvegas191817 = test.copy()
In [201]:
kvegas2016 = pd.read_csv('ncaa_vegas2/2016.csv')
kvegas191817_copy = kvegas191817.copy()
kvegas2016_copy = kvegas2016.copy()
test = kvegas191817_copy.merge(kvegas2016_copy, on="school", how="left")
test
Out[201]:
school W_2019 L_2019 C_2019 N_2019 P_2019 AVG_PTS_FOR_2019 AVG_PTS_AGT_2019 AVG_MGN_2019 AVE_RUN_YPG_FOR_2019 ... AVG_NET_YPG_2016 NET_TO_2016 AVG_PWR_RTG_2016 AVG_OPP_PWR_RTG_2016 NET_PWR_RTG_CHG_2016 LINE_POWR_2016 GAME_PLAY_POWR_2016 COMP_POWR_2016 X_FCTR_2016 vegas_year_2016
0 Air Force 11 2 8 5 0 34.1 19.9 14.2 299 ... 86.0 6.0 175.8 154.4 1.0 103.1 111.2 144.1 -9.0 2016.0
1 Akron 0 12 1 11 0 10.5 36.2 -25.8 48 ... -79.0 -8.0 155.0 157.8 -9.0 93.5 91.5 130.6 27.0 2016.0
2 Alabama 11 2 7 6 0 47.2 18.6 28.6 169 ... 193.0 10.0 220.9 193.1 4.0 116.6 121.2 174.5 19.0 2016.0
3 UAB 9 5 8 5 1 23.1 21.6 1.6 151 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Appalachian State 13 1 9 5 0 38.8 20.0 18.8 231 ... 98.0 8.0 164.2 155.8 6.0 107.0 112.5 140.7 -3.0 2016.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
125 Washington State 6 7 4 9 0 37.8 31.4 6.4 68 ... 77.0 6.0 196.4 182.5 5.0 108.9 105.3 157.9 12.0 2016.0
126 Washington 8 5 8 5 0 32.0 19.4 12.6 152 ... 140.0 18.0 209.7 185.6 10.0 117.1 116.1 167.6 27.0 2016.0
127 West Virginia 5 7 6 5 1 20.6 28.8 -8.2 73 ... 60.0 4.0 195.3 189.2 -5.0 103.7 105.9 157.9 5.0 2016.0
128 Wisconsin 10 4 8 6 0 34.1 16.9 17.1 233 ... 81.0 12.0 197.9 184.2 8.0 109.4 114.1 160.7 25.0 2016.0
129 Wyoming 8 5 8 5 0 25.4 17.8 7.6 215 ... -18.0 3.0 168.7 168.2 15.0 103.5 101.2 142.1 22.0 2016.0

130 rows × 97 columns

In [202]:
kvegas19181716 = test.copy()
In [203]:
kvegas19181716.to_csv('kvegas19181716.csv', index=False)
In [204]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years_copy = kwiki_kstatic_kattend_k00_kcent_kcoach6years.copy()
kvegas4years = kvegas19181716.copy()
test = kwiki_kstatic_kattend_k00_kcent_kcoach6years_copy.merge(kvegas4years, on="school", how="left")
test
Out[204]:
school nickname city state enrollment conf_x conf_y coach stad_size gsr ... AVG_NET_YPG_2016 NET_TO_2016 AVG_PWR_RTG_2016 AVG_OPP_PWR_RTG_2016 NET_PWR_RTG_CHG_2016 LINE_POWR_2016 GAME_PLAY_POWR_2016 COMP_POWR_2016 X_FCTR_2016 vegas_year_2016
0 Air Force Falcons Colorado Springs Colorado 4,237 Mountain West Mt. West Troy Calhoun 46,692 83.0 ... 86.0 6.0 175.8 154.4 1.0 103.1 111.2 144.1 -9.0 2016.0
1 Akron Zips Akron Ohio 19,200 MAC MAC Terry Bowden 30,000 45.0 ... -79.0 -8.0 155.0 157.8 -9.0 93.5 91.5 130.6 27.0 2016.0
2 Alabama Crimson Tide Tuscaloosa Alabama 38,500 SEC SEC Nick Saban 101,821 79.0 ... 193.0 10.0 220.9 193.1 4.0 116.6 121.2 174.5 19.0 2016.0
3 Appalachian State Mountaineers Boone North Carolina 19,100 Sun Belt Sun Belt Scott Satterfield 24,050 57.0 ... 98.0 8.0 164.2 155.8 6.0 107.0 112.5 140.7 -3.0 2016.0
4 Arizona Wildcats Tucson Arizona 45,200 Pac-12 Pac-12 Kevin Sumlin 51,811 74.0 ... -56.0 -7.0 179.4 182.5 -13.0 86.5 94.1 144.4 -37.0 2016.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
126 West Virginia Mountaineers Morgantown West Virginia 30,000 Big 12 Big 12 Dana Holgorsen 60,000 66.0 ... 60.0 4.0 195.3 189.2 -5.0 103.7 105.9 157.9 5.0 2016.0
127 Western Kentucky Hilltoppers Bowling Green Kentucky 20,300 C-USA NaN NaN NaN NaN ... 147.0 2.0 180.1 156.0 3.0 114.2 111.3 148.3 4.0 2016.0
128 Western Michigan Broncos Kalamazoo Michigan 22,900 MAC MAC Tim Lester 30,200 74.0 ... 128.0 18.0 179.3 160.8 6.0 116.2 116.9 150.5 21.0 2016.0
129 Wisconsin Badgers Madison Wisconsin 44,400 Big Ten Big Ten Paul Chryst 80,321 71.0 ... 81.0 12.0 197.9 184.2 8.0 109.4 114.1 160.7 25.0 2016.0
130 Wyoming Cowboys Laramie Wyoming 12,400 Mountain West Mt. West Craig Bohl 29,181 97.0 ... -18.0 3.0 168.7 168.2 15.0 103.5 101.2 142.1 22.0 2016.0

131 rows × 174 columns

In [205]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years_kvegas4years = test.copy()
In [206]:
kwiki_kstatic_kattend_k00_kcent_kcoach6years_kvegas4years.to_csv('kwiki_kstatic_kattend_k00_kcent_kcoach6years_kvegas4years.csv', index=False)
In [207]:
kmega = test.copy()
kmega.to_csv('kmega.csv', index=False)
In [ ]: