NBA metric height and weight (python code)

Python 3.6 using Jupyter Notebook

# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline
plt.style.use(‘seaborn-white’)

a = pd.read_csv(‘/…/1913-1933nba.csv’) # add your location for your file in …
a.head()

b = pd.read_csv(‘/…/1934-1959nba.csv’) # add your location for your file in …
b.head()

c = pd.read_csv(‘/…/1960-1979 nba.csv’) # add your location for your file in …
c.head()

d = pd.read_csv(‘/…/1980-1997 nba.csv’) # add your location for your file in …
d.head()

sns.regplot(a.weight_lbs, a.height_ft, order=1, ci=None, scatter_kws={‘color’:’g’, ‘s’:12})
sns.regplot(b.weight_lbs, b.height_ft, order=1, ci=None, scatter_kws={‘color’:’r’, ‘s’:12})
sns.regplot(c.weight_lbs, c.height_ft, order=1, ci=None, scatter_kws={‘color’:’b’, ‘s’:12})
sns.regplot(d.weight_lbs, d.height_ft, order=1, ci=None, scatter_kws={‘color’:’y’, ‘s’:12})
plt.xlim(140,325)
plt.ylim(ymin=5.5);

# multiple regression lines and changing the color with letter symbol

regr = skl_lm.LinearRegression()

X = a.weight_lbs.values.reshape(-1,1)
y = a.height_ft

regr.fit(X,y)
print(regr.intercept_)
print(regr.coef_)

# regression coefficient for 1913-1933 (4.24)

a[[‘weight_lbs’, ‘height_ft’]].describe()
# 1913-1933 note 6.3 ft mean and 192.7 lbs mean

d[[‘weight_lbs’, ‘height_ft’]].describe()
# 1980-1997 note 6.59 ft mean and 219.9 lbs mean
# increase of +0.03 ft in the mean and +6.8 lbs in the mean

# Create a coordinate grid
weight_lbs = np.arange(0,50)
height_ft = np.arange(0,300)

B1, B2 = np.meshgrid(weight_lbs, height_ft, indexing=’xy’)
Z = np.zeros((height_ft.size, weight_lbs.size))

for (i,j),v in np.ndenumerate(Z):
Z[i,j] =(regr.intercept_ + B1[i,j]*regr.coef_[0] + B2[i,j]*regr.coef_[1])

# Create plot
fig = plt.figure(figsize=(12,8))
fig.suptitle(‘NBA players born between 1910 – 1997′, fontsize=20)

ax = axes3d.Axes3D(fig)

ax.plot_surface(B1, B2, Z, rstride=10, cstride=5, alpha=0.4)
ax.scatter3D(a.weight_lbs, a.height_ft, a.born, c=’g’)
ax.scatter3D(b.weight_lbs, b.height_ft, b.born, c=’r’)
ax.scatter3D(c.weight_lbs, c.height_ft, c.born, c=’b’)
ax.scatter3D(d.weight_lbs, d.height_ft, d.born, c=’y’)

ax.set_xlabel(‘weight_lbs’)
ax.set_xlim(350,150)
ax.set_ylabel(‘height_ft’)
ax.set_ylim(5.5,8)
ax.set_zlabel(‘born’)
ax.set_zlim(1910,1997);

sns.jointplot(x=’weight_lbs’,y=’height_ft’,data=a,kind=’hex’) #1913-1933 showing the mean of height and weight
# note the pearson r is the strength of the linear relationship between the two variables 0.79

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.