San Francisco Police Department traffic stops data 2017 Python code

http://sanfranciscopolice.org/data#trafficstops

(See file) Stops by Race and Ethnicity – data (2017)

# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline
plt.style.use('seaborn-white')


df = pd.read_csv('/.../sfpd2017.csv')
df.head()
# I renamed the file so that it was easier to load

df.info()
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
# to find missing data in the data set
fig = plt.figure(figsize=(15,9))
fig.suptitle('SFPD demographic chart', fontsize=20)

sns.set_style('whitegrid')
sns.countplot(x='Race_description',hue='Sex',data=df,palette='RdBu_r')
sns.distplot(df['Age'].dropna(),kde=False,color='darkred',bins=50)
sns.countplot(x='Race_description',data=df)
sns.countplot(x='Sex',data=df)
plt.figure(figsize=(12, 7))
sns.boxplot(x='Race_description',y='Age',data=df,palette='winter')
df['Race_description'].hist(color='green',bins=40,figsize=(8,4))
df = pd.DataFrame(np.random.randn(1000, 2), columns=['Race_description', 'Age'])
df.plot.hexbin(x='Race_description',y='Age',gridsize=25,cmap='Oranges')
sns.lmplot(x='Time_hour',y='Age',data=df,col='Race_description',hue='Sex',palette='coolwarm',
          aspect=0.6,size=8)

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.