Exercise 5.9

import numpy as np
import pandas as pd

from sklearn.datasets import load_boston

%matplotlib inline
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = pd.Series(boston.target)
medv = df['MEDV']
df.head()
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT MEDV
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33 36.2

(a)

mu = medv.mean()

(b)

medv.std()/np.sqrt(len(df))
0.40886114749753505

(c)

means = [medv.sample(n = len(df), replace=True).mean() for _ in range(1000)]
np.std(means)
0.4126021332293619

(d)

SE = np.std(means)
print(mu - 2*SE, mu + 2*SE)
21.7076020577 23.3580105906

(e)

medv.median()
21.2

(f)

medians = [medv.sample(n = len(df), replace=True).median() for _ in range(1000)]
np.std(medians)
0.37716367799670181

(g)

medv.quantile(.1)
12.75

(h)

quantiles = [medv.sample(n = len(df), replace=True).quantile(.1) for _ in range(1000)]
np.std(quantiles)x
0.50477123531358237