14 Standard deviation of residuals or root mean square deviation (RMSD)#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/hSP_rmNHU_U/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

khanacademy

Standard deviation of residuals or root mean square deviation (RMSD) fig 1

x = np.array([1, 2, 2, 3])
y = np.array([1, 2, 3, 6])
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
yhat = intercept + slope * x
x_test = x.reshape(-1, 1)
y_test = y.reshape(-1, 1)
reg = LinearRegression()
reg.fit(x_test, y_test)
y_pred = reg.predict(x_test)
df = DataFrame({
    'x': x,
    'y': y,
    'yhat': yhat,
    '(y - yhat)**2': (y - yhat)**2
})
df
x y yhat (y - yhat)**2
0 1 1 0.5 0.25
1 2 2 3.0 1.00
2 2 3 3.0 0.00
3 3 6 5.5 0.25
y.shape
(4,)
\[ \sqrt{ \frac{ \sum (residual)^{2} }{n-2} } \]
\[ \sqrt { \frac{ \sum (y-\hat{y})^{2} }{n-2} } \]
def rmsd(y, yhat, i=0):
    n = len(y)
    return np.sqrt(np.sum((y - yhat) ** 2) / (n - i))
print('divided by 1: \t', rmsd(y, yhat, 1))
print('divided by 2: \t', rmsd(y, yhat, 2))
print('divided by 0: \t', rmsd(y, yhat))
divided by 1: 	 0.7071067811865476
divided by 2: 	 0.8660254037844386
divided by 0: 	 0.6123724356957945
stderr
0.6123724356957947
mean_squared_error(y_test, y_pred, squared=False)
0.6123724356957945
sns.scatterplot(x, y, color='k')
sns.lineplot(x, yhat)
plt.xlabel('Hours Studying')
plt.ylabel('Score')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
Text(0, 0.5, 'Score')
../_images/14 Standard deviation of residuals or root mean square deviation (RMSD)_18_2.png
plt.scatter(x_test, y_test, color='k')
plt.plot(x_test, y_pred)
plt.xlabel('Hours Studying')
plt.ylabel('Score')
Text(0, 0.5, 'Score')
../_images/14 Standard deviation of residuals or root mean square deviation (RMSD)_19_1.png