14 Standard deviation of residuals or root mean square deviation (RMSD)
14 Standard deviation of residuals or root mean square deviation (RMSD)#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/hSP_rmNHU_U/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
x = np.array([1, 2, 2, 3])
y = np.array([1, 2, 3, 6])
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
yhat = intercept + slope * x
x_test = x.reshape(-1, 1)
y_test = y.reshape(-1, 1)
reg = LinearRegression()
reg.fit(x_test, y_test)
y_pred = reg.predict(x_test)
df = DataFrame({
'x': x,
'y': y,
'yhat': yhat,
'(y - yhat)**2': (y - yhat)**2
})
df
x | y | yhat | (y - yhat)**2 | |
---|---|---|---|---|
0 | 1 | 1 | 0.5 | 0.25 |
1 | 2 | 2 | 3.0 | 1.00 |
2 | 2 | 3 | 3.0 | 0.00 |
3 | 3 | 6 | 5.5 | 0.25 |
y.shape
(4,)
\[ \sqrt{ \frac{ \sum (residual)^{2} }{n-2} } \]
\[ \sqrt { \frac{ \sum (y-\hat{y})^{2} }{n-2} } \]
def rmsd(y, yhat, i=0):
n = len(y)
return np.sqrt(np.sum((y - yhat) ** 2) / (n - i))
print('divided by 1: \t', rmsd(y, yhat, 1))
print('divided by 2: \t', rmsd(y, yhat, 2))
print('divided by 0: \t', rmsd(y, yhat))
divided by 1: 0.7071067811865476
divided by 2: 0.8660254037844386
divided by 0: 0.6123724356957945
stderr
0.6123724356957947
mean_squared_error(y_test, y_pred, squared=False)
0.6123724356957945
sns.scatterplot(x, y, color='k')
sns.lineplot(x, yhat)
plt.xlabel('Hours Studying')
plt.ylabel('Score')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
Text(0, 0.5, 'Score')
plt.scatter(x_test, y_test, color='k')
plt.plot(x_test, y_pred)
plt.xlabel('Hours Studying')
plt.ylabel('Score')
Text(0, 0.5, 'Score')