13 R-squared or coefficient of determination
13 R-squared or coefficient of determination#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/lng4ZgConCM/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
https://en.wikipedia.org/wiki/Coefficient_of_determination
x = np.array([1, 2, 2, 3])
y = np.array([1, 2, 3, 6])
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
reg_line = intercept + slope * x
def rsquard(x, y, m, b):
y_bar = y.mean()
ss_res = np.sum( (y - (b + m*x))**2 )
ss_tot = np.sum((y-y_bar)**2)
return 1 - ss_res / ss_tot
rsquard(x, y, slope, intercept)
0.8928571428571429
# where rvalue is equal to pearson correlation coefficient
r_squard = rvalue**2
r_squard
0.8928571428571428
x_, y_ = x.reshape(-1, 1), y.reshape(-1, 1)
reg = LinearRegression()
reg.fit(x_, y_)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
y_pred = reg.predict(x_)
print('slope', reg.coef_, slope)
print('intercept', reg.intercept_, intercept)
print('R squard', reg.score(x_, y_), r_squard)
print('line', reg.predict(x_), reg_line)
slope [[2.5]] 2.5
intercept [-2.] -2.0
R squard 0.8928571428571429 0.8928571428571428
line [[0.5]
[3. ]
[3. ]
[5.5]] [0.5 3. 3. 5.5]
sns.scatterplot(x, y, label='Original data')
sns.lineplot(x, reg_line, color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:>
plt.scatter(x_, y_, label='Original data')
plt.plot(x_, y_pred, color='r', label='Fitted line')
plt.legend()
<matplotlib.legend.Legend at 0x7f1a654d8fd0>
sns.scatterplot(x_.reshape(-1), y_.reshape(-1), label='Original data')
sns.lineplot(x_.reshape(-1), y_pred.reshape(-1), color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:>