13 R-squared or coefficient of determination#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/lng4ZgConCM/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression

khanacademy

R-squared or coefficient of determination fig 1R-squared or coefficient of determination fig 2R-squared or coefficient of determination fig 3R-squared or coefficient of determination fig 4

https://en.wikipedia.org/wiki/Coefficient_of_determination

x = np.array([1, 2, 2, 3])
y = np.array([1, 2, 3, 6])
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
reg_line = intercept + slope * x
def rsquard(x, y, m, b):
    y_bar = y.mean()
    ss_res = np.sum( (y - (b + m*x))**2 )
    ss_tot = np.sum((y-y_bar)**2)
    return 1 - ss_res / ss_tot
rsquard(x, y, slope, intercept)
0.8928571428571429
# where rvalue is equal to pearson correlation coefficient
r_squard = rvalue**2
r_squard
0.8928571428571428
x_, y_ = x.reshape(-1, 1), y.reshape(-1, 1)
reg = LinearRegression()
reg.fit(x_, y_)
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
y_pred = reg.predict(x_)
print('slope', reg.coef_, slope)
print('intercept', reg.intercept_, intercept)
print('R squard', reg.score(x_, y_), r_squard)
print('line', reg.predict(x_), reg_line)
slope [[2.5]] 2.5
intercept [-2.] -2.0
R squard 0.8928571428571429 0.8928571428571428
line [[0.5]
 [3. ]
 [3. ]
 [5.5]] [0.5 3.  3.  5.5]
sns.scatterplot(x, y, label='Original data')
sns.lineplot(x, reg_line, color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
<AxesSubplot:>
../_images/13 R-squared or coefficient of determination_18_2.png
plt.scatter(x_, y_, label='Original data')
plt.plot(x_, y_pred, color='r', label='Fitted line')
plt.legend()
<matplotlib.legend.Legend at 0x7f1a654d8fd0>
../_images/13 R-squared or coefficient of determination_19_1.png
sns.scatterplot(x_.reshape(-1), y_.reshape(-1), label='Original data')
sns.lineplot(x_.reshape(-1), y_pred.reshape(-1), color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
<AxesSubplot:>
../_images/13 R-squared or coefficient of determination_20_2.png