13 R-squared or coefficient of determination#

<iframe width="700" height="400" src="https://www.youtube.com/embed/lng4ZgConCM/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression


R-squared or coefficient of determination fig 1R-squared or coefficient of determination fig 2R-squared or coefficient of determination fig 3R-squared or coefficient of determination fig 4


x = np.array([1, 2, 2, 3])
y = np.array([1, 2, 3, 6])
slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)
reg_line = intercept + slope * x
def rsquard(x, y, m, b):
    y_bar = y.mean()
    ss_res = np.sum( (y - (b + m*x))**2 )
    ss_tot = np.sum((y-y_bar)**2)
    return 1 - ss_res / ss_tot
rsquard(x, y, slope, intercept)
# where rvalue is equal to pearson correlation coefficient
r_squard = rvalue**2
x_, y_ = x.reshape(-1, 1), y.reshape(-1, 1)
reg = LinearRegression()
reg.fit(x_, y_)
y_pred = reg.predict(x_)
print('slope', reg.coef_, slope)
print('intercept', reg.intercept_, intercept)
print('R squard', reg.score(x_, y_), r_squard)
print('line', reg.predict(x_), reg_line)
slope [[2.5]] 2.5
intercept [-2.] -2.0
R squard 0.8928571428571429 0.8928571428571428
line [[0.5]
 [3. ]
 [3. ]
 [5.5]] [0.5 3.  3.  5.5]
sns.scatterplot(x, y, label='Original data')
sns.lineplot(x, reg_line, color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
../_images/13 R-squared or coefficient of determination_18_2.png
plt.scatter(x_, y_, label='Original data')
plt.plot(x_, y_pred, color='r', label='Fitted line')
../_images/13 R-squared or coefficient of determination_19_1.png
sns.scatterplot(x_.reshape(-1), y_.reshape(-1), label='Original data')
sns.lineplot(x_.reshape(-1), y_pred.reshape(-1), color='r', label='Fitted line')
/opt/hostedtoolcache/Python/3.9.13/x64/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
../_images/13 R-squared or coefficient of determination_20_2.png