08 Interquartile range (IQR)
08 Interquartile range (IQR)#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/qLYYHWYr8xI/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats
khanacademy pandas.DataFrame.quantile
x_data = {'x': [4,4,6,7,10,11,12,14,15]}
y_data = {'y': [7, 9, 9, 10, 10, 11, 12, 12, 14]}
x_df = DataFrame(x_data)
y_df = DataFrame(y_data)
x_df.describe()
x | |
---|---|
count | 9.000000 |
mean | 9.222222 |
std | 4.146618 |
min | 4.000000 |
25% | 6.000000 |
50% | 10.000000 |
75% | 12.000000 |
max | 15.000000 |
y_df.describe()
y | |
---|---|
count | 9.000000 |
mean | 10.444444 |
std | 2.068279 |
min | 7.000000 |
25% | 9.000000 |
50% | 10.000000 |
75% | 12.000000 |
max | 14.000000 |
print(x_df.quantile(q=0.75))
print(x_df.quantile(q=0.75, interpolation='nearest'))
print(y_df.quantile(q=0.75))
print(y_df.quantile(q=0.75, interpolation='nearest'))
x 12.0
Name: 0.75, dtype: float64
x 12
Name: 0.75, dtype: int64
y 12.0
Name: 0.75, dtype: float64
y 12
Name: 0.75, dtype: int64
print(stats.iqr(x_df))
print(stats.iqr(y_df))
6.0
3.0
x_df['Rank'] = x_df.index + 1
x_df['Empirical_CDF'] = x_df['Rank'] / x_df.shape[0]
x_q_25 = x_df.x[x_df['Empirical_CDF']>=0.25].reset_index(drop=True)[0]
x_q_50 = x_df.x[x_df['Empirical_CDF']>=0.50].reset_index(drop=True)[0]
x_q_75 = x_df.x[x_df['Empirical_CDF']>=0.75].reset_index(drop=True)[0]
y_df['Rank'] = y_df.index + 1
y_df['Empirical_CDF'] = y_df['Rank'] / y_df.shape[0]
y_q_25 = y_df.y[y_df['Empirical_CDF']>=0.25].reset_index(drop=True)[0]
y_q_50 = y_df.y[y_df['Empirical_CDF']>=0.50].reset_index(drop=True)[0]
y_q_75 = y_df.y[y_df['Empirical_CDF']>=0.75].reset_index(drop=True)[0]
print(f'q 25 {x_q_25} q 50 {x_q_50} q 75 {x_q_75} => iqr = {x_q_75 - x_q_25}')
print(f'q 25 {y_q_25} q 50 {y_q_50} q 75 {y_q_75} => iqr = {y_q_75 - y_q_25}')
q 25 6 q 50 10 q 75 12 => iqr = 6
q 25 9 q 50 10 q 75 12 => iqr = 3