08 Interquartile range (IQR)#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/qLYYHWYr8xI/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats

khanacademy pandas.DataFrame.quantile

Interquartile range (IQR) fig 1Interquartile range (IQR) fig 2

x_data = {'x': [4,4,6,7,10,11,12,14,15]}
y_data = {'y': [7, 9, 9, 10, 10, 11, 12, 12, 14]}
x_df = DataFrame(x_data)
y_df = DataFrame(y_data)
x_df.describe()
x
count 9.000000
mean 9.222222
std 4.146618
min 4.000000
25% 6.000000
50% 10.000000
75% 12.000000
max 15.000000
y_df.describe()
y
count 9.000000
mean 10.444444
std 2.068279
min 7.000000
25% 9.000000
50% 10.000000
75% 12.000000
max 14.000000
print(x_df.quantile(q=0.75))
print(x_df.quantile(q=0.75, interpolation='nearest'))

print(y_df.quantile(q=0.75))
print(y_df.quantile(q=0.75, interpolation='nearest'))
x    12.0
Name: 0.75, dtype: float64
x    12
Name: 0.75, dtype: int64
y    12.0
Name: 0.75, dtype: float64
y    12
Name: 0.75, dtype: int64
print(stats.iqr(x_df))
print(stats.iqr(y_df))
6.0
3.0
x_df['Rank'] = x_df.index + 1
x_df['Empirical_CDF'] = x_df['Rank'] / x_df.shape[0]
x_q_25 = x_df.x[x_df['Empirical_CDF']>=0.25].reset_index(drop=True)[0]
x_q_50 = x_df.x[x_df['Empirical_CDF']>=0.50].reset_index(drop=True)[0]
x_q_75 = x_df.x[x_df['Empirical_CDF']>=0.75].reset_index(drop=True)[0]
y_df['Rank'] = y_df.index + 1
y_df['Empirical_CDF'] = y_df['Rank'] / y_df.shape[0]
y_q_25 = y_df.y[y_df['Empirical_CDF']>=0.25].reset_index(drop=True)[0]
y_q_50 = y_df.y[y_df['Empirical_CDF']>=0.50].reset_index(drop=True)[0]
y_q_75 = y_df.y[y_df['Empirical_CDF']>=0.75].reset_index(drop=True)[0]
print(f'q 25 {x_q_25} q 50 {x_q_50} q 75 {x_q_75} => iqr = {x_q_75 - x_q_25}')
print(f'q 25 {y_q_25} q 50 {y_q_50} q 75 {y_q_75} => iqr = {y_q_75 - y_q_25}')
q 25 6 q 50 10 q 75 12 => iqr = 6
q 25 9 q 50 10 q 75 12 => iqr = 3