01 Confidence intervals for the difference between two proportions#

import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats, special

khanacademy

Confidence intervals for the difference between two proportions fig 1

\[\begin{split}\begin{array}{lllll} \displaystyle z&=&\frac{\hat{p}-p}{\sqrt{\frac{p(1-p)}{n}}} &\approx& \frac{\hat{p}-p}{\sqrt{\frac{\hat{p}(1-\hat{p})}{n}}}\\ \displaystyle z&=&\frac{(\hat{p}_1-\hat{p}_2)-(p_1-p_2)}{\sqrt{\frac{p_1(1-p_1)}{n_1}+\frac{p_2(1-p_2)}{n_2}}} &\approx& \frac{(\hat{p}_1-\hat{p}_2)-(p_1-p_2)}{\sqrt{\frac{\hat{p}_1(1-\hat{p}_1)}{n_1}+\frac{\hat{p}_2(1-\hat{p}_2)}{n_2}}}\\ \end{array}\end{split}\]
\[p_1-p_2\]
\[\begin{split}\begin{array}{lllll} \displaystyle \hat{p}\pm z_*\sqrt{\frac{\hat{p}(1-\hat{p})}{n}}\\ \displaystyle (\hat{p}_1-\hat{p}_2)\pm z_*\sqrt{\frac{\hat{p}_1(1-\hat{p}_1)}{n_1}+\frac{\hat{p}_2(1-\hat{p}_2)}{n_2}}\\ \end{array}\end{split}\]
\[ \mathbb{E}(\hat{p}_1-\hat{p}_2) = \mathbb{E}(\hat{p}_1)-\mathbb{E}(\hat{p}_2) = p_1-p_2 \]
\[\begin{split}\begin{array}{lll} \sigma^2_{\hat{p}_1-\hat{p}_2} &=& \sigma^2_{\hat{p}_1}+\sigma^2_{\hat{p}_2}\\ &=& \frac{p_1(1-p_1)}{n_1}+\frac{p_2(1-p_2)}{n_2}\\ &\approx& \frac{\hat{p}_1(1-\hat{p}_1)}{n_1}+\frac{\hat{p}_2(1-\hat{p}_2)}{n_2}\\ \end{array}\end{split}\]
\[ \mathbb{E}(\bar{x}_1-\bar{x}_2) = \mathbb{E}(\bar{x}_1)-\mathbb{E}(\bar{x}_2) = \mu_1-\mu_2 \]
\[\begin{split}\begin{array}{lll} \sigma^2_{\bar{x}_1-\bar{x}_2} &=& \sigma^2_{\bar{x}_1}+\sigma^2_{\bar{x}_2}\\ &=& \frac{\sigma_1^2}{n_1}+\frac{\sigma_2^2}{n_2}\\ &\approx& \frac{s_1^2}{n_1}+\frac{s_2^2}{n_2}\\ \end{array}\end{split}\]
\[ \mathbb{E}\bar{x}_d = \mu_d \]
\[ \sigma^2_{\bar{x}_d} = \frac{\sigma_d^2}{n} \approx \frac{s_1^2}{n} \]