01 Calculating percentile
01 Calculating percentile#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/Ngyt8Q5tWkU/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats
percentile-vs-quantile-vs-quartile
x = np.array(1 * [1] + 2 * [2] + 1 * [3] + 1 * [4] + 2 * [5] + 1 * [6] + 5 * [7] + 1 * [9])
for i in [40, 55, 70, 85]:
percentile = np.percentile(x, i)
if round(percentile) == 6:
print(i, percentile)
55 6.15
Series(x).plot(kind='hist')
<AxesSubplot:ylabel='Frequency'>
data:image/s3,"s3://crabby-images/a712e/a712e788217ce1a366a5528131bdcf5ef8bf160c" alt="../_images/01 Calculating percentile_8_1.png"
plt.hist(x)
plt.xlabel('Hours a day')
plt.ylabel('Drivers')
plt.show()
data:image/s3,"s3://crabby-images/a7f08/a7f08867f0816a33bc749c25ad56ec981b80b07b" alt="../_images/01 Calculating percentile_9_0.png"
# TODO: fix plot gap and create a dot plot instead of histogram