02 Analyzing a cumulative relative frequency graph (optional)#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/TwGYLQ-DNdc/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats

khanacademy

datasciencemadesimple pandas.DataFrame.rank

data = {
    'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa'],
    'Mathematics_score':[62,47,55,74,32,77,86]
}
df = DataFrame(data)
df['Rank'] = df['Mathematics_score'].rank()
df['Percentile_rank'] = df['Mathematics_score'].rank(pct=True)
df
Name Mathematics_score Rank Percentile_rank
0 George 62 4.0 0.571429
1 Andrea 47 2.0 0.285714
2 micheal 55 3.0 0.428571
3 maggie 74 5.0 0.714286
4 Ravi 32 1.0 0.142857
5 Xien 77 6.0 0.857143
6 Jalpa 86 7.0 1.000000
df = df.sort_values('Mathematics_score')
plt.plot(df['Mathematics_score'], df['Percentile_rank'], '-o')
plt.xlabel('Mathematics Scores')
plt.ylabel('Percentile Rank')
plt.show()
../_images/02 Analyzing a cumulative relative frequency graph (optional)_12_0.png
# understand Empirical_CDF and shit

Refs: 1 2