02 Analyzing a cumulative relative frequency graph (optional)#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/TwGYLQ-DNdc/" frameborder="0" allowfullscreen></iframe>

import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats

data = {
    'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa'],
    'Mathematics_score':[62,47,55,74,32,77,86]
}

df = DataFrame(data)

df['Rank'] = df['Mathematics_score'].rank()

df['Percentile_rank'] = df['Mathematics_score'].rank(pct=True)

df

	Name	Mathematics_score	Rank	Percentile_rank
0	George	62	4.0	0.571429
1	Andrea	47	2.0	0.285714
2	micheal	55	3.0	0.428571
3	maggie	74	5.0	0.714286
4	Ravi	32	1.0	0.142857
5	Xien	77	6.0	0.857143
6	Jalpa	86	7.0	1.000000

df = df.sort_values('Mathematics_score')

plt.plot(df['Mathematics_score'], df['Percentile_rank'], '-o')
plt.xlabel('Mathematics Scores')
plt.ylabel('Percentile Rank')
plt.show()

# understand Empirical_CDF and shit

Refs: 1 2

Statstics with Python