02 Analyzing a cumulative relative frequency graph (optional)
02 Analyzing a cumulative relative frequency graph (optional)#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/TwGYLQ-DNdc/" frameborder="0" allowfullscreen></iframe>
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
from scipy import stats
datasciencemadesimple pandas.DataFrame.rank
data = {
'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa'],
'Mathematics_score':[62,47,55,74,32,77,86]
}
df = DataFrame(data)
df['Rank'] = df['Mathematics_score'].rank()
df['Percentile_rank'] = df['Mathematics_score'].rank(pct=True)
df
Name | Mathematics_score | Rank | Percentile_rank | |
---|---|---|---|---|
0 | George | 62 | 4.0 | 0.571429 |
1 | Andrea | 47 | 2.0 | 0.285714 |
2 | micheal | 55 | 3.0 | 0.428571 |
3 | maggie | 74 | 5.0 | 0.714286 |
4 | Ravi | 32 | 1.0 | 0.142857 |
5 | Xien | 77 | 6.0 | 0.857143 |
6 | Jalpa | 86 | 7.0 | 1.000000 |
df = df.sort_values('Mathematics_score')
plt.plot(df['Mathematics_score'], df['Percentile_rank'], '-o')
plt.xlabel('Mathematics Scores')
plt.ylabel('Percentile Rank')
plt.show()
# understand Empirical_CDF and shit