02. Creating a bar graph#

%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/woUQ9LLaees/" frameborder="0" allowfullscreen></iframe>
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import findspark

findspark.init()
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession

spark = SparkSession.builder.appName("statistics").master("local").getOrCreate()
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/runner/work/statistics/spark-3.1.3-bin-hadoop3.2/jars/spark-unsafe_2.12-3.1.3.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
22/07/21 02:30:44 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).

khanacademy

Creating a bar graph fig 1

dataset = {
    "Favorite course": ["Language", "History", "Geometry", "Chemistry", "Physics"],
    "number of teachers": [7, 3, 9, 1, 0],
}
df = pd.DataFrame(dataset).set_index("Favorite course")
df
number of teachers
Favorite course
Language 7
History 3
Geometry 9
Chemistry 1
Physics 0
sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))
sdf.show()
[Stage 0:>                                                          (0 + 1) / 1]
+---------------+------------------+
|Favorite course|number of teachers|
+---------------+------------------+
|       Language|                 7|
|        History|                 3|
|       Geometry|                 9|
|      Chemistry|                 1|
|        Physics|                 0|
+---------------+------------------+
                                                                                
df.plot(kind="bar", color="cornflowerblue")
<AxesSubplot:xlabel='Favorite course'>
../_images/02. Creating a bar graph_9_1.png
plt.bar(x=df.index, height=df["number of teachers"], color="cornflowerblue")
plt.title("Favorite course")
plt.ylabel("number of teachers")
plt.show()
../_images/02. Creating a bar graph_10_0.png
sns.barplot(x=df.index, y=df["number of teachers"], color="cornflowerblue")
plt.title("Favorite course")
plt.ylabel("number of teachers")
plt.show()
../_images/02. Creating a bar graph_11_0.png
data = [
    go.Bar(x=df.index, y=df["number of teachers"], marker=dict(color="cornflowerblue"))
]
layout = go.Layout(title="Favorite course", yaxis=dict(title="number of teachers"))
fig = go.Figure(data, layout)
fig.show()