02. Creating a bar graph
02. Creating a bar graph#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/woUQ9LLaees/" frameborder="0" allowfullscreen></iframe>
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
import findspark
findspark.init()
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
spark = SparkSession.builder.appName("statistics").master("local").getOrCreate()
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/runner/work/statistics/spark-3.1.3-bin-hadoop3.2/jars/spark-unsafe_2.12-3.1.3.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
22/07/21 02:30:44 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
dataset = {
"Favorite course": ["Language", "History", "Geometry", "Chemistry", "Physics"],
"number of teachers": [7, 3, 9, 1, 0],
}
df = pd.DataFrame(dataset).set_index("Favorite course")
df
number of teachers | |
---|---|
Favorite course | |
Language | 7 |
History | 3 |
Geometry | 9 |
Chemistry | 1 |
Physics | 0 |
sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))
sdf.show()
[Stage 0:> (0 + 1) / 1]
+---------------+------------------+
|Favorite course|number of teachers|
+---------------+------------------+
| Language| 7|
| History| 3|
| Geometry| 9|
| Chemistry| 1|
| Physics| 0|
+---------------+------------------+
df.plot(kind="bar", color="cornflowerblue")
<AxesSubplot:xlabel='Favorite course'>
plt.bar(x=df.index, height=df["number of teachers"], color="cornflowerblue")
plt.title("Favorite course")
plt.ylabel("number of teachers")
plt.show()
sns.barplot(x=df.index, y=df["number of teachers"], color="cornflowerblue")
plt.title("Favorite course")
plt.ylabel("number of teachers")
plt.show()
data = [
go.Bar(x=df.index, y=df["number of teachers"], marker=dict(color="cornflowerblue"))
]
layout = go.Layout(title="Favorite course", yaxis=dict(title="number of teachers"))
fig = go.Figure(data, layout)
fig.show()