10 Analyzing mosaic plots
10 Analyzing mosaic plots#
%%html
<iframe width="700" height="400" src="https://www.youtube.com/embed/2sHkluggZp8/" frameborder="0" allowfullscreen></iframe>
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
from statsmodels.graphics.mosaicplot import mosaic
import findspark
findspark.init()
from pyspark.context import SparkContext
from pyspark.sql import functions as F
from pyspark.sql.session import SparkSession
spark = SparkSession.builder.appName("statistics").master("local").getOrCreate()
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/runner/work/statistics/spark-3.1.3-bin-hadoop3.2/jars/spark-unsafe_2.12-3.1.3.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
22/07/21 02:33:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
dataset = {
"surveyees": ["Students", "Staff", "Parents"],
"No": [800, 60, 150],
"Yes": [200, 240, 150],
}
df = pd.DataFrame(dataset).set_index("surveyees")
df
No | Yes | |
---|---|---|
surveyees | ||
Students | 800 | 200 |
Staff | 60 | 240 |
Parents | 150 | 150 |
sdf = spark.createDataFrame(zip(*dataset.values()), schema=list(dataset.keys()))
sdf.show()
[Stage 0:> (0 + 1) / 1]
+---------+---+---+
|surveyees| No|Yes|
+---------+---+---+
| Students|800|200|
| Staff| 60|240|
| Parents|150|150|
+---------+---+---+
mosaic(df.stack())
(<Figure size 432x288 with 3 Axes>,
{('Students', 'No'): (0.0, 0.0, 0.6188118811881188, 0.7973421926910299),
('Students', 'Yes'): (0.0,
0.8006644518272424,
0.6188118811881188,
0.19933554817275742),
('Staff', 'No'): (0.6237623762376238,
0.0,
0.18564356435643564,
0.19933554817275748),
('Staff', 'Yes'): (0.6237623762376238,
0.2026578073089701,
0.18564356435643564,
0.7973421926910299),
('Parents', 'No'): (0.8143564356435643,
0.0,
0.18564356435643564,
0.4983388704318937),
('Parents', 'Yes'): (0.8143564356435643,
0.5016611295681063,
0.18564356435643564,
0.4983388704318937)})