Tuesday, June 28, 2022

Plot using matplotlib and pandas in pyspark environment

 (.env) boris@boris-All-Series:~/VOTING/PYSPARK$ cat PySparkDataFrame2.py

import pyspark

from pyspark.sql import SparkSession

import matplotlib.pyplot as plt

import pandas as pd

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [("James","","Smith","36636","M",60000),

        ("Michael","Rose","","40288","M",70000),

        ("Robert","","Williams","42114","",400000),

        ("Maria","Anne","Jones","39192","F",500000),

        ("Jen","Mary","Brown","","F",0)]

columns = ["first_name","middle_name","last_name","dob","gender","salary"]

pysparkDF = spark.createDataFrame(data = data, schema = columns)

pysparkDF.printSchema()

pysparkDF.show(truncate=False)

# Converting dataframe to pandas

pandasDF = pysparkDF.toPandas()

print(pandasDF)

# plotting pandas frames

ax = plt.gca()

pandasDF.plot(kind='line',x='first_name',y='salary',ax=ax,color='red')

pandasDF.plot(kind='bar',x='first_name',y='salary',ax=ax )

plt.show()




























































(.env) boris@boris-All-Series:~/VOTING/PYSPARK$ cat PySparkDataFrame3.py
import pyspark
from pyspark.sql import SparkSession
import matplotlib.pyplot as plt
import pandas as pd

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
data = [("James","","Smith",36,"M",60000),
        ("Michael","Rose","",40,"M",70000),
        ("Robert","","Williams",47,"M",400000),
        ("Maria","Anne","Jones",39,"F",500000),
        ("Jen","Mary","Brown",54,"F",250000)]

columns = ["first_name","middle_name","last_name","age","gender","salary"]
pysparkDF = spark.createDataFrame(data = data, schema = columns)
pysparkDF.printSchema()
pysparkDF.show(truncate=False)

# Converting dataframe to pandas
pandasDF = pysparkDF.toPandas()
print(pandasDF)

# plotting pandas frames
pandasDF.loc[:,['salary','age']].plot(secondary_y=['age'], mark_right=False, figsize = (20,5), grid=True)
plt.show()
































No comments:

Post a Comment