from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("tutorial-2").getOrCreate()
df = spark.read.csv("./cars.csv", header = True)
df.printSchema()
df = spark.read.csv("./cars.csv", header = True, ## YOUR CODE HERE ##)
df.printSchema()
df.show(10)
df.filter(df['YEAR'] == 2015).show(10)
df_tesla = ## YOUR CODE GOES HERE ##
df.select(df['Make'], df['Model'], df['Size']).show(10)
df_manufacturer = df.groupBy("Make").count()
df_manufacturer.show()
df_manufacturer.sort("count", ascending=False).show()
df_year = ## YOUR CODE GOES HERE ##
df_year.show()
df_pd = df.toPandas()
df_pd.head(10)
df_pd.describe()
spark.stop()