Spark¶

Apache Spark is a lightning-fast cluster computing API based on Scala. This notebook shows how you can load and use Spark just like any other library. There is also a Spark Magic that enables deeper integration.

In [ ]:

%classpath add mvn org.apache.spark spark-sql_2.11 2.2.1
org.apache.log4j.Logger.getRootLogger().setLevel(org.apache.log4j.Level.ERROR);

In [ ]:

import org.apache.spark.sql.SparkSession

val spark = SparkSession.builder()
                        .appName("Simple Application")
                        .master("local[4]")
                        .config("spark.ui.enabled", "false")
                        .getOrCreate()

In [ ]:

val NUM_SAMPLES = 10000000
val random = new scala.util.Random()
val count = spark.sparkContext.parallelize(1 to NUM_SAMPLES).map{i =>
  val x = random.nextDouble()
  val y = random.nextDouble()
  if (x*x + y*y < 1) 1 else 0
}.reduce(_ + _)

println("Pi is roughly " + 4.0 * count / NUM_SAMPLES)

In [ ]:

spark.stop()

In [ ]: