This page is available as an executable or viewable Jupyter Notebook:



In [1]:
%useLatestDescriptors
%use lets-plot
import java.util.Random
In [2]:
// This example was found at: 
// www.cookbook-r.com/Graphs/Scatterplots_(ggplot2)

val rand = java.util.Random(123)
val n = 20
val data = mapOf<String, List<*>>(
    "cond" to List(n / 2) { "A" } + List(n / 2) { "B" },
    "xvar" to List(n) { i:Int-> i }, 
    "yvar" to List(n) { i:Int-> i + rand.nextGaussian() * 3 }
)

Basic scatter plot

In [3]:
val p = ggplot(data) { x = "xvar"; y = "yvar" } + ggsize(300, 250)
p + geom_point(shape = 1)
Out[3]:

Add regression line

In [4]:
p + geom_point(shape = 1) +
    geom_smooth()
Out[4]:
In [5]:
// Without standard error band.
p + geom_point(shape = 1) +
    geom_smooth(se = false)
Out[5]:

Split dataset by the cond variable

In [6]:
val p1 = ggplot(data) { x = "xvar"; y = "yvar"; color = "cond" } + ggsize(500, 250)
p1 + geom_point(shape = 1) +
     geom_smooth(se = false)
Out[6]:
In [7]:
// Map `shape` to the `cond` variable.
p1 + geom_point(size = 5) { shape = "cond" }
Out[7]:
In [8]:
// Choose different shapes using `scale_shape_manual`:
// 1 - hollow circle 
// 2 - hollow triangle
p1 + geom_point(size = 5) { shape = "cond" } + 
     scale_shape_manual(values = listOf(1,2))
Out[8]:

Handling overplotting

In [9]:
// Create data with overlapping points.
val data1 = mapOf(
        "xvar" to (data["xvar"] as List<Double>).map { (it / 5).toInt() * 5 },
        "yvar" to (data["yvar"] as List<Double>).map { (it / 5).toInt() * 5 },
    )
In [10]:
val p2 = ggplot(data1) { x = "xvar"; y = "yvar"} + ggsize(500, 250) +
         scale_x_continuous(breaks = listOf(0, 5, 10, 15))
// Use `alpha` to show overplotting.
p2 + geom_point(alpha = .3, size = 7)
Out[10]:
In [11]:
// `jitter` points to show overplotting in another way.
p2 + geom_point(shape = 1, position = position_jitter(width=.1, height=.1))
Out[11]: