Draws a Correlation Chart or Heatmap on the Titanic data set

NuGet package installation

In [1]:
#r "nuget:MathNet.Numerics, 4.9.0"
#r "nuget:Microsoft.ML, 1.4.0"
#r "nuget:XPlot.Plotly, 3.0.1"
Installing package MathNet.Numerics, version 4.9.0.....................................done!
Successfully added reference to package MathNet.Numerics, version 4.9.0
Installing package Microsoft.ML, version 1.4.0.........done!
Successfully added reference to package Microsoft.ML, version 1.4.0
Installing package XPlot.Plotly, version 3.0.1........done!
Successfully added reference to package XPlot.Plotly, version 3.0.1

Namespaces

In [2]:
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;
using MathNet.Numerics.Statistics;

Simple heatmap sample

Warming up ...

In [3]:
var graph = new Graph.Heatmap()
{
    x = new [] { "one", "two", "three"},
    y = new [] { "three", "two", "one" },
    z = new List<List<double>> 
        { 
            new List<double> { 0, -.75, 1 }, 
            new List<double> { .75, 1, -.75 }, 
            new List<double> { 1, .75, 0 }
        },
    zmin = -1,
    zmax = 1
};

var chart = Chart.Plot(graph);

var layout = new Layout.Layout(){ title="Sample Correlation Chart" };
chart.WithLayout(layout);

display(chart);

And now for the real thing

Read the data

In [4]:
var mlContext = new MLContext(seed: null);

var readerOptions = new TextLoader.Options()
{
    Separators = new[] { ',' },
    HasHeader = true,
    AllowQuoting = true,
    Columns = new[]
        {
        new TextLoader.Column("Survived", DataKind.Single, 1),
        new TextLoader.Column("PClass", DataKind.Single, 2),
        new TextLoader.Column("Age", DataKind.Single, 5),
        new TextLoader.Column("SibSp", DataKind.Single, 6),
        new TextLoader.Column("Parch", DataKind.Single, 7),
        new TextLoader.Column("Fare", DataKind.Single, 9)
        }
};

var dataView = mlContext.Data.LoadFromTextFile("./Titanic.csv", readerOptions);

Enter the Matrix

ML.NET prefers singles (float), Math.NET prefers doubles ...

In [5]:
var matrix = new List<List<double>>();
for (int i = 0; i < dataView.Schema.Count; i++)
{
    var column = dataView.Schema[i];
    matrix.Add(dataView.GetColumn<float>(column).Select(f => (double)f).ToList());
}

var data = new double[6, 6];
for (int x = 0; x < 6; ++x)
{
    for (int y = 0; y < 5 - x; ++y)
    {
        var seriesA = matrix[x];
        var seriesB = matrix[5 - y];

        var value = Correlation.Pearson(seriesA, seriesB);

        data[x, y] = value;
        data[5 - y, 5 - x] = value;
    }

    data[x, 5 - x] = 1;
}

Draw

In [6]:
var graph = new Graph.Heatmap()
{
    y = new [] { "Survived", "Class", "Age", "Sib / Sp", "Par / Chi", "Fare" },
    x = new [] { "Fare", "Parents / Children", "Siblings / Spouses", "Age", "Class", "Survived" },
    z = data,
    zmin = -1,
    zmax = 1
};

var chart = Chart.Plot(graph);

var layout = new Layout.Layout(){ title="Titanic Survival Correlation Chart" };
chart.WithLayout(layout);

display(chart);