Draws some Box Plot diagrams on data distribution

NuGet package installation

In [1]:
#r "nuget:Microsoft.ML, 1.4.0"
#r "nuget:XPlot.Plotly, 3.0.1"
Installing package Microsoft.ML, version 1.4.0.............done!
Successfully added reference to package Microsoft.ML, version 1.4.0
Installing package XPlot.Plotly, version 3.0.1.....done!
Successfully added reference to package XPlot.Plotly, version 3.0.1

Namespaces

In [2]:
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;

Simple start: well prepared data

Read the raw data

In [3]:
var mlContext = new MLContext(seed: null);

var reader = mlContext.Data.CreateTextLoader(
                            new TextLoader.Options()
                            {
                                Separators = new[] { ',' },
                                HasHeader = true,
                                Columns = new[]
                                    {
                                        new TextLoader.Column("Age", DataKind.Single, 2),
                                        new TextLoader.Column("AnnualIncome", DataKind.Single, 3),
                                        new TextLoader.Column("SpendingScore", DataKind.Single, 4),
                                    }
                            });
                            
var dataView = reader.Load("./Mall_Customers.csv");

Visualize the data

In [4]:
var graph = new Graph.Box()
{
    y = dataView.GetColumn<float>(dataView.Schema[0]),
    name = "Age"
};

var graph2 = new Graph.Box()
{
    y = dataView.GetColumn<float>(dataView.Schema[1]),
    name = "Annual Income"
};

var graph3 = new Graph.Box()
{
    y = dataView.GetColumn<float>(dataView.Schema[2]),
    name = "Spending Score"
};

var chart = Chart.Plot(new List<Graph.Box> { graph, graph2, graph3 });

var layout = new Layout.Layout(){ title="Shopping Mall Customers Data Distribution", showlegend = false };
chart.WithLayout(layout);

display(chart);

A more interesting sample: dispersed data

Read the raw data

In [5]:
var reader2 = mlContext.Data.CreateTextLoader(
                            new TextLoader.Options()
                            {
                                Separators = new[] { ',' },
                                HasHeader = true,
                                Columns = new[]
                                    {
                                    new TextLoader.Column("Ts", DataKind.Single, 9),
                                    new TextLoader.Column("Orb", DataKind.Single, 12),
                                    new TextLoader.Column("Drb", DataKind.Single, 13),
                                    new TextLoader.Column("Trb", DataKind.Single, 14),
                                    new TextLoader.Column("Ast", DataKind.Single, 15),
                                    new TextLoader.Column("Stl", DataKind.Single, 16),
                                    new TextLoader.Column("Blk", DataKind.Single, 17),
                                    new TextLoader.Column("Tov", DataKind.Single, 18),
                                    new TextLoader.Column("Usg", DataKind.Single, 19),
                                    new TextLoader.Column("Age", DataKind.Single, 4)
                                    }
                            });

var dataView2 = reader2.Load("./2017-18_NBA_salary.csv");

Visualize the data

In [6]:
var chart2 = Chart.Plot(new List<Graph.Box> 
{ 
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[0]),
        name = "True Shootings"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[1]),
        name = "Offensive Rebounds"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[2]),
        name = "Defensive Rebounds"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[3]),
        name = "Team Rebounds"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[4]),
        name = "Assists"
    },  
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[5]),
        name = "Steals"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[6]),
        name = "Blocks"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[7]),
        name = "Turnover"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[8]),
        name = "Usage"
    },
    new Graph.Box()
    {
        y = dataView2.GetColumn<float>(dataView2.Schema[9]),
        name = "Age"
    }
});

var layout2 = new Layout.Layout(){ title="NBA Statistics Data Distribution", showlegend = false };
chart2.WithLayout(layout2);

display(chart2);