#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" #i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" #r "nuget:Microsoft.ML, 1.5.1" #r "nuget:Microsoft.ML.AutoML, 0.17.1" #r "nuget:Microsoft.Data.Analysis, 0.4.0" #r "nuget: XPlot.Plotly.Interactive, 4.0.2" using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; using Microsoft.DotNet.Interactive.Formatting; using Microsoft.Data.Analysis; using XPlot.Plotly; using Microsoft.AspNetCore.Html; using Microsoft.DotNet.Interactive.Formatting; using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags; Formatter.Register((df, writer) => { var headers = new List(); headers.Add(th(i("index"))); headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name))); var rows = new List>(); var take = 20; for (var i = 0; i < Math.Min(take, df.Rows.Count); i++) { var cells = new List(); cells.Add(td(i)); foreach (var obj in df.Rows[i]) { cells.Add(td(obj)); } rows.Add(cells); } var t = table( thead( headers), tbody( rows.Select( r => tr(r)))); writer.Write(t); }, "text/html"); using System.IO; using System.Net.Http; string housingPath = "housing.csv"; if (!File.Exists(housingPath)) { var contents = await new HttpClient() .GetStringAsync("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv"); File.WriteAllText("housing.csv", contents); } var housingData = DataFrame.LoadCsv(housingPath); housingData housingData.Description() Chart.Plot( new Histogram() { x = housingData.Columns["median_house_value"], nbinsx = 20 } ) var chart = Chart.Plot( new Scattergl() { x = housingData.Columns["longitude"], y = housingData.Columns["latitude"], mode = "markers", marker = new Marker() { color = housingData.Columns["median_house_value"], colorscale = "Jet" } } ); chart.Width = 600; chart.Height = 600; chart.Display(); static T[] Shuffle(T[] array) { Random rand = new Random(); for (int i = 0; i < array.Length; i++) { int r = i + rand.Next(array.Length - i); T temp = array[r]; array[r] = array[i]; array[i] = temp; } return array; } int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray()); int testSize = (int)(housingData.Rows.Count * .1); int[] trainRows = randomIndices[testSize..]; int[] testRows = randomIndices[..testSize]; DataFrame housing_train = housingData[trainRows]; DataFrame housing_test = housingData[testRows]; housing_train.Rows.Count.Display(); housing_test.Rows.Count.Display(); using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.AutoML; #!time var mlContext = new MLContext(); var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 15); var result = experiment.Execute(housing_train, labelColumnName:"median_house_value"); var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy( r => r.TrainerName, (name, details) => new Scattergl() { name = name, x = details.Select(r => r.RuntimeInSeconds), y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError), mode = "markers", marker = new Marker() { size = 12 } }); var chart = Chart.Plot(scatters); chart.WithXTitle("Training Time"); chart.WithYTitle("Error"); chart.Display(); Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}"); var testResults = result.BestRun.Model.Transform(housing_test); var trueValues = testResults.GetColumn("median_house_value"); var predictedValues = testResults.GetColumn("Score"); var predictedVsTrue = new Scattergl() { x = trueValues, y = predictedValues, mode = "markers", }; var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max()); var perfectLine = new Scattergl() { x = new[] {0, maximumValue}, y = new[] {0, maximumValue}, mode = "lines", }; var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine }); chart.WithXTitle("True Values"); chart.WithYTitle("Predicted Values"); chart.WithLegend(false); chart.Width = 600; chart.Height = 600; chart.Display();