%%classpath add mvn tech.tablesaw tablesaw-plot 0.11.4 tech.tablesaw tablesaw-smile 0.11.4 tech.tablesaw tablesaw-beakerx 0.11.4 %import tech.tablesaw.aggregate.* %import tech.tablesaw.api.* %import tech.tablesaw.api.ml.clustering.* %import tech.tablesaw.api.ml.regression.* %import tech.tablesaw.columns.* // display Tablesaw tables with BeakerX table display widget tech.tablesaw.beakerx.TablesawDisplayer.register() tornadoes = Table.read().csv("../resources/data/tornadoes_2014.csv") //print dataset structure tornadoes.structure() //get header names tornadoes.columnNames() //displays the row and column counts tornadoes.shape() //displays the first n rows tornadoes.first(10) import static tech.tablesaw.api.QueryHelper.column tornadoes.structure().selectWhere(column("Column Type").isEqualTo("FLOAT")) //summarize the data in each column tornadoes.summary() //Mapping operations def month = tornadoes.dateColumn("Date").month() tornadoes.addColumn(month); tornadoes.columnNames() //Sorting by column tornadoes.sortOn("-Fatalities") //Descriptive statistics tornadoes.column("Fatalities").summary() //Performing totals and sub-totals def injuriesByScale = tornadoes.median("Injuries").by("Scale") injuriesByScale.setName("Median injuries by Tornado Scale") injuriesByScale //Cross Tabs CrossTab.xCount(tornadoes, tornadoes.categoryColumn("State"), tornadoes.shortColumn("Scale")) t = Table.read().csv("../resources/data/whiskey.csv") t.structure() model = new Kmeans( 5, t.nCol(2), t.nCol(3), t.nCol(4), t.nCol(5), t.nCol(6), t.nCol(7), t.nCol(8), t.nCol(9), t.nCol(10), t.nCol(11), t.nCol(12), t.nCol(13) ); //print claster formation model.clustered(t.column("Distillery")); //print centroids for each claster model.labeledCentroids(); //gets the distortion for our model model.distortion() def n = t.rowCount(); def kValues = new double[n - 2]; def distortions = new double[n - 2]; for (int k = 2; k < n; k++) { kValues[k - 2] = k; def kmeans = new Kmeans(k, t.nCol(2), t.nCol(3), t.nCol(4), t.nCol(5), t.nCol(6), t.nCol(7), t.nCol(8), t.nCol(9), t.nCol(10), t.nCol(11), t.nCol(12), t.nCol(13) ); distortions[k - 2] = kmeans.distortion(); } def linearYPlot = new Plot(title: "K-means clustering demo", xLabel:"K", yLabel: "distortion") linearYPlot << new Line(x: kValues, y: distortions) import static tech.tablesaw.api.QueryHelper.column baseball = Table.read().csv("../resources/data/baseball.csv"); // filter to the data available at the start of the 2002 season moneyball = baseball.selectWhere(column("year").isLessThan(2002)); wins = moneyball.nCol("W"); year = moneyball.nCol("Year"); playoffs = moneyball.column("Playoffs"); runDifference = moneyball.shortColumn("RS").subtract(moneyball.shortColumn("RA")); moneyball.addColumn(runDifference); runDifference.setName("RD"); def Plot = new Plot(title: "RD x Wins", xLabel:"RD", yLabel: "W") Plot << new Points(x: moneyball.numericColumn("RD").toDoubleArray(), y: moneyball.numericColumn("W").toDoubleArray()) winsModel = LeastSquares.train(wins, runDifference); def runDiff = new double[1]; runDiff[0] = 135; def expectedWins = winsModel.predict(runDiff); runsScored2 = LeastSquares.train(moneyball.nCol("RS"), moneyball.nCol("OBP"), moneyball.nCol("SLG")); new Histogram(xLabel:"X", yLabel:"Proportion", data: Arrays.asList(runsScored2.residuals()), binCount: 25); %classpath add mvn com.jimmoores quandl-tablesaw 2.0.0 %import com.jimmoores.quandl.* %import com.jimmoores.quandl.tablesaw.* TableSawQuandlSession session = TableSawQuandlSession.create(); Table table = session.getDataSet(DataSetRequest.Builder.of("WIKI/AAPL").build()); // Create a new column containing the year ShortColumn yearColumn = table.dateColumn("Date").year(); yearColumn.setName("Year"); table.addColumn(yearColumn); // Create max, min and total volume tables aggregated by year Table summaryMax = table.groupBy("year").max("Adj. Close"); Table summaryMin = table.groupBy("year").min("Adj. Close"); Table summaryVolume = table.groupBy("year")sum("Volume"); // Create a new table from each of these summary = Table.create("Summary", summaryMax.column(0), summaryMax.column(1), summaryMin.column(1), summaryVolume.column(1)); // Add back a DateColumn to the summary...will be used for plotting DateColumn yearDates = new DateColumn("YearDate"); for(year in summary.column('Year')){ yearDates.append(java.time.LocalDate.of(year,1,1)); } summary.addColumn(yearDates) summary years = summary.column('YearDate').collect() plot = new TimePlot(title: 'Price Chart for AAPL', xLabel: 'Time', yLabel: 'Max [Adj. Close]') plot << new YAxis(label: 'Volume') plot << new Points(x: years, y: summary.column('Max [Adj. Close]').collect()) plot << new Line(x: years, y: summary.column('Max [Adj. Close]').collect(), color: Color.blue) plot << new Stems(x: years, y: summary.column('Sum [Volume]').collect(), yAxis: 'Volume')