%install-location $cwd/swift-install %install '.package(path: "$cwd/FastaiNotebook_05b_early_stopping")' FastaiNotebook_05b_early_stopping //export import Path import TensorFlow import Python import FastaiNotebook_05b_early_stopping %include "EnableIPythonDisplay.swift" IPythonDisplay.shell.enable_matplotlib("inline") let data = mnistDataBunch(flat: false, bs: 512) let firstBatch = data.train.ds.first(where: { _ in true })! let batchShape = firstBatch.xb.shape let batchSize = batchShape.dimensions[0] let exampleSideSize = batchShape.dimensions[1] assert(exampleSideSize == batchShape.dimensions[2]) print("Batch size: \(batchSize)") print("Example side size: \(exampleSideSize)") let classCount = firstBatch.yb.shape.dimensions[0] print("Class count: \(classCount)") firstBatch.xb.shape // export extension Learner { public class AddChannel: Delegate { public override func batchWillStart(learner: Learner) { learner.currentInput = learner.currentInput!.expandingShape(at: -1) } } public func makeAddChannel() -> AddChannel { return AddChannel() } } //export public struct CnnModel: Layer { public var convs: [FAConv2D] public var pool = FAGlobalAvgPool2D() public var linear: FADense public init(channelIn: Int, nOut: Int, filters: [Int]){ let allFilters = [channelIn] + filters convs = Array(0..(filters.last!, nOut) } @differentiable public func callAsFunction(_ input: TF) -> TF { return linear(pool(convs(input))) } } let model = CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32]) // Test that data goes through the model as expected. let predictions = model(firstBatch.xb.expandingShape(at: -1)) print(predictions.shape) print(predictions[0]) func optFunc(_ model: CnnModel) -> SGD { return SGD(for: model, learningRate: 0.4)} func modelInit() -> CnnModel { return CnnModel(channelIn: 1, nOut: 10, filters: [8, 16, 32, 32]) } let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std), learner.makeAddChannel()]) // This happens on the GPU (if you have one and it's configured correctly). // I tried this on a GCE 8vCPU 30GB + Tesla P100: // - time: ~4.3s // - nvidia-smi shows ~10% GPU-Util while this is running time { try! learner.fit(1) } // This happens on the CPU. // I tried this on a GCE 8vCPU 30GB + Tesla P100: // - time: ~6.3s // - nvidia-smi shows 0% GPU-Util while this is running time { withDevice(.cpu) { try! learner.fit(1) } } class ActivationStatsHook { var means: [Float] = [] var stds: [Float] = [] func update(_ act: TF) { means.append(act.mean().scalarized()) stds.append (act.std() .scalarized()) } } let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.addDelegates([learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std), learner.makeAddChannel()]) var statHooks: [ActivationStatsHook] = (0..