%install-location $cwd/swift-install %install '.package(path: "$cwd/FastaiNotebook_08a_heterogeneous_dictionary")' FastaiNotebook_08a_heterogeneous_dictionary // export import Path import TensorFlow import FastaiNotebook_08a_heterogeneous_dictionary %include "EnableIPythonDisplay.swift" IPythonDisplay.shell.enable_matplotlib("inline") let path = downloadImagenette() let il = ItemList(fromFolder: path, extensions: ["jpeg", "jpg"]) let sd = SplitData(il, fromFunc: {grandParentSplitter(fName: $0, valid: "val")}) var procLabel = CategoryProcessor() let sld = makeLabeledData(sd, fromFunc: parentLabeler, procLabel: &procLabel) let rawData = sld.toDataBunch(itemToTensor: pathsToTensor, labelToTensor: intsToTensor) let data = transformData(rawData, tfmItem: { openAndResize(fname: $0, size: 128) }) func modelInit() -> CNNModel { return CNNModel(channelIn: 3, nOut: 10, filters: [64, 64, 128, 256]) } //export public struct HyperParams { public static let lr = "learningRate" } //export public protocol StatDelegate { var name: String {get} var defaultHPs: [String:Float] {get} func update(_ state: inout [String:TF], p: TF, 𝛁p: TF, hps: inout [String:Float]) } public protocol StepDelegate { var defaultHPs: [String:Float] {get} func update(_ p: inout TF, 𝛁p: inout TF, state: [String:TF], hps: inout [String:Float]) } //export public func mergeDicts(_ dicts: inout [[String:Float]], with newDict: [String:Float]) { for i in dicts.indices { dicts[i].merge(newDict) { (_, new) in new } } } public func mergeDicts(_ dicts: inout [[String:Float]], with newDicts: [[String:Float]]) { for i in dicts.indices { dicts[i].merge(newDicts[i]) { (_, new) in new } } } //export extension Dictionary where Value == Int{ public init(mapFromArrays arrays: [[Key]]){ self.init(uniqueKeysWithValues: arrays.enumerated().flatMap { i, arr in arr.map { ($0, i) } }) } } extension Dictionary { public init(constant: Value, keys: [Key]){ self.init(uniqueKeysWithValues: keys.map { ($0, constant) }) } } //export public func initState(for model: Model, names: [String]) -> [WritableKeyPath: [String:TF]] { return [WritableKeyPath: [String:TF]]( constant: [String: TF](constant: TF(0), keys: names), keys: model.variables.keyPaths) } //export public class StatefulOptimizer where Model.AllDifferentiableVariables == Model.TangentVector { public typealias ModelKeyPath = WritableKeyPath public typealias SplitDict = [ModelKeyPath: Int] public var hpGroups: [[String:Float]] public var splitDict: SplitDict public var states: [ModelKeyPath: [String: TF]] public var stats: [StatDelegate] public var steppers: [StepDelegate] public init( for model: __shared Model, steppers: [StepDelegate], stats: [StatDelegate], hpGroups: [[String:Float]], splitArray: [[ModelKeyPath]] ) { self.hpGroups = Array(repeating: [:], count: hpGroups.count) (self.steppers,self.stats) = (steppers,stats) self.splitDict = SplitDict(mapFromArrays: splitArray) states = [:] steppers.forEach { mergeDicts(&self.hpGroups, with: $0.defaultHPs) } stats.forEach { mergeDicts(&self.hpGroups, with: $0.defaultHPs) } states = initState(for: model, names: stats.map { $0.name }) mergeDicts(&self.hpGroups, with: hpGroups) } public func update( _ variables: inout Model.AllDifferentiableVariables, along direction: Model.TangentVector ) { for kp in variables.keyPaths { var 𝛁p = direction[keyPath: kp] var hps = hpGroups[splitDict[kp]!] stats.forEach() { $0.update(&states[kp]!, p: variables[keyPath: kp], 𝛁p: 𝛁p, hps: &hps) } steppers.forEach() { $0.update(&variables[keyPath: kp], 𝛁p: &𝛁p, state: states[kp]!, hps: &hps) } hpGroups[splitDict[kp]!] = hps } } } //export extension StatefulOptimizer: Optimizer{ public var learningRate: Float { get { return hpGroups.last![HyperParams.lr]! } set { for i in hpGroups.indices {self.hpGroups[i][HyperParams.lr] = newValue } } } //For discriminative learning rates public var learningRates: [Float] { get { return hpGroups.map { $0[HyperParams.lr]! } } set { for i in hpGroups.indices {self.hpGroups[i][HyperParams.lr] = newValue[i] } } } } //export extension StatefulOptimizer{ public convenience init (for model: __shared Model, steppers: [StepDelegate], stats: [StatDelegate], hps: [String:Float]) { self.init(for: model, steppers: steppers, stats: stats, hpGroups: [hps], splitArray: [model.variables.keyPaths]) } } //export public struct SGDStep: StepDelegate { public var defaultHPs: [String: Float] { return [HyperParams.lr: 3e-3] } public init() {} public func update(_ p: inout TF, 𝛁p: inout TF, state: [String:TF], hps: inout [String:Float]) { p -= 𝛁p * hps[HyperParams.lr]! } } var hps: [String:Float] = [HyperParams.lr: 0.01] func optFunc(_ model: CNNModel) -> StatefulOptimizer { return StatefulOptimizer(for: model, steppers: [SGDStep()], stats: [], hps: hps) } var learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) var recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.fit(1) //export public extension HyperParams { static let wd = "weightDecay" } public struct WeightDecay: StepDelegate { public var defaultHPs: [String: Float] { return [HyperParams.wd: 0] } public init() {} public func update(_ p: inout TF, 𝛁p: inout TF, state: [String:TF], hps: inout [String:Float]) { p *= 1 - hps[HyperParams.lr]! * hps[HyperParams.wd]! } } //export public struct L2Regularization: StepDelegate { public var defaultHPs: [String: Float] { return [HyperParams.wd: 0] } public init() {} public func update(_ p: inout TF, 𝛁p: inout TF, state: [String:TF], hps: inout [String:Float]) { 𝛁p += hps[HyperParams.wd]! * p } } //export //Expandable enum to have tab completes/typo-proof for state variable names. public struct StateKeys { public static let avgGrad = "averageGrad" } //export public extension HyperParams { static let mom = "momentum" static let momDamp = "dampening" } public struct AverageGrad: StatDelegate { public var defaultHPs: [String: Float] { return [HyperParams.mom: 0.9] } public let dampened: Bool public init(dampened: Bool = false) { self.dampened = dampened } public var name: String { return StateKeys.avgGrad } public func update(_ state: inout [String: TF], p: TF, 𝛁p: TF, hps: inout [String:Float]) { state[StateKeys.avgGrad]! *= hps[HyperParams.mom]! hps[HyperParams.momDamp] = 1.0 - (dampened ? hps[HyperParams.mom]! : 0.0) state[StateKeys.avgGrad]! += hps[HyperParams.momDamp]! * 𝛁p } } //export public struct MomentumStep: StepDelegate { public var defaultHPs: [String: Float] = [:] public init() {} public func update(_ p: inout TF, 𝛁p: inout TF, state: [String: TF], hps: inout [String:Float]) { p -= state[StateKeys.avgGrad]! * hps[HyperParams.lr]! } } let hps: [String:Float] = [HyperParams.lr: 0.01] func optFunc(_ model: CNNModel) -> StatefulOptimizer { return StatefulOptimizer(for: model, steppers: [MomentumStep()], stats: [AverageGrad()], hps: hps) } var learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) var recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.fit(1) learner.opt.hpGroups[0] //export public extension HyperParams { static let ²mom = "momentumSquares" static let ²momDamp = "dampeningSquares" } public extension StateKeys { static let avgSqr = "averageSquaredGrad" } public struct AverageSquaredGrad: StatDelegate { let dampened: Bool public init(dampened: Bool = true) { self.dampened = dampened } public var name: String { return StateKeys.avgSqr } public var defaultHPs: [String: Float] { return [HyperParams.²mom: 0.99] } public func update(_ state: inout [String: TF], p: TF, 𝛁p: TF, hps: inout [String:Float]) { state[StateKeys.avgSqr]! *= hps[HyperParams.²mom]! hps[HyperParams.²momDamp] = 1.0 - (dampened ? hps[HyperParams.²mom]! : 0.0) state[StateKeys.avgSqr]! += hps[HyperParams.²momDamp]! * 𝛁p.squared() } } //export public extension StateKeys { static let step = "stepCount" } public struct StepCount: StatDelegate { public var name: String { return StateKeys.step } public var defaultHPs: [String:Float] = [:] public init() {} public func update(_ state: inout [String: TF], p: TF, 𝛁p: TF, hps: inout [String:Float]) { state[StateKeys.step]! += 1.0 } } //export //public struct Epsilon: HetDictKey { public static var defaultValue: Float = 1e-5 } public extension HyperParams { static let eps = "epsilon" } //export public struct AdamStep: StepDelegate { public var defaultHPs: [String: Float] { return [HyperParams.eps: 1e-5] } public init() {} public func update(_ p: inout TF, 𝛁p: inout TF, state: [String: TF], hps: inout [String:Float]) { let step = state[StateKeys.step]! let (mom,damp) = (hps[HyperParams.mom]!,hps[HyperParams.momDamp]!) let debias1 = damp * (1 - pow(mom, step)) / (1 - mom) let num = debias1 * state[StateKeys.avgGrad]! let (²mom,²damp) = (hps[HyperParams.²mom]!,hps[HyperParams.²momDamp]!) let debias2 = ²damp * (1 - pow(²mom, step)) / (1 - ²mom) let denom = sqrt(state[StateKeys.avgSqr]!/debias2) + hps[HyperParams.eps]! p -= hps[HyperParams.lr]! * num / denom } } func optFunc(_ model: CNNModel) -> StatefulOptimizer { return StatefulOptimizer( for: model, steppers: [AdamStep()], stats: [AverageGrad(dampened: true), AverageSquaredGrad(), StepCount()], hps: [HyperParams.lr: 1e-3]) } let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.fit(1) learner.opt.hpGroups[0] public struct LambStep: StepDelegate { public var defaultHPs: [String: Float] { return [HyperParams.eps: 1e-6, HyperParams.wd: 0.0] } public func update(_ p: inout TF, 𝛁p: inout TF, state: [String: TF], hps: inout [String:Float]) { let stepCount = state[StateKeys.step]! let (mom,damp) = (hps[HyperParams.mom]!,hps[HyperParams.momDamp]!) let debias1 = damp * (1 - pow(mom, stepCount)) / (1 - mom) let num = debias1 * state[StateKeys.avgGrad]! let (²mom,²damp) = (hps[HyperParams.²mom]!,hps[HyperParams.²momDamp]!) let debias2 = ²damp * (1 - pow(²mom, stepCount)) / (1 - ²mom) let denom = sqrt(state[StateKeys.avgSqr]!/debias2) + hps[HyperParams.eps]! let step = num / denom + hps[HyperParams.wd]! * p let r1 = sqrt((p * p).mean()) let r2 = sqrt((step * step).mean()) let factor = min(r1 / r2, Float(10.0)) p -= hps[HyperParams.lr]! * factor * step } } // export public func sgdOpt(lr: Float, mom: Float = 0.9, wd: Float = 0.0, dampening: Bool = false ) -> ((Model) -> StatefulOptimizer) { var steppers: [StepDelegate] = (mom != 0) ? [MomentumStep()] : [SGDStep()] if wd != 0 { steppers.append(WeightDecay()) } let stats = (mom != 0) ? [AverageGrad(dampened: dampening)] : [] var hps: [String: Float] = [HyperParams.lr: lr] if mom != 0 { hps[HyperParams.mom] = mom } if wd != 0 { hps[HyperParams.wd ] = wd } return {model in return StatefulOptimizer(for: model, steppers: steppers, stats: stats, hps: hps)} } // export public func adamOpt(lr: Float, mom: Float = 0.9, beta: Float=0.99, wd: Float = 0.0, eps: Float = 1e-5 ) -> ((Model) -> StatefulOptimizer) { var steppers: [StepDelegate] = [AdamStep()] if wd != 0 { steppers.append(WeightDecay()) } let stats: [StatDelegate] = [AverageGrad(dampened: true), AverageSquaredGrad(), StepCount()] var hps: [String: Float] = [HyperParams.lr: lr] hps[HyperParams.mom] = mom hps[HyperParams.²mom] = beta hps[HyperParams.eps] = eps if wd != 0 { hps[HyperParams.wd ] = wd } return {model in return StatefulOptimizer(for: model, steppers: steppers, stats: stats, hps: hps)} } // export public extension StatefulOptimizer { func setParam(_ hp: String, _ val: Float) { for i in 0.. Float // A learning rate schedule from step to float. public var scheduler: ScheduleFunc public let hp: String public init(scheduler: @escaping (Float) -> Float, hp: String) { (self.scheduler,self.hp) = (scheduler,hp) } override public func batchWillStart(learner: Learner) { let val = scheduler(learner.pctEpochs/Float(learner.epochCount)) (learner.opt as! StatefulOptimizer).setParam(hp, val) } } public func makeParamScheduler(_ scheduler: @escaping (Float) -> Float, hp: String) -> ParamScheduler { return ParamScheduler(scheduler: scheduler, hp: hp) } } // export public func oneCycleSchedulers(_ lrMax: Float, pctStart:Float=0.25, divStart: Float = 10, divEnd: Float = 1e5, moms: (Float,Float,Float) = (0.95,0.85,0.95)) -> ((Float) -> Float, (Float) -> Float){ let lrSched = combineSchedules( pcts: [pctStart, 1-pctStart], schedules: [makeAnnealer(start: lrMax/divStart, end: lrMax, schedule: cosineSchedule), makeAnnealer(start: lrMax, end: lrMax/divEnd, schedule: cosineSchedule)]) let momSched = combineSchedules( pcts: [pctStart, 1-pctStart], schedules: [makeAnnealer(start: moms.0, end: moms.1, schedule: cosineSchedule), makeAnnealer(start: moms.1, end: moms.2, schedule: cosineSchedule)]) return (lrSched, momSched) } // export extension Learner where Opt.Scalar: BinaryFloatingPoint, Opt.Model.AllDifferentiableVariables == Opt.Model.TangentVector{ public func addOneCycleDelegates(_ lrMax: Float, pctStart:Float=0.25, divStart: Float = 10, divEnd: Float = 1e5, moms: (Float,Float,Float) = (0.95,0.85,0.95)) { let scheds = oneCycleSchedulers(lrMax, pctStart: pctStart, divStart: divStart, divEnd: divEnd, moms: moms) addDelegates([makeParamScheduler(scheds.0 , hp: HyperParams.lr), makeParamScheduler(scheds.1 , hp: HyperParams.mom)]) } } let optFunc: (CNNModel) -> StatefulOptimizer = adamOpt(lr: 1e-3, mom: 0.9, beta: 0.99, wd: 1e-2, eps: 1e-6) let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.addOneCycleDelegates(1e-3) learner.fit(1) recorder.plotLRs() func modelInit() -> CNNModel { return CNNModel(channelIn: 3, nOut: 10, filters: [64, 64, 128, 256]) } var model = modelInit() let splitArray = [Array(model.variables.keyPaths[0..<9]), Array(model.variables.keyPaths[9...])] let hpGroups: [[String: Float]] = [[HyperParams.lr: 0], [HyperParams.lr: 0.1]] func optFunc(_ model: CNNModel) -> StatefulOptimizer { return StatefulOptimizer(for: model, steppers: [SGDStep()], stats: [], hpGroups: hpGroups, splitArray: splitArray) } let learner = Learner(data: data, lossFunc: softmaxCrossEntropy, optFunc: optFunc, modelInit: modelInit) let recorder = learner.makeDefaultDelegates(metrics: [accuracy]) learner.delegates.append(learner.makeNormalize(mean: mnistStats.mean, std: mnistStats.std)) learner.model.convs[0].norm.scale learner.model.convs[3].norm.scale learner.fit(1) learner.model.convs[0].norm.scale learner.model.convs[3].norm.scale public func parameterKeyPaths( _ model: M1, _ kp: WritableKeyPath, _ layer: M2) -> [WritableKeyPath] where M1: Layer, M2: Layer { return model.variables[keyPath: kp].keyPaths.map { kp.appending(path: $0) } } let kp = \(CNNModel.AllDifferentiableVariables).convs let conv = model.convs let bodyKeyPaths = parameterKeyPaths(model, kp, conv) let splitArray = [bodyKeyPaths, model.variables.keyPaths.filter { return !bodyKeyPaths.contains($0) }] splitArray.map { $0.count } let x = [1,2,3] let y = [4,5,6] zip(x,y).map { print($0, $1) } let deepBody = (0..<4).map { parameterKeyPaths( model, \(CNNModel.AllDifferentiableVariables).convs.base[$0], model.convs[$0] ) }.reduce([], +) let upperBody = (4..<7).map { parameterKeyPaths( model, \(CNNModel.AllDifferentiableVariables).convs.base[$0], model.convs[$0] ) }.reduce([], +) let splitArray = [deepBody, upperBody, model.variables.keyPaths.filter { return !bodyKeyPaths.contains($0) }] splitArray.map { $0.count } let bns = model.recursivelyAllWritableKeyPaths(to: FABatchNorm.self).map { model[keyPath: $0] } let bnKeyPaths = model.variables.recursivelyAllWritableKeyPaths(to: FABatchNorm.AllDifferentiableVariables.self) let bnParameters = zip(bnKeyPaths, bns).map { parameterKeyPaths(model, $0, $1) }.reduce([], +) bnParameters.count import NotebookExport let exporter = NotebookExport(Path.cwd/"09_optimizer.ipynb") print(exporter.export(usingPrefix: "FastaiNotebook_"))