%install-location $cwd/swift-install
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")' Path
%install '.package(url: "https://github.com/saeta/Just", from: "0.7.2")' Just
%install '.package(url: "https://github.com/latenitesoft/NotebookExport", from: "0.5.0")' NotebookExport
Installing packages: .package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1") Path .package(url: "https://github.com/saeta/Just", from: "0.7.2") Just .package(url: "https://github.com/latenitesoft/NotebookExport", from: "0.5.0") NotebookExport With SwiftPM flags: [] Working in: /tmp/tmpqyuwt4mg/swift-install warning: /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc) /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) warning: /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc) /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) warning: /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc) /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) warning: /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc) /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc) /home/jupyter/swift/usr/bin/swiftc: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swiftc)[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) [2/3] Merging module jupyterInstalledPackages /home/jupyter/swift/usr/bin/swift: /home/jupyter/anaconda3/lib/libuuid.so.1: no version information available (required by /home/jupyter/swift/usr/bin/swift) Initializing Swift... Installation complete!
Currently there's a bug in swift-jupyter which requires we define any custom operators here:
// export
precedencegroup ExponentiationPrecedence {
associativity: right
higherThan: MultiplicationPrecedence
}
infix operator ** : ExponentiationPrecedence
precedencegroup CompositionPrecedence { associativity: left }
infix operator >| : CompositionPrecedence
//export
import Foundation
import Just
import Path
We will need to gunzip, untar or unzip files we download, so instead of grabbing one library for each, we implement a function that can execute any shell command.
//export
public extension String {
@discardableResult
func shell(_ args: String...) -> String
{
let (task,pipe) = (Process(),Pipe())
task.executableURL = URL(fileURLWithPath: self)
(task.arguments,task.standardOutput) = (args,pipe)
do { try task.run() }
catch { print("Unexpected error: \(error).") }
let data = pipe.fileHandleForReading.readDataToEndOfFile()
return String(data: data, encoding: String.Encoding.utf8) ?? ""
}
}
print("/bin/ls".shell("-lh"))
total 15M -rw-rw-r-- 1 jupyter jupyter 27K Jul 17 22:52 00_load_data.ipynb -rw-rw-r-- 1 jupyter jupyter 43K Jul 17 22:52 00a_intro_and_float.ipynb -rw-rw-r-- 1 jupyter jupyter 43K Jul 17 22:52 01_matmul.ipynb -rw-rw-r-- 1 jupyter jupyter 30K Jul 17 22:52 01a_fastai_layers.ipynb -rw-rw-r-- 1 jupyter jupyter 50K Jul 17 22:52 02_fully_connected.ipynb -rw-rw-r-- 1 jupyter jupyter 23K Jul 17 22:52 02a_why_sqrt5.ipynb -rw-rw-r-- 1 jupyter jupyter 21K Jul 17 22:52 02b_initializing.ipynb -rw-rw-r-- 1 jupyter jupyter 16K Jul 17 22:52 02c_autodiff.ipynb -rw-rw-r-- 1 jupyter jupyter 29K Jul 17 22:52 03_minibatch_training.ipynb -rw-rw-r-- 1 jupyter jupyter 32K Jul 17 22:52 04_callbacks.ipynb -rw-rw-r-- 1 jupyter jupyter 79K Jul 17 22:52 05_anneal.ipynb -rw-rw-r-- 1 jupyter jupyter 57K Jul 17 22:52 05b_early_stopping.ipynb -rw-rw-r-- 1 jupyter jupyter 132K Jul 17 22:52 06_cuda.ipynb -rw-rw-r-- 1 jupyter jupyter 39K Jul 17 22:52 07_batchnorm.ipynb -rw-rw-r-- 1 jupyter jupyter 15K Jul 17 22:52 07b_batchnorm_lesson.ipynb -rw-rw-r-- 1 jupyter jupyter 830K Jul 17 22:52 08_data_block.ipynb -rw-rw-r-- 1 jupyter jupyter 5.2K Jul 17 22:52 08a_heterogeneous_dictionary.ipynb -rw-rw-r-- 1 jupyter jupyter 772K Jul 17 22:52 08b_data_block_opencv.ipynb -rw-rw-r-- 1 jupyter jupyter 526K Jul 17 22:52 08c_data_block-lightlyfunctional.ipynb -rw-rw-r-- 1 jupyter jupyter 438K Jul 17 22:52 08c_data_block_generic.ipynb -rw-rw-r-- 1 jupyter jupyter 42K Jul 17 22:52 09_optimizer.ipynb -rw-rw-r-- 1 jupyter jupyter 27K Jul 17 22:52 10_mixup_ls.ipynb -rw-rw-r-- 1 jupyter jupyter 433K Jul 17 22:52 11_imagenette.ipynb drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_00_load_data drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_01_matmul drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_01a_fastai_layers drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_02_fully_connected drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_02a_why_sqrt5 drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_03_minibatch_training drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_04_callbacks drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_05_anneal drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_05b_early_stopping drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_06_cuda drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_07_batchnorm drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_08_data_block drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_08a_heterogeneous_dictionary drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_08c_data_block_generic drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_09_optimizer drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_10_mixup_ls drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 FastaiNotebook_11_imagenette drwxrwxr-x 3 jupyter jupyter 4.0K Jul 17 22:52 Imagenette -rw-rw-r-- 1 jupyter jupyter 7.3K Jul 16 23:28 Memory leak.ipynb drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 Runnable11 drwxrwxr-x 6 jupyter jupyter 4.0K Jul 16 20:46 SwiftCV drwxrwxr-x 5 jupyter jupyter 4.0K Jul 16 20:46 SwiftSox drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 SwiftVips -rw-rw-r-- 1 jupyter jupyter 105K Jul 17 22:52 audio.ipynb -rw-rw-r-- 1 jupyter jupyter 303K Jul 17 22:52 c_interop_examples.ipynb drwxrwxr-x 3 jupyter jupyter 4.0K Jul 16 20:46 datablock -rw-rw-r-- 1 jupyter jupyter 5.1K Jul 16 23:17 memory_leak.swift -rw-rw-r-- 1 jupyter jupyter 780K Jul 17 22:52 opencv_integration_example.ipynb drwxrwxr-x 4 jupyter jupyter 4.0K Jul 16 20:47 swift-install -rw------- 1 jupyter jupyter 9.5M Jul 17 22:50 train-images-idx3-ubyte.gz
To download a file, we use the Just
library.
//export
public func downloadFile(_ url: String, dest: String? = nil, force: Bool = false) {
let dest_name = dest ?? (Path.cwd/url.split(separator: "/").last!).string
let url_dest = URL(fileURLWithPath: (dest ?? (Path.cwd/url.split(separator: "/").last!).string))
if !force && Path(dest_name)!.exists { return }
print("Downloading \(url)...")
if let cts = Just.get(url).content {
do {try cts.write(to: URL(fileURLWithPath:dest_name))}
catch {print("Can't write to \(url_dest).\n\(error)")}
} else {
print("Can't reach \(url)")
}
}
downloadFile("https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz")
Then we will need to read our data and convert it into a Tensor
:
//export
import TensorFlow
The following is generic over the element type on the return value. We could define two functions like this:
func loadMNIST(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor<Float> {
func loadMNIST(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor<Int32> {
but that would be boring. So we make loadMNIST take a "type parameter" T
which indicates what sort of element type to load a tensor into.
func loadMNIST<T>(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor<T> {
let split = training ? "train" : "t10k"
let kind = labels ? "labels" : "images"
let batch = training ? 60000 : 10000
let shape: TensorShape = labels ? [batch] : (flat ? [batch, 784] : [batch, 28, 28])
let dropK = labels ? 8 : 16
let baseUrl = "https://storage.googleapis.com/cvdf-datasets/mnist/"
let fname = split + "-" + kind + "-idx\(labels ? 1 : 3)-ubyte"
let file = path/fname
if !file.exists {
downloadFile("\(baseUrl)\(fname).gz", dest:(path/"\(fname).gz").string)
"/bin/gunzip".shell("-fq", (path/"\(fname).gz").string)
}
let data = try! Data(contentsOf: URL(fileURLWithPath: file.string)).dropFirst(dropK)
if labels { return Tensor(data.map(T.init)) }
else { return Tensor(data.map(T.init)).reshaped(to: shape)}
}
error: <Cell 9>:15:36: error: cannot invoke 'map' with an argument list of type '(@escaping (Tensor<T>) -> T?)' if labels { return Tensor(data.map(T.init)) } ^ <Cell 9>:15:36: note: expected an argument list of type '((Self.Element) throws -> T)' if labels { return Tensor(data.map(T.init)) } ^ error: <Cell 9>:16:36: error: cannot invoke 'map' with an argument list of type '(@escaping (Tensor<T>) -> T?)' else { return Tensor(data.map(T.init)).reshaped(to: shape)} ^ <Cell 9>:16:36: note: expected an argument list of type '((Self.Element) throws -> T)' else { return Tensor(data.map(T.init)).reshaped(to: shape)} ^
But this doesn't work because S4TF can't just put any type of data inside a Tensor
. We have to tell it that this type:
We do this by defining a protocol called ConvertibleFromByte
that inherits from TensorFlowScalar
. That takes care of the first requirement. The second requirement is dealt with by asking for an init
method that takes UInt8
:
//export
protocol ConvertibleFromByte: TensorFlowScalar {
init(_ d:UInt8)
}
Then we need to say that Float
and Int32
conform to that protocol. They already have the right initializer so we don't have to code anything.
//export
extension Float : ConvertibleFromByte {}
extension Int32 : ConvertibleFromByte {}
Lastly, we write a convenience method for all types that conform to the ConvertibleFromByte
protocol, that will convert some raw data to a Tensor
of that type.
//export
extension Data {
func asTensor<T:ConvertibleFromByte>() -> Tensor<T> {
return Tensor(map(T.init))
}
}
And now we can write a generic loadMNIST
function that can returns tensors of Float
or Int32
.
//export
func loadMNIST<T: ConvertibleFromByte>
(training: Bool, labels: Bool, path: Path, flat: Bool) -> Tensor<T> {
let split = training ? "train" : "t10k"
let kind = labels ? "labels" : "images"
let batch = training ? 60000 : 10000
let shape: TensorShape = labels ? [batch] : (flat ? [batch, 784] : [batch, 28, 28])
let dropK = labels ? 8 : 16
let baseUrl = "https://storage.googleapis.com/cvdf-datasets/mnist/"
let fname = split + "-" + kind + "-idx\(labels ? 1 : 3)-ubyte"
let file = path/fname
if !file.exists {
downloadFile("\(baseUrl)\(fname).gz", dest:(path/"\(fname).gz").string)
"/bin/gunzip".shell("-fq", (path/"\(fname).gz").string)
}
let data = try! Data(contentsOf: URL(fileURLWithPath: file.string)).dropFirst(dropK)
if labels { return data.asTensor() }
else { return data.asTensor().reshaped(to: shape)}
}
public func loadMNIST(path:Path, flat:Bool = false)
-> (Tensor<Float>, Tensor<Int32>, Tensor<Float>, Tensor<Int32>) {
try! path.mkdir(.p)
return (
loadMNIST(training: true, labels: false, path: path, flat: flat) / 255.0,
loadMNIST(training: true, labels: true, path: path, flat: flat),
loadMNIST(training: false, labels: false, path: path, flat: flat) / 255.0,
loadMNIST(training: false, labels: true, path: path, flat: flat)
)
}
We will store mnist in this folder so that we don't download it each time we run a notebook:
//export
public let mnistPath = Path.home/".fastai"/"data"/"mnist_tst"
The default returns mnist in the image format:
let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath)
xTrain.shape
▿ [60000, 28, 28] ▿ dimensions : 3 elements - 0 : 60000 - 1 : 28 - 2 : 28
We can also ask for it in its flattened form:
let (xTrain, yTrain, xValid, yValid) = loadMNIST(path: mnistPath, flat: true)
xTrain.shape
▿ [60000, 784] ▿ dimensions : 2 elements - 0 : 60000 - 1 : 784
Here is our time function:
//export
import Dispatch
// ⏰Time how long it takes to run the specified function, optionally taking
// the average across a number of repetitions.
public func time(repeating: Int = 1, _ f: () -> ()) {
guard repeating > 0 else { return }
// Warmup
if repeating > 1 { f() }
var times = [Double]()
for _ in 1...repeating {
let start = DispatchTime.now()
f()
let end = DispatchTime.now()
let nanoseconds = Double(end.uptimeNanoseconds - start.uptimeNanoseconds)
let milliseconds = nanoseconds / 1e6
times.append(milliseconds)
}
print("average: \(times.reduce(0.0, +)/Double(times.count)) ms, " +
"min: \(times.reduce(times[0], min)) ms, " +
"max: \(times.reduce(times[0], max)) ms")
}
time(repeating: 10) {
_ = loadMNIST(training: false, labels: false, path: mnistPath, flat: false) as Tensor<Float>
}
average: 485.15768909999997 ms, min: 474.723727 ms, max: 501.671377 ms
Searching for a specific pattern with a regular expression isn't easy in swift. The good thing is that with an extension, we can make it easy for us!
// export
public extension String {
func findFirst(pat: String) -> Range<String.Index>? {
return range(of: pat, options: .regularExpression)
}
func hasMatch(pat: String) -> Bool {
return findFirst(pat:pat) != nil
}
}
The foundation library isn't always the most convenient to use... This is how the first line of the following cell is written in it.
let url_fname = URL(fileURLWithPath: fname)
let last = fname.lastPathComponent
let out_fname = (url_fname.deletingLastPathComponent().appendingPathComponent("FastaiNotebooks", isDirectory: true)
.appendingPathComponent("Sources", isDirectory: true)
.appendingPathComponent("FastaiNotebooks", isDirectory: true).appendingPathComponent(last)
.deletingPathExtension().appendingPathExtension("swift"))
This function parses the underlying json behind a notebook to keep the code in the cells marked with //export
.
//export
public func notebookToScript(fname: Path){
let newname = fname.basename(dropExtension: true)+".swift"
let url = fname.parent/"FastaiNotebooks/Sources/FastaiNotebooks"/newname
do {
let data = try Data(contentsOf: fname.url)
let jsonData = try JSONSerialization.jsonObject(with: data, options: .allowFragments) as! [String: Any]
let cells = jsonData["cells"] as! [[String:Any]]
var module = """
/*
THIS FILE WAS AUTOGENERATED! DO NOT EDIT!
file to edit: \(fname.lastPathComponent)
*/
"""
for cell in cells {
if let source = cell["source"] as? [String], !source.isEmpty,
source[0].hasMatch(pat: #"^\s*//\s*export\s*$"#) {
module.append("\n" + source[1...].joined() + "\n")
}
}
try module.write(to: url, encoding: .utf8)
} catch {
print("Can't read the content of \(fname)")
}
}
And this will do all the notebooks in a given folder.
// export
public func exportNotebooks(_ path: Path) {
for entry in try! path.ls()
where entry.kind == Entry.Kind.file &&
entry.path.basename().hasMatch(pat: #"^\d*_.*ipynb$"#) {
print("Converting \(entry)")
notebookToScript(fname: entry.path)
}
}
notebookToScript(fname: Path.cwd/"00_load_data.ipynb")
Can't read the content of /home/jupyter/notebooks/swift/00_load_data.ipynb
But now that we implemented it from scratch we're allowed to use it as a package ;). NotebookExport has been written by pcuenq and will make our lives easier.
import NotebookExport
let exporter = NotebookExport(Path.cwd/"00_load_data.ipynb")
print(exporter.export(usingPrefix: "FastaiNotebook_"))
success