BentoML makes moving trained ML models to production easy:
BentoML is a framework for serving, managing, and deploying machine learning models. It is aiming to bridge the gap between Data Science and DevOps, and enable teams to deliver prediction services in a fast, repeatable, and scalable way.
%reload_ext autoreload
%autoreload 2
%matplotlib inline
!pip install -q bentoml tensorflow matplotlib
from __future__ import absolute_import, division, print_function, unicode_literals
import io
# TensorFlow
import tensorflow as tf
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
fashion_mnist = tf.keras.datasets.fashion_mnist
(_train_images, train_labels), (_test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
train_images = _train_images / 255.0
test_images = _test_images / 255.0
class FashionMnist(tf.keras.Model):
def __init__(self):
super(FashionMnist, self).__init__()
self.cnn = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
@staticmethod
def image_bytes2tensor(inputs):
with tf.device("cpu:0"): # map_fn has issues on GPU https://github.com/tensorflow/tensorflow/issues/28007
inputs = tf.map_fn(lambda i: tf.io.decode_png(i, channels=1), inputs, dtype=tf.uint8)
inputs = tf.cast(inputs, tf.float32)
inputs = (255.0 - inputs) / 255.0
inputs = tf.reshape(inputs, [-1, 28, 28])
return inputs
@tf.function(input_signature=[tf.TensorSpec(shape=(None,), dtype=tf.string)])
def predict_image(self, inputs):
inputs = self.image_bytes2tensor(inputs)
return self(inputs)
def call(self, inputs):
return self.cnn(inputs)
# pick up a test image
d_test_img = _test_images[0]
print(class_names[test_labels[0]])
plt.imshow(255.0 - d_test_img, cmap='gray')
plt.imsave("test.png", 255.0 - d_test_img, cmap='gray')
# read bytes
with open("test.png", "rb") as f:
img_bytes = f.read()
# verify saved image
assert tf.reduce_mean(FashionMnist.image_bytes2tensor(tf.constant([img_bytes])) - d_test_img) < 0.01
model = FashionMnist()
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_images, train_labels, epochs=50)
predict = model.predict_image(tf.constant([img_bytes]))
klass = tf.argmax(predict, axis=1)
[class_names[c] for c in klass]
And the model predicts a label as expected.
%%writefile tensorflow_fashion_mnist.py
import bentoml
import tensorflow as tf
from bentoml.artifact import TensorflowSavedModelArtifact
from bentoml.adapters import TfTensorInput
FASHION_MNIST_CLASSES = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
@bentoml.env(pip_dependencies=['tensorflow', 'numpy', 'pillow'])
@bentoml.artifacts([TensorflowSavedModelArtifact('model')])
class FashionMnistTensorflow(bentoml.BentoService):
@bentoml.api(input=TfTensorInput(), batch=True)
def predict(self, inputs):
outputs = self.artifacts.model.predict_image(inputs)
output_classes = tf.math.argmax(outputs, axis=1)
return [FASHION_MNIST_CLASSES[c] for c in output_classes]
from tensorflow_fashion_mnist import FashionMnistTensorflow
bento_svc = FashionMnistTensorflow()
bento_svc.pack("model", model)
saved_path = bento_svc.save()
bentoml get <BentoService Name>
list all of BentoService's versions
!bentoml get FashionMnistTensorflow
bentoml get <BentoService name>:<bentoService version>
display detailed information of the specific BentoService version
!bentoml get FashionMnistTensorflow:latest
Serve bentoml REST server locally
!bentoml serve FashionMnistTensorflow:latest
import base64
import json
import requests
with open("test.png", "rb") as f:
img_bytes = f.read()
img_b64 = base64.b64encode(img_bytes).decode()
headers = {"content-type": "application/json"}
data = json.dumps(
{"instances": [{"b64": img_b64}]}
)
print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
json_response = requests.post(f'http://localhost:5000/predict', data=data, headers=headers)
print(json_response)
print(json_response.text)
pip install $SAVED_PATH
also installs a CLI tool for accessing the BentoML service
!pip install -q {saved_path}
!FashionMnistTensorflow --help
!echo '{\"instances\":[{\"b64\":\"'$(base64 test.png)'\"}]}' > test.json
!cat test.json | xargs -I {} FashionMnistTensorflow run predict --input {}
!docker build --quiet -t tensorflow2-fashion-mnist {saved_path}
!docker run -p 5000:5000 tensorflow2-fashion-mnist --workers 1 --enable-microbatch
BentoML support deployment to multiply cloud provider services, such as AWS Lambda, AWS Sagemaker, Google Cloudrun and etc. You can find the full list and guide on the documentation site at https://docs.bentoml.org/en/latest/deployment/index.html
For this demo, we are going to deploy to AWS Sagemaker
bento_service_tag = f'{bento_svc.name}:{bento_svc.version}'
print(bento_service_tag)
!bentoml sagemaker deploy first-tf-fashion -b {bento_service_tag} --api-name predict --verbose
!bentoml sagemaker get first-tf-fashion
!aws sagemaker-runtime invoke-endpoint --endpoint-name dev-first-tf-fashion --content-type 'application/json' \
--body "{\"instances\":[{\"b64\":\"iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAYAAAByDd+UAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAA2dJREFUSIntlk9L60oYh59Mp43axJoiVj2KboSuXPoJXLkQ/Fx+Ahe6ceFG3CtdWlFcFf9RKlhFatpSbWuaNLHJnIW3OR7ugcu9BzxccGAgZCbv887v/b1JNKWU4hOH+EzYF/AL+OeBYRjysct83wegUqn8HlApRRRFADw+PrK/v4/jOCQSCTRNi/fpug7AwcHB7wEBhHh/9Pj4mGKxyM7Ozt/2NBoNdnd3MU0zvif/CywMQ6SUnJ+fc3NzQy6Xo1KpsLGxgWVZ9Pt9FhYWaLVadLtd5ubmfiT6b2FRFCGlxHEc9vf3UUrR7/d5fX1FKRXPq6srpJRYlsVgMPhn4LD4URTF12EYxlJubW2Ry+WYnp7G8zz6/T65XC6uYzqdRtd1fN+n2+3iOM6vgcPgw+ILIdA0jTAMSSQSAOzt7WHbNtPT0xiGQbvdJpvNMjU1RTKZJAxD3t7e4ni9Xo/b29tfA4egKIoYDAZxAkPYzs4OpVKJ+fl5Wq0W7XYbz/PIZDK8vr6iaRpjY2Mkk0mUUnG8w8ND4INphjbXNA2lFEKIWD6AWq3GwcEBnuextLSE4zj4vk+r1SKVSqFpGq7rxsnpuo4QgnQ6jRCCYrH4DhxK9TH4MKtms0m1WqVcLvP09EQqlWJ8fJx2u0232+Xt7Q3f9xFCUK1WGQwGTExMkEwmEUKglGJ0dJQwDDEMg8vLS+RQqnq9zv39Pb1ej16vh+d53N3d4bouUkpM0ySKIjqdDp7nIaXEdV1GR0fRdZ0gCJidnaXT6eC6LpZl4TgOLy8vpNNpbNvm+fn5XdJCoUCtVkNKSbPZjA0yBDmOg23bKKXwfR/LsoiiCMdxCMOQdDqNYRhkMhkajUaslGVZCCHwPI8gCJBSIo+Ojtje3iafzzMzMxOfJJVKxe9G0zQJggAhRNxvnuehaRpRFGHbNvV6nevra4IgIAxDAAzDwHVddF3HMAympqaQKysrnJ6ecnFx8aOwf50sm82SzWbJZDIEQYBSilarRblcxnVdut0umqZRKpVYXl5mcXGRQqGA7/uxD6SUfPv2DdM031388SfKcRzOzs4ol8ucnJzQbDZ/atqhA7PZLPl8ntXVVdbW1hgZGYnX19fXeXh4YHJyEtM0MU0TKSW6rrO5ufkz8DPG//sD/AX8I8DvdgnOxdB4B1wAAAAASUVORK5CYII=\"}]}" \
output.json && cat output.json
!bentoml sagemaker delete first-tf-fashion --force