Schema Library Showdown

A simple benchmark on the performance of various schema libraries. This exercises all aspects of the library (as applicable):

  1. instantiating the object equivalent of a dictionary of primitive objects
  2. validating the fields
  3. converting the object to the dictionary + primitive equivalent

As with all benchmarks, it is a decent indictator for your use case, but not a definite one. Benchmark your use case (fork this repo if you want!) and verify yourself before acting on these results.

Here's the most recent results:

library        execution_time (seconds)    iterations
-----------  --------------------------  ------------
class_raw                    0.00889444         10000
class                        0.0183623          10000
attrs                        0.082592           10000
cattrs                       0.150875           10000
pydantic                     0.164629           10000
marshmallow                  0.627408           10000
schematics                   2.65854            10000
In [1]:
import time
import timeit

target = {"name": "Mario Luigi", "age": 32}

TRIALS = 10000


def benchmark(name, func):
    elapsed_time = timeit.timeit(func, number=TRIALS)
    return [name, elapsed_time, TRIALS]

headers = ["library", "execution_time (seconds)", "iterations"]
data = []
In [2]:
import attr
from cattr import structure, unstructure

@attr.s
class PersonAttrs:
    name = attr.ib(validator=[
        attr.validators.instance_of(str)
    ])
    age = attr.ib(validator=[
        attr.validators.instance_of(int)
    ])
    
def attr_benchmark():
    obj = PersonAttrs(**target)
    result = attr.asdict(obj)

def cattr_benchmark():
    obj = structure(target, PersonAttrs)
    result = unstructure(obj)
    
    
data.append(benchmark("attrs",  attr_benchmark))
data.append(benchmark("cattrs",  cattr_benchmark))
In [3]:
class PersonClass:
    def __init__(self, name, age):
        if not isinstance(name, str):
            raise ValueError("name is not a string")
        if not isinstance(age, int):
            raise ValueError("age is not an integer")
        self.name = name
        self.age = age 
        
        
class PersonClassRaw:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
def to_dict(obj):
    return {
        "name": obj.name,
        "age": obj.age
    }


def class_benchmark():
    obj = PersonClass(**target)
    result = to_dict(obj)
    
    
def class_raw_benchmark():
    obj = PersonClassRaw(**target)
    result = to_dict(obj)

data.append(benchmark("class",  class_benchmark))
data.append(benchmark("class_raw",  class_raw_benchmark))
In [4]:
from schematics.models import Model
from schematics.types import StringType, IntType

class PersonSchematics(Model):
    name = StringType()
    age = IntType()
    



def schematics_benchmark():
    obj = PersonSchematics(target)
    obj.validate()
    result = obj.to_primitive()
    
data.append(benchmark("schematics", schematics_benchmark))
In [5]:
import toastedmarshmallow
from marshmallow import Schema, fields


class PersonMarshmallow(Schema):
    name = fields.Str()
    age = fields.Int()
    
    
marshmallow_schema = PersonMarshmallow()
toasted_marshmallow_schema = PersonMarshmallow()
toasted_marshmallow_schema.jit = toastedmarshmallow.Jit
toasted_marshmallow_cython_schema = PersonMarshmallow()
toasted_marshmallow_cython_schema.jit = toastedmarshmallow.CythonJit
    
def marshmallow_benchmark():
    obj = marshmallow_schema.load(target)
    result = marshmallow_schema.dump(obj.data)
    
data.append(benchmark("marshmallow", marshmallow_benchmark))

def toasted_marshmallow_benchmark():
    obj = marshmallow_schema.load(target)
    result = marshmallow_schema.dump(obj.data)
    
data.append(benchmark("toastedmarshmallow", toasted_marshmallow_benchmark))

def toasted_marshmallow_cython_benchmark():
    obj = marshmallow_schema.load(target)
    result = marshmallow_schema.dump(obj.data)
    
data.append(benchmark("toastedmarshmallow (cython)", toasted_marshmallow_cython_benchmark))
In [6]:
import pydantic

from pydantic import BaseModel

class PersonPydantic(BaseModel):
    name: str = ...
    age: int = ...
        
        
def benchmark_pydantic():
    obj = PersonPydantic(**target)
    reuslt = obj.dict()
        
data.append(benchmark("pydantic", benchmark_pydantic))
In [7]:
import pprint

from tabulate import tabulate
table_data = [headers]
table_data += sorted(data, key=lambda row: row[1])
print(tabulate(table_data, headers="firstrow"))
library                        execution_time (seconds)    iterations
---------------------------  --------------------------  ------------
class_raw                                    0.0058964          10000
class                                        0.00783895         10000
cattrs                                       0.035864           10000
attrs                                        0.0568484          10000
pydantic                                     0.130491           10000
toastedmarshmallow                           0.319275           10000
toastedmarshmallow (cython)                  0.321963           10000
marshmallow                                  0.327428           10000
schematics                                   1.19172            10000
In [ ]: