A simple benchmark on the performance of various schema libraries. This exercises all aspects of the library (as applicable):
As with all benchmarks, it is a decent indictator for your use case, but not a definite one. Benchmark your use case (fork this repo if you want!) and verify yourself before acting on these results.
Here's the most recent results:
library execution_time (seconds) iterations
----------- -------------------------- ------------
class_raw 0.00889444 10000
class 0.0183623 10000
attrs 0.082592 10000
cattrs 0.150875 10000
pydantic 0.164629 10000
marshmallow 0.627408 10000
schematics 2.65854 10000
import time
import timeit
target = {"name": "Mario Luigi", "age": 32}
TRIALS = 10000
def benchmark(name, func):
elapsed_time = timeit.timeit(func, number=TRIALS)
return [name, elapsed_time, TRIALS]
headers = ["library", "execution_time (seconds)", "iterations"]
data = []
import attr
from cattr import structure, unstructure
@attr.s
class PersonAttrs:
name = attr.ib(validator=[
attr.validators.instance_of(str)
])
age = attr.ib(validator=[
attr.validators.instance_of(int)
])
def attr_benchmark():
obj = PersonAttrs(**target)
result = attr.asdict(obj)
def cattr_benchmark():
obj = structure(target, PersonAttrs)
result = unstructure(obj)
data.append(benchmark("attrs", attr_benchmark))
data.append(benchmark("cattrs", cattr_benchmark))
class PersonClass:
def __init__(self, name, age):
if not isinstance(name, str):
raise ValueError("name is not a string")
if not isinstance(age, int):
raise ValueError("age is not an integer")
self.name = name
self.age = age
class PersonClassRaw:
def __init__(self, name, age):
self.name = name
self.age = age
def to_dict(obj):
return {
"name": obj.name,
"age": obj.age
}
def class_benchmark():
obj = PersonClass(**target)
result = to_dict(obj)
def class_raw_benchmark():
obj = PersonClassRaw(**target)
result = to_dict(obj)
data.append(benchmark("class", class_benchmark))
data.append(benchmark("class_raw", class_raw_benchmark))
from schematics.models import Model
from schematics.types import StringType, IntType
class PersonSchematics(Model):
name = StringType()
age = IntType()
def schematics_benchmark():
obj = PersonSchematics(target)
obj.validate()
result = obj.to_primitive()
data.append(benchmark("schematics", schematics_benchmark))
import toastedmarshmallow
from marshmallow import Schema, fields
class PersonMarshmallow(Schema):
name = fields.Str()
age = fields.Int()
marshmallow_schema = PersonMarshmallow()
toasted_marshmallow_schema = PersonMarshmallow()
toasted_marshmallow_schema.jit = toastedmarshmallow.Jit
toasted_marshmallow_cython_schema = PersonMarshmallow()
toasted_marshmallow_cython_schema.jit = toastedmarshmallow.CythonJit
def marshmallow_benchmark():
obj = marshmallow_schema.load(target)
result = marshmallow_schema.dump(obj.data)
data.append(benchmark("marshmallow", marshmallow_benchmark))
def toasted_marshmallow_benchmark():
obj = marshmallow_schema.load(target)
result = marshmallow_schema.dump(obj.data)
data.append(benchmark("toastedmarshmallow", toasted_marshmallow_benchmark))
def toasted_marshmallow_cython_benchmark():
obj = marshmallow_schema.load(target)
result = marshmallow_schema.dump(obj.data)
data.append(benchmark("toastedmarshmallow (cython)", toasted_marshmallow_cython_benchmark))
import pydantic
from pydantic import BaseModel
class PersonPydantic(BaseModel):
name: str = ...
age: int = ...
def benchmark_pydantic():
obj = PersonPydantic(**target)
reuslt = obj.dict()
data.append(benchmark("pydantic", benchmark_pydantic))
import pprint
from tabulate import tabulate
table_data = [headers]
table_data += sorted(data, key=lambda row: row[1])
print(tabulate(table_data, headers="firstrow"))
library execution_time (seconds) iterations --------------------------- -------------------------- ------------ class_raw 0.0058964 10000 class 0.00783895 10000 cattrs 0.035864 10000 attrs 0.0568484 10000 pydantic 0.130491 10000 toastedmarshmallow 0.319275 10000 toastedmarshmallow (cython) 0.321963 10000 marshmallow 0.327428 10000 schematics 1.19172 10000