Skip to content

Commit

Permalink
Performance fix: WOWZA! Dataclass Wizard is now faster than ever at b…
Browse files Browse the repository at this point in the history
…enchmarks!

* Use/save `__call__()` method of Parser rather than Parser itself
  • Loading branch information
rnag committed Nov 27, 2024
1 parent ce895df commit 9b9a256
Show file tree
Hide file tree
Showing 17 changed files with 207 additions and 182 deletions.
29 changes: 17 additions & 12 deletions benchmarks/complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,28 @@ def parse_iso_format(data):
}


def test_load(data, n):
def test_load(request, data, n):
g = globals().copy()
g.update(locals())

# Result: 1.753
# Result: 0.790
log.info('dataclass-wizard %f',
timeit('MyClassWizard.from_dict(data)', globals=g, number=n))

# Result: 1.349
# Result: 0.774
log.info('dataclass-factory %f',
timeit('factory.load(data, MyClass)', globals=g, number=n))

# Result: 28.776
# Result: 23.40
# NOTE: This likely is not an entirely fair comparison, since the
# rest load `Person.name` as a `Name` (which is a NamedTuple sub-class),
# but in this case we just load it as an `Any` type.
log.info('dataclasses-json %f',
timeit('MyClassDJ.from_dict(data)', globals=g, number=n))

if not request.config.getoption("--all"):
pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")

# these ones took a long time xD
# Result: 70.752
log.info('jsons %f',
Expand All @@ -193,7 +196,7 @@ def test_load(data, n):
# assert c1.__dict__ == c2.__dict__ == c3.__dict__ == c4.__dict__


def test_dump(data, n):
def test_dump(request, data, n):

c1 = MyClassWizard.from_dict(data)
c2 = factory.load(data, MyClass)
Expand All @@ -203,23 +206,25 @@ def test_dump(data, n):
g = globals().copy()
g.update(locals())

# Result: 2.445
# Result: 1.394
log.info('dataclass-wizard %f',
timeit('c1.to_dict()', globals=g, number=n))

# actually, `dataclasses.asdict` call seems to fail for some reason
# (possibly due to a `defaultdict` being used? would be a bug if so :o)
# log.info('asdict (dataclasses) %f',
# timeit('asdict(c1)', globals=g, number=n))
# Result: 1.804
log.info('asdict (dataclasses) %f',
timeit('asdict(c1)', globals=g, number=n))

# Result: 3.468
# Result: 0.862
log.info('dataclass-factory %f',
timeit('factory.dump(c2, MyClass)', globals=g, number=n))

# Result: 15.214
# Result: 9.872
log.info('dataclasses-json %f',
timeit('c3.to_dict()', globals=g, number=n))

if not request.config.getoption("--all"):
pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")

# Result: 53.686
log.info('jsons %f',
timeit('c4.dump()', globals=g, number=n))
Expand Down
10 changes: 10 additions & 0 deletions benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,13 @@
@pytest.fixture(scope='session')
def n():
return 100_000


def pytest_addoption(parser):
parser.addoption(
"--all", # long option
"-A",
action="store_true",
default=False,
help="Run benchmarks for *all* libraries, including *slower* ones like `jsons`",
)
32 changes: 19 additions & 13 deletions benchmarks/nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,27 +189,30 @@ def data():
}


def test_load(data, n):
def test_load(request, data, n):
g = globals().copy()
g.update(locals())

# Result: 0.811
# Result: 0.404
log.info('dataclass-wizard %f',
timeit('MyClassWizard.from_dict(data)', globals=g, number=n))

# Result: 0.795
# Result: 0.427
log.info('dataclass-factory %f',
timeit('factory.load(data, Data1)', globals=g, number=n))

# Result: 20.571
# Result: 15.304
log.info('dataclasses-json %f',
timeit('MyClassDJ.from_dict(data)', globals=g, number=n))

# Result: 45.352
if not request.config.getoption("--all"):
pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")

# Result: 26.490
log.info('jsons %f',
timeit('MyClassJsons.load(data)', globals=g, number=n))

# Result: 62.501
# Result: 30.343
log.info('jsons (strict) %f',
timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n))

Expand All @@ -226,7 +229,7 @@ def test_load(data, n):
assert c1.__dict__ == c2.__dict__ == c4.__dict__


def test_dump(data, n):
def test_dump(request, data, n):
c1 = MyClassWizard.from_dict(data)
c2 = factory.load(data, Data1)
c3 = MyClassDJ.from_dict(data)
Expand All @@ -235,27 +238,30 @@ def test_dump(data, n):
g = globals().copy()
g.update(locals())

# Result: 1.096
# Result: 0.431
log.info('dataclass-wizard %f',
timeit('c1.to_dict()', globals=g, number=n))

# Result: 1.754
# Result: 0.628
log.info('asdict (dataclasses) %f',
timeit('asdict(c1)', globals=g, number=n))

# Result: 0.597
# Result: 0.217
log.info('dataclass-factory %f',
timeit('factory.dump(c2, Data1)', globals=g, number=n))

# Result: 7.514
# Result: 6.332
log.info('dataclasses-json %f',
timeit('c3.to_dict()', globals=g, number=n))

# Result: 54.996
if not request.config.getoption("--all"):
pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")

# Result: 41.752
log.info('jsons %f',
timeit('c4.dump()', globals=g, number=n))

# Result: 51.893
# Result: 38.744
log.info('jsons (strict) %f',
timeit('c4.dump(strict=True)', globals=g, number=n))

Expand Down
24 changes: 12 additions & 12 deletions benchmarks/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,23 @@ def test_load(data, n):
g = globals().copy()
g.update(locals())

# Result: 0.170
# Result: 0.076
log.info('dataclass-wizard %f',
timeit('MyClassWizard.from_dict(data)', globals=g, number=n))

# Result: 0.314
# Result: 0.104
log.info('dataclass-factory %f',
timeit('factory.load(data, MyClass)', globals=g, number=n))

# Result: 4.953
# Result: 3.614
log.info('dataclasses-json %f',
timeit('MyClassDJ.from_dict(data)', globals=g, number=n))

# Result: 9.543
# Result: 4.702
log.info('jsons %f',
timeit('MyClassJsons.load(data)', globals=g, number=n))

# Result: 12.825
# Result: 5.708
log.info('jsons (strict) %f',
timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n))

Expand All @@ -91,27 +91,27 @@ def test_dump(data, n):
g = globals().copy()
g.update(locals())

# Result: 0.237
# Result: 0.067
log.info('dataclass-wizard %f',
timeit('c1.to_dict()', globals=g, number=n))

# Result: 0.238
# Result: 0.090
log.info('asdict (dataclasses) %f',
timeit('c1.to_dict()', globals=g, number=n))
timeit('asdict(c1)', globals=g, number=n))

# Result: 0.513
# Result: 0.075
log.info('dataclass-factory %f',
timeit('factory.dump(c2, MyClass)', globals=g, number=n))

# Result: 1.497
# Result: 1.318
log.info('dataclasses-json %f',
timeit('c3.to_dict()', globals=g, number=n))

# Result: 10.177
# Result: 6.207
log.info('jsons %f',
timeit('c4.dump()', globals=g, number=n))

# Result: 10.099
# Result: 6.280
log.info('jsons (strict) %f',
timeit('c4.dump(strict=True)', globals=g, number=n))

Expand Down
7 changes: 5 additions & 2 deletions dataclass_wizard/class_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,12 @@ def _setup_load_config_for_cls(cls_loader,

# Lookup the Parser (dispatcher) for each field based on its annotated
# type, and then cache it so we don't need to lookup each time.
name_to_parser[f.name] = cls_loader.get_parser_for_annotation(
#
# Changed in v0.31.0: Get the __call__() method as defined
# on `AbstractParser`, if it exists
name_to_parser[f.name] = getattr(p := cls_loader.get_parser_for_annotation(
field_type, cls, field_extras
)
), '__call__', p)

parser_dict = DictWithLowerStore(name_to_parser)
# only cache the load parser for the class if `save` is enabled
Expand Down
16 changes: 8 additions & 8 deletions dataclass_wizard/environ/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,25 +129,25 @@ def _create_methods(cls):

# parameters to the `__init__()` method.
init_params = ['self',
'env_file:EnvFileType=None',
'reload_env:bool=False']
'_env_file:EnvFileType=None',
'_reload:bool=False']

fn_gen = FunctionBuilder()

with fn_gen.function('__init__', init_params, None):
# reload cached var names from `os.environ` as needed.
with fn_gen.if_('reload_env'):
with fn_gen.if_('_reload'):
fn_gen.add_line('Env.reload()')
# update environment with values in the "dot env" files as needed.
if _meta_env_file:
fn = fn_gen.elif_
_globals['_dotenv_values'] = Env.dotenv_values(_meta_env_file)
with fn_gen.if_('env_file is None'):
with fn_gen.if_('_env_file is None'):
fn_gen.add_line('Env.update_with_dotenv(dotenv_values=_dotenv_values)')
else:
fn = fn_gen.if_
with fn('env_file'):
fn_gen.add_line('Env.update_with_dotenv(env_file)')
with fn('_env_file'):
fn_gen.add_line('Env.update_with_dotenv(_env_file)')

# iterate over the dataclass fields and (attempt to) resolve
# each one.
Expand All @@ -169,8 +169,8 @@ def _create_methods(cls):

with fn_gen.if_(f'{name} is not MISSING or {part} is not MISSING'):
parser_name = f'_parser_{name}'
_globals[parser_name] = cls_loader.get_parser_for_annotation(
tp, cls, extras)
_globals[parser_name] = getattr(p := cls_loader.get_parser_for_annotation(
tp, cls, extras), '__call__', p)
with fn_gen.try_():
fn_gen.add_line(f'self.{name} = {parser_name}({name})')
with fn_gen.except_(ParseError, 'e'):
Expand Down
4 changes: 2 additions & 2 deletions dataclass_wizard/environ/wizard.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class EnvWizard(AbstractEnvWizard):

# stub for type hinting purposes.
def __init__(self, *,
env_file: EnvFileType = None,
reload_env: bool = False,
_env_file: EnvFileType = None,
_reload: bool = False,
**init_kwargs) -> None:
...

Expand Down
3 changes: 1 addition & 2 deletions dataclass_wizard/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,8 @@ def get_parser_for_annotation(cls, ann_type: Type[T],
base_type: 'type[T]'
# return a dynamically generated `fromdict`
# for the `cls` (base_type)
return load_func_for_dataclass(
return cls.load_func_for_dataclass(
base_type,
is_main_class=False,
config=extras['config']
)

Expand Down
10 changes: 4 additions & 6 deletions dataclass_wizard/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,8 @@ def __init__(self, keys, all: bool, dump: bool,

if isinstance(keys, str):
keys = split_object_path(keys) if path else (keys,)
# keys = (keys, )
# elif keys is Ellipsis:
# keys = ()
elif keys is ...:
keys = ()

self.json = JSON(*keys, all=all, dump=dump, path=path)

Expand All @@ -113,9 +112,8 @@ def __init__(self, keys, all: bool, dump: bool,

if isinstance(keys, str):
keys = split_object_path(keys) if path else (keys,)
# keys = (keys, )
# elif keys is Ellipsis:
# keys = ()
elif keys is ...:
keys = ()

self.json = JSON(*keys, all=all, dump=dump, path=path)

Expand Down
Loading

0 comments on commit 9b9a256

Please sign in to comment.