Performance fix: WOWZA! Dataclass Wizard is now faster than ever at b…

…enchmarks! * Use/save `__call__()` method of Parser rather than Parser itself
rnag · Nov 27, 2024 · 9b9a256 · 9b9a256
1 parent ce895df
commit 9b9a256
Show file tree

Hide file tree

Showing 17 changed files with 207 additions and 182 deletions.
diff --git a/benchmarks/complex.py b/benchmarks/complex.py
@@ -151,25 +151,28 @@ def parse_iso_format(data):
 }
 
 
-def test_load(data, n):
+def test_load(request, data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 1.753
+    # Result: 0.790
     log.info('dataclass-wizard     %f',
              timeit('MyClassWizard.from_dict(data)', globals=g, number=n))
 
-    # Result: 1.349
+    # Result: 0.774
     log.info('dataclass-factory    %f',
              timeit('factory.load(data, MyClass)', globals=g, number=n))
 
-    # Result: 28.776
+    # Result: 23.40
     #   NOTE: This likely is not an entirely fair comparison, since the
     #   rest load `Person.name` as a `Name` (which is a NamedTuple sub-class),
     #   but in this case we just load it as an `Any` type.
     log.info('dataclasses-json     %f',
              timeit('MyClassDJ.from_dict(data)', globals=g, number=n))
 
+    if not request.config.getoption("--all"):
+        pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")
+
     # these ones took a long time xD
     # Result: 70.752
     log.info('jsons                %f',
@@ -193,7 +196,7 @@ def test_load(data, n):
     # assert c1.__dict__ == c2.__dict__  == c3.__dict__ == c4.__dict__
 
 
-def test_dump(data, n):
+def test_dump(request, data, n):
 
     c1 = MyClassWizard.from_dict(data)
     c2 = factory.load(data, MyClass)
@@ -203,23 +206,25 @@ def test_dump(data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 2.445
+    # Result: 1.394
     log.info('dataclass-wizard     %f',
              timeit('c1.to_dict()', globals=g, number=n))
 
-    # actually, `dataclasses.asdict` call seems to fail for some reason
-    # (possibly due to a `defaultdict` being used? would be a bug if so :o)
-    # log.info('asdict (dataclasses) %f',
-    #          timeit('asdict(c1)', globals=g, number=n))
+    # Result: 1.804
+    log.info('asdict (dataclasses) %f',
+             timeit('asdict(c1)', globals=g, number=n))
 
-    # Result: 3.468
+    # Result: 0.862
     log.info('dataclass-factory    %f',
              timeit('factory.dump(c2, MyClass)', globals=g, number=n))
 
-    # Result: 15.214
+    # Result: 9.872
     log.info('dataclasses-json     %f',
              timeit('c3.to_dict()', globals=g, number=n))
 
+    if not request.config.getoption("--all"):
+        pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")
+
     # Result: 53.686
     log.info('jsons                %f',
              timeit('c4.dump()', globals=g, number=n))

diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py
@@ -4,3 +4,13 @@
 @pytest.fixture(scope='session')
 def n():
     return 100_000
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--all",  # long option
+        "-A",
+        action="store_true",
+        default=False,
+        help="Run benchmarks for *all* libraries, including *slower* ones like `jsons`",
+    )
diff --git a/benchmarks/nested.py b/benchmarks/nested.py
@@ -189,27 +189,30 @@ def data():
 }
 
 
-def test_load(data, n):
+def test_load(request, data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 0.811
+    # Result: 0.404
     log.info('dataclass-wizard     %f',
              timeit('MyClassWizard.from_dict(data)', globals=g, number=n))
 
-    # Result: 0.795
+    # Result: 0.427
     log.info('dataclass-factory    %f',
             timeit('factory.load(data, Data1)', globals=g, number=n))
 
-    # Result: 20.571
+    # Result: 15.304
     log.info('dataclasses-json     %f',
              timeit('MyClassDJ.from_dict(data)', globals=g, number=n))
 
-    # Result: 45.352
+    if not request.config.getoption("--all"):
+        pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")
+
+    # Result: 26.490
     log.info('jsons                %f',
              timeit('MyClassJsons.load(data)', globals=g, number=n))
 
-    # Result: 62.501
+    # Result: 30.343
     log.info('jsons (strict)       %f',
              timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n))
 
@@ -226,7 +229,7 @@ def test_load(data, n):
     assert c1.__dict__ == c2.__dict__ == c4.__dict__
 
 
-def test_dump(data, n):
+def test_dump(request, data, n):
     c1 = MyClassWizard.from_dict(data)
     c2 = factory.load(data, Data1)
     c3 = MyClassDJ.from_dict(data)
@@ -235,27 +238,30 @@ def test_dump(data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 1.096
+    # Result: 0.431
     log.info('dataclass-wizard     %f',
              timeit('c1.to_dict()', globals=g, number=n))
 
-    # Result: 1.754
+    # Result: 0.628
     log.info('asdict (dataclasses) %f',
              timeit('asdict(c1)', globals=g, number=n))
 
-    # Result: 0.597
+    # Result: 0.217
     log.info('dataclass-factory    %f',
              timeit('factory.dump(c2, Data1)', globals=g, number=n))
 
-    # Result: 7.514
+    # Result: 6.332
     log.info('dataclasses-json     %f',
              timeit('c3.to_dict()', globals=g, number=n))
 
-    # Result: 54.996
+    if not request.config.getoption("--all"):
+        pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.")
+
+    # Result: 41.752
     log.info('jsons                %f',
              timeit('c4.dump()', globals=g, number=n))
 
-    # Result: 51.893
+    # Result: 38.744
     log.info('jsons (strict)       %f',
              timeit('c4.dump(strict=True)', globals=g, number=n))
 

diff --git a/benchmarks/simple.py b/benchmarks/simple.py
@@ -52,23 +52,23 @@ def test_load(data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 0.170
+    # Result: 0.076
     log.info('dataclass-wizard     %f',
              timeit('MyClassWizard.from_dict(data)', globals=g, number=n))
 
-    # Result: 0.314
+    # Result: 0.104
     log.info('dataclass-factory    %f',
              timeit('factory.load(data, MyClass)', globals=g, number=n))
 
-    # Result: 4.953
+    # Result: 3.614
     log.info('dataclasses-json     %f',
              timeit('MyClassDJ.from_dict(data)', globals=g, number=n))
 
-    # Result: 9.543
+    # Result: 4.702
     log.info('jsons                %f',
              timeit('MyClassJsons.load(data)', globals=g, number=n))
 
-    # Result: 12.825
+    # Result: 5.708
     log.info('jsons (strict)       %f',
              timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n))
 
@@ -91,27 +91,27 @@ def test_dump(data, n):
     g = globals().copy()
     g.update(locals())
 
-    # Result: 0.237
+    # Result: 0.067
     log.info('dataclass-wizard     %f',
              timeit('c1.to_dict()', globals=g, number=n))
 
-    # Result: 0.238
+    # Result: 0.090
     log.info('asdict (dataclasses) %f',
-             timeit('c1.to_dict()', globals=g, number=n))
+             timeit('asdict(c1)', globals=g, number=n))
 
-    # Result: 0.513
+    # Result: 0.075
     log.info('dataclass-factory    %f',
              timeit('factory.dump(c2, MyClass)', globals=g, number=n))
 
-    # Result: 1.497
+    # Result: 1.318
     log.info('dataclasses-json     %f',
              timeit('c3.to_dict()', globals=g, number=n))
 
-    # Result: 10.177
+    # Result: 6.207
     log.info('jsons                %f',
              timeit('c4.dump()', globals=g, number=n))
 
-    # Result: 10.099
+    # Result: 6.280
     log.info('jsons (strict)       %f',
              timeit('c4.dump(strict=True)', globals=g, number=n))
 

diff --git a/dataclass_wizard/class_helper.py b/dataclass_wizard/class_helper.py
@@ -205,9 +205,12 @@ def _setup_load_config_for_cls(cls_loader,
 
         # Lookup the Parser (dispatcher) for each field based on its annotated
         # type, and then cache it so we don't need to lookup each time.
-        name_to_parser[f.name] = cls_loader.get_parser_for_annotation(
+        #
+        # Changed in v0.31.0: Get the __call__() method as defined
+        # on `AbstractParser`, if it exists
+        name_to_parser[f.name] = getattr(p := cls_loader.get_parser_for_annotation(
             field_type, cls, field_extras
-        )
+        ), '__call__', p)
 
     parser_dict = DictWithLowerStore(name_to_parser)
     # only cache the load parser for the class if `save` is enabled

diff --git a/dataclass_wizard/environ/wizard.py b/dataclass_wizard/environ/wizard.py
@@ -129,25 +129,25 @@ def _create_methods(cls):
 
         # parameters to the `__init__()` method.
         init_params = ['self',
-                       'env_file:EnvFileType=None',
-                       'reload_env:bool=False']
+                       '_env_file:EnvFileType=None',
+                       '_reload:bool=False']
 
         fn_gen = FunctionBuilder()
 
         with fn_gen.function('__init__', init_params, None):
             # reload cached var names from `os.environ` as needed.
-            with fn_gen.if_('reload_env'):
+            with fn_gen.if_('_reload'):
                 fn_gen.add_line('Env.reload()')
             # update environment with values in the "dot env" files as needed.
             if _meta_env_file:
                 fn = fn_gen.elif_
                 _globals['_dotenv_values'] = Env.dotenv_values(_meta_env_file)
-                with fn_gen.if_('env_file is None'):
+                with fn_gen.if_('_env_file is None'):
                     fn_gen.add_line('Env.update_with_dotenv(dotenv_values=_dotenv_values)')
             else:
                 fn = fn_gen.if_
-            with fn('env_file'):
-                fn_gen.add_line('Env.update_with_dotenv(env_file)')
+            with fn('_env_file'):
+                fn_gen.add_line('Env.update_with_dotenv(_env_file)')
 
             # iterate over the dataclass fields and (attempt to) resolve
             # each one.
@@ -169,8 +169,8 @@ def _create_methods(cls):
 
                 with fn_gen.if_(f'{name} is not MISSING or {part} is not MISSING'):
                     parser_name = f'_parser_{name}'
-                    _globals[parser_name] = cls_loader.get_parser_for_annotation(
-                        tp, cls, extras)
+                    _globals[parser_name] = getattr(p := cls_loader.get_parser_for_annotation(
+                        tp, cls, extras), '__call__', p)
                     with fn_gen.try_():
                         fn_gen.add_line(f'self.{name} = {parser_name}({name})')
                     with fn_gen.except_(ParseError, 'e'):

diff --git a/dataclass_wizard/environ/wizard.pyi b/dataclass_wizard/environ/wizard.pyi
@@ -67,8 +67,8 @@ class EnvWizard(AbstractEnvWizard):
 
     # stub for type hinting purposes.
     def __init__(self, *,
-                 env_file: EnvFileType = None,
-                 reload_env: bool = False,
+                 _env_file: EnvFileType = None,
+                 _reload: bool = False,
                  **init_kwargs) -> None:
         ...
 

diff --git a/dataclass_wizard/loaders.py b/dataclass_wizard/loaders.py
@@ -320,9 +320,8 @@ def get_parser_for_annotation(cls, ann_type: Type[T],
                             base_type: 'type[T]'
                             # return a dynamically generated `fromdict`
                             # for the `cls` (base_type)
-                            return load_func_for_dataclass(
+                            return cls.load_func_for_dataclass(
                                 base_type,
-                                is_main_class=False,
                                 config=extras['config']
                             )
 

diff --git a/dataclass_wizard/models.py b/dataclass_wizard/models.py
@@ -96,9 +96,8 @@ def __init__(self, keys, all: bool, dump: bool,
 
             if isinstance(keys, str):
                 keys = split_object_path(keys) if path else (keys,)
-            #     keys = (keys, )
-            # elif keys is Ellipsis:
-            #     keys = ()
+            elif keys is ...:
+                keys = ()
 
             self.json = JSON(*keys, all=all, dump=dump, path=path)
 
@@ -113,9 +112,8 @@ def __init__(self, keys, all: bool, dump: bool,
 
             if isinstance(keys, str):
                 keys = split_object_path(keys) if path else (keys,)
-            #     keys = (keys, )
-            # elif keys is Ellipsis:
-            #     keys = ()
+            elif keys is ...:
+                keys = ()
 
             self.json = JSON(*keys, all=all, dump=dump, path=path)