From 9d0be229ed71ceb2cb47461c6891bc94829ae552 Mon Sep 17 00:00:00 2001 From: Ritvik Nag Date: Wed, 27 Nov 2024 13:07:41 -0500 Subject: [PATCH] udopates benchmark code for other libs --- benchmarks/complex.py | 114 ++++++++++++++++++++---- benchmarks/nested.py | 201 +++++++++++++++++++++++++----------------- benchmarks/simple.py | 38 +++++--- 3 files changed, 240 insertions(+), 113 deletions(-) diff --git a/benchmarks/complex.py b/benchmarks/complex.py index e733b7d0..199823a7 100644 --- a/benchmarks/complex.py +++ b/benchmarks/complex.py @@ -3,8 +3,9 @@ from dataclasses import dataclass, field, asdict from datetime import datetime from timeit import timeit -from typing import Optional, TypeVar, Dict, Any, List, Union, NamedTuple, Tuple +from typing import Optional, TypeVar, Dict, Any, List, Union, NamedTuple, Tuple, Type +import dacite import dataclass_factory import marshmallow import pytest @@ -39,15 +40,38 @@ class MyClassDJ(DataClassJsonMixin): is_enabled: bool = True -# New Mashumaro Model @dataclass -class MyClassMashumaro(mashumaro.DataClassDictMixin): +class MyClassDacite: my_ledger: Dict[str, Any] the_answer_to_life: Optional[int] - people: List['Person'] + people: List['PersonDJ'] + is_enabled: bool = True + + +# New Pydantic Models +class MyClassPydantic(BaseModel): + my_ledger: Dict[str, Any] + the_answer_to_life: Optional[int] + people: List['PersonPydantic'] is_enabled: bool = True +# New Pydantic Models +class PersonPydantic(BaseModel): + name: 'NamePydantic' + age: int + birthdate: datetime + gender: str + occupation: Union[str, List[str]] + hobbies: Dict[str, List[str]] = defaultdict(list) + + +class NamePydantic(BaseModel): + first: str + last: str + salutation: Optional[str] = 'Mr.' + + @dataclass class Person: name: 'Name' @@ -111,11 +135,6 @@ class PersonDJ: attr_dict=vars(MyClass).copy()) - -def custom_name_decoder(value): - return Name(**value) - - @pytest.fixture(scope='session') def data(): return { @@ -159,6 +178,18 @@ def data_2(data): return d +@pytest.fixture(scope='session') +def data_dacite(data_2): + """data for `dacite`, which has a *TON* of issues.""" + + # It's official, I hate this library ;-( + d = data_2.copy() + d['the_answer_to_life'] = int(d['the_answer_to_life']) + d['people'][0]['hobbies'] = data_2['people'][0]['hobbies'].copy() + d['people'][0]['hobbies']['M-F'] = list(d['people'][0]['hobbies']['M-F']) + + return d + def parse_iso_format(data): return as_datetime(data) @@ -172,7 +203,26 @@ def parse_iso_format(data): datetime: iso_format_schema } -def test_load(request, data, data_2, n): +def parse_datetime(value: str) -> datetime: + return datetime.fromisoformat(value.rstrip('Z')) # Remove 'Z' if it's present + +dacite_cfg = dacite.Config( + type_hooks={datetime: parse_datetime}) + + +def test_load(request, data, data_2, data_dacite, n): + """ + [ RESULTS ON MAC OS X ] + + benchmarks.complex.complex - [INFO] dataclass-wizard 0.800521 + benchmarks.complex.complex - [INFO] dataclass-factory 0.827150 + benchmarks.complex.complex - [INFO] dataclasses-json 37.087781 + benchmarks.complex.complex - [INFO] dacite 9.421210 + benchmarks.complex.complex - [INFO] mashumaro 0.608496 + benchmarks.complex.complex - [INFO] pydantic 1.039472 + benchmarks.complex.complex - [INFO] jsons 39.677698 + benchmarks.complex.complex - [INFO] jsons (strict) 41.592585 + """ g = globals().copy() g.update(locals()) @@ -185,9 +235,28 @@ def test_load(request, data, data_2, n): log.info('dataclasses-json %f', timeit('MyClassDJ.from_dict(data_2)', globals=g, number=n)) + log.info('dacite %f', + timeit('dacite_from_dict(MyClassDacite, data_dacite, config=dacite_cfg)', + globals=g, number=n)) + log.info('mashumaro %f', timeit('MyClassMashumaro.from_dict(data)', globals=g, number=n)) + log.info('pydantic %f', + timeit('MyClassPydantic(**data_2)', globals=g, number=n)) + + # Assert the dataclass instances have the same values for all fields. + c1 = MyClassWizard.from_dict(data) + c2 = factory.load(data_2, MyClass) + c3 = MyClassDJ.from_dict(data_2) + c4 = MyClassJsons.load(data) + c5 = MyClassMashumaro.from_dict(data) + c6 = dacite_from_dict(MyClassDacite, data_dacite, config=dacite_cfg) + c7 = MyClassPydantic(**data_2) + + # Since these models might differ slightly, we can skip exact equality checks + # assert c1.__dict__ == c2.__dict__ == c3.__dict__ == c4.__dict__ == c5.__dict__ + if not request.config.getoption("--all"): pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.") @@ -197,22 +266,26 @@ def test_load(request, data, data_2, n): log.info('jsons (strict) %f', timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n)) - # Assert the dataclass instances have the same values for all fields. - c1 = MyClassWizard.from_dict(data) - c2 = factory.load(data_2, MyClass) - c3 = MyClassDJ.from_dict(data) - c4 = MyClassJsons.load(data) - c5 = MyClassMashumaro.from_dict(data) - # Since these models might differ slightly, we can skip exact equality checks - # assert c1.__dict__ == c2.__dict__ == c3.__dict__ == c4.__dict__ == c5.__dict__ +def test_dump(request, data, data_2, data_dacite, n): + """ + [ RESULTS ON MAC OS X ] -def test_dump(request, data, data_2, n): + benchmarks.complex.complex - [INFO] dataclass-wizard 1.606120 + benchmarks.complex.complex - [INFO] asdict (dataclasses) 2.006917 + benchmarks.complex.complex - [INFO] dataclass-factory 0.979412 + benchmarks.complex.complex - [INFO] dataclasses-json 13.740522 + benchmarks.complex.complex - [INFO] mashumaro 0.289991 + benchmarks.complex.complex - [INFO] pydantic 0.384267 + benchmarks.complex.complex - [INFO] jsons 41.673240 + benchmarks.complex.complex - [INFO] jsons (strict) 45.934885 + """ c1 = MyClassWizard.from_dict(data) c2 = factory.load(data_2, MyClass) c3 = MyClassDJ.from_dict(data_2) c4 = MyClassJsons.load(data) c5 = MyClassMashumaro.from_dict(data) + c6 = MyClassPydantic(**data_2) g = globals().copy() g.update(locals()) @@ -232,6 +305,9 @@ def test_dump(request, data, data_2, n): log.info('mashumaro %f', timeit('c5.to_dict()', globals=g, number=n)) + log.info('pydantic %f', + timeit('c6.model_dump()', globals=g, number=n)) + if not request.config.getoption("--all"): pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.") diff --git a/benchmarks/nested.py b/benchmarks/nested.py index 6a9496d9..42dfefea 100644 --- a/benchmarks/nested.py +++ b/benchmarks/nested.py @@ -9,6 +9,9 @@ import pytest from dataclasses_json import DataClassJsonMixin, config from jsons import JsonSerializable +from dacite import from_dict as dacite_from_dict +from pydantic import BaseModel +import mashumaro from dataclass_wizard import JSONWizard from dataclass_wizard.class_helper import create_new_class @@ -18,114 +21,117 @@ log = logging.getLogger(__name__) - +# Dataclass Definitions (Same as before, no changes needed) @dataclass class Data1: - """ - Top-level dataclass for the majority of the cases. - - """ instance: 'Instance' result: 'Result' @dataclass class Instance: - """ - Instance dataclass - - """ name: str data: 'Data2' @dataclass class Data2: - """ - Data dataclass - - """ date: date owner: str @dataclass class Result: - """ - Result dataclass - - """ status: str iteration_results: 'IterationResults' @dataclass class IterationResults: - """ - IterationResults dataclass - - """ iterations: List['Iteration'] @dataclass class Iteration: - """ - Iteration dataclass - - """ name: str data: 'Data3' @dataclass class Data3: - """ - Data dataclass - - """ question1: str question2: str +# New Model Class Definitions for Libraries + +class MyClassPydantic(BaseModel): + instance: 'InstancePydantic' + result: 'ResultPydantic' + + +class InstancePydantic(BaseModel): + name: str + data: 'Data2Pydantic' + + +class Data2Pydantic(BaseModel): + date: date + owner: str + + +class ResultPydantic(BaseModel): + status: str + iteration_results: 'IterationResultsPydantic' + + @dataclass -class MyClassDJ(DataClassJsonMixin): - """ - Top level dataclass for testing with `dataclasses-json`. Just a note - this nested definition is a bit painful, but necessary as there seems - no way to decode `date` fields automatically by default. +class IterationResultsPydantic: + iterations: List['IterationPydantic'] - """ - instance: 'InstanceJD' - result: 'Result' + +class IterationPydantic(BaseModel): + name: str + data: 'Data3Pydantic' + + +class Data3Pydantic(BaseModel): + question1: str + question2: str @dataclass -class InstanceJD: - """ - Instance dataclass for `dataclasses-json` +class MyClassMashumaro(mashumaro.DataClassDictMixin): + instance: 'InstanceMashumaro' + result: 'Result' - """ + +@dataclass +class InstanceMashumaro: name: str - data: 'Data2JD' + data: 'Data2Mashumaro' @dataclass -class Data2JD: - """ - Data dataclass for `dataclasses-json`. Note this is needed because - otherwise we de-serialize strings as strings, instead of `date` type. - So we need to tell `dataclasses-json` to de-serialize our field as - a `date` type. +class Data2Mashumaro: + date: date + owner: str - """ - date: date = field( - metadata=config( - encoder=date.isoformat, - decoder=as_date, - mm_field=marshmallow.fields.Date(format='iso') - ) - ) + +# Corrected Definition for `MyClassDJ` +@dataclass +class MyClassDJ(DataClassJsonMixin): + instance: 'InstanceDJ' + result: 'Result' + + +class InstanceDJ: + name: str + data: 'Data2DJ' + + +class Data2DJ: + date: date owner: str @@ -145,6 +151,12 @@ class Data2JD: Data1, (Data1, JsonSerializable), 'Jsons', attr_dict=vars(Data1).copy()) +# Pydantic Model for Benchmarking +MyClassPydanticModel = MyClassPydantic + +# Mashumaro Model for Benchmarking +MyClassMashumaroModel = MyClassMashumaro + @pytest.fixture(scope='session') def data(): @@ -190,88 +202,111 @@ def data(): def test_load(request, data, n): + """ + [ RESULTS ON MAC OS X ] + + benchmarks.nested.nested - [INFO] dataclass-wizard 0.397123 + benchmarks.nested.nested - [INFO] dataclass-factory 0.418530 + benchmarks.nested.nested - [INFO] dataclasses-json 11.443072 + benchmarks.nested.nested - [INFO] mashumaro 0.158189 + benchmarks.nested.nested - [INFO] pydantic 0.346031 + benchmarks.nested.nested - [INFO] jsons 28.124958 + benchmarks.nested.nested - [INFO] jsons (strict) 28.816675 + + """ g = globals().copy() g.update(locals()) - # Result: 0.404 log.info('dataclass-wizard %f', timeit('MyClassWizard.from_dict(data)', globals=g, number=n)) - # Result: 0.427 log.info('dataclass-factory %f', - timeit('factory.load(data, Data1)', globals=g, number=n)) + timeit('factory.load(data, Data1)', globals=g, number=n)) - # Result: 15.304 log.info('dataclasses-json %f', timeit('MyClassDJ.from_dict(data)', globals=g, number=n)) + # JUST SKKIPING IN INTERESTS OF TIME + # log.info('dacite %f', + # timeit('dacite_from_dict(MyClass, data)', globals=g, number=n)) + + log.info('mashumaro %f', + timeit('MyClassMashumaro.from_dict(data)', globals=g, number=n)) + + log.info('pydantic %f', + timeit('MyClassPydantic(**data)', globals=g, number=n)) + if not request.config.getoption("--all"): pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.") - # Result: 26.490 log.info('jsons %f', timeit('MyClassJsons.load(data)', globals=g, number=n)) - # Result: 30.343 log.info('jsons (strict) %f', timeit('MyClassJsons.load(data, strict=True)', globals=g, number=n)) - # Assert the dataclass instances have the same values for all fields. - c1 = MyClassWizard.from_dict(data) c2 = factory.load(data, Data1) - c3 = MyClassDJ.from_dict(data) # TODO unused in comparison + c3 = MyClassDJ.from_dict(data) c4 = MyClassJsons.load(data) + c5 = MyClassMashumaro.from_dict(data) + # c6 = dacite_from_dict(MyClass, data) + c7 = MyClassPydantic(**data) - # Note: we can't do direct comparison with `dataclasses-json`, because - # that uses different model dataclasses (for ex. `InstanceJD` instead - # of `Instance`) - assert c1.__dict__ == c2.__dict__ == c4.__dict__ + assert c1.__dict__ == c2.__dict__ == c3.__dict__ == c4.__dict__ == c5.__dict__ == c7.__dict__ # == c6.__dict__ def test_dump(request, data, n): + """ + [ RESULTS ON MAC OS X ] + + INFO benchmarks.nested:nested.py:258 dataclass-wizard 0.460812 + INFO benchmarks.nested:nested.py:261 asdict (dataclasses) 0.674034 + INFO benchmarks.nested:nested.py:264 dataclass-factory 0.233023 + INFO benchmarks.nested:nested.py:267 dataclasses-json 5.717344 + INFO benchmarks.nested:nested.py:270 mashumaro 0.086356 + INFO benchmarks.nested:nested.py:273 pydantic 0.209953 + INFO benchmarks.nested:nested.py:279 jsons 49.321013 + INFO benchmarks.nested:nested.py:282 jsons (strict) 44.051063 + """ c1 = MyClassWizard.from_dict(data) c2 = factory.load(data, Data1) c3 = MyClassDJ.from_dict(data) c4 = MyClassJsons.load(data) + c5 = MyClassMashumaro.from_dict(data) + c6 = MyClassPydantic(**data) g = globals().copy() g.update(locals()) - # Result: 0.431 log.info('dataclass-wizard %f', timeit('c1.to_dict()', globals=g, number=n)) - # Result: 0.628 log.info('asdict (dataclasses) %f', timeit('asdict(c1)', globals=g, number=n)) - # Result: 0.217 log.info('dataclass-factory %f', timeit('factory.dump(c2, Data1)', globals=g, number=n)) - # Result: 6.332 log.info('dataclasses-json %f', timeit('c3.to_dict()', globals=g, number=n)) + log.info('mashumaro %f', + timeit('c5.to_dict()', globals=g, number=n)) + + log.info('pydantic %f', + timeit('c6.model_dump()', globals=g, number=n)) + if not request.config.getoption("--all"): pytest.skip("Skipping benchmarks for the rest by default, unless --all is specified.") - # Result: 41.752 log.info('jsons %f', timeit('c4.dump()', globals=g, number=n)) - # Result: 38.744 log.info('jsons (strict) %f', timeit('c4.dump(strict=True)', globals=g, number=n)) # Assert the dict objects which are the result of `to_dict` are all equal. + c1_dict = {to_snake_case(f): fval for f, fval in c1.to_dict().items()} - # Need this step because our lib converts field names to camel-case - # by default. - # c1_dict = {to_snake_case(f): fval for f, fval in c1.to_dict().items()} - - # I tried to do an assertion but it failed. Even if I remove our result - # e.g. `c1_dict`, results are still unequal between the others. I'll - # need to dedicate some time to look into this a bit more in depth. - # assert c1_dict == factory.dump(c2, Data1) == c3.to_dict() == c4.dump() + # assert c1_dict == factory.dump(c2, Data1) == c3.to_dict() == c4.dump() == c5.to_dict() diff --git a/benchmarks/simple.py b/benchmarks/simple.py index c2b76cac..62409a32 100644 --- a/benchmarks/simple.py +++ b/benchmarks/simple.py @@ -74,20 +74,22 @@ def data(): } def test_load(data, n): + """ + [ RESULTS ON MAC OS X ] + + benchmarks.simple.simple - [INFO] dataclass-wizard 0.076336 + benchmarks.simple.simple - [INFO] dataclass-factory 0.103837 + benchmarks.simple.simple - [INFO] dataclasses-json 3.941902 + benchmarks.simple.simple - [INFO] jsons 5.636863 + benchmarks.simple.simple - [INFO] dacite 0.572661 + benchmarks.simple.simple - [INFO] pydantic 0.081108 + benchmarks.simple.simple - [INFO] marshmallow 2.550217 + benchmarks.simple.simple - [INFO] attrs 0.022822 + benchmarks.simple.simple - [INFO] mashumaro 0.046641 + """ g = globals().copy() g.update(locals()) - # [ RESULTS ] - # benchmarks.simple.simple - [INFO] dataclass-wizard 0.075491 - # benchmarks.simple.simple - [INFO] dataclass-factory 0.105838 - # benchmarks.simple.simple - [INFO] dataclasses-json 3.684969 - # benchmarks.simple.simple - [INFO] jsons 4.713889 - # benchmarks.simple.simple - [INFO] dacite 0.480481 - # benchmarks.simple.simple - [INFO] pydantic 0.073991 - # benchmarks.simple.simple - [INFO] marshmallow 2.219145 - # benchmarks.simple.simple - [INFO] attrs 0.020691 - # benchmarks.simple.simple - [INFO] mashumaro 0.042289 - # Add dacite and pydantic benchmarks log.info("dataclass-wizard %f", timeit("MyClassWizard.from_dict(data)", globals=g, number=n)) @@ -122,6 +124,20 @@ def test_load(data, n): assert c1.__dict__ == c2.__dict__ == c3.__dict__ == c4.__dict__ == c5.__dict__ == c6.model_dump() == c7 == c8.__dict__ == c9.to_dict() def test_dump(data, n): + """ + [ RESULTS ON MAC OS X ] + + benchmarks.simple.simple - [INFO] dataclass-wizard 0.072549 + benchmarks.simple.simple - [INFO] asdict (dataclasses) 0.101621 + benchmarks.simple.simple - [INFO] dataclass-factory 0.087357 + benchmarks.simple.simple - [INFO] dataclasses-json 1.488334 + benchmarks.simple.simple - [INFO] jsons 8.550752 + benchmarks.simple.simple - [INFO] dacite (not applicable) -- skipped + benchmarks.simple.simple - [INFO] pydantic 0.080157 + benchmarks.simple.simple - [INFO] marshmallow 0.000578 + benchmarks.simple.simple - [INFO] attrs 0.146561 + benchmarks.simple.simple - [INFO] mashumaro 0.010199 + """ # [ RESULTS ] # benchmarks.simple.simple - [INFO] dataclass-wizard 0.065604