Add a more complete ecore parsing script #476

Wuestengecko · 2024-11-18T13:27:49Z

This new script is based roughly off of the old generate_enums.py. However, instead of aggregating all defined classes and enums into a single module, the new script keeps them separated by package.

Note that the script's output still needs to be adjusted manually. However, it does help greatly in finding and covering new parts of the metamodel, as well as keeping the definitions in sync.

The generated output makes use of the new classes introduced in #456. As such, this PR is stacked on top of it.

ewuerger

I don't know if you intend to document any of this code. If so, please add doc-strings to the functions that you find useful and meaningful to share in the docs.
Additionally some tests for this script would be beneficial, especially if this is a cornerstone for a complete metamodel coverage.

ewuerger · 2024-12-17T12:49:09Z

scripts/ecore2py.py

+
+@dataclasses.dataclass(frozen=True)
+class EnumPODMember(PODMember):
+    pass


Instead of pass just provide a doc-string?

What is there to say in that docstring though, other than the class name? 🤔

ewuerger · 2024-12-17T12:56:38Z

scripts/ecore2py.py

+def write_classes(
+    f: t.IO[str],
+    location: str,
+    classes: collections.deque[ClassDef],
+    docstrings: bool,
+    nsmodule: bool,
+) -> None:
+    written_classes: set[str] = set()
+    while classes:
+        cls = classes.popleft()
+        for base in cls.bases:
+            if "." in base or base in written_classes:
+                continue
+            classes.appendleft(cls)
+            try:
+                index, basedef = next(
+                    (i, c) for i, c in enumerate(classes) if c.name == base
+                )
+            except StopIteration:
+                if location:
+                    isource = "." * (1 + location.count("."))
+                    f.write(f"\n\nfrom {isource} import {base}")
+                else:
+                    raise ValueError(
+                        f"Reference to undefined base {base}"
+                    ) from None
+                written_classes.add(base)
+            else:
+                del classes[index]
+                classes.appendleft(basedef)
+                break
+        else:
+            f.write(f"\n\nclass {cls.name}(")
+            f.write(", ".join(cls.bases) or "m.ModelElement")
+            if cls.abstract:
+                f.write(", abstract=True")
+            f.write("):\n")
+            if docstrings and cls.docstring:
+                writedoc(f, cls.docstring, 1)
+                if cls.members:
+                    f.write("\n")
+            for member in cls.members:
+                if isinstance(member, EnumPODMember):
+                    f.write(
+                        f"    {member.name} = m.EnumPOD"
+                        f"({member.xmlname!r}, {member.pod_type})\n"
+                    )
+                elif isinstance(member, PODMember):
+                    f.write(
+                        f"    {member.name} = m.{member.pod_type}POD"
+                        f"({member.xmlname!r})\n"
+                    )
+                elif isinstance(member, ContainmentMember):
+                    f.write(f"    {member.name} = ")
+                    if member.single:
+                        f.write(f"m.Single[{member.cls!r}](m.Containment(")
+                    else:
+                        f.write(f"m.Containment[{member.cls!r}](")
+                    f.write(
+                        f"{member.tag!r},"
+                        f" {clsname2tuplestring(member.cls, nsmodule)}"
+                    )
+                    if member.single:
+                        f.write(")")
+                    f.write(")\n")
+                elif isinstance(member, AssociationMember):
+                    f.write(f"    {member.name} = ")
+                    if member.single:
+                        f.write(f"m.Single[{member.cls!r}](m.Association(")
+                    else:
+                        f.write(f"m.Association[{member.cls!r}](")
+                    f.write(
+                        f"{clsname2tuplestring(member.cls, nsmodule)},"
+                        f" {member.attr!r}"
+                    )
+                    if member.single:
+                        f.write(")")
+                    f.write(")\n")
+                else:
+                    raise AssertionError(
+                        f"Unhandled member type {type(member).__name__}"
+                    )
+                if docstrings and member.docstring:
+                    writedoc(f, member.docstring, 1)
+            if (not docstrings or not cls.docstring) and not cls.members:
+                f.write("    pass\n")
+            written_classes.add(cls.name)


This function is massive in its length and nestedness. That makes it hard to read, understand and maintain. I think it has 4 responseabilities:

Resolves base classes (cls.bases).

Write imports for unresolved base classes.

Write class definitions and their members.

Write different member types (e.g., EnumPODMember, PODMember, ContainmentMember, AssociationMember).

Suggested change

def write_classes(

f: t.IO[str],

location: str,

classes: collections.deque[ClassDef],

docstrings: bool,

nsmodule: bool,

) -> None:

written_classes: set[str] = set()

while classes:

cls = classes.popleft()

for base in cls.bases:

if "." in base or base in written_classes:

continue

classes.appendleft(cls)

try:

index, basedef = next(

(i, c) for i, c in enumerate(classes) if c.name == base

)

except StopIteration:

if location:

isource = "." * (1 + location.count("."))

f.write(f"\n\nfrom {isource} import {base}")

else:

raise ValueError(

f"Reference to undefined base {base}"

) from None

written_classes.add(base)

else:

del classes[index]

classes.appendleft(basedef)

break

else:

f.write(f"\n\nclass {cls.name}(")

f.write(", ".join(cls.bases) or "m.ModelElement")

if cls.abstract:

f.write(", abstract=True")

f.write("):\n")

if docstrings and cls.docstring:

writedoc(f, cls.docstring, 1)

if cls.members:

f.write("\n")

for member in cls.members:

if isinstance(member, EnumPODMember):

f.write(

f" {member.name} = m.EnumPOD"

f"({member.xmlname!r}, {member.pod_type})\n"

)

elif isinstance(member, PODMember):

f.write(

f" {member.name} = m.{member.pod_type}POD"

f"({member.xmlname!r})\n"

)

elif isinstance(member, ContainmentMember):

f.write(f" {member.name} = ")

if member.single:

f.write(f"m.Single[{member.cls!r}](m.Containment(")

else:

f.write(f"m.Containment[{member.cls!r}](")

f.write(

f"{member.tag!r},"

f" {clsname2tuplestring(member.cls, nsmodule)}"

)

if member.single:

f.write(")")

f.write(")\n")

elif isinstance(member, AssociationMember):

f.write(f" {member.name} = ")

if member.single:

f.write(f"m.Single[{member.cls!r}](m.Association(")

else:

f.write(f"m.Association[{member.cls!r}](")

f.write(

f"{clsname2tuplestring(member.cls, nsmodule)},"

f" {member.attr!r}"

)

if member.single:

f.write(")")

f.write(")\n")

else:

raise AssertionError(

f"Unhandled member type {type(member).__name__}"

)

if docstrings and member.docstring:

writedoc(f, member.docstring, 1)

if (not docstrings or not cls.docstring) and not cls.members:

f.write(" pass\n")

written_classes.add(cls.name)

def write_classes(

f: t.IO[str],

location: str,

classes: collections.deque[ClassDef],

docstrings: bool,

nsmodule: bool,

) -> None:

"""Write class definitions to the given file-like object."""

written_classes: set[str] = set()

def resolve_base_classes(cls: ClassDef) -> None:

for base in cls.bases:

if "." in base or base in written_classes:

continue

classes.appendleft(cls)

try:

index, basedef = next(

(i, c) for i, c in enumerate(classes) if c.name == base

)

except StopIteration:

write_import_for_base(base)

else:

del classes[index]

classes.appendleft(basedef)

break

else:

write_class_definition(cls)

def write_import_for_base(base: str) -> None:

if location:

isource = "." * (1 + location.count("."))

f.write(f"\n\nfrom {isource} import {base}")

else:

raise ValueError(f"Reference to undefined base {base}") from None

written_classes.add(base)

def write_class_definition(cls: ClassDef) -> None:

f.write(f"\n\nclass {cls.name}(")

f.write(", ".join(cls.bases) or "m.ModelElement")

if cls.abstract:

f.write(", abstract=True")

f.write("):\n")

write_docstring_and_members(cls)

def write_docstring_and_members(cls: ClassDef) -> None:

if docstrings and cls.docstring:

writedoc(f, cls.docstring, 1)

if cls.members:

f.write("\n")

for member in cls.members:

write_member(member)

if docstrings and member.docstring:

writedoc(f, member.docstring, 1)

if not cls.members and not (docstrings and cls.docstring):

f.write(" pass\n")

written_classes.add(cls.name)

def write_member(member: t.Any) -> None:

if isinstance(member, EnumPODMember):

f.write(

f" {member.name} = m.EnumPOD({member.xmlname!r}, {member.pod_type})\n"

)

elif isinstance(member, PODMember):

f.write(f" {member.name} = m.{member.pod_type}POD({member.xmlname!r})\n")

elif isinstance(member, ContainmentMember):

write_containment_or_association(member, "Containment")

elif isinstance(member, AssociationMember):

write_containment_or_association(member, "Association")

else:

raise AssertionError(f"Unhandled member type {type(member).__name__}")

def write_containment_or_association(member: t.Any, member_type: str) -> None:

f.write(f" {member.name} = ")

if member.single:

f.write(f"m.Single[{member.cls!r}](m.{member_type}(")

else:

f.write(f"m.{member_type}[{member.cls!r}](")

f.write(f"{clsname2tuplestring(member.cls, nsmodule)}, {member.attr!r}")

if member.single:

f.write(")")

f.write(")\n")

while classes:

cls = classes.popleft()

resolve_base_classes(cls)

I think it has 4 responsibilities

When really it only has one, which is to write out all the classes passed to it. 🙃

def write_classes(...): def resolve_base_classes(cls: ClassDef) -> None: def write_import_for_base(base: str) -> None: def write_class_definition(cls: ClassDef) -> None: def write_docstring_and_members(cls: ClassDef) -> None: def write_member(member: t.Any) -> None: def write_containment_or_association(member: t.Any, member_type: str) -> None:

Interesting restructuring. I definitely don't agree with all parts of this, for example:

try: index, basedef = next( (i, c) for i, c in enumerate(classes) if c.name == base ) except StopIteration: write_import_for_base(base) # <-- the only call site

This just muddies the fact that we are in fact in an except handler during the entire write_import_for_base call. Without that in mind, I'd wonder why it uses raise ... from None instead of just a simple raise .... At the same time, it's only 6 lines long, which hardly warrants extracting it as its own function.

I also don't really like the fact that these functions are all happily writing to shared state (the classes deque and the written_classes set), without any obvious markers that they even use these variables at all (since they all also take arguments). That makes it much harder to see that (and where) things are actually being modified.

I do agree however that this function could be broken up in a few smaller chunks (just not that many). But when doing such a refactor, I'd prefer to completely pull these parts out as separate functions, which only communicate via arguments and returns, instead of establishing these side-channels.

def write_containment_or_association(member: t.Any, member_type: str) -> None:

This one actually isn't quite as straight-forward to factor out, because there's a very subtle difference. ContainmentMember has a tag attribute, which goes before the ClassName tuple, whereas AssociationMember has an attr attribute, which goes after.

Now of course one could argue that this makes no sense and should be the same order, which I actually agree with, but doing that in a backwards-compatible way and keeping mypy happy in the process was ~~too much work that I didn't want to deal with~~ out of scope for this PR stack, so I guess it is what it is.

Here's my take on the refactor: https://github.com/DSD-DBS/py-capellambse/compare/25f89424e96b28fe30eef85d6f534534ae920930..8a82beaf9e516b6222f1715058fed65784bb4030

I ended up only extracting two functions, write_class and write_relationship. The former replaces most of the outermost while-loop, basically everything related to actually writing out the class. The latter is essentially a rewrite of write_containment_or_association, accompanied with some changes to the data model to make that approach less of a mess. Instead of the sibling classes ContainmentMember and AssociationMember, there is now a single RelationshipMember class with a RelationshipType discriminant.

I've also made sure to carefully place a small number of empty lines, to better guide the reader's (and reviewer's) weary eyes through the thought process.

Wuestengecko · 2024-12-17T14:44:35Z

I don't know if you intend to document any of this code. If so, please add doc-strings to the functions that you find useful and meaningful to share in the docs.

It's a script, which isn't even distributed in the wheel. As such it's not really intended to provide any reusable functions or be in any way generic - that was just not in scope for what I tried to achieve. Therefore I don't see any value in documenting the individual functions beyond what the function/parameter names and type system already tell you anyways.

This new script is based roughly off of the old `generate_enums.py`. However, instead of aggregating all defined classes and enums into a single module, the new script keeps them separated by package. Note that the script's output still needs to be adjusted manually. However, it does help greatly in finding and covering new parts of the metamodel, as well as keeping the definitions in sync.

Wuestengecko requested a review from vik378 as a code owner November 18, 2024 13:27

This was referenced Nov 18, 2024

Extend the metamodel #477

Draft

Cleanup after the namespace discovery rework #478

Draft

Wuestengecko force-pushed the ecore-parsing branch from bb4a2bd to 714e751 Compare November 19, 2024 15:05

Wuestengecko force-pushed the class-discovery branch from bb63451 to 2b0b8c2 Compare November 19, 2024 15:05

Wuestengecko force-pushed the ecore-parsing branch from 714e751 to bf2b40d Compare November 27, 2024 11:07

Wuestengecko force-pushed the class-discovery branch from 2b0b8c2 to 09892e6 Compare November 27, 2024 11:07

Wuestengecko force-pushed the ecore-parsing branch from bf2b40d to bf291c3 Compare November 27, 2024 13:53

Wuestengecko force-pushed the class-discovery branch 2 times, most recently from 58ce90d to c1cf3c0 Compare November 27, 2024 14:12

Wuestengecko force-pushed the ecore-parsing branch from bf291c3 to bf71dae Compare November 27, 2024 14:12

Wuestengecko force-pushed the ecore-parsing branch from bf71dae to eff3410 Compare December 10, 2024 16:21

Wuestengecko force-pushed the class-discovery branch from c1cf3c0 to 36233ac Compare December 10, 2024 16:21

Wuestengecko force-pushed the ecore-parsing branch from eff3410 to ebfb3bf Compare December 11, 2024 16:04

Wuestengecko force-pushed the class-discovery branch 2 times, most recently from cb42e52 to c8be001 Compare December 11, 2024 16:08

Wuestengecko force-pushed the ecore-parsing branch from ebfb3bf to 48492e8 Compare December 11, 2024 16:08

Wuestengecko force-pushed the class-discovery branch from c8be001 to ed81db4 Compare December 12, 2024 16:42

Wuestengecko force-pushed the ecore-parsing branch from 48492e8 to 19748bd Compare December 12, 2024 16:42

ewuerger self-requested a review December 12, 2024 18:27

Wuestengecko force-pushed the class-discovery branch from ed81db4 to c3f5125 Compare December 13, 2024 11:00

Wuestengecko force-pushed the ecore-parsing branch 2 times, most recently from dfdf9de to 6a67687 Compare December 13, 2024 13:41

Wuestengecko force-pushed the class-discovery branch 2 times, most recently from 43f0f85 to ee2c6de Compare December 13, 2024 14:20

Wuestengecko force-pushed the ecore-parsing branch from 6a67687 to 69b684b Compare December 13, 2024 14:20

Wuestengecko force-pushed the class-discovery branch from ee2c6de to 6695bb4 Compare December 13, 2024 15:52

Wuestengecko force-pushed the ecore-parsing branch from 69b684b to f43c0da Compare December 13, 2024 15:52

Wuestengecko force-pushed the class-discovery branch from 6695bb4 to dbcb916 Compare December 13, 2024 15:59

Wuestengecko force-pushed the ecore-parsing branch from f43c0da to a1817d8 Compare December 13, 2024 15:59

Wuestengecko force-pushed the ecore-parsing branch from f50f893 to c4be98d Compare December 16, 2024 17:40

Wuestengecko force-pushed the class-discovery branch from e701888 to 9394970 Compare December 16, 2024 17:40

Wuestengecko force-pushed the ecore-parsing branch from c4be98d to 25f8942 Compare December 16, 2024 18:35

Wuestengecko force-pushed the class-discovery branch from 9394970 to 4cfea11 Compare December 16, 2024 18:35

ewuerger requested changes Dec 17, 2024

View reviewed changes

Wuestengecko force-pushed the ecore-parsing branch 3 times, most recently from f08a1b9 to 9013b5f Compare December 17, 2024 16:21

Wuestengecko force-pushed the class-discovery branch from 4cfea11 to bcd22a4 Compare December 17, 2024 17:18

Wuestengecko force-pushed the ecore-parsing branch 2 times, most recently from 27fb02f to 8a82bea Compare December 17, 2024 17:25

Wuestengecko requested a review from ewuerger December 17, 2024 17:39

Wuestengecko force-pushed the class-discovery branch from bcd22a4 to deb8e14 Compare December 18, 2024 09:04

Wuestengecko force-pushed the ecore-parsing branch from 8a82bea to 615d0c8 Compare December 18, 2024 09:04

Wuestengecko force-pushed the class-discovery branch from deb8e14 to f4b51b0 Compare December 18, 2024 10:14

Wuestengecko force-pushed the ecore-parsing branch 2 times, most recently from dc85bcf to 9ac75fd Compare December 18, 2024 10:17

Wuestengecko force-pushed the class-discovery branch 2 times, most recently from 2a340d5 to 9159ad5 Compare December 19, 2024 08:37

Wuestengecko force-pushed the ecore-parsing branch 2 times, most recently from 8204196 to 6b5b499 Compare December 19, 2024 08:56

Wuestengecko force-pushed the class-discovery branch from 9159ad5 to 250660d Compare December 19, 2024 08:56

Wuestengecko force-pushed the ecore-parsing branch from 6b5b499 to 3a354f9 Compare December 19, 2024 16:53

Wuestengecko force-pushed the class-discovery branch 2 times, most recently from 8b37632 to c3d890c Compare December 20, 2024 09:20

Wuestengecko force-pushed the ecore-parsing branch from 3a354f9 to d54066e Compare December 20, 2024 09:20

Wuestengecko force-pushed the class-discovery branch from c3d890c to ed5d538 Compare December 20, 2024 15:55

Wuestengecko force-pushed the ecore-parsing branch from d54066e to d1e4627 Compare December 20, 2024 15:55

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add a more complete ecore parsing script #476

Add a more complete ecore parsing script #476

Wuestengecko commented Nov 18, 2024 •

edited

Loading

ewuerger left a comment

ewuerger Dec 17, 2024

Wuestengecko Dec 17, 2024

ewuerger Dec 17, 2024

Wuestengecko Dec 17, 2024

Wuestengecko Dec 17, 2024

Wuestengecko Dec 17, 2024

Wuestengecko Dec 17, 2024

Wuestengecko commented Dec 17, 2024

Add a more complete ecore parsing script #476

Are you sure you want to change the base?

Add a more complete ecore parsing script #476

Conversation

Wuestengecko commented Nov 18, 2024 • edited Loading

ewuerger left a comment

Choose a reason for hiding this comment

ewuerger Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko Dec 17, 2024

Choose a reason for hiding this comment

ewuerger Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko Dec 17, 2024

Choose a reason for hiding this comment

Wuestengecko commented Dec 17, 2024

Wuestengecko commented Nov 18, 2024 •

edited

Loading