From 8fa7386aa5ae650b5a27265f93960f0c230019e3 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 14:25:02 +0800 Subject: [PATCH 1/6] Don't check POP for object identity This causes issues in multiprocessing as the 'singleton' object gets a unique instance per process. Fixes #85 --- CHANGELOG.md | 12 ++++++++++-- docs/api/penman.layout.rst | 10 ++++++++++ penman/layout.py | 18 +++++++++--------- penman/transform.py | 10 ++++++++-- tests/test_layout.py | 16 +++++++++++++++- 5 files changed, 52 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06f7b0c..7a06f8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,15 @@ # Change Log -## [Unreleased][unreleased] +## [v1.1.0] + +### Added + +* `penman.layout.Pop` class is now part of the public API ([#85]) + +### Fixed + +* `penman.layout` no longer checks `POP` for object identity ([#85]) -(no unreleased changes) ## [v1.0.0] @@ -728,3 +735,4 @@ First release with very basic functionality. [#79]: https://github.com/goodmami/penman/issues/79 [#80]: https://github.com/goodmami/penman/issues/80 [#81]: https://github.com/goodmami/penman/issues/81 +[#85]: https://github.com/goodmami/penman/issues/85 diff --git a/docs/api/penman.layout.rst b/docs/api/penman.layout.rst index 492d7fd..af0696e 100644 --- a/docs/api/penman.layout.rst +++ b/docs/api/penman.layout.rst @@ -13,8 +13,18 @@ Epigraphical Markers .. autoclass:: Push :show-inheritance: +.. autoclass:: Pop + :show-inheritance: + .. autodata:: POP + Using the :data:`POP` singleton can help reduce memory usage and + processing time when working with many graphs, but it should + **not** be checked for object identity, such as ``if x is POP``, + when working with multiple processes because each process gets its + own instance. Instead, use a type check such as ``isinstance(x, + Pop)``. + Tree Functions -------------- diff --git a/penman/layout.py b/penman/layout.py index f961bbf..14a20af 100644 --- a/penman/layout.py +++ b/penman/layout.py @@ -88,7 +88,7 @@ def __repr__(self): return f'Push({self.variable})' -class _Pop(LayoutMarker): +class Pop(LayoutMarker): """Epigraph marker to indicate the end of a node context.""" __slots__ = () @@ -97,8 +97,8 @@ def __repr__(self): return 'POP' -#: Epigraphical marker to indicate the end of a node context. -POP = _Pop() +#: A singleton instance of :class:`Pop`. +POP = Pop() # Tree to graph interpretation ################################################ @@ -261,7 +261,7 @@ def configure(g: Graph, model = _default_model node, data, nodemap = _configure(g, top, model) # remove any superfluous POPs at the end (maybe from dereification) - while data and data[-1] is POP: + while data and isinstance(data[-1], Pop): data.pop() # if any data remain, the graph was not properly annotated for a tree while data: @@ -274,7 +274,7 @@ def configure(g: Graph, raise LayoutError('possible cycle in configuration') data = skipped + data # remove any superfluous POPs - while data and data[-1] is POP: + while data and isinstance(data[-1], Pop): data.pop() tree = Tree(node, metadata=g.metadata) logger.debug('Configured: %s', tree) @@ -337,7 +337,7 @@ def _preconfigure_triple(triple, pushed, epidata): continue pushed.add(pvar) push = epi - elif epi is POP: + elif isinstance(epi, Pop): pops.append(epi) elif epi.mode == 1: # role epidata role = f'{role!s}{epi!s}' @@ -365,7 +365,7 @@ def _configure_node(var, data, nodemap, model): while data: datum = data.pop() - if datum is POP: + if isinstance(datum, Pop): break triple, push = datum @@ -407,7 +407,7 @@ def _find_next(data, nodemap): var = None for i in range(len(data) - 1, -1, -1): datum = data[i] - if datum is POP: + if isinstance(datum, Pop): continue source, _, target = datum[0] if source in nodemap and _get_or_establish_site(source, nodemap): @@ -632,7 +632,7 @@ def node_contexts(g: Graph) -> List[Union[Variable, None]]: try: for epi in g.epidata[triple]: - if epi is POP: + if isinstance(epi, Pop): stack.pop() except IndexError: break # more POPs than contexts in stack diff --git a/penman/transform.py b/penman/transform.py index 52529db..2f4c9c0 100644 --- a/penman/transform.py +++ b/penman/transform.py @@ -13,7 +13,13 @@ from penman.tree import (Tree, is_atomic) from penman.graph import (Graph, CONCEPT_ROLE) from penman.model import Model -from penman.layout import (Push, POP, appears_inverted, get_pushed_variable) +from penman.layout import ( + Push, + Pop, + POP, + appears_inverted, + get_pushed_variable, +) logger = logging.getLogger(__name__) @@ -296,7 +302,7 @@ def _reified_markers(epidata: Epidata) -> _SplitMarkers: for epi in epidata: if isinstance(epi, Push): push = epi - elif epi is POP: + elif isinstance(epi, Pop): pops.append(epi) elif epi.mode == 1: role_epis.append(epi) diff --git a/tests/test_layout.py b/tests/test_layout.py index 28715d2..3d0e600 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -1,12 +1,15 @@ -import pytest import random +import logging + +import pytest from penman.exceptions import LayoutError from penman.model import Model from penman.tree import Tree from penman.graph import Graph from penman.codec import PENMANCodec +from penman import layout from penman.layout import ( interpret, rearrange, @@ -128,6 +131,17 @@ def test_issue_34(): (':polarity', '-')]))]))])) +def test_issue_85(monkeypatch, caplog): + # https://github.com/goodmami/penman/issues/85 + # Emulate multiprocessing by reassigning POP + with monkeypatch.context() as m: + m.setattr(layout, 'POP', layout.Pop()) + g = codec.decode('(a / alpha :ARG0 (b / beta))') + caplog.set_level(logging.WARNING) + codec.encode(g, indent=None) + assert 'epigraphical marker ignored: POP' not in caplog.text + + def test_reconfigure(): g = codec.decode(''' (a / alpha From f315b370088a1e558464701c70ec63af6000cd9f Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 14:26:12 +0800 Subject: [PATCH 2/6] Use the correct logger in penman.layout --- penman/layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/penman/layout.py b/penman/layout.py index 14a20af..d7fde24 100644 --- a/penman/layout.py +++ b/penman/layout.py @@ -344,7 +344,7 @@ def _preconfigure_triple(triple, pushed, epidata): elif target and epi.mode == 2: # target epidata target = f'{target!s}{epi!s}' else: - logging.warning('epigraphical marker ignored: %r', epi) + logger.warning('epigraphical marker ignored: %r', epi) if push and pops: logger.warning( From 9e20e91e023cf46c46dd8ee64bac0936cc605434 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 14:27:43 +0800 Subject: [PATCH 3/6] flake8 and mypy fixes --- penman/model.py | 8 ++++---- penman/transform.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/penman/model.py b/penman/model.py index fa23210..531592c 100644 --- a/penman/model.py +++ b/penman/model.py @@ -330,20 +330,20 @@ def errors(self, graph: Graph) -> Dict[Optional[BasicTriple], List[str]]: for triple in graph.triples: var, role, tgt = triple if not self.has_role(role): - err[triple].append(f'invalid role') + err[triple].append('invalid role') if var not in g: g[var] = [] g[var].append(triple) if not graph.top: - err[None].append(f'top is not set') + err[None].append('top is not set') elif graph.top not in g: - err[None].append(f'top is not a variable in the graph') + err[None].append('top is not a variable in the graph') else: reachable = _dfs(g, graph.top) unreachable = set(g).difference(reachable) for uvar in sorted(unreachable): for triple in g[uvar]: - err[triple].append(f'unreachable') + err[triple].append('unreachable') return dict(err) diff --git a/penman/transform.py b/penman/transform.py index 2f4c9c0..dc7da33 100644 --- a/penman/transform.py +++ b/penman/transform.py @@ -3,7 +3,7 @@ Tree and graph transformations. """ -from typing import Union, Dict, Set, List, Tuple +from typing import Optional, Dict, Set, List, Tuple import logging from penman.types import (Variable, Target, BasicTriple, Node) @@ -274,7 +274,7 @@ def indicate_branches(g: Graph, model: Model) -> Graph: return g -_SplitMarkers = Tuple[Union[Epidatum, None], Epidata, Epidata, Epidata] +_SplitMarkers = Tuple[Optional[Push], List[Pop], Epidata, Epidata] def _reified_markers(epidata: Epidata) -> _SplitMarkers: From 5fa909d29973070b799a4099a06158382a0e2c95 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 17:10:39 +0800 Subject: [PATCH 4/6] Add noop model Resolves #84 Also related to #83 --- CHANGELOG.md | 2 ++ docs/api/penman.models.noop.rst | 13 +++++++++++++ docs/api/penman.models.rst | 4 ++-- docs/command.rst | 7 +++++-- penman/__main__.py | 9 +++++++-- penman/models/noop.py | 22 ++++++++++++++++++++++ 6 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 docs/api/penman.models.noop.rst create mode 100644 penman/models/noop.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a06f8a..8ab7d38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added * `penman.layout.Pop` class is now part of the public API ([#85]) +* `penman.models.noop` model for tree-like graph usage ([#84]) ### Fixed @@ -735,4 +736,5 @@ First release with very basic functionality. [#79]: https://github.com/goodmami/penman/issues/79 [#80]: https://github.com/goodmami/penman/issues/80 [#81]: https://github.com/goodmami/penman/issues/81 +[#84]: https://github.com/goodmami/penman/issues/84 [#85]: https://github.com/goodmami/penman/issues/85 diff --git a/docs/api/penman.models.noop.rst b/docs/api/penman.models.noop.rst new file mode 100644 index 0000000..ce4e9f9 --- /dev/null +++ b/docs/api/penman.models.noop.rst @@ -0,0 +1,13 @@ + +penman.models.noop +================== + +.. automodule:: penman.models.noop + +.. autoclass:: NoOpModel + :show-inheritance: + :members: + +.. data:: model + + An instance of the :class:`NoOpModel` class. diff --git a/docs/api/penman.models.rst b/docs/api/penman.models.rst index 6afe61f..7e95c48 100644 --- a/docs/api/penman.models.rst +++ b/docs/api/penman.models.rst @@ -5,8 +5,7 @@ penman.models .. automodule:: penman.models This sub-package contains specified instances of the - :class:`penman.model.Model` class, although currently there is only - one instance. + :class:`penman.model.Model` class. Available Models ---------------- @@ -15,3 +14,4 @@ penman.models :maxdepth: 1 penman.models.amr + penman.models.noop diff --git a/docs/command.rst b/docs/command.rst index 25bd14c..fd41a48 100644 --- a/docs/command.rst +++ b/docs/command.rst @@ -30,7 +30,7 @@ any input content that is not a graph or a metadata comment will be discarded. To see what features are available for the current version and how to call the command, run :command:`penman --help`:: - usage: penman [-h] [-V] [-v] [-q] [--model FILE | --amr] [--check] + usage: penman [-h] [-V] [-v] [-q] [--model FILE | --amr | --noop] [--check] [--indent N] [--compact] [--triples] [--make-variables FMT] [--rearrange KEY] [--reconfigure KEY] [--canonicalize-roles] [--reify-edges] [--dereify-edges] [--reify-attributes] @@ -49,6 +49,7 @@ and how to call the command, run :command:`penman --help`:: -q, --quiet suppress output on and --model FILE JSON model file describing the semantic model --amr use the AMR model + --noop use the no-op model --check check graphs for compliance with the model formatting options: @@ -159,7 +160,9 @@ require it. For Abstract Meaning Representation (AMR) graphs, the This model contains information about AMR's valid roles, canonical role inversions (such as ``:domain`` to ``:mod``), and relation -reifications. +reifications. Also available is the no-op model via :command:`--noop`, +which does not deinvert tree edges when interpreting the graph so that +a role like ``:ARG0-of`` is the role used in the graph triples. Other models can be given by using the :command:`--model` option with a path to a JSON file containing the model information:: diff --git a/penman/__main__.py b/penman/__main__.py index 3f43ce4..4a072c2 100644 --- a/penman/__main__.py +++ b/penman/__main__.py @@ -178,6 +178,9 @@ def main(): model_group.add_argument( '--amr', action='store_true', help='use the AMR model') + model_group.add_argument( + '--noop', action='store_true', + help='use the no-op model') parser.add_argument( '--check', action='store_true', help='check graphs for compliance with the model') @@ -231,7 +234,7 @@ def main(): logger = logging.getLogger('penman') logger.setLevel(logging.ERROR - (args.verbosity * 10)) - model = _get_model(args.amr, args.model) + model = _get_model(args.amr, args.noop, args.model) if args.rearrange: args.rearrange = _make_sort_key( @@ -272,9 +275,11 @@ def main(): sys.exit(exitcode) -def _get_model(amr, model_file): +def _get_model(amr, noop, model_file): if amr: from penman.models.amr import model + elif noop: + from penman.models.noop import model elif model_file: model = Model(**json.load(model_file)) else: diff --git a/penman/models/noop.py b/penman/models/noop.py new file mode 100644 index 0000000..ec14a93 --- /dev/null +++ b/penman/models/noop.py @@ -0,0 +1,22 @@ +""" +No-op semantic model definition. +""" + +from penman.types import BasicTriple +from penman.model import Model + + +class NoOpModel(Model): + """ + A no-operation model that mostly leaves things alone. + + This model is like the default :class:`~penman.model.Model` except + that :meth:`NoOpModel.deinvert` always returns the original + triple, even if it was inverted. + """ + def deinvert(self, triple: BasicTriple) -> BasicTriple: + """Return *triple* (does not deinvert).""" + return triple + + +model = NoOpModel() From 20bdf9b98216b0e2daf8cd5a2f6e237703551ba1 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 17:13:50 +0800 Subject: [PATCH 5/6] Prepare for v1.1.0 --- CHANGELOG.md | 7 +++++++ penman/__about__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ab7d38..105acb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## [v1.1.0] +**Release date: 2020-07-06** + +A no-op model is added to allow inspection of graphs without +deinverting triples, and Penman is now a bit more threadsafe for +multiprocessing applications. + ### Added * `penman.layout.Pop` class is now part of the public API ([#85]) @@ -680,6 +686,7 @@ First release with very basic functionality. [v0.11.1]: ../../releases/tag/v0.11.1 [v0.12.0]: ../../releases/tag/v0.12.0 [v1.0.0]: ../../releases/tag/v1.0.0 +[v1.1.0]: ../../releases/tag/v1.1.0 [README]: README.md [#4]: https://github.com/goodmami/penman/issues/4 diff --git a/penman/__about__.py b/penman/__about__.py index f33a1de..557010a 100644 --- a/penman/__about__.py +++ b/penman/__about__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__version__ = '1.0.0' +__version__ = '1.1.0' __version_info__ = tuple( int(x) if x.isdigit() else x for x in __version__.replace('.', ' ').replace('-', ' ').split() From 7a442b069641237f34b3ad7c9334326a552ea790 Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Mon, 6 Jul 2020 17:32:05 +0800 Subject: [PATCH 6/6] Update citation in README --- README.md | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c8a0c28..38d7a2e 100644 --- a/README.md +++ b/README.md @@ -108,12 +108,28 @@ surface alignments. ### Citation -The canonical citation for the Penman library will soon be a system -demo paper for [ACL 2020]. Until the proceedings are published, simply -putting https://github.com/goodmami/penman in a footnote is -sufficient. If you are referring to the graph -transformation/normalization work or prefer an academic citation, -please use the following: +If you make use of Penman in your work, please cite [Goodman, 2020]. +The BibTeX is below: + +[Goodman, 2020]: https://www.aclweb.org/anthology/2020.acl-demos.35/ + +```bibtex +@inproceedings{goodman-2020-penman, + title = "{P}enman: An Open-Source Library and Tool for {AMR} Graphs", + author = "Goodman, Michael Wayne", + booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations", + month = jul, + year = "2020", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/2020.acl-demos.35", + pages = "312--319", + abstract = "Abstract Meaning Representation (AMR) (Banarescu et al., 2013) is a framework for semantic dependencies that encodes its rooted and directed acyclic graphs in a format called PENMAN notation. The format is simple enough that users of AMR data often write small scripts or libraries for parsing it into an internal graph representation, but there is enough complexity that these users could benefit from a more sophisticated and well-tested solution. The open-source Python library Penman provides a robust parser, functions for graph inspection and manipulation, and functions for formatting graphs into PENMAN notation. Many functions are also available in a command-line tool, thus extending its utility to non-Python setups.", +} +``` + +For the graph transformation/normalization work, please use the +following: ``` bibtex @inproceedings{Goodman:2019, @@ -126,7 +142,6 @@ please use the following: } ``` -[ACL 2020]: https://acl2020.org/ ### Disclaimer