diff --git a/CHANGELOG.md b/CHANGELOG.md index 06f7b0c..105acb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,22 @@ # Change Log -## [Unreleased][unreleased] +## [v1.1.0] + +**Release date: 2020-07-06** + +A no-op model is added to allow inspection of graphs without +deinverting triples, and Penman is now a bit more threadsafe for +multiprocessing applications. + +### Added + +* `penman.layout.Pop` class is now part of the public API ([#85]) +* `penman.models.noop` model for tree-like graph usage ([#84]) + +### Fixed + +* `penman.layout` no longer checks `POP` for object identity ([#85]) -(no unreleased changes) ## [v1.0.0] @@ -672,6 +686,7 @@ First release with very basic functionality. [v0.11.1]: ../../releases/tag/v0.11.1 [v0.12.0]: ../../releases/tag/v0.12.0 [v1.0.0]: ../../releases/tag/v1.0.0 +[v1.1.0]: ../../releases/tag/v1.1.0 [README]: README.md [#4]: https://github.com/goodmami/penman/issues/4 @@ -728,3 +743,5 @@ First release with very basic functionality. [#79]: https://github.com/goodmami/penman/issues/79 [#80]: https://github.com/goodmami/penman/issues/80 [#81]: https://github.com/goodmami/penman/issues/81 +[#84]: https://github.com/goodmami/penman/issues/84 +[#85]: https://github.com/goodmami/penman/issues/85 diff --git a/README.md b/README.md index c8a0c28..38d7a2e 100644 --- a/README.md +++ b/README.md @@ -108,12 +108,28 @@ surface alignments. ### Citation -The canonical citation for the Penman library will soon be a system -demo paper for [ACL 2020]. Until the proceedings are published, simply -putting https://github.com/goodmami/penman in a footnote is -sufficient. If you are referring to the graph -transformation/normalization work or prefer an academic citation, -please use the following: +If you make use of Penman in your work, please cite [Goodman, 2020]. +The BibTeX is below: + +[Goodman, 2020]: https://www.aclweb.org/anthology/2020.acl-demos.35/ + +```bibtex +@inproceedings{goodman-2020-penman, + title = "{P}enman: An Open-Source Library and Tool for {AMR} Graphs", + author = "Goodman, Michael Wayne", + booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations", + month = jul, + year = "2020", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/2020.acl-demos.35", + pages = "312--319", + abstract = "Abstract Meaning Representation (AMR) (Banarescu et al., 2013) is a framework for semantic dependencies that encodes its rooted and directed acyclic graphs in a format called PENMAN notation. The format is simple enough that users of AMR data often write small scripts or libraries for parsing it into an internal graph representation, but there is enough complexity that these users could benefit from a more sophisticated and well-tested solution. The open-source Python library Penman provides a robust parser, functions for graph inspection and manipulation, and functions for formatting graphs into PENMAN notation. Many functions are also available in a command-line tool, thus extending its utility to non-Python setups.", +} +``` + +For the graph transformation/normalization work, please use the +following: ``` bibtex @inproceedings{Goodman:2019, @@ -126,7 +142,6 @@ please use the following: } ``` -[ACL 2020]: https://acl2020.org/ ### Disclaimer diff --git a/docs/api/penman.layout.rst b/docs/api/penman.layout.rst index 492d7fd..af0696e 100644 --- a/docs/api/penman.layout.rst +++ b/docs/api/penman.layout.rst @@ -13,8 +13,18 @@ Epigraphical Markers .. autoclass:: Push :show-inheritance: +.. autoclass:: Pop + :show-inheritance: + .. autodata:: POP + Using the :data:`POP` singleton can help reduce memory usage and + processing time when working with many graphs, but it should + **not** be checked for object identity, such as ``if x is POP``, + when working with multiple processes because each process gets its + own instance. Instead, use a type check such as ``isinstance(x, + Pop)``. + Tree Functions -------------- diff --git a/docs/api/penman.models.noop.rst b/docs/api/penman.models.noop.rst new file mode 100644 index 0000000..ce4e9f9 --- /dev/null +++ b/docs/api/penman.models.noop.rst @@ -0,0 +1,13 @@ + +penman.models.noop +================== + +.. automodule:: penman.models.noop + +.. autoclass:: NoOpModel + :show-inheritance: + :members: + +.. data:: model + + An instance of the :class:`NoOpModel` class. diff --git a/docs/api/penman.models.rst b/docs/api/penman.models.rst index 6afe61f..7e95c48 100644 --- a/docs/api/penman.models.rst +++ b/docs/api/penman.models.rst @@ -5,8 +5,7 @@ penman.models .. automodule:: penman.models This sub-package contains specified instances of the - :class:`penman.model.Model` class, although currently there is only - one instance. + :class:`penman.model.Model` class. Available Models ---------------- @@ -15,3 +14,4 @@ penman.models :maxdepth: 1 penman.models.amr + penman.models.noop diff --git a/docs/command.rst b/docs/command.rst index 25bd14c..fd41a48 100644 --- a/docs/command.rst +++ b/docs/command.rst @@ -30,7 +30,7 @@ any input content that is not a graph or a metadata comment will be discarded. To see what features are available for the current version and how to call the command, run :command:`penman --help`:: - usage: penman [-h] [-V] [-v] [-q] [--model FILE | --amr] [--check] + usage: penman [-h] [-V] [-v] [-q] [--model FILE | --amr | --noop] [--check] [--indent N] [--compact] [--triples] [--make-variables FMT] [--rearrange KEY] [--reconfigure KEY] [--canonicalize-roles] [--reify-edges] [--dereify-edges] [--reify-attributes] @@ -49,6 +49,7 @@ and how to call the command, run :command:`penman --help`:: -q, --quiet suppress output on and --model FILE JSON model file describing the semantic model --amr use the AMR model + --noop use the no-op model --check check graphs for compliance with the model formatting options: @@ -159,7 +160,9 @@ require it. For Abstract Meaning Representation (AMR) graphs, the This model contains information about AMR's valid roles, canonical role inversions (such as ``:domain`` to ``:mod``), and relation -reifications. +reifications. Also available is the no-op model via :command:`--noop`, +which does not deinvert tree edges when interpreting the graph so that +a role like ``:ARG0-of`` is the role used in the graph triples. Other models can be given by using the :command:`--model` option with a path to a JSON file containing the model information:: diff --git a/penman/__about__.py b/penman/__about__.py index f33a1de..557010a 100644 --- a/penman/__about__.py +++ b/penman/__about__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -__version__ = '1.0.0' +__version__ = '1.1.0' __version_info__ = tuple( int(x) if x.isdigit() else x for x in __version__.replace('.', ' ').replace('-', ' ').split() diff --git a/penman/__main__.py b/penman/__main__.py index 3f43ce4..4a072c2 100644 --- a/penman/__main__.py +++ b/penman/__main__.py @@ -178,6 +178,9 @@ def main(): model_group.add_argument( '--amr', action='store_true', help='use the AMR model') + model_group.add_argument( + '--noop', action='store_true', + help='use the no-op model') parser.add_argument( '--check', action='store_true', help='check graphs for compliance with the model') @@ -231,7 +234,7 @@ def main(): logger = logging.getLogger('penman') logger.setLevel(logging.ERROR - (args.verbosity * 10)) - model = _get_model(args.amr, args.model) + model = _get_model(args.amr, args.noop, args.model) if args.rearrange: args.rearrange = _make_sort_key( @@ -272,9 +275,11 @@ def main(): sys.exit(exitcode) -def _get_model(amr, model_file): +def _get_model(amr, noop, model_file): if amr: from penman.models.amr import model + elif noop: + from penman.models.noop import model elif model_file: model = Model(**json.load(model_file)) else: diff --git a/penman/layout.py b/penman/layout.py index f961bbf..d7fde24 100644 --- a/penman/layout.py +++ b/penman/layout.py @@ -88,7 +88,7 @@ def __repr__(self): return f'Push({self.variable})' -class _Pop(LayoutMarker): +class Pop(LayoutMarker): """Epigraph marker to indicate the end of a node context.""" __slots__ = () @@ -97,8 +97,8 @@ def __repr__(self): return 'POP' -#: Epigraphical marker to indicate the end of a node context. -POP = _Pop() +#: A singleton instance of :class:`Pop`. +POP = Pop() # Tree to graph interpretation ################################################ @@ -261,7 +261,7 @@ def configure(g: Graph, model = _default_model node, data, nodemap = _configure(g, top, model) # remove any superfluous POPs at the end (maybe from dereification) - while data and data[-1] is POP: + while data and isinstance(data[-1], Pop): data.pop() # if any data remain, the graph was not properly annotated for a tree while data: @@ -274,7 +274,7 @@ def configure(g: Graph, raise LayoutError('possible cycle in configuration') data = skipped + data # remove any superfluous POPs - while data and data[-1] is POP: + while data and isinstance(data[-1], Pop): data.pop() tree = Tree(node, metadata=g.metadata) logger.debug('Configured: %s', tree) @@ -337,14 +337,14 @@ def _preconfigure_triple(triple, pushed, epidata): continue pushed.add(pvar) push = epi - elif epi is POP: + elif isinstance(epi, Pop): pops.append(epi) elif epi.mode == 1: # role epidata role = f'{role!s}{epi!s}' elif target and epi.mode == 2: # target epidata target = f'{target!s}{epi!s}' else: - logging.warning('epigraphical marker ignored: %r', epi) + logger.warning('epigraphical marker ignored: %r', epi) if push and pops: logger.warning( @@ -365,7 +365,7 @@ def _configure_node(var, data, nodemap, model): while data: datum = data.pop() - if datum is POP: + if isinstance(datum, Pop): break triple, push = datum @@ -407,7 +407,7 @@ def _find_next(data, nodemap): var = None for i in range(len(data) - 1, -1, -1): datum = data[i] - if datum is POP: + if isinstance(datum, Pop): continue source, _, target = datum[0] if source in nodemap and _get_or_establish_site(source, nodemap): @@ -632,7 +632,7 @@ def node_contexts(g: Graph) -> List[Union[Variable, None]]: try: for epi in g.epidata[triple]: - if epi is POP: + if isinstance(epi, Pop): stack.pop() except IndexError: break # more POPs than contexts in stack diff --git a/penman/model.py b/penman/model.py index fa23210..531592c 100644 --- a/penman/model.py +++ b/penman/model.py @@ -330,20 +330,20 @@ def errors(self, graph: Graph) -> Dict[Optional[BasicTriple], List[str]]: for triple in graph.triples: var, role, tgt = triple if not self.has_role(role): - err[triple].append(f'invalid role') + err[triple].append('invalid role') if var not in g: g[var] = [] g[var].append(triple) if not graph.top: - err[None].append(f'top is not set') + err[None].append('top is not set') elif graph.top not in g: - err[None].append(f'top is not a variable in the graph') + err[None].append('top is not a variable in the graph') else: reachable = _dfs(g, graph.top) unreachable = set(g).difference(reachable) for uvar in sorted(unreachable): for triple in g[uvar]: - err[triple].append(f'unreachable') + err[triple].append('unreachable') return dict(err) diff --git a/penman/models/noop.py b/penman/models/noop.py new file mode 100644 index 0000000..ec14a93 --- /dev/null +++ b/penman/models/noop.py @@ -0,0 +1,22 @@ +""" +No-op semantic model definition. +""" + +from penman.types import BasicTriple +from penman.model import Model + + +class NoOpModel(Model): + """ + A no-operation model that mostly leaves things alone. + + This model is like the default :class:`~penman.model.Model` except + that :meth:`NoOpModel.deinvert` always returns the original + triple, even if it was inverted. + """ + def deinvert(self, triple: BasicTriple) -> BasicTriple: + """Return *triple* (does not deinvert).""" + return triple + + +model = NoOpModel() diff --git a/penman/transform.py b/penman/transform.py index 52529db..dc7da33 100644 --- a/penman/transform.py +++ b/penman/transform.py @@ -3,7 +3,7 @@ Tree and graph transformations. """ -from typing import Union, Dict, Set, List, Tuple +from typing import Optional, Dict, Set, List, Tuple import logging from penman.types import (Variable, Target, BasicTriple, Node) @@ -13,7 +13,13 @@ from penman.tree import (Tree, is_atomic) from penman.graph import (Graph, CONCEPT_ROLE) from penman.model import Model -from penman.layout import (Push, POP, appears_inverted, get_pushed_variable) +from penman.layout import ( + Push, + Pop, + POP, + appears_inverted, + get_pushed_variable, +) logger = logging.getLogger(__name__) @@ -268,7 +274,7 @@ def indicate_branches(g: Graph, model: Model) -> Graph: return g -_SplitMarkers = Tuple[Union[Epidatum, None], Epidata, Epidata, Epidata] +_SplitMarkers = Tuple[Optional[Push], List[Pop], Epidata, Epidata] def _reified_markers(epidata: Epidata) -> _SplitMarkers: @@ -296,7 +302,7 @@ def _reified_markers(epidata: Epidata) -> _SplitMarkers: for epi in epidata: if isinstance(epi, Push): push = epi - elif epi is POP: + elif isinstance(epi, Pop): pops.append(epi) elif epi.mode == 1: role_epis.append(epi) diff --git a/tests/test_layout.py b/tests/test_layout.py index 28715d2..3d0e600 100644 --- a/tests/test_layout.py +++ b/tests/test_layout.py @@ -1,12 +1,15 @@ -import pytest import random +import logging + +import pytest from penman.exceptions import LayoutError from penman.model import Model from penman.tree import Tree from penman.graph import Graph from penman.codec import PENMANCodec +from penman import layout from penman.layout import ( interpret, rearrange, @@ -128,6 +131,17 @@ def test_issue_34(): (':polarity', '-')]))]))])) +def test_issue_85(monkeypatch, caplog): + # https://github.com/goodmami/penman/issues/85 + # Emulate multiprocessing by reassigning POP + with monkeypatch.context() as m: + m.setattr(layout, 'POP', layout.Pop()) + g = codec.decode('(a / alpha :ARG0 (b / beta))') + caplog.set_level(logging.WARNING) + codec.encode(g, indent=None) + assert 'epigraphical marker ignored: POP' not in caplog.text + + def test_reconfigure(): g = codec.decode(''' (a / alpha