goodmami · goodmami · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Change Log
 
+## [v1.3.1]
+
+**Release date: 2024-08-06**
+
+### Fixed
+
+* Double-quote characters are no longer parsed as roles or symbols ([#143])
+
+
 ## [v1.3.0]
 
 **Release date: 2023-11-14**

diff --git a/docs/notation.rst b/docs/notation.rst
@@ -90,8 +90,10 @@ grammar to allow for surface alignments.
    Symbol    <- NameChar+
    Role      <- ':' NameChar*
    Alignment <- '~' ([a-zA-Z] '.'?)? Digit+ (',' Digit+)*
-   String    <- '"' (!'"' ('\\' . / .))* '"'
-   NameChar  <- ![ \n\t\r\f\v()/:~] .
+   String    <- '"' (!'"' (StrEscape / StrChar))* '"'
+   StrEscape <- '\\' StrChar
+   StrChar   <- ![\n\r\f\v] .
+   NameChar  <- ![ \n\t\r\f\v"()/:~] .
    Digit     <- [0-9]
 
 This grammar has some seemingly unnecessary ambiguity in that both the

diff --git a/penman/__about__.py b/penman/__about__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-__version__ = '1.3.0'
+__version__ = '1.3.1'
 __version_info__ = tuple(
     int(x) if x.isdigit() else x
     for x in __version__.replace('.', ' ').replace('-', ' ').split()

diff --git a/penman/_lexer.py b/penman/_lexer.py
@@ -22,8 +22,8 @@
     'ALIGNMENT': r'~(?:[a-z]\.?)?[0-9]+(?:,[0-9]+)*',
     # ROLE cannot be made up of COLON + SYMBOL because it then becomes
     # difficult to detect anonymous roles: (a : b) vs (a :b c)
-    'ROLE': r':[^ \t\r\n\v\f()\/:~]*',
-    'SYMBOL': r'[^ \t\r\n\v\f()\/:~]+',
+    'ROLE': r':[^ \t\r\n\v\f"()\/:~]*',
+    'SYMBOL': r'[^ \t\r\n\v\f"()\/:~]+',
     'LPAREN': r'\(',
     'RPAREN': r'\)',
     'SLASH': r'\/',  # concept (node label) role

diff --git a/penman/codec.py b/penman/codec.py
@@ -3,6 +3,7 @@
 """
 Serialization of PENMAN graphs.
 """
+
 from pathlib import Path
 from typing import IO, Iterable, Iterator, List, Optional, Union
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -57,7 +57,7 @@ dependencies = [
 ]
 [tool.hatch.envs.dev.scripts]
 test = "pytest {args:--doctest-glob=*.rst --doctest-modules --ignore-glob=penman/interface.py}"
-lint = "ruff {args} penman/"
+lint = "ruff check {args} penman/"
 typecheck = "mypy penman/"
 format = "ruff format {args} penman/"
 
@@ -75,6 +75,8 @@ clean = "make -C docs clean"
 [tool.ruff]
 target-version = "py38"
 line-length = 79
+
+[tool.ruff.lint]
 select = [
   "B",    # flake8-bugbear
   "C90",  # McCabe cyclomatic complexity
@@ -84,10 +86,8 @@ select = [
   "N",    # PEP-8 naming
   "W",    # pycodestyle warnings
 ]
-
-[tool.ruff.lint.isort]
-combine-as-imports = true
-force-wrap-aliases = true
+isort.combine-as-imports = true
+isort.force-wrap-aliases = true
 
 [tool.ruff.format]
 quote-style = "single"
diff --git a/tests/test_codec.py b/tests/test_codec.py
@@ -229,6 +229,16 @@ def test_decode_recursion_limit(self):
         assert len(g.triples) == (n         # n :instance triples
                                   + n - 1)  # n - 1 :ARG0 triples
 
+    def test_decode_issue_143(self):
+        # https://github.com/goodmami/penman/issues/143
+        with pytest.raises(penman.DecodeError):
+            decode('(a :op ")')
+        with pytest.raises(penman.DecodeError):
+            decode('(a :op1 " :op2 "foo")')
+        with pytest.raises(penman.DecodeError):
+            decode('(a :" foo)')
+
+
     def test_encode(self, x1):
         # empty graph
         g = penman.Graph([])

diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -74,3 +74,18 @@ def test_nonbreaking_space_issue_99():
     assert [tok.type for tok in lexer.lex('1\r2')] == ['SYMBOL', 'SYMBOL']
     assert [tok.type for tok in lexer.lex('1\u00a02')] == ['SYMBOL']
     assert [tok.type for tok in lexer.lex('あ　い')] == ['SYMBOL']
+
+
+def test_unterminated_string_issue_143():
+    # https://github.com/goodmami/penman/issues/143
+    # unmatched quotes result in unexpected tokens
+    assert [tok.type for tok in lexer.lex('(a :op ")')] == [
+        'LPAREN', 'SYMBOL', 'ROLE', 'UNEXPECTED', 'RPAREN'
+    ]
+    assert [tok.type for tok in lexer.lex('(a :op1 " :op2 "foo")')] == [
+        'LPAREN', 'SYMBOL', 'ROLE', 'STRING', 'SYMBOL', 'UNEXPECTED', 'RPAREN'
+    ]
+    # also disallow quotes in role names
+    assert [tok.type for tok in lexer.lex('(a :" b)')] == [
+        'LPAREN', 'SYMBOL', 'ROLE', 'UNEXPECTED', 'SYMBOL', 'RPAREN'
+    ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     """
     Serialization of PENMAN graphs.
     """
     from pathlib import Path
     from typing import IO, Iterable, Iterator, List, Optional, Union
@@ Expand Down @@