diff --git a/aas_core_codegen/rdf_shacl/shacl.py b/aas_core_codegen/rdf_shacl/shacl.py index f97fbe788..2835691e0 100644 --- a/aas_core_codegen/rdf_shacl/shacl.py +++ b/aas_core_codegen/rdf_shacl/shacl.py @@ -7,6 +7,7 @@ from aas_core_codegen import intermediate, specific_implementations, infer_for_schema from aas_core_codegen.common import Stripped, Error, assert_never, Identifier +from aas_core_codegen.parse import retree as parse_retree from aas_core_codegen.rdf_shacl import ( naming as rdf_shacl_naming, common as rdf_shacl_common, @@ -214,7 +215,33 @@ def _define_property_shape( # region Define patterns for pattern_constraint in pattern_constraints: - pattern_literal = rdf_shacl_common.string_literal(pattern_constraint.pattern) + # NOTE (mristin): + # We need to render the regular expression so that the pattern appears in + # the canonical form. The original pattern in the specification might be written + # in Python dialect, which does not translate directly to many Regex Engines. + # + # For example, repetition bounds can be given with 0 omitted (*e.g.*, ``{,4}``), + # while SHACL and Java need an explicit zero (``{0, 4}``). Our standard renderer + # puts an explicit zero. + + regex, parse_error = parse_retree.parse([pattern_constraint.pattern]) + if parse_error is not None: + return None, Error( + prop.parsed.node, + f"(mristin): " + f"The pattern could not be parsed: {pattern_constraint.pattern}", + ) + assert regex is not None + + rendered_pattern = parse_retree.render(regex) + + assert len(rendered_pattern) == 1 and isinstance(rendered_pattern[0], str), ( + "Expected strictly only a string output from rendering " + f"a string pattern (*e.g.*, no FormattedValues), " + f"but got: {rendered_pattern}" + ) + + pattern_literal = rdf_shacl_common.string_literal(rendered_pattern[0]) stmts.append(Stripped(f"sh:pattern {pattern_literal} ;")) diff --git a/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl b/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl index 4b734c5c0..9ba839ee8 100644 --- a/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl +++ b/test_data/rdf_shacl/test_main/expected/aas_core_meta.v3/expected_output/shacl-schema.ttl @@ -36,7 +36,7 @@ aas:AbstractLangStringShape a sh:NodeShape ; sh:datatype xs:string ; sh:minCount 1 ; sh:maxCount 1 ; - sh:pattern "^(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){,2})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-(([a-zA-Z0-9]){5,8}|[0-9]([a-zA-Z0-9]){3}))*(-[0-9A-WY-Za-wy-z](-([a-zA-Z0-9]){2,8})+)*(-[xX](-([a-zA-Z0-9]){1,8})+)?|[xX](-([a-zA-Z0-9]){1,8})+|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$" ; + sh:pattern "^(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2})?|[a-zA-Z]{4}|[a-zA-Z]{5,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-(([a-zA-Z0-9]){5,8}|[0-9]([a-zA-Z0-9]){3}))*(-[0-9A-WY-Za-wy-z](-([a-zA-Z0-9]){2,8})+)*(-[xX](-([a-zA-Z0-9]){1,8})+)?|[xX](-([a-zA-Z0-9]){1,8})+|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -45,7 +45,7 @@ aas:AbstractLangStringShape a sh:NodeShape ; sh:minCount 1 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; . @@ -60,7 +60,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 4 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; sh:pattern "^(0|[1-9][0-9]*)$" ; ] ; sh:property [ @@ -71,7 +71,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 4 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; sh:pattern "^(0|[1-9][0-9]*)$" ; ] ; sh:property [ @@ -89,7 +89,7 @@ aas:AdministrativeInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; . @@ -147,7 +147,7 @@ aas:AssetInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -163,7 +163,7 @@ aas:AssetInformationShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -206,7 +206,7 @@ aas:BasicEventElementShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 255 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -259,8 +259,8 @@ aas:BlobShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -336,7 +336,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -352,7 +352,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -361,7 +361,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -383,7 +383,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:minCount 0 ; sh:maxCount 1 ; sh:minLength 1 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -400,7 +400,7 @@ aas:DataSpecificationIec61360Shape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -453,7 +453,7 @@ aas:EntityShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -540,7 +540,7 @@ aas:EventPayloadShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 255 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -577,7 +577,7 @@ aas:ExtensionShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -612,7 +612,7 @@ aas:FileShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -622,8 +622,8 @@ aas:FileShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -753,7 +753,7 @@ aas:IdentifiableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; . @@ -774,7 +774,7 @@ aas:KeyShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; . @@ -993,7 +993,7 @@ aas:QualifierShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1067,7 +1067,7 @@ aas:ReferableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1077,7 +1077,7 @@ aas:ReferableShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 128 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; sh:pattern "^[a-zA-Z][a-zA-Z0-9_]*$" ; ] ; sh:property [ @@ -1159,7 +1159,7 @@ aas:ResourceShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1169,8 +1169,8 @@ aas:ResourceShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 100 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; - sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ ]*;[ ]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([ !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([ !-~]|[\\x80-\\xff]))*\"))*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; + sh:pattern "^([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+/([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+([ \\t]*;[ \\t]*([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+=(([!#$%&'*+\\-.^_`|~0-9a-zA-Z])+|\"(([\\t !#-\\[\\]-~]|[\\x80-\\xff])|\\\\([\\t !-~]|[\\x80-\\xff]))*\"))*$" ; ] ; . @@ -1185,7 +1185,7 @@ aas:SpecificAssetIdShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 64 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1195,7 +1195,7 @@ aas:SpecificAssetIdShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ; @@ -1311,7 +1311,7 @@ aas:ValueReferencePairShape a sh:NodeShape ; sh:maxCount 1 ; sh:minLength 1 ; sh:maxLength 2000 ; - sh:pattern "^[\\x09\\x0A\\x0D\\x20-\\uD7FF\\uE000-\\uFFFD\\U00010000-\\U0010FFFF]*$" ; + sh:pattern "^[\\x09\\x0a\\x0d\\x20-\\ud7ff\\ue000-\\ufffd\\U00010000-\\U0010ffff]*$" ; ] ; sh:property [ a sh:PropertyShape ;