From 642cbdb410480766b9ec5025a17be09685fb8b1f Mon Sep 17 00:00:00 2001 From: Simon Johnston Date: Mon, 16 Sep 2024 19:30:20 -0700 Subject: [PATCH] refactor: new traits for read/write --- rdftk_io/Cargo.toml | 5 +- rdftk_io/mkgrammars.sh | 8 - rdftk_io/src/common/common.pest | 504 ++++++++++++++---- rdftk_io/src/common/mod.rs | 42 -- rdftk_io/src/common/parser/mod.rs | 149 ++++++ .../src/common/{parser.rs => parser/n3.rs} | 14 +- rdftk_io/src/common/parser/nquads.rs | 96 ++++ rdftk_io/src/common/parser/ntriples.rs | 290 ++++++++++ rdftk_io/src/common/parser/trig.rs | 44 ++ rdftk_io/src/common/parser/turtle.rs | 44 ++ rdftk_io/src/common/parser_error.rs | 152 ------ rdftk_io/src/dot/mod.rs | 8 +- rdftk_io/src/dot/writer.rs | 4 +- rdftk_io/src/json/mod.rs | 29 +- rdftk_io/src/json/reader.rs | 150 ++++-- rdftk_io/src/json/writer.rs | 31 +- rdftk_io/src/json_ld/mod.rs | 20 - rdftk_io/src/lib.rs | 9 +- rdftk_io/src/n3/mod.rs | 28 +- rdftk_io/src/n3/n3.g4 | 325 ----------- rdftk_io/src/n3/reader.rs | 48 +- rdftk_io/src/nq/mod.rs | 12 +- rdftk_io/src/nq/nq-in.pest | 35 -- rdftk_io/src/nq/nq.pest | 297 ----------- rdftk_io/src/nq/reader.rs | 82 +-- rdftk_io/src/nt/mod.rs | 6 +- rdftk_io/src/nt/nt-in.pest | 30 -- rdftk_io/src/nt/nt.pest | 292 ---------- rdftk_io/src/nt/reader.rs | 380 +------------ rdftk_io/src/trig/mod.rs | 23 +- rdftk_io/src/trig/reader.rs | 48 +- rdftk_io/src/trig/writer.rs | 63 ++- rdftk_io/src/turtle/mod.rs | 9 +- rdftk_io/src/turtle/parser/mod.rs | 102 ---- rdftk_io/src/turtle/reader.rs | 46 +- rdftk_io/src/turtle/turtle-in.pest | 114 ---- rdftk_io/src/turtle/turtle.pest | 376 ------------- rdftk_io/src/turtle/turtlestar.g4 | 304 ----------- rdftk_io/src/turtle/writer/mod.rs | 23 +- rdftk_io/src/turtle/writer/options.rs | 7 + rdftk_io/src/xml/mod.rs | 14 +- rdftk_io/src/xml/reader.rs | 38 +- rdftk_io/src/xml/writer.rs | 13 +- rdftk_io/tests/read_json.rs | 2 +- rdftk_io/tests/read_nq.rs | 27 + rdftk_io/tests/read_nt.rs | 54 ++ rdftk_io/tests/read_turtle.rs | 27 + rdftk_io/tests/read_xml.rs | 5 +- rdftk_io/tests/write_turtle.rs | 27 +- 49 files changed, 1559 insertions(+), 2897 deletions(-) delete mode 100755 rdftk_io/mkgrammars.sh create mode 100644 rdftk_io/src/common/parser/mod.rs rename rdftk_io/src/common/{parser.rs => parser/n3.rs} (83%) create mode 100644 rdftk_io/src/common/parser/nquads.rs create mode 100644 rdftk_io/src/common/parser/ntriples.rs create mode 100644 rdftk_io/src/common/parser/trig.rs create mode 100644 rdftk_io/src/common/parser/turtle.rs delete mode 100644 rdftk_io/src/common/parser_error.rs delete mode 100644 rdftk_io/src/n3/n3.g4 delete mode 100644 rdftk_io/src/nq/nq-in.pest delete mode 100644 rdftk_io/src/nq/nq.pest delete mode 100644 rdftk_io/src/nt/nt-in.pest delete mode 100644 rdftk_io/src/nt/nt.pest delete mode 100644 rdftk_io/src/turtle/parser/mod.rs delete mode 100644 rdftk_io/src/turtle/turtle-in.pest delete mode 100644 rdftk_io/src/turtle/turtle.pest delete mode 100644 rdftk_io/src/turtle/turtlestar.g4 create mode 100644 rdftk_io/tests/read_nq.rs create mode 100644 rdftk_io/tests/read_turtle.rs diff --git a/rdftk_io/Cargo.toml b/rdftk_io/Cargo.toml index 14463fa..3ca7032 100644 --- a/rdftk_io/Cargo.toml +++ b/rdftk_io/Cargo.toml @@ -26,13 +26,13 @@ turtle = ["pest", "pest_derive"] xml = ["xml-rs", "rdftk_names"] [dependencies] +itertools = "0.13" lazy_static = "1.4" -log = "0.4" objio = "0.1.1" rdftk_core = { version = "0.4.2", path = "../rdftk_core" } rdftk_iri = { version = "0.2.2", path = "../rdftk_iri" } regex = "1.5" -itertools = "0.13" +tracing = "0.1.40" # feature-dependencies pest = { version = "2.7", optional = true } @@ -43,5 +43,6 @@ xml-rs = { version = "0.8", optional = true } [dev-dependencies] indoc = "2.0" +log = "0.4.22" pretty_assertions = "1.4" pretty_env_logger = "0.5" diff --git a/rdftk_io/mkgrammars.sh b/rdftk_io/mkgrammars.sh deleted file mode 100755 index ad19c69..0000000 --- a/rdftk_io/mkgrammars.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -LANGUAGES=("nq" "nt" "turtle") - -for grammar in ${LANGUAGES[@]}; do - echo cat "src/${grammar}/${grammar}-in.pest" "src/common/common.pest" ">" "src/${grammar}/${grammar}.pest" - cat "src/${grammar}/${grammar}-in.pest" "src/common/common.pest" > "src/${grammar}/${grammar}.pest" -done diff --git a/rdftk_io/src/common/common.pest b/rdftk_io/src/common/common.pest index e9ab21c..8c04b42 100644 --- a/rdftk_io/src/common/common.pest +++ b/rdftk_io/src/common/common.pest @@ -1,11 +1,312 @@ +// ------------------------------------------------------------------------------------------------ +// N3 +// ------------------------------------------------------------------------------------------------ + +n3Doc = { + SOI ~ (n3Statement ~ END_OF_STATEMENT | sparqlDirective)* ~ EOI +} + +n3Statement = { + n3Directive | n3Triples +} + +n3Directive = { + prefixID | base +} + +sparqlDirective = { + sparqlBase + | sparqlPrefix +} + +n3Triples = { + n3Subject ~ n3PredicateObjectList? +} + +n3PredicateObjectList = { + n3Verb ~ n3ObjectList ~ (";" ~ (n3Verb ~ n3ObjectList)?)* +} + +n3ObjectList = { + n3Object ~ ("," ~ n3Object)* +} + +n3Verb = { + n3Predicate + | "a" + | "has" ~ expression + | "is" ~ expression ~ "of" + | "=" + | "<=" + | "=>" +} + +n3Subject = { + expression +} + +n3Predicate = { + expression | "<-" ~ expression +} + +n3Object = { + expression +} + +expression = { + path +} + +path = { + pathItem ~ ("!" ~ path | "^" ~ path)? +} + +pathItem = { + iri + | blankNode + | quickVar + | collection + | blankNodePropertyList + | turtleLiteral + | formula +} + +formula = { + "{" ~ formulaContent? ~ "}" +} + +formulaContent = { + n3Statement ~ ("." ~ formulaContent?)? + | (sparqlPrefix | sparqlBase) ~ formulaContent? +} + +// only made this a parser rule for consistency +// (all other path-items are also parser rules) +quickVar = { + QUICK_VAR_NAME +} + +// ------------------------------------------------------------------------------------------------ +// TRiG +// ------------------------------------------------------------------------------------------------ + +trigDoc = { + SOI ~ (directive | block)* ~ EOI +} + +block = _{ + triplesOrGraph + | wrappedGraph + | triples2 + | namedGraph +} + +namedGraph = { + "GRAPH" ~ labelOrSubject ~ wrappedGraph +} + +triplesOrGraph = { + labelOrSubject ~ (wrappedGraph | predicateObjectList ~ END_OF_STATEMENT) +} + +triples2 = { + blankNodePropertyList ~ predicateObjectList? ~ END_OF_STATEMENT + | collection ~ predicateObjectList ~ END_OF_STATEMENT +} + +wrappedGraph = { + "{" ~ triplesBlock? ~ "}" +} + +triplesBlock = { + triples ~ (END_OF_STATEMENT ~ triplesBlock?)? +} + +labelOrSubject = { + iri + | blankNode +} + +// ------------------------------------------------------------------------------------------------ +// Turtle with RDF-* +// ------------------------------------------------------------------------------------------------ + +turtleStarDoc = { + SOI ~ statement* ~ EOI +} + +statement = { + directive + | triples ~ END_OF_STATEMENT +} + +directive = { + prefixID + | base + | sparqlPrefix + | sparqlBase +} + +prefixID = { + "@prefix" ~ PNAME_NS ~ IRIREF ~ END_OF_STATEMENT +} + +base = { + "@base" ~ IRIREF ~ END_OF_STATEMENT +} + +sparqlBase = { + ^"BASE" ~ IRIREF +} + +sparqlPrefix = { + ^"PREFIX" ~ PNAME_NS ~ IRIREF +} + +triples = { + turtleSubject ~ predicateObjectList + | blankNodePropertyList ~ predicateObjectList? +} + +predicateObjectList = { + verbObjectList ~ (";" ~ verbObjectList?)* +} + +verbObjectList = { + verb ~ objectList +} + +objectList = { + turtleObject ~ ("," ~ turtleObject)* +} + +verb = { + turtlePredicate + | "a" +} + +turtleSubject = { + iri + | blankNode + | collection + | tripleX +} + +turtlePredicate = { + iri +} + +turtleObject = { + iri + | blankNode + | turtleLiteral + | collection + | blankNodePropertyList + | tripleX +} + +tripleX = { + "<<" ~ subjectX ~ turtlePredicate ~ objectX ~ ">>" +} + +subjectX = { + iri + | blankNode + | tripleX +} + +objectX = { + iri + | blankNode + | turtleLiteral + | tripleX +} + +turtleLiteral = { + turtleRdfLiteral + | NumericLiteral + | BooleanLiteral +} + + +turtleRdfLiteral = { + turtleString ~ (LANGTAG | "^^" ~ iri)? +} + +turtleString = { + STRING_LITERAL_LONG_SINGLE_QUOTE + | STRING_LITERAL_LONG_QUOTE + | STRING_LITERAL_QUOTE + | STRING_LITERAL_SINGLE_QUOTE +} + +blankNodePropertyList = { + "[" ~ predicateObjectList ~ "]" +} + +collection = { + "(" ~ turtleObject* ~ ")" +} + +// ------------------------------------------------------------------------------------------------ +// NQuads +// ------------------------------------------------------------------------------------------------ + +nquadDoc = { + SOI ~ nquad* ~ EOI +} + +// replace nquadGraphLabel with ntripleSubject +nquad = { + ntripleSubject ~ ntriplePredicate ~ ntripleObject ~ ntripleSubject? ~ END_OF_STATEMENT +} + +// ------------------------------------------------------------------------------------------------ +// NTriples +// ------------------------------------------------------------------------------------------------ + +ntripleDoc = { + SOI ~ ntriple* ~ EOI +} + +ntriple = { + ntripleSubject ~ ntriplePredicate ~ ntripleObject ~ END_OF_STATEMENT +} + +ntripleSubject = { + IRIREF + | blankNode +} + +ntriplePredicate = { + IRIREF +} + +ntripleObject = { + IRIREF + | blankNode + | ntripleLiteral +} + +ntripleLiteral = { + ntripleRdfLiteral +} + +ntripleRdfLiteral = { + ntripleString ~ (LANGTAG | "^^" ~ IRIREF)? +} + +ntripleString = { + STRING_LITERAL_QUOTE +} // ------------------------------------------------------------------------------------------------ // IRIs // ------------------------------------------------------------------------------------------------ iri = { - IRIREF - | prefixedName + IRIREF + | prefixedName } IRIREF = ${ @@ -18,42 +319,38 @@ IRIREF_INNER = @{ IRIREF_CHAR = { /* #x00=NULL #01-#x1F=control codes #x20=space */ - !(">" | '\u{0000}'..'\u{0020}' | "\"" | "{" | "}" | "|" | "^" | "`" | "\\") ~ ANY - | UCHAR + !(">" | '\u{0000}'..'\u{0020}' | "\"" | "{" | "}" | "|" | "^" | "`" | "\\") ~ ANY + | UCHAR } // ------------------------------------------------------------------------------------------------ // Blank Nodes // ------------------------------------------------------------------------------------------------ -BlankNode = { +blankNode = { BLANK_NODE_LABEL - | ANON + | ANON } BLANK_NODE_LABEL = @{ - "_:" ~ (PN_CHARS_U | ASCII_DIGIT) ~ BLANK_NODE_LABEL_TAIL* + "_:" ~ (PN_CHARS_U | ASCII_DIGIT) ~ BLANK_NODE_LABEL_TAIL* } BLANK_NODE_LABEL_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." + PN_CHARS + | &("." ~ PN_CHARS) ~ "." } ANON = { - "[" ~ "]" + "[" ~ "]" } // ------------------------------------------------------------------------------------------------ // RDF (string-like) Literals // ------------------------------------------------------------------------------------------------ -rdfLiteral = { - String ~ (LANGTAG | "^^" ~ iri)? -} - -LANGTAG = { - "@" ~ ASCII_ALPHA+ ~ ("-" ~ ASCII_ALPHANUMERIC+)* +LANGTAG = @{ + "@" ~ ASCII_ALPHA+ ~ ("-" ~ ASCII_ALPHANUMERIC+)* } // ------------------------------------------------------------------------------------------------ @@ -61,30 +358,28 @@ LANGTAG = { // ------------------------------------------------------------------------------------------------ NumericLiteral = { - SIGN? ~ - ( DOUBLE - | DECIMAL - | INTEGER ) + Double | Decimal | Integer } -INTEGER = { - ASCII_DIGIT+ +SIGN = { + ("+" | "-") } - -DECIMAL = { - ASCII_DIGIT* ~ "." ~ ASCII_DIGIT+ + +Integer = @{ + SIGN? ~ ASCII_DIGIT+ } - -DOUBLE = { - (ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ EXPONENT | "." ~ ASCII_DIGIT+ ~EXPONENT | ASCII_DIGIT+ ~ EXPONENT) + +Decimal = @{ + SIGN? ~ ASCII_DIGIT* ~ "." ~ ASCII_DIGIT+ } -SIGN = { - ("+" | "-") +Double = @{ + SIGN? ~ (ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ EXPONENT + | "." ~ ASCII_DIGIT+ ~ EXPONENT | ASCII_DIGIT+ ~ EXPONENT) } EXPONENT = { - ^"e" ~ SIGN? ~ ASCII_DIGIT+ + ^"e" ~ SIGN? ~ ASCII_DIGIT+ } // ------------------------------------------------------------------------------------------------ @@ -92,16 +387,22 @@ EXPONENT = { // ------------------------------------------------------------------------------------------------ BooleanLiteral = { - "true" - | "false" + "true" + | "false" } +// ------------------------------------------------------------------------------------------------ +// Special Punctuation +// ------------------------------------------------------------------------------------------------ + +END_OF_STATEMENT = { "." } + // ------------------------------------------------------------------------------------------------ // String Literals // ------------------------------------------------------------------------------------------------ STRING_LITERAL_LONG_SINGLE_QUOTE = ${ - "'''" ~ LONG_SINGLE_QUOTE_INNER ~ "'''" + "'''" ~ LONG_SINGLE_QUOTE_INNER ~ "'''" } LONG_SINGLE_QUOTE_INNER = @{ @@ -109,13 +410,13 @@ LONG_SINGLE_QUOTE_INNER = @{ } LONG_SINGLE_QUOTE_CHAR = { - !("'''" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR + !("'''" | "\\" | "\r" | "\n") ~ ANY + | ECHAR + | UCHAR } STRING_LITERAL_LONG_QUOTE = ${ - "\"\"\"" ~ LONG_QUOTE_INNER ~ "\"\"\"" + "\"\"\"" ~ LONG_QUOTE_INNER ~ "\"\"\"" } LONG_QUOTE_INNER = @{ @@ -123,13 +424,13 @@ LONG_QUOTE_INNER = @{ } LONG_QUOTE_CHAR = { - !("\"\"\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR + !("\"\"\"" | "\\" | "\r" | "\n") ~ ANY + | UCHAR + | ECHAR } - + STRING_LITERAL_QUOTE = ${ - "\"" ~ QUOTE_INNER ~ "\"" + "\"" ~ QUOTE_INNER ~ "\"" } QUOTE_INNER = @{ @@ -137,13 +438,13 @@ QUOTE_INNER = @{ } QUOTE_CHAR = { - !("\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR + !("\"" | "\\" | "\r" | "\n") ~ ANY + | UCHAR + | ECHAR } STRING_LITERAL_SINGLE_QUOTE = ${ - "'" ~ SINGLE_QUOTE_INNER ~ "'" + "'" ~ SINGLE_QUOTE_INNER ~ "'" } SINGLE_QUOTE_INNER = @{ @@ -151,17 +452,27 @@ SINGLE_QUOTE_INNER = @{ } SINGLE_QUOTE_CHAR = { - !( "'" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR + !("'" | "\\" | "\r" | "\n") ~ ANY + | ECHAR + | UCHAR } UCHAR = @{ - "\\U" ~ ASCII_HEX_DIGIT{8} | "\\u" ~ ASCII_HEX_DIGIT{4} + "\\U" ~ ASCII_HEX_DIGIT{8} + | "\\u" ~ ASCII_HEX_DIGIT{4} } - + ECHAR = @{ - "\\" ~ ("t" | "b" | "n" | "r" | "f" | "\"" | "'" | "\\") + "\\" ~ ("t" | "b" | "n" | "r" | "f" | "\"" | "'" | "\\") +} + +// ------------------------------------------------------------------------------------------------ +// Special Names +// ------------------------------------------------------------------------------------------------ + +// approximating "barename" with PN_CHARS - they seem similar enough +QUICK_VAR_NAME = @{ + "?" ~ PN_CHARS_U ~ PN_CHARS* } // ------------------------------------------------------------------------------------------------ @@ -169,94 +480,97 @@ ECHAR = @{ // ------------------------------------------------------------------------------------------------ prefixedName = { - PNAME_LN - | PNAME_NS + PNAME_LN + | PNAME_NS } PNAME_NS = { - PN_PREFIX? ~ ":" + PN_PREFIX? ~ ":" } PNAME_LN = { - PNAME_NS ~ PN_LOCAL + PNAME_NS ~ PN_LOCAL } PN_CHARS_BASE = { - ASCII_ALPHA - | '\u{00C0}'..'\u{00D6}' - | '\u{00D8}'..'\u{00F6}' - | '\u{00F8}'..'\u{02FF}' - | '\u{0370}'..'\u{037D}' - | '\u{037F}'..'\u{1FFF}' - | '\u{200C}'..'\u{200D}' - | '\u{2070}'..'\u{218F}' - | '\u{2C00}'..'\u{2FEF}' - | '\u{3001}'..'\u{D7FF}' - | '\u{F900}'..'\u{FDCF}' - | '\u{FDF0}'..'\u{FFFD}' - | '\u{10000}'..'\u{EFFFF}' + ASCII_ALPHA + | '\u{00C0}'..'\u{00D6}' + | '\u{00D8}'..'\u{00F6}' + | '\u{00F8}'..'\u{02FF}' + | '\u{0370}'..'\u{037D}' + | '\u{037F}'..'\u{1FFF}' + | '\u{200C}'..'\u{200D}' + | '\u{2070}'..'\u{218F}' + | '\u{2C00}'..'\u{2FEF}' + | '\u{3001}'..'\u{D7FF}' + | '\u{F900}'..'\u{FDCF}' + | '\u{FDF0}'..'\u{FFFD}' + | '\u{10000}'..'\u{EFFFF}' } PN_CHARS_U = { - PN_CHARS_BASE - | "_" + PN_CHARS_BASE + | "_" } - + PN_CHARS = { - PN_CHARS_U - | "-" - | ASCII_DIGIT - | '\u{00B7}'..'\u{00B7}' - | '\u{0300}'..'\u{036F}' - | '\u{203F}'..'\u{2040}' + PN_CHARS_U + | "-" + | ASCII_DIGIT + | '\u{00B7}'..'\u{00B7}' + | '\u{0300}'..'\u{036F}' + | '\u{203F}'..'\u{2040}' } PN_PREFIX = @{ - PN_CHARS_BASE ~ PN_PREFIX_TAIL* + PN_CHARS_BASE ~ PN_PREFIX_TAIL* } PN_PREFIX_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." + PN_CHARS + | &("." ~ PN_CHARS) ~ "." } PN_LOCAL = @{ - (PN_CHARS_U | ":" | ASCII_DIGIT | PLX) ~ PN_LOCAL_TAIL* + (PN_CHARS_U | ":" | ASCII_DIGIT | PLX) ~ PN_LOCAL_TAIL* } PN_LOCAL_TAIL = { PN_LOCAL_CHARS - | &("." ~ PN_LOCAL_CHARS) ~ "." + | &("." ~ PN_LOCAL_CHARS) ~ "." } PN_LOCAL_CHARS = { - PN_CHARS | ":" | PLX + PN_CHARS + | ":" + | PLX } - + PLX = { - PERCENT - | PN_LOCAL_ESC + PERCENT + | PN_LOCAL_ESC } - + PERCENT = { - "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT + "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT } PN_LOCAL_ESC = { - "\\" ~ ("_" | "~" | "." | "-" | "!" | "$" | "&" | "'\''" | "(" | ")" - | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%") + "\\" ~ ("_" | "~" | "." | "-" | "!" | "$" | "&" | "'\''" | "(" | ")" | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%") } // ------------------------------------------------------------------------------------------------ // Implicit Whitespace // ------------------------------------------------------------------------------------------------ -newline = _{ "\n" | "\r\n" } +newline = _{ "\r" | "\n" } COMMENT = _{ - "#" ~ (!newline ~ ANY)* + "#" ~ (!newline ~ ANY)* } WHITESPACE = _{ - " " | "\t" | newline + " " + | " " + | newline } diff --git a/rdftk_io/src/common/mod.rs b/rdftk_io/src/common/mod.rs index 1e77693..3c6b168 100644 --- a/rdftk_io/src/common/mod.rs +++ b/rdftk_io/src/common/mod.rs @@ -7,52 +7,10 @@ More detailed description, with */ -use rdftk_core::{model::graph::GraphFactoryRef, simple::graph_factory}; - -// ------------------------------------------------------------------------------------------------ -// Public Types -// ------------------------------------------------------------------------------------------------ - -#[derive(Debug)] -pub struct ReaderOptions { - factory: GraphFactoryRef, -} - -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ - -impl Default for ReaderOptions { - fn default() -> Self { - Self { - factory: graph_factory(), - } - } -} - -impl ReaderOptions { - pub fn with_factory(self, factory: GraphFactoryRef) -> Self { - let mut self_mut = self; - self_mut.factory = factory; - self_mut - } - - pub fn set_factory(&mut self, factory: GraphFactoryRef) { - self.factory = factory; - } - - pub fn factory(&self) -> &GraphFactoryRef { - &self.factory - } -} - // ------------------------------------------------------------------------------------------------ // Modules // ------------------------------------------------------------------------------------------------ pub(crate) mod indenter; -#[macro_use] -pub(crate) mod parser_error; - pub(crate) mod parser; diff --git a/rdftk_io/src/common/parser/mod.rs b/rdftk_io/src/common/parser/mod.rs new file mode 100644 index 0000000..7830caf --- /dev/null +++ b/rdftk_io/src/common/parser/mod.rs @@ -0,0 +1,149 @@ +#![allow(clippy::upper_case_acronyms)] // << generated by pest. + +use pest::Parser as _; +use pest_derive::Parser; +use rdftk_core::error::Error; +use rdftk_core::model::data_set::{DataSetFactoryRef, DataSetRef}; +use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; +use tracing::{span, Level}; + +// ------------------------------------------------------------------------------------------------ +// Public Macros +// ------------------------------------------------------------------------------------------------ + +#[macro_export] +macro_rules! make_factory_options { + ($opt_name:ident, $factory_type:ty, $default_fn:ident) => { + #[derive(Debug)] + pub struct $opt_name { + factory: $factory_type, + } + + impl Default for $opt_name { + fn default() -> Self { + Self { + factory: $default_fn(), + } + } + } + + impl $opt_name { + pub fn with_factory(self, factory: $factory_type) -> Self { + let mut self_mut = self; + self_mut.factory = factory; + self_mut + } + + pub fn set_factory(&mut self, factory: $factory_type) { + self.factory = factory; + } + + pub fn factory(&self) -> &$factory_type { + &self.factory + } + } + }; +} + +macro_rules! parse_rule { + ($rule_fn:literal entry $pair:expr) => { + const RULE_FN: &'static str = $rule_fn; + ::tracing::trace!("{}({:?}, ...)", $rule_fn, &$pair.as_rule()); + }; +} + +macro_rules! pest_error { + (unexpected $rule_fn:expr, $given:expr, $expecting: expr ) => { + ::rdftk_core::error::Error::ParserUnexpected { + rule_fn: $rule_fn.into(), + given: format!("{:?}", $given.as_rule()), + expecting: $expecting.iter().map(|r| format!("{r:?}")).collect(), + } + }; +} + +// ------------------------------------------------------------------------------------------------ +// Public Functions +// ------------------------------------------------------------------------------------------------ + +pub(crate) fn parse_ntriple_doc(source: S, factory: GraphFactoryRef) -> Result +where + S: AsRef, +{ + let span = span!(Level::TRACE, "NTriples"); + let _guard = span.enter(); + let mut parsed = + CommonParser::parse(Rule::ntripleDoc, source.as_ref()).map_err(|e| Error::Tokenizer { + representation: "NTriples".into(), + source: Box::new(e), + })?; + let top_node = parsed.next().unwrap(); + ntriples::parse_doc(top_node, factory) +} + +pub(crate) fn parse_nquad_doc(source: S, factory: DataSetFactoryRef) -> Result +where + S: AsRef, +{ + let span = span!(Level::TRACE, "NQuads"); + let _guard = span.enter(); + let mut parsed = + CommonParser::parse(Rule::nquadDoc, source.as_ref()).map_err(|e| Error::Tokenizer { + representation: "NQuads".into(), + source: Box::new(e), + })?; + let top_node = parsed.next().unwrap(); + nquads::parse_doc(top_node, factory) +} + +pub(crate) fn parse_turtle_doc(_source: S, _factory: GraphFactoryRef) -> Result +where + S: AsRef, +{ + let span = span!(Level::TRACE, "Turtle"); + let _guard = span.enter(); + todo!() +} + +pub(crate) fn parse_trig_doc( + _source: S, + _factory: DataSetFactoryRef, +) -> Result +where + S: AsRef, +{ + let span = span!(Level::TRACE, "TRiG"); + let _guard = span.enter(); + todo!() +} + +pub(crate) fn parse_n3_doc(_source: S, _factory: GraphFactoryRef) -> Result +where + S: AsRef, +{ + let span = span!(Level::TRACE, "N3"); + let _guard = span.enter(); + todo!() +} + +// ------------------------------------------------------------------------------------------------ +// Private Types +// ------------------------------------------------------------------------------------------------ + +#[derive(Parser)] +#[grammar = "common/common.pest"] +struct CommonParser; + +// ------------------------------------------------------------------------------------------------ +// Modules +// ------------------------------------------------------------------------------------------------ + +mod ntriples; + +mod nquads; + +mod turtle; + +mod trig; + +mod n3; diff --git a/rdftk_io/src/common/parser.rs b/rdftk_io/src/common/parser/n3.rs similarity index 83% rename from rdftk_io/src/common/parser.rs rename to rdftk_io/src/common/parser/n3.rs index 4270939..3536722 100644 --- a/rdftk_io/src/common/parser.rs +++ b/rdftk_io/src/common/parser/n3.rs @@ -5,24 +5,30 @@ More detailed description, with # Example -*/ +End of file during parsingSymbol’s value as variable is void: rustEnd of file during parsing + + */ // use ... +// ------------------------------------------------------------------------------------------------ +// Public Macros +// ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------ -// Public Macros +// Public Functions // ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------ -// Private Types +// Private Macros // ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------ -// Public Functions +// Private Types // ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/common/parser/nquads.rs b/rdftk_io/src/common/parser/nquads.rs new file mode 100644 index 0000000..5aafe68 --- /dev/null +++ b/rdftk_io/src/common/parser/nquads.rs @@ -0,0 +1,96 @@ +#![allow(clippy::upper_case_acronyms)] // << generated by pest. + +use super::ntriples::{object as nt_object, predicate as nt_predicate, subject as nt_subject}; +use super::Rule; +use pest::iterators::Pair; +use rdftk_core::model::data_set::DataSet; +use rdftk_core::model::graph::named::{GraphName, GraphNameRef}; +use rdftk_core::model::graph::NamedGraphRef; +use rdftk_core::model::statement::{ObjectNodeRef, StatementRef, SubjectNodeRef}; +use rdftk_core::{ + error::Error, + model::data_set::{DataSetFactoryRef, DataSetRef}, +}; +use rdftk_iri::IriRef; +use std::cell::RefMut; + +// ------------------------------------------------------------------------------------------------ +// Public Functions +// ------------------------------------------------------------------------------------------------ + +pub(super) fn parse_doc( + input_pair: Pair<'_, Rule>, + factory: DataSetFactoryRef, +) -> Result { + parse_rule!("nquadDoc" entry input_pair); + + let data_set = factory.data_set(); + + if input_pair.as_rule() == Rule::nquadDoc { + for inner_pair in input_pair.into_inner() { + match inner_pair.as_rule() { + Rule::nquad => { + nquad(inner_pair, data_set.borrow_mut())?; + } + Rule::EOI => {} + _ => { + return Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::nquad, Rule::EOI] + )); + } + } + } + Ok(data_set) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::nquadDoc])) + } +} + +// ------------------------------------------------------------------------------------------------ +// Private Functions +// ------------------------------------------------------------------------------------------------ + +fn subject_to_name(subject: SubjectNodeRef) -> GraphNameRef { + let name: GraphName = subject.into(); + name.into() +} + +fn nquad(input_pair: Pair<'_, Rule>, data_set: RefMut<'_, dyn DataSet>) -> Result<(), Error> { + parse_rule!("nquad" entry input_pair); + + let mut data_set = data_set; + + let graphs = data_set.graph_factory(); + let statements = graphs.statement_factory(); + let literals = statements.literal_factory(); + + if input_pair.as_rule() == Rule::nquad { + let mut inner_pairs = input_pair.into_inner(); + let subject: SubjectNodeRef = nt_subject(inner_pairs.next().unwrap(), &statements)?; + let predicate: IriRef = nt_predicate(inner_pairs.next().unwrap())?; + let object: ObjectNodeRef = nt_object(inner_pairs.next().unwrap(), &statements, &literals)?; + let statement: StatementRef = statements.statement(subject, predicate, object)?; + let graph: &mut NamedGraphRef = if let Some(new_inner_pair) = inner_pairs.next() { + let graph_name = subject_to_name(nt_subject(new_inner_pair, &statements)?); + if let Some(graph) = data_set.graph_mut(&Some(graph_name.clone())) { + graph + } else { + data_set.insert(graphs.named_graph(Some(graph_name.clone()))); + data_set.graph_mut(&Some(graph_name)).unwrap() + } + } else if let Some(graph) = data_set.graph_mut(&None) { + graph + } else { + data_set.insert(graphs.named_graph(None)); + data_set.graph_mut(&None).unwrap() + }; + let mut graph_mut = graph.borrow_mut(); + graph_mut.insert(statement); + Ok(()) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::nquad])) + } +} diff --git a/rdftk_io/src/common/parser/ntriples.rs b/rdftk_io/src/common/parser/ntriples.rs new file mode 100644 index 0000000..cfe0260 --- /dev/null +++ b/rdftk_io/src/common/parser/ntriples.rs @@ -0,0 +1,290 @@ +#![allow(clippy::upper_case_acronyms)] // << generated by pest. + +use super::Rule; +use pest::iterators::Pair; +use rdftk_core::error::Error; +use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; +use rdftk_core::model::literal::{DataType, LanguageTag, LiteralFactoryRef, LiteralRef}; +use rdftk_core::model::statement::{ + ObjectNodeRef, StatementFactoryRef, StatementRef, SubjectNodeRef, +}; +use rdftk_iri::{Iri, IriRef}; +use regex::Regex; +use std::str::FromStr; + +// ------------------------------------------------------------------------------------------------ +// Public Functions +// ------------------------------------------------------------------------------------------------ + +pub(super) fn parse_doc( + input_pair: Pair<'_, Rule>, + factory: GraphFactoryRef, +) -> Result { + parse_rule!("parse_doc" entry input_pair); + + let graph = factory.graph(); + + if input_pair.as_rule() == Rule::ntripleDoc { + for inner_pair in input_pair.into_inner() { + match inner_pair.as_rule() { + Rule::ntriple => { + let mut graph = graph.borrow_mut(); + let st = triple( + inner_pair, + &graph.statement_factory(), + &graph.literal_factory(), + )?; + graph.insert(st); + } + Rule::EOI => { + return Ok(graph); + } + _ => { + return Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::ntriple, Rule::EOI] + )); + } + } + } + } else { + return Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleDoc])); + } + + unreachable!() +} + +fn triple( + input_pair: Pair<'_, Rule>, + statements: &StatementFactoryRef, + literals: &LiteralFactoryRef, +) -> Result { + parse_rule!("triple" entry input_pair); + + if input_pair.as_rule() == Rule::ntriple { + let mut inner_pairs = input_pair.into_inner(); + let subject = subject(inner_pairs.next().unwrap(), statements)?; + let predicate = predicate(inner_pairs.next().unwrap())?; + let object = object(inner_pairs.next().unwrap(), statements, literals)?; + statements.statement(subject, predicate, object) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntriple])) + } +} + +pub(crate) fn subject( + input_pair: Pair<'_, Rule>, + factory: &StatementFactoryRef, +) -> Result { + parse_rule!("nt_subject" entry input_pair); + + if input_pair.as_rule() == Rule::ntripleSubject { + let inner_pair = input_pair.into_inner().next().unwrap(); + match inner_pair.as_rule() { + Rule::IRIREF => Ok(factory.named_subject(iri_ref(inner_pair)?)), + Rule::blankNode => { + let node = inner_pair.as_str().to_string(); + // strip the leading '_:' + let node = &node[2..]; + factory.blank_subject_named(node) + } + _ => Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::IRIREF, Rule::blankNode] + )), + } + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleSubject])) + } +} + +pub(crate) fn predicate(input_pair: Pair<'_, Rule>) -> Result { + parse_rule!("predicate" entry input_pair); + + if input_pair.as_rule() == Rule::ntriplePredicate { + let inner_pair = input_pair.into_inner().next().unwrap(); + if inner_pair.as_rule() == Rule::IRIREF { + Ok(iri_ref(inner_pair)?) + } else { + Err(pest_error!(unexpected RULE_FN, &inner_pair, [Rule::IRIREF])) + } + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntriplePredicate])) + } +} + +pub(crate) fn object( + input_pair: Pair<'_, Rule>, + factory: &StatementFactoryRef, + literals: &LiteralFactoryRef, +) -> Result { + parse_rule!("object" entry input_pair); + + if input_pair.as_rule() == Rule::ntripleObject { + let inner_pair = input_pair.into_inner().next().unwrap(); + match inner_pair.as_rule() { + Rule::IRIREF => Ok(factory.named_object(iri_ref(inner_pair)?)), + Rule::blankNode => { + let node = inner_pair.as_str().to_string(); + // strip the leading '_:' + let node = &node[2..]; + Ok(factory.blank_object_named(node)?) + } + Rule::ntripleLiteral => { + let literal = literal(inner_pair, literals)?; + Ok(factory.literal_object(literal)) + } + _ => Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::IRIREF, Rule::blankNode, Rule::ntripleLiteral] + )), + } + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleObject])) + } +} + +fn literal(input_pair: Pair<'_, Rule>, literals: &LiteralFactoryRef) -> Result { + parse_rule!("literal" entry input_pair); + + if input_pair.as_rule() == Rule::ntripleLiteral { + let inner_pair = input_pair.into_inner().next().unwrap(); + rdf_literal(inner_pair, literals) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleObject])) + } +} + +fn rdf_literal( + input_pair: Pair<'_, Rule>, + literals: &LiteralFactoryRef, +) -> Result { + parse_rule!("rdf_literal" entry input_pair); + + if input_pair.as_rule() == Rule::ntripleRdfLiteral { + let mut inner_pair = input_pair.into_inner(); + let lexical_form = string(inner_pair.next().unwrap())?; + + if let Some(other) = inner_pair.next() { + match other.as_rule() { + Rule::IRIREF => { + let data_type = DataType::Other(iri_ref(other)?); + Ok(literals.with_data_type(&lexical_form, data_type)) + } + Rule::LANGTAG => { + let lang_tag = lang_tag(other)?; + Ok(literals.with_language(&lexical_form, lang_tag)) + } + _ => Err(pest_error!( + unexpected + RULE_FN, + &other, + [Rule::IRIREF, Rule::LANGTAG] + )), + } + } else { + Ok(literals.literal(&lexical_form)) + } + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleRdfLiteral])) + } +} + +fn string(input_pair: Pair<'_, Rule>) -> Result { + parse_rule!("string" entry input_pair); + + if input_pair.as_rule() == Rule::ntripleString { + let inner_pair = input_pair.into_inner().next().unwrap(); + match inner_pair.as_rule() { + Rule::STRING_LITERAL_QUOTE => { + let inner_pair = inner_pair.into_inner().next().unwrap(); + if inner_pair.as_rule() == Rule::QUOTE_INNER { + Ok(inner_pair.as_str().to_string()) + } else { + Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::QUOTE_INNER] + )) + } + } + _ => Err(pest_error!( + unexpected + RULE_FN, + &inner_pair, + [Rule::STRING_LITERAL_QUOTE] + )), + } + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::ntripleString])) + } +} + +fn iri_ref(input_pair: Pair<'_, Rule>) -> Result { + parse_rule!("iri_ref" entry input_pair); + + if input_pair.as_rule() == Rule::IRIREF { + let iri = input_pair.as_str().to_string(); + // strip the '<' and '>' characters. + let iri_str = unescape_iri(&iri[1..iri.len() - 1]); + Ok(IriRef::new(Iri::from_str(&iri_str)?)) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::IRIREF])) + } +} + +fn lang_tag(input_pair: Pair<'_, Rule>) -> Result { + parse_rule!("lang_tag" entry input_pair); + + if input_pair.as_rule() == Rule::LANGTAG { + let tag = input_pair.as_str().to_string(); + println!("**{tag}**"); + // strip the leading '@' + let tag = &tag[1..]; + println!("**{tag}**"); + Ok(LanguageTag::from_str(tag)?) + } else { + Err(pest_error!(unexpected RULE_FN, &input_pair, [Rule::LANGTAG])) + } +} + +// ------------------------------------------------------------------------------------------------ + +lazy_static::lazy_static! { + static ref UNICODE_ESC: Regex = + Regex::new(r"(\\U[[:xdigit:]]{8})|(\\u[[:xdigit:]]{4})").unwrap(); +} + +fn unescape_iri(iri: &str) -> String { + let (new_iri, end) = + UNICODE_ESC + .captures_iter(iri) + .fold((String::new(), 0), |(so_far, start), cap| { + let cap = cap.get(0).unwrap(); + ( + format!( + "{}{}{}", + so_far, + &iri[start..cap.start()], + unescape_uchar(cap.as_str()) + ), + cap.end(), + ) + }); + + format!("{}{}", new_iri, &iri[end..]) +} + +fn unescape_uchar(uchar: &str) -> char { + use std::char; + let uchar = &uchar[2..]; + let uchar_u32 = u32::from_str_radix(uchar, 16).unwrap(); + char::from_u32(uchar_u32).unwrap() +} diff --git a/rdftk_io/src/common/parser/trig.rs b/rdftk_io/src/common/parser/trig.rs new file mode 100644 index 0000000..3536722 --- /dev/null +++ b/rdftk_io/src/common/parser/trig.rs @@ -0,0 +1,44 @@ +/*! +One-line description. + +More detailed description, with + +# Example + +End of file during parsingSymbol’s value as variable is void: rustEnd of file during parsing + + */ + +// use ... + +// ------------------------------------------------------------------------------------------------ +// Public Macros +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Public Types +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Public Functions +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Macros +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Types +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Implementations +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Functions +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Modules +// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/common/parser/turtle.rs b/rdftk_io/src/common/parser/turtle.rs new file mode 100644 index 0000000..3536722 --- /dev/null +++ b/rdftk_io/src/common/parser/turtle.rs @@ -0,0 +1,44 @@ +/*! +One-line description. + +More detailed description, with + +# Example + +End of file during parsingSymbol’s value as variable is void: rustEnd of file during parsing + + */ + +// use ... + +// ------------------------------------------------------------------------------------------------ +// Public Macros +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Public Types +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Public Functions +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Macros +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Types +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Implementations +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Private Functions +// ------------------------------------------------------------------------------------------------ + +// ------------------------------------------------------------------------------------------------ +// Modules +// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/common/parser_error.rs b/rdftk_io/src/common/parser_error.rs deleted file mode 100644 index b86894e..0000000 --- a/rdftk_io/src/common/parser_error.rs +++ /dev/null @@ -1,152 +0,0 @@ -use pest::iterators::Pair; -use pest::RuleType; -use rdftk_core::error::{read_write_error_with, Error as CoreError}; -use std::fmt::{Debug, Display, Formatter}; -use std::hash::Hash; - -// ------------------------------------------------------------------------------------------------ -// Public Types -// ------------------------------------------------------------------------------------------------ - -#[derive(Debug, Clone)] -pub(crate) struct ParserErrorFactory { - pub(crate) repr: &'static str, -} - -#[derive(Debug, Clone)] -pub(crate) struct ParserError { - repr: String, - fn_name: String, - rule: Option, - expecting: Option, - unreachable: bool, - context: Option, -} - -// ------------------------------------------------------------------------------------------------ -// Public Macros -// ------------------------------------------------------------------------------------------------ - -macro_rules! unexpected { - ($fn_name:expr, $pair:expr) => {{ - ::log::error!("ParserError::unexpected({}, {:?})", $fn_name, $pair); - return Err(ERROR.error($fn_name).unexpected(&$pair).clone().into()); - }}; -} - -#[allow(unused_macros)] -macro_rules! unreachable { - ($fn_name:expr) => {{ - ::log::error!("ParserError::unreachable({)", $fn_name); - return ERROR.error($fn_name).unreachable().into(); - }}; -} - -#[allow(unused_macros)] -macro_rules! expecting { - ($fn_name:expr, $rule:expr) => {{ - ::log::error!("ParserError::new({}, {:?})", $fn_name, $rule); - return ERROR - .error($fn_name) - .expecting(stringify!($rule.to_string())) - .into(); - }}; -} - -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ - -impl ParserErrorFactory { - pub(crate) fn error(&self, fn_name: &str) -> ParserError { - ParserError { - repr: self.repr.to_string(), - fn_name: fn_name.to_string(), - rule: None, - expecting: None, - unreachable: false, - context: None, - } - } - pub(crate) fn parser( - &self, - e: ::pest::error::Error, - ) -> CoreError { - read_write_error_with(self.repr, e) - } -} - -// ------------------------------------------------------------------------------------------------ - -impl Display for ParserError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}{}{}{}{}", - &self.fn_name, - match &self.rule { - None => String::new(), - Some(s) => format!(", rule: {}", s), - }, - match &self.expecting { - None => String::new(), - Some(s) => format!(", expecting: {}", s), - }, - if self.unreachable { - ", should have been unreachable".to_string() - } else { - String::new() - }, - match &self.context { - None => String::new(), - Some(s) => format!(", context: '{}'", s), - } - ) - } -} - -impl std::error::Error for ParserError {} - -impl From for CoreError { - fn from(e: ParserError) -> Self { - read_write_error_with(e.repr.clone(), e) - } -} - -#[allow(dead_code)] -impl ParserError { - pub(crate) fn unexpected(&mut self, pair: &Pair<'_, T>) -> &mut Self { - self.context = Some(format!("{:?}: {:?}", pair.as_rule(), pair.as_str())); - self - } - - pub(crate) fn unreachable(&mut self) -> &mut Self { - self.unreachable = true; - self - } - - pub(crate) fn in_rule(&mut self, rule: &str) -> &mut Self { - self.rule = Some(rule.to_string()); - self - } - - pub(crate) fn expecting(&mut self, expecting: &str) -> &mut Self { - self.expecting = Some(expecting.to_string()); - self - } - - pub(crate) fn unreachable_rule(&mut self) -> &mut Self { - self.unreachable = true; - self - } - - pub(crate) fn context(&mut self, context: &dyn Display) -> &mut Self { - self.context = Some(format!("{}", context)); - self - } - - pub(crate) fn debug_context(&mut self, context: &dyn Debug) -> &mut Self { - self.context = Some(format!("{:?}", context)); - self - } -} diff --git a/rdftk_io/src/dot/mod.rs b/rdftk_io/src/dot/mod.rs index ceb8766..85890c0 100644 --- a/rdftk_io/src/dot/mod.rs +++ b/rdftk_io/src/dot/mod.rs @@ -6,16 +6,16 @@ format. # Example ```rust -use rdftk_io::{HasOptions, ObjectWriter}; -use rdftk_io::dot::writer::{DotOptions, DotWriter}; +use rdftk_io::dot::{DotOptions, DotWriter}; +# use objio::{HasOptions, ObjectWriter}; # use rdftk_core::model::graph::GraphRef; # fn make_graph() -> GraphRef { rdftk_core::simple::graph::graph_factory().graph() } let mut options = DotOptions::default().with_blank_labels(true); -let writer = DotWriter::default.with_options(options); +let writer = DotWriter::default().with_options(options); -let result = writer.write_graph_to_string(&make_graph()); +let result = writer.write_to_string(&make_graph()); ``` */ diff --git a/rdftk_io/src/dot/writer.rs b/rdftk_io/src/dot/writer.rs index 44964c7..e8ec5d1 100644 --- a/rdftk_io/src/dot/writer.rs +++ b/rdftk_io/src/dot/writer.rs @@ -373,7 +373,7 @@ impl DotWriter { Node { id: id.clone(), kind: NodeKind::Blank, - label: node.as_blank().unwrap().into(), + label: node.as_blank().unwrap().as_ref().into(), }, ); } else if node.is_iri() { @@ -402,7 +402,7 @@ impl DotWriter { Node { id: id.clone(), kind: NodeKind::Blank, - label: node.as_blank().unwrap().into(), + label: node.as_blank().unwrap().as_ref().into(), }, ); } else if node.is_iri() { diff --git a/rdftk_io/src/json/mod.rs b/rdftk_io/src/json/mod.rs index 298a685..6d79b22 100644 --- a/rdftk_io/src/json/mod.rs +++ b/rdftk_io/src/json/mod.rs @@ -1,7 +1,11 @@ /*! Provides for writing a `Graph` instance in the W3C [RDF 1.1 JSON Alternate Serialization (RDF/JSON)](https://www.w3.org/TR/rdf-json/) format. -*/ + */ + +// ------------------------------------------------------------------------------------------------ +// Public Values +// ------------------------------------------------------------------------------------------------ /// The display name of this serialization format. pub const NAME: &str = "JSON"; @@ -12,33 +16,14 @@ pub const FILE_EXTENSION: &str = "json"; /// The MIME type used for this serialization format. pub const MIME_TYPE: &str = "application/rdf+json"; -// ------------------------------------------------------------------------------------------------ -// Public Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Modules // ------------------------------------------------------------------------------------------------ mod syntax; -pub mod reader; +mod reader; +pub use reader::{JsonReader, JsonReaderOptions}; mod writer; pub use writer::{JsonOptions, JsonWriter}; diff --git a/rdftk_io/src/json/reader.rs b/rdftk_io/src/json/reader.rs index fad3c88..aab2c59 100644 --- a/rdftk_io/src/json/reader.rs +++ b/rdftk_io/src/json/reader.rs @@ -5,47 +5,45 @@ Provides the `JsonReader` implementation of the `GraphReader` trait. */ -use crate::common::ReaderOptions; use crate::json::syntax::{ BNODE_PREFIX, OBJ_KEY_DATATYPE, OBJ_KEY_LANG, OBJ_KEY_TYPE, OBJ_KEY_VALUE, OBJ_TYPE_BNODE, OBJ_TYPE_LITERAL, OBJ_TYPE_URI, }; +use crate::make_factory_options; use objio::{impl_has_options, HasOptions, ObjectReader}; -use rdftk_core::error::{read_write_error, read_write_error_with, Error}; +use rdftk_core::error::Error; use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; use rdftk_core::model::literal::{DataType, LanguageTag}; use rdftk_core::model::statement::SubjectNodeRef; +use rdftk_core::simple::graph_factory; use rdftk_core::simple::statement::statement_factory; use rdftk_iri::{Iri, IriRef}; use serde_json::{Map, Value}; use std::io::Read; use std::str::FromStr; +use tracing::error; + +use super::NAME; // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ +make_factory_options!(JsonReaderOptions, GraphFactoryRef, graph_factory); + /// /// An implementation of the GraphReader trait to read resources in the JSON representation. /// #[derive(Debug, Default)] pub struct JsonReader { - options: ReaderOptions, + options: JsonReaderOptions, } -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -impl_has_options!(JsonReader, ReaderOptions); +impl_has_options!(JsonReader, JsonReaderOptions); impl ObjectReader for JsonReader { type Error = Error; @@ -54,8 +52,7 @@ impl ObjectReader for JsonReader { where R: Read, { - let value: Value = - serde_json::from_reader(r).map_err(|e| read_write_error_with(super::NAME, e))?; + let value: Value = serde_json::from_reader(r).map_err(json_error)?; parse_graph(value, self.options().factory().clone()) } } @@ -64,7 +61,35 @@ impl ObjectReader for JsonReader { // Private Functions // ------------------------------------------------------------------------------------------------ +macro_rules! parse_rule { + ($rule_fn:literal entry) => { + const RULE_FN: &'static str = $rule_fn; + ::tracing::trace!("{}(...)", $rule_fn); + }; +} + +fn json_error(err: serde_json::Error) -> Error { + Error::Tokenizer { + representation: NAME.into(), + source: Box::new(err), + } +} + +fn value_variant(value: &Value) -> String { + match value { + Value::Null => "Null", + Value::Bool(_) => "Bool", + Value::Number(_) => "Number", + Value::String(_) => "String", + Value::Array(_) => "Array", + Value::Object(_) => "Object", + } + .to_string() +} + fn parse_graph(value: Value, factory: GraphFactoryRef) -> Result { + parse_rule!("parse_graph" entry); + if let Value::Object(json) = value { let graph = factory.graph(); for (subject, predicate_objects) in json.iter() { @@ -72,8 +97,12 @@ fn parse_graph(value: Value, factory: GraphFactoryRef) -> Result Result<(), Error> { + parse_rule!("parse_statements" entry); + if let Value::Object(json) = predicate_objects { let subject = if let Some(subject) = subject.strip_prefix(BNODE_PREFIX) { graph @@ -99,8 +130,12 @@ fn parse_statements( } Ok(()) } else { - log::error!("parse_statements() - expecting Value::Object"); - read_write_error(super::NAME).into() + error!("rule {RULE_FN} expecting `Object` variant"); + Err(Error::ParserUnexpected { + rule_fn: "parse_statements".into(), + given: value_variant(predicate_objects), + expecting: vec!["Object".into()], + }) } } @@ -110,6 +145,8 @@ fn parse_predicates( objects: &Value, graph: &GraphRef, ) -> Result<(), Error> { + parse_rule!("parse_predicates" entry); + if let Value::Array(json) = objects { let predicate = IriRef::new(Iri::from_str(predicate)?); for object in json { @@ -117,8 +154,12 @@ fn parse_predicates( } Ok(()) } else { - log::error!("parse_predicates() - expecting Value::Array"); - read_write_error(super::NAME).into() + error!("rule {RULE_FN} expecting `Array` variant"); + Err(Error::ParserUnexpected { + rule_fn: RULE_FN.into(), + given: value_variant(objects), + expecting: vec!["Array".into()], + }) } } @@ -128,6 +169,8 @@ fn parse_object( object: &Value, graph: &GraphRef, ) -> Result<(), Error> { + parse_rule!("parse_object" entry); + if let Value::Object(json) = object { match json.get(OBJ_KEY_TYPE) { Some(Value::String(s)) => { @@ -138,18 +181,36 @@ fn parse_object( } else if s == OBJ_TYPE_URI { parse_uri_object(subject, predicate, json, graph) } else { - log::error!("parse_object() - unknown 'type' key value: {}", s); - read_write_error(super::NAME).into() + error!("parse_object() - unknown 'type' key value: {}", s); + Err(Error::ParserUnexpected { + rule_fn: RULE_FN.into(), + given: s.into(), + expecting: vec![ + OBJ_TYPE_LITERAL.into(), + OBJ_TYPE_BNODE.into(), + OBJ_TYPE_URI.into(), + ], + }) } } _ => { - log::error!("parse_object() - no 'type' key in object"); - read_write_error(super::NAME).into() + error!( + "rule {RULE_FN} expecting object to have key {}", + OBJ_KEY_TYPE + ); + Err(Error::ParserExpected { + rule_fn: RULE_FN.into(), + expecting: OBJ_KEY_TYPE.into(), + }) } } } else { - log::error!("parse_object() - expecting Value::Object"); - read_write_error(super::NAME).into() + error!("rule {RULE_FN} expecting `Object` variant"); + Err(Error::ParserUnexpected { + rule_fn: RULE_FN.into(), + given: value_variant(object), + expecting: vec!["Object".into()], + }) } } @@ -159,6 +220,8 @@ fn parse_literal_object( object: &Map, graph: &GraphRef, ) -> Result<(), Error> { + parse_rule!("parse_literal_object" entry); + let mut graph = graph.borrow_mut(); let value = object.get(OBJ_KEY_VALUE); let language = object.get(OBJ_KEY_LANG); @@ -176,8 +239,11 @@ fn parse_literal_object( .with_data_type(v, DataType::from(data_type)) } _ => { - log::error!("parse_literal_object() - bad value/data type/language combination"); - return read_write_error(super::NAME).into(); + error!("parse_literal_object() - bad value/data type/language combination"); + return Err(Error::ParserUnreachable { + rule_fn: RULE_FN.into(), + given: "bad value/data type/language combination".into(), + }); } }); let st = graph @@ -193,6 +259,8 @@ fn parse_bnode_object( object: &Map, graph: &GraphRef, ) -> Result<(), Error> { + parse_rule!("parse_bnode_object" entry); + let mut graph = graph.borrow_mut(); if let Some(Value::String(s)) = object.get(OBJ_KEY_VALUE) { let object = graph.statement_factory().blank_object_named(&s[2..])?; @@ -202,8 +270,14 @@ fn parse_bnode_object( graph.insert(st); Ok(()) } else { - log::error!("parse_bnode_object() - expecting Value::String"); - read_write_error(super::NAME).into() + error!( + "rule {RULE_FN} expecting object to have key {}", + OBJ_KEY_VALUE + ); + Err(Error::ParserExpected { + rule_fn: RULE_FN.into(), + expecting: OBJ_KEY_VALUE.into(), + }) } } @@ -213,6 +287,8 @@ fn parse_uri_object( object: &Map, graph: &GraphRef, ) -> Result<(), Error> { + parse_rule!("parse_uri_object" entry); + let mut graph = graph.borrow_mut(); if let Some(Value::String(s)) = object.get(OBJ_KEY_VALUE) { let uri = IriRef::new(Iri::from_str(s)?); @@ -223,11 +299,13 @@ fn parse_uri_object( graph.insert(st); Ok(()) } else { - log::error!("parse_uri_object() - expecting Value::String"); - read_write_error(super::NAME).into() + error!( + "rule {RULE_FN} expecting object to have key {}", + OBJ_KEY_VALUE + ); + Err(Error::ParserExpected { + rule_fn: RULE_FN.into(), + expecting: OBJ_KEY_VALUE.into(), + }) } } - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/json/writer.rs b/rdftk_io/src/json/writer.rs index 18830e7..6134c6c 100644 --- a/rdftk_io/src/json/writer.rs +++ b/rdftk_io/src/json/writer.rs @@ -2,14 +2,15 @@ Provides the `JsonWriter` implementation of the `GraphWriter` trait. ```rust -use rdftk_io::json::writer::{JsonWriter}; -use rdftk_io::write_graph_to_string; +use rdftk_io::json::{JsonWriter, JsonOptions}; +# use objio::{HasOptions, ObjectWriter}; # use rdftk_core::model::graph::GraphRef; # fn make_graph() -> GraphRef { rdftk_core::simple::graph::graph_factory().graph() } -let writer = JsonWriter::pretty(); +let writer = JsonWriter::default() + .with_options(JsonOptions::default().with_pretty_print(true)); -let result = write_graph_to_string(&writer, &make_graph()); +let result = writer.write_to_string(&make_graph()); ``` @@ -21,10 +22,11 @@ use crate::json::syntax::{ }; use crate::json::NAME; use objio::{impl_has_options, ObjectWriter}; -use rdftk_core::error::{rdf_star_not_supported_error, read_write_error_with, Error}; +use rdftk_core::error::{rdf_star_not_supported_error, Error}; use rdftk_core::model::graph::GraphRef; use serde_json::{Map, Value}; use std::io::Write; +use tracing::error; // ------------------------------------------------------------------------------------------------ // Public Types @@ -43,14 +45,6 @@ pub struct JsonOptions { pretty_print: bool, } -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ @@ -153,10 +147,9 @@ impl ObjectWriter for JsonWriter { // ------------------------------------------------------------------------------------------------ fn json_error(e: serde_json::Error) -> Error { - log::error!("Error parsing JSON source: {:?}", e); - read_write_error_with(NAME, e) + error!("Error generating JSON source: {:?}", e); + Error::Tokenizer { + representation: super::NAME.into(), + source: Box::new(e), + } } - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/json_ld/mod.rs b/rdftk_io/src/json_ld/mod.rs index dad7945..49bc53c 100644 --- a/rdftk_io/src/json_ld/mod.rs +++ b/rdftk_io/src/json_ld/mod.rs @@ -13,26 +13,6 @@ pub const FILE_EXTENSION: &str = "jsonld"; /// The MIME type used for this serialization format. pub const MIME_TYPE: &str = "application/ld+json"; -// ------------------------------------------------------------------------------------------------ -// Public Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Modules // ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/lib.rs b/rdftk_io/src/lib.rs index 4a51922..bc949f6 100644 --- a/rdftk_io/src/lib.rs +++ b/rdftk_io/src/lib.rs @@ -29,16 +29,17 @@ Each module will also provide public constants `NAME`, `FILE_EXTENSION`, and `MI An example, reading an existing NTriple file. ```rust -use rdftk_io::nt::reader::NTriplesReader; -use rdftk_io::GraphReader; +use objio::{HasOptions, ObjectReader}; +use rdftk_io::nt::{NTripleReaderOptions, NTripleReader}; use rdftk_core::simple::graph_factory; use std::fs::File; use std::path::PathBuf; let file_path = PathBuf::from("tests/w3c/nt/literal.nt"); let mut file = File::open(file_path).unwrap(); -let reader = NTriplesReader::default(); -let graph = reader.read(&mut file, graph_factory()).unwrap(); +let reader = NTripleReader::default() + .with_options(NTripleReaderOptions::default().with_factory(graph_factory())); +let graph = reader.read(&mut file).unwrap(); ``` */ diff --git a/rdftk_io/src/n3/mod.rs b/rdftk_io/src/n3/mod.rs index 596f45d..923b436 100644 --- a/rdftk_io/src/n3/mod.rs +++ b/rdftk_io/src/n3/mod.rs @@ -4,34 +4,28 @@ proposed W3C [Notation3 (N3)](https://www.w3.org/TeamSubmission/n3/), _a readabl format. */ -// use ... // ------------------------------------------------------------------------------------------------ -// Public Types +// Public Values // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ +/// The display name of this serialization format. +pub const NAME: &str = "N3"; -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ +/// The common file extension for this serialization format. +pub const FILE_EXTENSION: &str = "n3"; -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ +/// The MIME type used for this serialization format. +pub const MIME_TYPE: &str = "application/n3"; -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ +/// An IRI that defines the language. +pub const FORMAT_IRI: &str = "http://www.w3.org/ns/formats/N3"; // ------------------------------------------------------------------------------------------------ // Modules // ------------------------------------------------------------------------------------------------ -#[doc(hidden)] -pub mod reader; +mod reader; +pub use reader::{N3Reader, N3ReaderOptions}; -#[doc(hidden)] pub mod writer; diff --git a/rdftk_io/src/n3/n3.g4 b/rdftk_io/src/n3/n3.g4 deleted file mode 100644 index ab036eb..0000000 --- a/rdftk_io/src/n3/n3.g4 +++ /dev/null @@ -1,325 +0,0 @@ -/* - [The "BSD licence"] - Copyright (c) 2019, William Van Woensel (W3C N3 Community Group), - Łukasz Szeremeta (University of Bialystok, http://www.uwb.edu.pl/) - Dominik Tomaszuk (University of Bialystok, W3C N3 Community Group) - - All rights reserved. - - Based on ANTLR TURTLE grammar - (https://github.com/antlr/grammars-v4/blob/master/turtle/TURTLE.g4) - distributed under BSD licence. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -*/ - -grammar n3; - -@header { - package w3c.n3dev.parser.antlr; -} - -n3Doc - : (n3Statement '.' | sparqlDirective)* EOF - ; - -COMMENT - : '#' ~[\r\n\f]* -> skip - /* N3 nor Turtle syntax actually allows for comments (?) */ - ; - -n3Statement - : n3Directive - | triples - ; - -n3Directive - : prefixID - | base - ; - -sparqlDirective - : sparqlBase - | sparqlPrefix - ; - -sparqlBase - : BASE IRIREF - ; - -sparqlPrefix - : PREFIX PNAME_NS IRIREF - ; - -prefixID - : '@prefix' PNAME_NS IRIREF - ; - -base - : '@base' IRIREF - ; - -triples - : subject predicateObjectList? - ; - -predicateObjectList - : verb objectList (';' (verb objectList)?)* - ; - -objectList - : object (',' object)* - ; - -verb - : predicate - | 'a' - | 'has' expression - | 'is' expression 'of' - | '=' - | '<=' - | '=>' - ; - -subject - : expression - ; - -predicate - : (expression | '<-' expression) -/* allow first predicate in a path to also be inverted */ - ; - -object - : expression - ; - -expression - : path - ; - -path - : pathItem ('!' path | '^' path)? - ; - -pathItem - : iri - | blankNode - | quickVar - | collection - | blankNodePropertyList - | literal - | formula - ; - -literal - : rdfLiteral - | numericLiteral - | BooleanLiteral - ; - -blankNodePropertyList - : '[' predicateObjectList ']' - ; - -collection - : '(' object* ')' - ; - -formula - : '{' formulaContent? '}' - ; - -formulaContent - : n3Statement ('.' formulaContent?)? - | sparqlDirective formulaContent? - ; - -numericLiteral - : INTEGER - | DECIMAL - | DOUBLE - ; - -rdfLiteral - : String (LANGTAG | '^^' iri)? - ; - -BooleanLiteral - : 'true' - | 'false' - ; - -String - : STRING_LITERAL_QUOTE - | STRING_LITERAL_SINGLE_QUOTE - | STRING_LITERAL_LONG_SINGLE_QUOTE - | STRING_LITERAL_LONG_QUOTE - ; - -iri - : IRIREF - | prefixedName - ; - -prefixedName - : PNAME_NS - | PNAME_LN -/* PNAME_NS will be matched for ':' (i.e., "empty") prefixedNames - * hence this cannot be a lexer rule; for s/p/o of only ':', PNAME_NS will be returned - * instead of PrefixedName token */ - ; - -blankNode - : BLANK_NODE_LABEL - | ANON - ; - -quickVar - : QuickVarName - // only made this a parser rule for consistency - // (all other path-items are also parser rules) - ; - -IRIREF - : '<' (~[\u0000-\u0020<>"{}|^`\\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */ -/* production below allows non-encoded spaces */ -// : '<' (~[\u0000-\u001F<>"{}|^`\\] | UCHAR)* '>' - ; - -PNAME_NS - : PN_PREFIX? ':' -/* when using PrefixedName (i.e., lexer rule), this must occur *before* PrefixedName - * else, PNAME_NS will not be chosen for prefix decl (e.g., turtle-syntax-bnode-01) - * regardless, it should occur before PNAME_LN - */ - ; - -PNAME_LN - : PNAME_NS PN_LOCAL -/* rule must be above PN_LOCAL */ - ; - -BLANK_NODE_LABEL - : '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? - ; - -LANGTAG - : '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* - ; - -INTEGER - : [+-]? [0-9]+ - ; - -DECIMAL - : [+-]? [0-9]* '.' [0-9]+ - ; - -DOUBLE - : [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) - ; - -EXPONENT - : [eE] [+-]? [0-9]+ - ; - -STRING_LITERAL_LONG_SINGLE_QUOTE - : '\'\'\'' (('\'' | '\'\'')? (~['\\] | ECHAR | UCHAR))* '\'\'\'' -/* needs to be above the "regular" quotes ; else first two '' will be matched as empty string */ - ; - -STRING_LITERAL_LONG_QUOTE - : '"""' (('"' | '""')? (~["\\] | ECHAR | UCHAR))* '"""' -/* needs to be above the "regular" quotes ; else first two "" will be matched as empty string */ - ; - -STRING_LITERAL_QUOTE - : '"' (~[\u0022\u005C\u000A\u000D] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */ - ; - -STRING_LITERAL_SINGLE_QUOTE - : '\'' (~[\u0027\u005C\u000A\u000D] | ECHAR | UCHAR)* '\'' /* #x27=' #x5C=\ #xA=new line #xD=carriage return */ - ; - -UCHAR - : '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX - ; - -ECHAR - : '\\' [tbnrf"'\\] - ; - -WS - : [\u0020\u0009\u000D\u000A] -> skip /* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ - ; - - ANON - : '[' WS* ']' - ; - -QuickVarName - : '?' PN_CHARS_U PN_CHARS* -/* approximating "barename" with PN_CHARS - they seem similar enough */ - ; - -PN_CHARS_U - : PN_CHARS_BASE - | '_' - ; - -PN_CHARS_BASE - : [A-Z] | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] -/* antlr cannot include (at least on Java) the following: #x10000-#xEFFFF */ - ; - -PN_CHARS - : PN_CHARS_U | '-' | [0-9] | '\u00B7' | [\u0300-\u036F] | [\u203F-\u2040] - ; - -BASE: - ('B'|'b') ('A'|'a') ('S'|'s') ('E'|'e') - ; - -PREFIX: - ('P'|'p') ('R'|'r') ('E'|'e') ('F'|'f') ('I'|'i') ('X'|'x') -/* BASE and PREFIX must be case-insensitive, hence these monstrosities */ -/* BASE and PREFIX must be above PN_PREFIX, PN_LOCAL */ - ; - -PN_PREFIX - : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? - ; - -PN_LOCAL - : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? - ; - -PLX - : PERCENT - | PN_LOCAL_ESC - ; - -PERCENT - : '%' HEX HEX - ; - -HEX - : [0-9] | [A-F] | [a-f] - ; - -PN_LOCAL_ESC - : '\\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%') - ; diff --git a/rdftk_io/src/n3/reader.rs b/rdftk_io/src/n3/reader.rs index 1f52a31..ca2c78f 100644 --- a/rdftk_io/src/n3/reader.rs +++ b/rdftk_io/src/n3/reader.rs @@ -1,34 +1,38 @@ -/*! -One-line description. - -More detailed description, with - -# Example - -*/ - -// use ... +use crate::common::parser::parse_n3_doc; +use crate::make_factory_options; +use objio::{impl_has_options, HasOptions, ObjectReader}; +use rdftk_core::error::Error; +use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; +use rdftk_core::simple::graph_factory; +use std::io::Read; // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ +make_factory_options!(N3ReaderOptions, GraphFactoryRef, graph_factory); -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ +#[derive(Debug, Default)] +pub struct N3Reader { + options: N3ReaderOptions, +} // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ +impl_has_options!(N3Reader, N3ReaderOptions); -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ +impl ObjectReader for N3Reader { + type Error = Error; + + fn read(&self, r: &mut R) -> Result + where + R: Read, + { + let mut buffer = String::new(); + r.read_to_string(&mut buffer)?; + let factory = self.options().factory().clone(); + parse_n3_doc(buffer, factory) + } +} diff --git a/rdftk_io/src/nq/mod.rs b/rdftk_io/src/nq/mod.rs index 356a357..d8f9ad0 100644 --- a/rdftk_io/src/nq/mod.rs +++ b/rdftk_io/src/nq/mod.rs @@ -9,17 +9,17 @@ Provides the `NQuadDataSetWriter` implementation of the `DataSetWriter` trait an # Example ```rust -use rdftk_io::nq::writer::NQuadDataSetWriter; -use rdftk_io::write_data_set_to_string; +use rdftk_io::nq::NQuadWriter; +# use objio::ObjectWriter; # use std::cell::RefCell; # use std::rc::Rc; # use rdftk_core::model::data_set::DataSetRef; # use rdftk_core::simple::data_set::data_set_factory; -# fn make_data_set() -> DataSetRef { data_set_factory().data_set(None) } +# fn make_data_set() -> DataSetRef { data_set_factory().data_set() } -let writer = NQuadDataSetWriter::default(); +let writer = NQuadWriter::default(); -let result = write_data_set_to_string(&writer, &make_data_set()); +let result = writer.write_to_string(&make_data_set()); ``` */ @@ -45,7 +45,7 @@ pub const FORMAT_IRI: &str = "http://www.w3.org/ns/formats/N-Quads"; // ------------------------------------------------------------------------------------------------ mod reader; -pub use reader::NQuadReader; +pub use reader::{NQuadReader, NQuadReaderOptions}; mod writer; pub use writer::NQuadWriter; diff --git a/rdftk_io/src/nq/nq-in.pest b/rdftk_io/src/nq/nq-in.pest deleted file mode 100644 index f119911..0000000 --- a/rdftk_io/src/nq/nq-in.pest +++ /dev/null @@ -1,35 +0,0 @@ -nquadsDoc = { - SOI ~ quad* ~ EOI -} - -quad = { - subject ~ predicate ~ object ~ graphLabel? ~ "." -} - -subject = { - IRIREF - | BlankNode -} - -predicate = { - IRIREF -} - -object = { - IRIREF - | BlankNode - | literal -} - -graphLabel = { - IRIREF - | BlankNode -} - -literal = { - rdfLiteral -} - -String = { - STRING_LITERAL_QUOTE -} diff --git a/rdftk_io/src/nq/nq.pest b/rdftk_io/src/nq/nq.pest deleted file mode 100644 index 5639761..0000000 --- a/rdftk_io/src/nq/nq.pest +++ /dev/null @@ -1,297 +0,0 @@ -nquadsDoc = { - SOI ~ quad* ~ EOI -} - -quad = { - subject ~ predicate ~ object ~ graphLabel? ~ "." -} - -subject = { - IRIREF - | BlankNode -} - -predicate = { - IRIREF -} - -object = { - IRIREF - | BlankNode - | literal -} - -graphLabel = { - IRIREF - | BlankNode -} - -literal = { - rdfLiteral -} - -String = { - STRING_LITERAL_QUOTE -} - -// ------------------------------------------------------------------------------------------------ -// IRIs -// ------------------------------------------------------------------------------------------------ - -iri = { - IRIREF - | prefixedName -} - -IRIREF = ${ - "<" ~ IRIREF_INNER ~ ">" -} - -IRIREF_INNER = @{ - IRIREF_CHAR* -} - -IRIREF_CHAR = { - /* #x00=NULL #01-#x1F=control codes #x20=space */ - !(">" | '\u{0000}'..'\u{0020}' | "\"" | "{" | "}" | "|" | "^" | "`" | "\\") ~ ANY - | UCHAR -} - -// ------------------------------------------------------------------------------------------------ -// Blank Nodes -// ------------------------------------------------------------------------------------------------ - -BlankNode = { - BLANK_NODE_LABEL - | ANON -} - -BLANK_NODE_LABEL = @{ - "_:" ~ (PN_CHARS_U | ASCII_DIGIT) ~ BLANK_NODE_LABEL_TAIL* -} - -BLANK_NODE_LABEL_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -ANON = { - "[" ~ "]" -} - -// ------------------------------------------------------------------------------------------------ -// RDF (string-like) Literals -// ------------------------------------------------------------------------------------------------ - -rdfLiteral = { - String ~ (LANGTAG | "^^" ~ iri)? -} - -LANGTAG = { - "@" ~ ASCII_ALPHA+ ~ ("-" ~ ASCII_ALPHANUMERIC+)* -} - -// ------------------------------------------------------------------------------------------------ -// Numeric Literals -// ------------------------------------------------------------------------------------------------ - -NumericLiteral = { - SIGN? ~ - ( DOUBLE - | DECIMAL - | INTEGER ) -} - -INTEGER = { - ASCII_DIGIT+ -} - -DECIMAL = { - ASCII_DIGIT* ~ "." ~ ASCII_DIGIT+ -} - -DOUBLE = { - (ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ EXPONENT | "." ~ ASCII_DIGIT+ ~EXPONENT | ASCII_DIGIT+ ~ EXPONENT) -} - -SIGN = { - ("+" | "-") -} - -EXPONENT = { - ^"e" ~ SIGN? ~ ASCII_DIGIT+ -} - -// ------------------------------------------------------------------------------------------------ -// Boolean Literals -// ------------------------------------------------------------------------------------------------ - -BooleanLiteral = { - "true" - | "false" -} - -// ------------------------------------------------------------------------------------------------ -// String Literals -// ------------------------------------------------------------------------------------------------ - -STRING_LITERAL_LONG_SINGLE_QUOTE = ${ - "'''" ~ LONG_SINGLE_QUOTE_INNER ~ "'''" -} - -LONG_SINGLE_QUOTE_INNER = @{ - LONG_SINGLE_QUOTE_CHAR* -} - -LONG_SINGLE_QUOTE_CHAR = { - !("'''" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -STRING_LITERAL_LONG_QUOTE = ${ - "\"\"\"" ~ LONG_QUOTE_INNER ~ "\"\"\"" -} - -LONG_QUOTE_INNER = @{ - LONG_QUOTE_CHAR* -} - -LONG_QUOTE_CHAR = { - !("\"\"\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_QUOTE = ${ - "\"" ~ QUOTE_INNER ~ "\"" -} - -QUOTE_INNER = @{ - QUOTE_CHAR* -} - -QUOTE_CHAR = { - !("\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_SINGLE_QUOTE = ${ - "'" ~ SINGLE_QUOTE_INNER ~ "'" -} - -SINGLE_QUOTE_INNER = @{ - SINGLE_QUOTE_CHAR* -} - -SINGLE_QUOTE_CHAR = { - !( "'" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -UCHAR = @{ - "\\U" ~ ASCII_HEX_DIGIT{8} | "\\u" ~ ASCII_HEX_DIGIT{4} -} - -ECHAR = @{ - "\\" ~ ("t" | "b" | "n" | "r" | "f" | "\"" | "'" | "\\") -} - -// ------------------------------------------------------------------------------------------------ -// Prefixed Names -// ------------------------------------------------------------------------------------------------ - -prefixedName = { - PNAME_LN - | PNAME_NS -} - -PNAME_NS = { - PN_PREFIX? ~ ":" -} - -PNAME_LN = { - PNAME_NS ~ PN_LOCAL -} - -PN_CHARS_BASE = { - ASCII_ALPHA - | '\u{00C0}'..'\u{00D6}' - | '\u{00D8}'..'\u{00F6}' - | '\u{00F8}'..'\u{02FF}' - | '\u{0370}'..'\u{037D}' - | '\u{037F}'..'\u{1FFF}' - | '\u{200C}'..'\u{200D}' - | '\u{2070}'..'\u{218F}' - | '\u{2C00}'..'\u{2FEF}' - | '\u{3001}'..'\u{D7FF}' - | '\u{F900}'..'\u{FDCF}' - | '\u{FDF0}'..'\u{FFFD}' - | '\u{10000}'..'\u{EFFFF}' -} - -PN_CHARS_U = { - PN_CHARS_BASE - | "_" -} - -PN_CHARS = { - PN_CHARS_U - | "-" - | ASCII_DIGIT - | '\u{00B7}'..'\u{00B7}' - | '\u{0300}'..'\u{036F}' - | '\u{203F}'..'\u{2040}' -} - -PN_PREFIX = @{ - PN_CHARS_BASE ~ PN_PREFIX_TAIL* -} - -PN_PREFIX_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -PN_LOCAL = @{ - (PN_CHARS_U | ":" | ASCII_DIGIT | PLX) ~ PN_LOCAL_TAIL* -} - -PN_LOCAL_TAIL = { - PN_LOCAL_CHARS - | &("." ~ PN_LOCAL_CHARS) ~ "." -} - -PN_LOCAL_CHARS = { - PN_CHARS | ":" | PLX -} - -PLX = { - PERCENT - | PN_LOCAL_ESC -} - -PERCENT = { - "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT -} - -PN_LOCAL_ESC = { - "\\" ~ ("_" | "~" | "." | "-" | "!" | "$" | "&" | "'\''" | "(" | ")" - | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%") -} - -// ------------------------------------------------------------------------------------------------ -// Implicit Whitespace -// ------------------------------------------------------------------------------------------------ - -newline = _{ "\n" | "\r\n" } - -COMMENT = _{ - "#" ~ (!newline ~ ANY)* -} - -WHITESPACE = _{ - " " | "\t" | newline -} diff --git a/rdftk_io/src/nq/reader.rs b/rdftk_io/src/nq/reader.rs index 329a703..b64988d 100644 --- a/rdftk_io/src/nq/reader.rs +++ b/rdftk_io/src/nq/reader.rs @@ -1,12 +1,10 @@ -#![allow(clippy::upper_case_acronyms)] // << generated by pest. - -use crate::common::ReaderOptions; +use crate::common::parser::parse_nquad_doc; +use crate::make_factory_options; use objio::{impl_has_options, HasOptions, ObjectReader}; -use pest::Parser as _; -use pest_derive::Parser; +use rdftk_core::simple::data_set_factory; use rdftk_core::{ - error::{read_write_error_with, Error}, - model::graph::NamedGraphRef, + error::Error, + model::data_set::{DataSetFactoryRef, DataSetRef}, }; use std::io::Read; @@ -14,83 +12,29 @@ use std::io::Read; // Public Types // ------------------------------------------------------------------------------------------------ +make_factory_options!(NQuadReaderOptions, DataSetFactoryRef, data_set_factory); + #[derive(Debug, Default)] pub struct NQuadReader { - options: ReaderOptions, + options: NQuadReaderOptions, } -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -#[derive(Parser)] -#[grammar = "nq/nq.pest"] -struct NQuadParser; - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -impl_has_options!(NQuadReader, ReaderOptions); +impl_has_options!(NQuadReader, NQuadReaderOptions); -impl ObjectReader for NQuadReader { +impl ObjectReader for NQuadReader { type Error = Error; - fn read(&self, r: &mut R) -> Result + fn read(&self, r: &mut R) -> Result where R: Read, { let mut buffer = String::new(); r.read_to_string(&mut buffer)?; - - let mut parsed = NQuadParser::parse(Rule::nquadsDoc, &buffer) - .map_err(|e| read_write_error_with(super::NAME, e))?; - let _top_node = parsed.next().unwrap(); - - let _factory = self.options().factory().clone(); - //ntriples_doc(top_node, factory) - todo!() - } -} - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_simple_1() { - let reader = NQuadReader::default(); - assert!(reader.read_from_string( - r###" - . # comments here -# or on a line by themselves -_:subject1 "object1" . -_:subject2 "object2" . -"###).is_ok()); - } - - #[test] - fn parse_simple_2() { - let reader = NQuadReader::default(); - assert!(reader - .read_from_string( - r###" -_:alice _:bob . -_:bob _:alice . -"###, - ) - .is_ok()); + let factory = self.options().factory().clone(); + parse_nquad_doc(buffer, factory) } } diff --git a/rdftk_io/src/nt/mod.rs b/rdftk_io/src/nt/mod.rs index 9ce397e..38cac80 100644 --- a/rdftk_io/src/nt/mod.rs +++ b/rdftk_io/src/nt/mod.rs @@ -7,8 +7,8 @@ format. # Writer Example ```rust -use rdftk_io::nt::writer::NTripleWriter; -use rdftk_io::write_graph_to_string; +use rdftk_io::nt::NTripleWriter; +# use objio::ObjectWriter; # use rdftk_core::model::graph::GraphRef; # fn make_graph() -> GraphRef { rdftk_core::simple::graph::graph_factory().graph() } @@ -37,7 +37,7 @@ pub const MIME_TYPE: &str = "application/n-triples"; // ------------------------------------------------------------------------------------------------ mod reader; -pub use reader::NTripleReader; +pub use reader::{NTripleReader, NTripleReaderOptions}; mod writer; pub use writer::NTripleWriter; diff --git a/rdftk_io/src/nt/nt-in.pest b/rdftk_io/src/nt/nt-in.pest deleted file mode 100644 index abfa64e..0000000 --- a/rdftk_io/src/nt/nt-in.pest +++ /dev/null @@ -1,30 +0,0 @@ -ntriplesDoc = { - SOI ~ triple* ~ EOI -} - -triple = { - subject ~ predicate ~ object ~ "." -} - -subject = { - IRIREF - | BlankNode -} - -predicate = { - IRIREF -} - -object = { - IRIREF - | BlankNode - | literal -} - -literal = { - rdfLiteral -} - -String = { - STRING_LITERAL_QUOTE -} diff --git a/rdftk_io/src/nt/nt.pest b/rdftk_io/src/nt/nt.pest deleted file mode 100644 index 26bca1d..0000000 --- a/rdftk_io/src/nt/nt.pest +++ /dev/null @@ -1,292 +0,0 @@ -ntriplesDoc = { - SOI ~ triple* ~ EOI -} - -triple = { - subject ~ predicate ~ object ~ "." -} - -subject = { - IRIREF - | BlankNode -} - -predicate = { - IRIREF -} - -object = { - IRIREF - | BlankNode - | literal -} - -literal = { - rdfLiteral -} - -String = { - STRING_LITERAL_QUOTE -} - -// ------------------------------------------------------------------------------------------------ -// IRIs -// ------------------------------------------------------------------------------------------------ - -iri = { - IRIREF - | prefixedName -} - -IRIREF = ${ - "<" ~ IRIREF_INNER ~ ">" -} - -IRIREF_INNER = @{ - IRIREF_CHAR* -} - -IRIREF_CHAR = { - /* #x00=NULL #01-#x1F=control codes #x20=space */ - !(">" | '\u{0000}'..'\u{0020}' | "\"" | "{" | "}" | "|" | "^" | "`" | "\\") ~ ANY - | UCHAR -} - -// ------------------------------------------------------------------------------------------------ -// Blank Nodes -// ------------------------------------------------------------------------------------------------ - -BlankNode = { - BLANK_NODE_LABEL - | ANON -} - -BLANK_NODE_LABEL = @{ - "_:" ~ (PN_CHARS_U | ASCII_DIGIT) ~ BLANK_NODE_LABEL_TAIL* -} - -BLANK_NODE_LABEL_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -ANON = { - "[" ~ "]" -} - -// ------------------------------------------------------------------------------------------------ -// RDF (string-like) Literals -// ------------------------------------------------------------------------------------------------ - -rdfLiteral = { - String ~ (LANGTAG | "^^" ~ iri)? -} - -LANGTAG = @{ - "@" ~ ASCII_ALPHA+ ~ ("-" ~ ASCII_ALPHANUMERIC+)* -} - -// ------------------------------------------------------------------------------------------------ -// Numeric Literals -// ------------------------------------------------------------------------------------------------ - -NumericLiteral = { - SIGN? ~ - ( DOUBLE - | DECIMAL - | INTEGER ) -} - -INTEGER = { - ASCII_DIGIT+ -} - -DECIMAL = { - ASCII_DIGIT* ~ "." ~ ASCII_DIGIT+ -} - -DOUBLE = { - (ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ EXPONENT | "." ~ ASCII_DIGIT+ ~EXPONENT | ASCII_DIGIT+ ~ EXPONENT) -} - -SIGN = { - ("+" | "-") -} - -EXPONENT = { - ^"e" ~ SIGN? ~ ASCII_DIGIT+ -} - -// ------------------------------------------------------------------------------------------------ -// Boolean Literals -// ------------------------------------------------------------------------------------------------ - -BooleanLiteral = { - "true" - | "false" -} - -// ------------------------------------------------------------------------------------------------ -// String Literals -// ------------------------------------------------------------------------------------------------ - -STRING_LITERAL_LONG_SINGLE_QUOTE = ${ - "'''" ~ LONG_SINGLE_QUOTE_INNER ~ "'''" -} - -LONG_SINGLE_QUOTE_INNER = @{ - LONG_SINGLE_QUOTE_CHAR* -} - -LONG_SINGLE_QUOTE_CHAR = { - !("'''" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -STRING_LITERAL_LONG_QUOTE = ${ - "\"\"\"" ~ LONG_QUOTE_INNER ~ "\"\"\"" -} - -LONG_QUOTE_INNER = @{ - LONG_QUOTE_CHAR* -} - -LONG_QUOTE_CHAR = { - !("\"\"\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_QUOTE = ${ - "\"" ~ QUOTE_INNER ~ "\"" -} - -QUOTE_INNER = @{ - QUOTE_CHAR* -} - -QUOTE_CHAR = { - !("\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_SINGLE_QUOTE = ${ - "'" ~ SINGLE_QUOTE_INNER ~ "'" -} - -SINGLE_QUOTE_INNER = @{ - SINGLE_QUOTE_CHAR* -} - -SINGLE_QUOTE_CHAR = { - !( "'" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -UCHAR = @{ - "\\U" ~ ASCII_HEX_DIGIT{8} | "\\u" ~ ASCII_HEX_DIGIT{4} -} - -ECHAR = @{ - "\\" ~ ("t" | "b" | "n" | "r" | "f" | "\"" | "'" | "\\") -} - -// ------------------------------------------------------------------------------------------------ -// Prefixed Names -// ------------------------------------------------------------------------------------------------ - -prefixedName = { - PNAME_LN - | PNAME_NS -} - -PNAME_NS = { - PN_PREFIX? ~ ":" -} - -PNAME_LN = { - PNAME_NS ~ PN_LOCAL -} - -PN_CHARS_BASE = { - ASCII_ALPHA - | '\u{00C0}'..'\u{00D6}' - | '\u{00D8}'..'\u{00F6}' - | '\u{00F8}'..'\u{02FF}' - | '\u{0370}'..'\u{037D}' - | '\u{037F}'..'\u{1FFF}' - | '\u{200C}'..'\u{200D}' - | '\u{2070}'..'\u{218F}' - | '\u{2C00}'..'\u{2FEF}' - | '\u{3001}'..'\u{D7FF}' - | '\u{F900}'..'\u{FDCF}' - | '\u{FDF0}'..'\u{FFFD}' - | '\u{10000}'..'\u{EFFFF}' -} - -PN_CHARS_U = { - PN_CHARS_BASE - | "_" -} - -PN_CHARS = { - PN_CHARS_U - | "-" - | ASCII_DIGIT - | '\u{00B7}'..'\u{00B7}' - | '\u{0300}'..'\u{036F}' - | '\u{203F}'..'\u{2040}' -} - -PN_PREFIX = @{ - PN_CHARS_BASE ~ PN_PREFIX_TAIL* -} - -PN_PREFIX_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -PN_LOCAL = @{ - (PN_CHARS_U | ":" | ASCII_DIGIT | PLX) ~ PN_LOCAL_TAIL* -} - -PN_LOCAL_TAIL = { - PN_LOCAL_CHARS - | &("." ~ PN_LOCAL_CHARS) ~ "." -} - -PN_LOCAL_CHARS = { - PN_CHARS | ":" | PLX -} - -PLX = { - PERCENT - | PN_LOCAL_ESC -} - -PERCENT = { - "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT -} - -PN_LOCAL_ESC = { - "\\" ~ ("_" | "~" | "." | "-" | "!" | "$" | "&" | "'\''" | "(" | ")" - | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%") -} - -// ------------------------------------------------------------------------------------------------ -// Implicit Whitespace -// ------------------------------------------------------------------------------------------------ - -newline = _{ "\n" | "\r\n" } - -COMMENT = _{ - "#" ~ (!newline ~ ANY)* -} - -WHITESPACE = _{ - " " | "\t" | newline -} diff --git a/rdftk_io/src/nt/reader.rs b/rdftk_io/src/nt/reader.rs index d5cfeed..e0f9b21 100644 --- a/rdftk_io/src/nt/reader.rs +++ b/rdftk_io/src/nt/reader.rs @@ -1,59 +1,27 @@ -/*! -One-line description. - -More detailed description, with - -# Example - -*/ - -#![allow(clippy::upper_case_acronyms)] // << generated by pest. - -use crate::common::parser_error::ParserErrorFactory; -use crate::common::ReaderOptions; +use crate::common::parser::parse_ntriple_doc; +use crate::make_factory_options; use objio::{impl_has_options, HasOptions, ObjectReader}; -use pest::iterators::Pair; -use pest::Parser as _; -use pest_derive::Parser; use rdftk_core::error::Error; use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; -use rdftk_core::model::literal::{DataType, LanguageTag, LiteralFactoryRef, LiteralRef}; -use rdftk_core::model::statement::{ - ObjectNodeRef, StatementFactoryRef, StatementRef, SubjectNodeRef, -}; -use rdftk_iri::{Iri, IriRef}; -use regex::Regex; +use rdftk_core::simple::graph_factory; use std::io::Read; -use std::str::FromStr; // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ +make_factory_options!(NTripleReaderOptions, GraphFactoryRef, graph_factory); + #[derive(Debug, Default)] pub struct NTripleReader { - options: ReaderOptions, + options: NTripleReaderOptions, } -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -#[derive(Parser)] -#[grammar = "nt/nt.pest"] -struct NTripleParser; - -const ERROR: ParserErrorFactory = ParserErrorFactory { repr: super::NAME }; - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -impl_has_options!(NTripleReader, ReaderOptions); +impl_has_options!(NTripleReader, NTripleReaderOptions); impl ObjectReader for NTripleReader { type Error = Error; @@ -64,339 +32,7 @@ impl ObjectReader for NTripleReader { { let mut buffer = String::new(); r.read_to_string(&mut buffer)?; - - let mut parsed = - NTripleParser::parse(Rule::ntriplesDoc, &buffer).map_err(|e| ERROR.parser(e))?; - let top_node = parsed.next().unwrap(); - let factory = self.options().factory().clone(); - ntriples_doc(top_node, factory) - } -} - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - -fn ntriples_doc(input_pair: Pair<'_, Rule>, factory: GraphFactoryRef) -> Result { - log::trace!("ntriples_doc({:?})", &input_pair.as_rule()); - - let graph = factory.graph(); - - if input_pair.as_rule() == Rule::ntriplesDoc { - for inner_pair in input_pair.into_inner() { - match inner_pair.as_rule() { - Rule::triple => { - let mut graph = graph.borrow_mut(); - let st = triple( - inner_pair, - &graph.statement_factory(), - &graph.literal_factory(), - )?; - graph.insert(st); - } - Rule::EOI => { - log::trace!("Done.") - } - _ => { - unexpected!("ntriples_doc", inner_pair) - } - } - } - } else { - unexpected!("ntriples_doc", input_pair); - } - - Ok(graph) -} - -fn triple( - input_pair: Pair<'_, Rule>, - statements: &StatementFactoryRef, - literals: &LiteralFactoryRef, -) -> Result { - log::trace!("triple({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::triple { - let mut inner_pairs = input_pair.into_inner(); - let subject = subject(inner_pairs.next().unwrap(), statements)?; - let predicate = predicate(inner_pairs.next().unwrap())?; - let object = object(inner_pairs.next().unwrap(), statements, literals)?; - statements.statement(subject, predicate, object) - } else { - unexpected!("triple", input_pair); - } -} - -fn subject( - input_pair: Pair<'_, Rule>, - factory: &StatementFactoryRef, -) -> Result { - log::trace!("subject({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::subject { - let inner_pair = input_pair.into_inner().next().unwrap(); - match inner_pair.as_rule() { - Rule::IRIREF => Ok(factory.named_subject(iri_ref(inner_pair)?)), - Rule::BlankNode => { - let node = inner_pair.as_str().to_string(); - // strip the leading '_:' - let node = &node[2..]; - factory.blank_subject_named(node) - } - _ => { - unexpected!("subject", inner_pair) - } - } - } else { - unexpected!("subject", input_pair); - } -} - -fn predicate(input_pair: Pair<'_, Rule>) -> Result { - log::trace!("predicate({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::predicate { - let inner_pair = input_pair.into_inner().next().unwrap(); - if inner_pair.as_rule() == Rule::IRIREF { - Ok(iri_ref(inner_pair)?) - } else { - unexpected!("subject", inner_pair); - } - } else { - unexpected!("subject", input_pair); - } -} - -fn object( - input_pair: Pair<'_, Rule>, - factory: &StatementFactoryRef, - literals: &LiteralFactoryRef, -) -> Result { - log::trace!("object({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::object { - let inner_pair = input_pair.into_inner().next().unwrap(); - match inner_pair.as_rule() { - Rule::IRIREF => Ok(factory.named_object(iri_ref(inner_pair)?)), - Rule::BlankNode => { - let node = inner_pair.as_str().to_string(); - // strip the leading '_:' - let node = &node[2..]; - Ok(factory.blank_object_named(node)?) - } - Rule::literal => { - let literal = literal(inner_pair, literals)?; - Ok(factory.literal_object(literal)) - } - _ => { - unexpected!("object", inner_pair) - } - } - } else { - unexpected!("object", input_pair); - } -} - -fn literal(input_pair: Pair<'_, Rule>, literals: &LiteralFactoryRef) -> Result { - log::trace!("literal({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::literal { - let inner_pair = input_pair.into_inner().next().unwrap(); - rdf_literal(inner_pair, literals) - } else { - unexpected!("literal", input_pair); - } -} - -fn rdf_literal( - input_pair: Pair<'_, Rule>, - literals: &LiteralFactoryRef, -) -> Result { - log::trace!("literal({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::rdfLiteral { - let mut inner_pair = input_pair.into_inner(); - let lexical_form = string(inner_pair.next().unwrap())?; - - if let Some(other) = inner_pair.next() { - match other.as_rule() { - Rule::iri => { - let data_type = DataType::Other(iri(other)?); - Ok(literals.with_data_type(&lexical_form, data_type)) - } - Rule::LANGTAG => { - let lang_tag = lang_tag(other)?; - Ok(literals.with_language(&lexical_form, lang_tag)) - } - _ => { - unexpected!("literal", other); - } - } - } else { - Ok(literals.literal(&lexical_form)) - } - } else { - unexpected!("literal", input_pair); - } -} - -fn string(input_pair: Pair<'_, Rule>) -> Result { - log::trace!("string({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::String { - let inner_pair = input_pair.into_inner().next().unwrap(); - match inner_pair.as_rule() { - Rule::STRING_LITERAL_QUOTE => { - let inner_pair = inner_pair.into_inner().next().unwrap(); - if inner_pair.as_rule() == Rule::QUOTE_INNER { - Ok(inner_pair.as_str().to_string()) - } else { - unexpected!("string", inner_pair); - } - } - _ => { - unexpected!("string", inner_pair) - } - } - } else { - unexpected!("string", input_pair); - } -} - -fn iri(input_pair: Pair<'_, Rule>) -> Result { - log::trace!("iri({:?})", &input_pair.as_rule()); - - if input_pair.as_rule() == Rule::iri { - let inner_pair = input_pair.into_inner().next().unwrap(); - if inner_pair.as_rule() == Rule::IRIREF { - iri_ref(inner_pair) - } else { - unexpected!("iri", inner_pair); - } - } else { - unexpected!("iri", input_pair); - } -} - -fn iri_ref(input_pair: Pair<'_, Rule>) -> Result { - log::trace!("iri_ref({:?})", &input_pair.as_rule()); - if input_pair.as_rule() == Rule::IRIREF { - let iri = input_pair.as_str().to_string(); - // strip the '<' and '>' characters. - let iri_str = unescape_iri(&iri[1..iri.len() - 1]); - Ok(IriRef::new(Iri::from_str(&iri_str)?)) - } else { - unexpected!("iri_ref", input_pair); - } -} - -fn lang_tag(input_pair: Pair<'_, Rule>) -> Result { - log::trace!("lang_tag({:?})", &input_pair.as_rule()); - if input_pair.as_rule() == Rule::LANGTAG { - let tag = input_pair.as_str().to_string(); - // strip the leading '@' - let tag = &tag[1..]; - Ok(LanguageTag::from_str(tag)?) - } else { - unexpected!("lang_tag", input_pair); - } -} - -// ------------------------------------------------------------------------------------------------ - -lazy_static::lazy_static! { - static ref UNICODE_ESC: Regex = - Regex::new(r"(\\U[[:xdigit:]]{8})|(\\u[[:xdigit:]]{4})").unwrap(); -} - -fn unescape_iri(iri: &str) -> String { - let (new_iri, end) = - UNICODE_ESC - .captures_iter(iri) - .fold((String::new(), 0), |(so_far, start), cap| { - let cap = cap.get(0).unwrap(); - ( - format!( - "{}{}{}", - so_far, - &iri[start..cap.start()], - unescape_uchar(cap.as_str()) - ), - cap.end(), - ) - }); - - format!("{}{}", new_iri, &iri[end..]) -} - -fn unescape_uchar(uchar: &str) -> char { - use std::char; - let uchar = &uchar[2..]; - let uchar_u32 = u32::from_str_radix(uchar, 16).unwrap(); - char::from_u32(uchar_u32).unwrap() -} - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ - -#[cfg(test)] -mod tests { - use super::*; - use crate::nt::writer::NTripleWriter; - use objio::ObjectWriter; - use rdftk_core::error::Error; - - fn write_graph(graph: &GraphRef) { - let writer = NTripleWriter::default(); - let _ = writer.write(&mut std::io::stdout(), graph); - } - - #[test] - fn parse_simple() { - let reader = NTripleReader::default(); - let result: Result = reader.read_from_string( - r###" - "That Seventies Show"^^ . # literal with XML Schema string datatype - "That Seventies Show" . # same as above - "That Seventies Show"@en . # literal with a language tag - "Cette Série des Années Septante"@fr-be . # literal outside of ASCII range with a region subtag - "This is a multi-line\nliteral with many quotes (\"\"\"\"\")\nand two apostrophes ('')." . - "2"^^ . # xsd:integer - "1.663E-4"^^ . # xsd:double -"### ); - match result { - Ok(g) => { - println!("ok"); - write_graph(&g); - } - Err(e) => { - println!("{:?}", e); - panic!(); - } - } - } - - #[test] - fn parse_simple_with_blanks() { - let reader = NTripleReader::default(); - let result: Result = reader.read_from_string( - r###" - . # comments here -# or on a line by themselves -_:subject1 "object1" . -_:subject2 "object2" . -"### ); - match result { - Ok(g) => { - println!("ok"); - write_graph(&g); - } - Err(e) => { - println!("{:?}", e); - panic!(); - } - } + parse_ntriple_doc(buffer, factory) } } diff --git a/rdftk_io/src/trig/mod.rs b/rdftk_io/src/trig/mod.rs index 76d370d..e5da534 100644 --- a/rdftk_io/src/trig/mod.rs +++ b/rdftk_io/src/trig/mod.rs @@ -3,6 +3,25 @@ Provides for reading and writing a `Graph` instance in the W3C [RDF 1.1 TriG](https://www.w3.org/TR/trig/), _RDF Dataset Language_ format. */ -pub mod reader; +// ------------------------------------------------------------------------------------------------ +// Public Values +// ------------------------------------------------------------------------------------------------ -pub mod writer; +/// The display name of this serialization format. +pub const NAME: &str = "TriG"; + +/// The common file extension for this serialization format. +pub const FILE_EXTENSION: &str = "trig"; + +/// The MIME type used for this serialization format. +pub const MIME_TYPE: &str = "text/trig"; + +// ------------------------------------------------------------------------------------------------ +// Modules +// ------------------------------------------------------------------------------------------------ + +mod reader; +pub use reader::{TrigReader, TrigReaderOptions}; + +mod writer; +pub use writer::{TrigWriter, TrigWriterOptions}; diff --git a/rdftk_io/src/trig/reader.rs b/rdftk_io/src/trig/reader.rs index 1f52a31..b7232ae 100644 --- a/rdftk_io/src/trig/reader.rs +++ b/rdftk_io/src/trig/reader.rs @@ -1,34 +1,38 @@ -/*! -One-line description. - -More detailed description, with - -# Example - -*/ - -// use ... +use crate::common::parser::parse_trig_doc; +use crate::make_factory_options; +use objio::{impl_has_options, HasOptions, ObjectReader}; +use rdftk_core::error::Error; +use rdftk_core::model::data_set::{DataSetFactoryRef, DataSetRef}; +use rdftk_core::simple::data_set_factory; +use std::io::Read; // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ +make_factory_options!(TrigReaderOptions, DataSetFactoryRef, data_set_factory); -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ +#[derive(Debug, Default)] +pub struct TrigReader { + options: TrigReaderOptions, +} // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ +impl_has_options!(TrigReader, TrigReaderOptions); -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ +impl ObjectReader for TrigReader { + type Error = Error; + + fn read(&self, r: &mut R) -> Result + where + R: Read, + { + let mut buffer = String::new(); + r.read_to_string(&mut buffer)?; + let factory = self.options().factory().clone(); + parse_trig_doc(buffer, factory) + } +} diff --git a/rdftk_io/src/trig/writer.rs b/rdftk_io/src/trig/writer.rs index 4289c01..df62a8d 100644 --- a/rdftk_io/src/trig/writer.rs +++ b/rdftk_io/src/trig/writer.rs @@ -7,8 +7,8 @@ More detailed description, with */ -use crate::turtle::writer::TurtleOptions; -use objio::{impl_has_options, ObjectWriter}; +use crate::turtle::TurtleOptions; +use objio::{impl_has_options, HasOptions, ObjectWriter}; use rdftk_core::error::Error; use rdftk_core::model::{data_set::DataSetRef, graph::NamedGraphRef}; @@ -16,24 +16,56 @@ use rdftk_core::model::{data_set::DataSetRef, graph::NamedGraphRef}; // Public Types // ------------------------------------------------------------------------------------------------ +#[derive(Debug, Default)] +pub struct TrigWriterOptions { + turtle: TurtleOptions, + omit_graph_keyword: bool, +} + #[derive(Debug, Default)] pub struct TrigWriter { - options: TurtleOptions, + options: TrigWriterOptions, } // ------------------------------------------------------------------------------------------------ -// Private Types +// Implementations // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ +impl From for TrigWriterOptions { + fn from(value: TurtleOptions) -> Self { + Self { + turtle: value, + ..Default::default() + } + } +} + +impl AsRef for TrigWriterOptions { + fn as_ref(&self) -> &TurtleOptions { + &self.turtle + } +} + +impl TrigWriterOptions { + pub fn with_omit_graph_keyword(self, omit_graph_keyword: bool) -> Self { + Self { + omit_graph_keyword, + ..self + } + } + + pub fn omit_graph_keyword(&self) -> bool { + self.omit_graph_keyword + } + + pub fn set_omit_graph_keyword(&mut self, omit_graph_keyword: bool) { + self.omit_graph_keyword = omit_graph_keyword; + } +} -// ------------------------------------------------------------------------------------------------ -// Implementations // ------------------------------------------------------------------------------------------------ -impl_has_options!(TrigWriter, TurtleOptions); +impl_has_options!(TrigWriter, TrigWriterOptions); impl ObjectWriter for TrigWriter { type Error = Error; @@ -59,6 +91,9 @@ impl ObjectWriter for TrigWriter { { let graph = graph.borrow(); if let Some(name) = graph.name() { + if !self.options().omit_graph_keyword() { + w.write_all(b"GRAPH ")?; + } w.write_all(name.to_string().as_bytes())?; w.write_all(b" ")?; } @@ -70,11 +105,3 @@ impl ObjectWriter for TrigWriter { Ok(()) } } - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/turtle/mod.rs b/rdftk_io/src/turtle/mod.rs index 7fd7d2a..2b75eca 100644 --- a/rdftk_io/src/turtle/mod.rs +++ b/rdftk_io/src/turtle/mod.rs @@ -21,9 +21,8 @@ pub const MIME_TYPE: &str = "text/turtle"; // Modules // ------------------------------------------------------------------------------------------------ -mod parser; +mod reader; +pub use reader::{TurtleReader, TurtleReaderOptions}; -#[doc(hidden)] -pub mod reader; - -pub mod writer; +mod writer; +pub use writer::{TurtleOptions, TurtleWriter}; diff --git a/rdftk_io/src/turtle/parser/mod.rs b/rdftk_io/src/turtle/parser/mod.rs deleted file mode 100644 index 9380882..0000000 --- a/rdftk_io/src/turtle/parser/mod.rs +++ /dev/null @@ -1,102 +0,0 @@ -/*! -One-line description. - -More detailed description, with - -# Example - -*/ - -#![allow(clippy::upper_case_acronyms)] // << generated by pest. - -use crate::common::parser_error::ParserErrorFactory; -use pest::iterators::Pair; -use pest::Parser as _; -use pest_derive::Parser; -use rdftk_core::error::Error; -use rdftk_core::model::graph::GraphRef; -use rdftk_core::simple::graph::graph_factory; - -// ------------------------------------------------------------------------------------------------ -// Public Types -// ------------------------------------------------------------------------------------------------ - -#[derive(Parser)] -#[grammar = "turtle/turtle.pest"] -struct TurtleParser; - -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ - -#[allow(dead_code)] -const ERROR: ParserErrorFactory = ParserErrorFactory { repr: super::NAME }; - -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - -#[allow(dead_code)] -pub(super) fn parse_text(input: &str) -> Result { - let mut parsed = - TurtleParser::parse(Rule::turtleStarDoc, input).map_err(|e| ERROR.parser(e))?; - let top_node = parsed.next().unwrap(); - turtle_star_doc(top_node) -} - -// ------------------------------------------------------------------------------------------------ -// Implementations -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ - -#[allow(dead_code)] -fn turtle_star_doc(input_pair: Pair<'_, Rule>) -> Result { - let graph: GraphRef = graph_factory().graph(); - - log::trace!("turtle_star_doc({:?})", &input_pair.as_rule()); - - match input_pair.as_rule() { - Rule::turtleStarDoc => {} - _ => unexpected!("parse_idl", input_pair), - } - - Ok(graph) -} - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ - -// ------------------------------------------------------------------------------------------------ -// Unit Tests -// ------------------------------------------------------------------------------------------------ - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_simple() { - let result: Result = parse_text( - r###"@base . -@prefix rdf: . -@prefix rdfs: . -@prefix foaf: . -@prefix rel: . - -<#green-goblin> - rel:enemyOf <#spiderman> ; - a foaf:Person ; # in the context of the Marvel universe - foaf:name "Green Goblin" . - -<#spiderman> - rel:enemyOf <#green-goblin> ; - a foaf:Person ; - foaf:name "Spiderman", "Человек-паук"@ru ."###, - ); - assert!(result.is_ok()); - } -} diff --git a/rdftk_io/src/turtle/reader.rs b/rdftk_io/src/turtle/reader.rs index 60fc24f..7b7b6d3 100644 --- a/rdftk_io/src/turtle/reader.rs +++ b/rdftk_io/src/turtle/reader.rs @@ -1,32 +1,38 @@ -/*! -One-line description. - -More detailed description, with - -# Example - -*/ +use crate::common::parser::parse_turtle_doc; +use crate::make_factory_options; +use objio::{impl_has_options, HasOptions, ObjectReader}; +use rdftk_core::error::Error; +use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; +use rdftk_core::simple::graph_factory; +use std::io::Read; // ------------------------------------------------------------------------------------------------ // Public Types // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Types -// ------------------------------------------------------------------------------------------------ +make_factory_options!(TurtleReaderOptions, GraphFactoryRef, graph_factory); -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ +#[derive(Debug, Default)] +pub struct TurtleReader { + options: TurtleReaderOptions, +} // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ -// ------------------------------------------------------------------------------------------------ -// Private Functions -// ------------------------------------------------------------------------------------------------ +impl_has_options!(TurtleReader, TurtleReaderOptions); -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ +impl ObjectReader for TurtleReader { + type Error = Error; + + fn read(&self, r: &mut R) -> Result + where + R: Read, + { + let mut buffer = String::new(); + r.read_to_string(&mut buffer)?; + let factory = self.options().factory().clone(); + parse_turtle_doc(buffer, factory) + } +} diff --git a/rdftk_io/src/turtle/turtle-in.pest b/rdftk_io/src/turtle/turtle-in.pest deleted file mode 100644 index 81de561..0000000 --- a/rdftk_io/src/turtle/turtle-in.pest +++ /dev/null @@ -1,114 +0,0 @@ -/* -This file is based turtlestar.g4 in this same directory. The license for that file remains intact. -*/ - -turtleStarDoc = { - SOI ~ statement* ~ EOI -} - -statement = { - directive | triples ~ "." -} - -directive = { - prefixID - | base - | sparqlPrefix - | sparqlBase -} - - -prefixID = { - "@prefix" ~ PNAME_NS ~ IRIREF ~ "." -} - -base = { - "@base" ~ IRIREF ~ "." -} - -sparqlBase = { - ^"BASE" ~ IRIREF -} - -sparqlPrefix = { - ^"PREFIX" ~ PNAME_NS ~ IRIREF -} - -triples = { - subject ~ predicateObjectList - | blankNodePropertyList ~ predicateObjectList? -} - -predicateObjectList = { - verbObjectList ~ (";" ~ verbObjectList?)* -} - -verbObjectList = { - verb ~ objectList -} - -objectList = { - object ~ ("," ~ object)* -} - -verb = { - predicate | "a" -} - -subject = { - iri - | BlankNode - | collection - | tripleX -} - -predicate = { - iri -} - -object = { - iri - | BlankNode - | literal - | collection - | blankNodePropertyList - | tripleX -} - -tripleX = { - "<<" ~ subjectX ~ predicate ~ objectX ~ ">>" -} - -subjectX = { - iri - | BlankNode - | tripleX -} - -objectX = { - iri - | BlankNode - | literal - | tripleX -} - -literal = { - rdfLiteral - | NumericLiteral - | BooleanLiteral -} - -String = { - STRING_LITERAL_LONG_SINGLE_QUOTE - | STRING_LITERAL_LONG_QUOTE - | STRING_LITERAL_QUOTE - | STRING_LITERAL_SINGLE_QUOTE -} - -blankNodePropertyList = { - "[" ~ predicateObjectList ~ "]" -} - -collection = { - "(" ~ object* ~ ")" -} diff --git a/rdftk_io/src/turtle/turtle.pest b/rdftk_io/src/turtle/turtle.pest deleted file mode 100644 index 657ccb5..0000000 --- a/rdftk_io/src/turtle/turtle.pest +++ /dev/null @@ -1,376 +0,0 @@ -/* -This file is based turtlestar.g4 in this same directory. The license for that file remains intact. -*/ - -turtleStarDoc = { - SOI ~ statement* ~ EOI -} - -statement = { - directive | triples ~ "." -} - -directive = { - prefixID - | base - | sparqlPrefix - | sparqlBase -} - - -prefixID = { - "@prefix" ~ PNAME_NS ~ IRIREF ~ "." -} - -base = { - "@base" ~ IRIREF ~ "." -} - -sparqlBase = { - ^"BASE" ~ IRIREF -} - -sparqlPrefix = { - ^"PREFIX" ~ PNAME_NS ~ IRIREF -} - -triples = { - subject ~ predicateObjectList - | blankNodePropertyList ~ predicateObjectList? -} - -predicateObjectList = { - verbObjectList ~ (";" ~ verbObjectList?)* -} - -verbObjectList = { - verb ~ objectList -} - -objectList = { - object ~ ("," ~ object)* -} - -verb = { - predicate | "a" -} - -subject = { - iri - | BlankNode - | collection - | tripleX -} - -predicate = { - iri -} - -object = { - iri - | BlankNode - | literal - | collection - | blankNodePropertyList - | tripleX -} - -tripleX = { - "<<" ~ subjectX ~ predicate ~ objectX ~ ">>" -} - -subjectX = { - iri - | BlankNode - | tripleX -} - -objectX = { - iri - | BlankNode - | literal - | tripleX -} - -literal = { - rdfLiteral - | NumericLiteral - | BooleanLiteral -} - -String = { - STRING_LITERAL_LONG_SINGLE_QUOTE - | STRING_LITERAL_LONG_QUOTE - | STRING_LITERAL_QUOTE - | STRING_LITERAL_SINGLE_QUOTE -} - -blankNodePropertyList = { - "[" ~ predicateObjectList ~ "]" -} - -collection = { - "(" ~ object* ~ ")" -} - -// ------------------------------------------------------------------------------------------------ -// IRIs -// ------------------------------------------------------------------------------------------------ - -iri = { - IRIREF - | prefixedName -} - -IRIREF = ${ - "<" ~ IRIREF_INNER ~ ">" -} - -IRIREF_INNER = @{ - IRIREF_CHAR* -} - -IRIREF_CHAR = { - /* #x00=NULL #01-#x1F=control codes #x20=space */ - !(">" | '\u{0000}'..'\u{0020}' | "\"" | "{" | "}" | "|" | "^" | "`" | "\\") ~ ANY - | UCHAR -} - -// ------------------------------------------------------------------------------------------------ -// Blank Nodes -// ------------------------------------------------------------------------------------------------ - -BlankNode = { - BLANK_NODE_LABEL - | ANON -} - -BLANK_NODE_LABEL = @{ - "_:" ~ (PN_CHARS_U | ASCII_DIGIT) ~ BLANK_NODE_LABEL_TAIL* -} - -BLANK_NODE_LABEL_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -ANON = { - "[" ~ "]" -} - -// ------------------------------------------------------------------------------------------------ -// RDF (string-like) Literals -// ------------------------------------------------------------------------------------------------ - -rdfLiteral = { - String ~ (LANGTAG | "^^" ~ iri)? -} - -LANGTAG = { - "@" ~ ASCII_ALPHA+ ~ ("-" ~ ASCII_ALPHANUMERIC+)* -} - -// ------------------------------------------------------------------------------------------------ -// Numeric Literals -// ------------------------------------------------------------------------------------------------ - -NumericLiteral = { - SIGN? ~ - ( DOUBLE - | DECIMAL - | INTEGER ) -} - -INTEGER = { - ASCII_DIGIT+ -} - -DECIMAL = { - ASCII_DIGIT* ~ "." ~ ASCII_DIGIT+ -} - -DOUBLE = { - (ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT* ~ EXPONENT | "." ~ ASCII_DIGIT+ ~EXPONENT | ASCII_DIGIT+ ~ EXPONENT) -} - -SIGN = { - ("+" | "-") -} - -EXPONENT = { - ^"e" ~ SIGN? ~ ASCII_DIGIT+ -} - -// ------------------------------------------------------------------------------------------------ -// Boolean Literals -// ------------------------------------------------------------------------------------------------ - -BooleanLiteral = { - "true" - | "false" -} - -// ------------------------------------------------------------------------------------------------ -// String Literals -// ------------------------------------------------------------------------------------------------ - -STRING_LITERAL_LONG_SINGLE_QUOTE = ${ - "'''" ~ LONG_SINGLE_QUOTE_INNER ~ "'''" -} - -LONG_SINGLE_QUOTE_INNER = @{ - LONG_SINGLE_QUOTE_CHAR* -} - -LONG_SINGLE_QUOTE_CHAR = { - !("'''" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -STRING_LITERAL_LONG_QUOTE = ${ - "\"\"\"" ~ LONG_QUOTE_INNER ~ "\"\"\"" -} - -LONG_QUOTE_INNER = @{ - LONG_QUOTE_CHAR* -} - -LONG_QUOTE_CHAR = { - !("\"\"\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_QUOTE = ${ - "\"" ~ QUOTE_INNER ~ "\"" -} - -QUOTE_INNER = @{ - QUOTE_CHAR* -} - -QUOTE_CHAR = { - !("\"" | "\\" | "\r" | "\n" ) ~ ANY - | UCHAR - | ECHAR -} - -STRING_LITERAL_SINGLE_QUOTE = ${ - "'" ~ SINGLE_QUOTE_INNER ~ "'" -} - -SINGLE_QUOTE_INNER = @{ - SINGLE_QUOTE_CHAR* -} - -SINGLE_QUOTE_CHAR = { - !( "'" | "\\" | "\r" | "\n" ) ~ ANY - | ECHAR - | UCHAR -} - -UCHAR = @{ - "\\U" ~ ASCII_HEX_DIGIT{8} | "\\u" ~ ASCII_HEX_DIGIT{4} -} - -ECHAR = @{ - "\\" ~ ("t" | "b" | "n" | "r" | "f" | "\"" | "'" | "\\") -} - -// ------------------------------------------------------------------------------------------------ -// Prefixed Names -// ------------------------------------------------------------------------------------------------ - -prefixedName = { - PNAME_LN - | PNAME_NS -} - -PNAME_NS = { - PN_PREFIX? ~ ":" -} - -PNAME_LN = { - PNAME_NS ~ PN_LOCAL -} - -PN_CHARS_BASE = { - ASCII_ALPHA - | '\u{00C0}'..'\u{00D6}' - | '\u{00D8}'..'\u{00F6}' - | '\u{00F8}'..'\u{02FF}' - | '\u{0370}'..'\u{037D}' - | '\u{037F}'..'\u{1FFF}' - | '\u{200C}'..'\u{200D}' - | '\u{2070}'..'\u{218F}' - | '\u{2C00}'..'\u{2FEF}' - | '\u{3001}'..'\u{D7FF}' - | '\u{F900}'..'\u{FDCF}' - | '\u{FDF0}'..'\u{FFFD}' - | '\u{10000}'..'\u{EFFFF}' -} - -PN_CHARS_U = { - PN_CHARS_BASE - | "_" -} - -PN_CHARS = { - PN_CHARS_U - | "-" - | ASCII_DIGIT - | '\u{00B7}'..'\u{00B7}' - | '\u{0300}'..'\u{036F}' - | '\u{203F}'..'\u{2040}' -} - -PN_PREFIX = @{ - PN_CHARS_BASE ~ PN_PREFIX_TAIL* -} - -PN_PREFIX_TAIL = { - PN_CHARS - | &("." ~ PN_CHARS) ~ "." -} - -PN_LOCAL = @{ - (PN_CHARS_U | ":" | ASCII_DIGIT | PLX) ~ PN_LOCAL_TAIL* -} - -PN_LOCAL_TAIL = { - PN_LOCAL_CHARS - | &("." ~ PN_LOCAL_CHARS) ~ "." -} - -PN_LOCAL_CHARS = { - PN_CHARS | ":" | PLX -} - -PLX = { - PERCENT - | PN_LOCAL_ESC -} - -PERCENT = { - "%" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT -} - -PN_LOCAL_ESC = { - "\\" ~ ("_" | "~" | "." | "-" | "!" | "$" | "&" | "'\''" | "(" | ")" - | "*" | "+" | "," | ";" | "=" | "/" | "?" | "#" | "@" | "%") -} - -// ------------------------------------------------------------------------------------------------ -// Implicit Whitespace -// ------------------------------------------------------------------------------------------------ - -newline = _{ "\n" | "\r\n" } - -COMMENT = _{ - "#" ~ (!newline ~ ANY)* -} - -WHITESPACE = _{ - " " | "\t" | newline -} diff --git a/rdftk_io/src/turtle/turtlestar.g4 b/rdftk_io/src/turtle/turtlestar.g4 deleted file mode 100644 index 984c05a..0000000 --- a/rdftk_io/src/turtle/turtlestar.g4 +++ /dev/null @@ -1,304 +0,0 @@ -/* - [The "BSD licence"] - Copyright (c) 2019, Łukasz Szeremeta (@ University of Bialystok, http://www.uwb.edu.pl/), - William Van Woensel (W3C N3 Community Group) - - All rights reserved. - - Based on ANTLR TURTLE grammar - (https://github.com/antlr/grammars-v4/blob/master/turtle/TURTLE.g4) - distributed under BSD licence. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the author may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -*/ - -grammar turtlestar; - -@header { - package w3c.n3dev.parser.antlr; -} - -turtleStarDoc - : statement* EOF - ; - -COMMENT - : '#' (~[\n\r])* -> skip -/* above code comes from Łukasz' n3.g4 */ -/* comments are found in turtle test cases but not in grammar */ - ; - -statement - : directive - | triples '.' - ; - -directive - : prefixID - | base - | sparqlPrefix - | sparqlBase - ; - - -prefixID - : '@prefix' PNAME_NS IRIREF '.' - ; - -base - : '@base' IRIREF '.' - ; - -sparqlBase - : BASE IRIREF /* case insensitive */ - ; - -sparqlPrefix - : PREFIX PNAME_NS IRIREF /* case insensitive */ - ; - -triples - : subject predicateObjectList - | blankNodePropertyList predicateObjectList? - ; - -predicateObjectList - : verb objectList (';' (verb objectList)?)* - ; - -objectList - : object (',' object)* - ; - -verb - : predicate - | 'a' - ; - -subject - : iri - | BlankNode - | collection - | tripleX - ; - -predicate - : iri - ; - -object - : iri - | BlankNode - | literal - | collection - | blankNodePropertyList - | tripleX - ; - -tripleX - : '<<' subjectX predicate objectX '>>' - ; - -subjectX - : iri - | BlankNode - | tripleX - ; - -objectX - :iri - | BlankNode - | literal - | tripleX - ; - -literal - : rdfLiteral - | NumericLiteral - | BooleanLiteral - ; - -blankNodePropertyList - : '[' predicateObjectList ']' - ; - -collection - : '(' object* ')' - ; - -NumericLiteral - : INTEGER - | DECIMAL - | DOUBLE - ; - -rdfLiteral - : String (LANGTAG | '^^' iri)? - ; - -BooleanLiteral - : 'true' - | 'false' - ; - -String - : STRING_LITERAL_QUOTE - | STRING_LITERAL_SINGLE_QUOTE - | STRING_LITERAL_LONG_SINGLE_QUOTE - | STRING_LITERAL_LONG_QUOTE - ; - -iri - : IRIREF - | prefixedName - ; - -prefixedName - : PNAME_NS - | PNAME_LN -/* PNAME_NS will be matched for ':' (i.e., "empty") prefixedNames - * hence this cannot be a lexer rule; for s/p/o of only ':', PNAME_NS will be returned - * instead of PrefixedName token */ - ; - -BlankNode - : BLANK_NODE_LABEL - | ANON - ; - -IRIREF - : '<' (~[\u0000-\u0020<>"{}|^`\\] | UCHAR)* '>' /* #x00=NULL #01-#x1F=control codes #x20=space */ - ; - -PNAME_NS - : PN_PREFIX? ':' -/* when using PrefixedName (i.e., lexer rule), this must occur *before* PrefixedName - * else, PNAME_NS will not be chosen for prefix decl (e.g., turtle-syntax-bnode-01) - * regardless, it should occur before PNAME_LN - */ - ; - -PNAME_LN - : PNAME_NS PN_LOCAL -/* rule must be above PN_LOCAL */ - ; - -BLANK_NODE_LABEL - : '_:' (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)? - ; - -LANGTAG - : '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* - ; - -INTEGER - : [+-]? [0-9]+ - ; - -DECIMAL - : [+-]? [0-9]* '.' [0-9]+ - ; - -DOUBLE - : [+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT) - ; - -EXPONENT - : [eE] [+-]? [0-9]+ - ; - -STRING_LITERAL_LONG_SINGLE_QUOTE - : '\'\'\'' (('\'' | '\'\'')? (~['\\] | ECHAR | UCHAR))* '\'\'\'' -/* needs to be above the "regular" quotes ; else first two '' will be matched as empty string */ - ; - -STRING_LITERAL_LONG_QUOTE - : '"""' (('"' | '""')? (~["\\] | ECHAR | UCHAR))* '"""' -/* needs to be above the "regular" quotes ; else first two "" will be matched as empty string */ - ; - -STRING_LITERAL_QUOTE - : '"' (~[\u0022\u005C\u000A\u000D] | ECHAR | UCHAR)* '"' /* #x22=" #x5C=\ #xA=new line #xD=carriage return */ - ; - -STRING_LITERAL_SINGLE_QUOTE - : '\'' (~[\u0027\u005C\u000A\u000D] | ECHAR | UCHAR)* '\'' /* #x27=' #x5C=\ #xA=new line #xD=carriage return */ - ; - -UCHAR - : '\\u' HEX HEX HEX HEX | '\\U' HEX HEX HEX HEX HEX HEX HEX HEX - ; - -ECHAR - : '\\' [tbnrf"'\\] - ; - -WS - : [\u0020\u0009\u000D\u000A] -> skip -/* #x20=space #x9=character tabulation #xD=carriage return #xA=new line */ - ; - -ANON - : '[' WS* ']' - ; - -PN_CHARS_BASE - : [A-Z] | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF] | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD] -/* antlr cannot include (at least on Java) the following: #x10000-#xEFFFF */ - ; - -PN_CHARS_U - : PN_CHARS_BASE - | '_' - ; - -PN_CHARS - : PN_CHARS_U | '-' | [0-9] | '\u00B7' | [\u0300-\u036F] | [\u203F-\u2040] - ; - -BASE: - ('B'|'b') ('A'|'a') ('S'|'s') ('E'|'e') - ; - -PREFIX: - ('P'|'p') ('R'|'r') ('E'|'e') ('F'|'f') ('I'|'i') ('X'|'x') -/* BASE and PREFIX must be case-insensitive, hence these monstrosities */ -/* BASE and PREFIX must be above PN_PREFIX, PN_LOCAL */ - ; - -PN_PREFIX - : PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)? - ; - -PN_LOCAL - : (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? - ; - -PLX - : PERCENT - | PN_LOCAL_ESC - ; - -PERCENT - : '%' HEX HEX - ; - -HEX - : [0-9] | [A-F] | [a-f] - ; - -PN_LOCAL_ESC - : '\\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%') - ; \ No newline at end of file diff --git a/rdftk_io/src/turtle/writer/mod.rs b/rdftk_io/src/turtle/writer/mod.rs index 09214d2..b74b77a 100644 --- a/rdftk_io/src/turtle/writer/mod.rs +++ b/rdftk_io/src/turtle/writer/mod.rs @@ -4,23 +4,22 @@ Provides the `TurtleWriter` implementation of the `GraphWriter` trait. # Example ```rust -use rdftk_io::turtle::writer::TurtleWriter; -use rdftk_io::turtle::writer::TurtleOptions; -use rdftk_io::write_graph_to_string; -use rdftk_iri::{IRIRef, IRI}; +use rdftk_io::turtle::{TurtleWriter, TurtleOptions}; +use rdftk_iri::{IriRef, Iri}; use std::str::FromStr; +# use objio::{HasOptions, ObjectWriter}; # use rdftk_core::model::graph::GraphRef; # fn make_graph() -> GraphRef { rdftk_core::simple::graph::graph_factory().graph() } -let mut options = TurtleOptions::default(); -options.use_sparql_style = true; -options.nest_blank_nodes = false; -let writer = TurtleWriter::with_id_base( - &IRIRef::from(IRI::from_str("http://en.wikipedia.org/wiki/").unwrap()), - options, -); +let mut options = TurtleOptions::default() + .with_id_base(Iri::from_str("http://en.wikipedia.org/wiki/").unwrap().into()) + .with_sparql_style() + .without_nested_blank_nodes(); -let result = write_graph_to_string(&writer, &make_graph()); +let writer = TurtleWriter::default() + .with_options(options); + +let result = writer.write_to_string(&make_graph()); ``` */ diff --git a/rdftk_io/src/turtle/writer/options.rs b/rdftk_io/src/turtle/writer/options.rs index a4849a8..018a023 100644 --- a/rdftk_io/src/turtle/writer/options.rs +++ b/rdftk_io/src/turtle/writer/options.rs @@ -72,6 +72,13 @@ impl TurtleOptions { } } + pub fn without_nested_blank_nodes(self) -> Self { + Self { + nest_blank_nodes: false, + ..self + } + } + pub fn id_base(&self) -> Option<&IriRef> { self.id_base.as_ref() } diff --git a/rdftk_io/src/xml/mod.rs b/rdftk_io/src/xml/mod.rs index 196becd..c25324e 100644 --- a/rdftk_io/src/xml/mod.rs +++ b/rdftk_io/src/xml/mod.rs @@ -9,15 +9,15 @@ pretty-printed for readability. It is also possible to pick one of the type sty in the specification, "flat" or "striped". ```rust -use rdftk_io::xml::writer::{XmlOptions, XmlWriter}; -use rdftk_io::write_graph_to_string; +use rdftk_io::xml::{XmlOptions, XmlWriter}; +# use objio::{HasOptions, ObjectWriter}; # let graph = rdftk_core::simple::graph::graph_factory().graph(); -let options: XmlOptions = XmlOptions::flat().pretty().clone(); +let options: XmlOptions = XmlOptions::default().flat().pretty(); -let writer = XmlWriter::new(options); +let writer = XmlWriter::default().with_options(options); -println!("{}", write_graph_to_string(&writer, &graph).unwrap()); +println!("{}", writer.write_to_string(&graph).unwrap()); ``` */ @@ -41,8 +41,8 @@ pub const MIME_TYPE: &str = "application/rdf+xml"; mod syntax; -#[doc(hidden)] -pub mod reader; +mod reader; +pub use reader::XmlReader; mod writer; pub use writer::{XmlOptions, XmlStyle, XmlWriter}; diff --git a/rdftk_io/src/xml/reader.rs b/rdftk_io/src/xml/reader.rs index 3eab6cf..c501784 100644 --- a/rdftk_io/src/xml/reader.rs +++ b/rdftk_io/src/xml/reader.rs @@ -17,7 +17,7 @@ use crate::xml::syntax::{ PARSE_TYPE_COLLECTION, PARSE_TYPE_LITERAL, PARSE_TYPE_RESOURCE, XML_NAMESPACE, }; use objio::ObjectReader; -use rdftk_core::error::{invalid_state_error, read_write_error_with, Error}; +use rdftk_core::error::{invalid_state_error, Error}; use rdftk_core::model::graph::{GraphFactoryRef, GraphRef}; use rdftk_core::model::literal::{DataType, LanguageTag}; use rdftk_core::model::statement::SubjectNodeRef; @@ -26,6 +26,7 @@ use rdftk_iri::{Iri, IriRef}; use rdftk_names::rdf; use std::io::Read; use std::str::FromStr; +use tracing::{error, trace}; use xml::attribute::OwnedAttribute; use xml::name::OwnedName; use xml::reader::XmlEvent; @@ -72,10 +73,6 @@ struct Attributes<'a> { inner: Vec<&'a OwnedAttribute>, } -// ------------------------------------------------------------------------------------------------ -// Public Functions -// ------------------------------------------------------------------------------------------------ - // ------------------------------------------------------------------------------------------------ // Implementations // ------------------------------------------------------------------------------------------------ @@ -113,26 +110,33 @@ impl ExpectedName { macro_rules! trace_event { ($fn_name:expr => $event:expr) => { - log::trace!("XmlReader::{} event: {:?}", $fn_name, $event); + trace!("XmlReader::{} event: {:?}", $fn_name, $event); }; ($fn_name:expr => ignore $event:expr) => { - log::trace!("XmlReader::{} ignoring event: {:?}", $fn_name, &$event); + trace!("XmlReader::{} ignoring event: {:?}", $fn_name, &$event); }; } macro_rules! error_event { (parse => $fn_name:expr, $error:expr) => { - let inner: rdftk_core::error::Error = read_write_error_with(super::NAME, $error.clone()); + let inner = Error::Tokenizer { + representation: super::NAME.into(), + source: Box::new($error.clone()), + }; error_event!($fn_name, inner); }; (state => $fn_name:expr, $msg:expr) => { - log::error!("Invalid state: {}", $msg,); - let inner: rdftk_core::error::Error = invalid_state_error(); + error!("Invalid state: {}", $msg,); + let inner = invalid_state_error(); error_event!($fn_name, inner); }; - ($fn_name:expr, $inner:expr) => { - log::error!("XmlReader::{} {}", $fn_name, $inner); - return read_write_error_with(super::NAME, $inner).into(); + ($fn_name:expr, $error:expr) => { + error!("XmlReader::{} {}", $fn_name, $error); + return Error::Tokenizer { + representation: super::NAME.into(), + source: Box::new($error), + } + .into(); }; } @@ -290,7 +294,7 @@ fn parse_predicate_attributes( // SPEC: §2.5 Property Attributes // SPEC: §2.12 Omitting Nodes: Property Attributes on an empty Property Element for attribute in attributes { - log::trace!( + trace!( "XmlReader::parse_predicate_attributes attribute: {:?}", attribute ); @@ -582,11 +586,7 @@ fn parse_attributes(attributes: &[OwnedAttribute]) -> Result, Err } } - log::trace!("parse_attributes -> {:?}", response); + trace!("parse_attributes -> {:?}", response); Ok(response) } - -// ------------------------------------------------------------------------------------------------ -// Modules -// ------------------------------------------------------------------------------------------------ diff --git a/rdftk_io/src/xml/writer.rs b/rdftk_io/src/xml/writer.rs index 83df2f6..32ac01e 100644 --- a/rdftk_io/src/xml/writer.rs +++ b/rdftk_io/src/xml/writer.rs @@ -4,7 +4,7 @@ use super::syntax::{ }; use crate::xml::syntax::ATTRIBUTE_XML_LANG_PREFIXED; use objio::{impl_has_options, ObjectWriter}; -use rdftk_core::error::{rdf_star_not_supported_error, read_write_error_with, Error}; +use rdftk_core::error::{rdf_star_not_supported_error, Error}; use rdftk_core::model::graph::{Graph, GraphRef}; use rdftk_core::model::statement::SubjectNodeRef; use rdftk_iri::IriRef; @@ -240,7 +240,7 @@ impl XmlWriter { writer .write( XmlEvent::start_element(RDF_DESCRIPTION.as_str()) - .attr(RDF_NODE_ID.as_str(), blank.as_ref()), + .attr(RDF_NODE_ID.as_str(), blank.as_ref().as_ref()), ) .map_err(xml_error)?; } else { @@ -276,7 +276,7 @@ impl XmlWriter { writer.write(element).map_err(xml_error)?; } else if let Some(blank) = object.as_blank() { if flat { - element = element.attr(RDF_NODE_ID.as_str(), blank.as_ref()); + element = element.attr(RDF_NODE_ID.as_str(), blank.as_ref().as_ref()); writer.write(element).map_err(xml_error)?; } else { writer.write(element).map_err(xml_error)?; @@ -328,8 +328,11 @@ impl XmlWriter { // ------------------------------------------------------------------------------------------------ #[inline] -fn xml_error(e: xml::writer::Error) -> rdftk_core::error::Error { - read_write_error_with(super::NAME, e) +fn xml_error(e: xml::writer::Error) -> Error { + Error::Tokenizer { + representation: super::NAME.into(), + source: Box::new(e), + } } fn split_uri(iri: &IriRef) -> (String, String) { diff --git a/rdftk_io/tests/read_json.rs b/rdftk_io/tests/read_json.rs index 2ed7545..ba8d98b 100644 --- a/rdftk_io/tests/read_json.rs +++ b/rdftk_io/tests/read_json.rs @@ -1,5 +1,5 @@ use objio::ObjectReader; -use rdftk_io::json::reader::JsonReader; +use rdftk_io::json::JsonReader; #[test] fn read_example_01() { diff --git a/rdftk_io/tests/read_nq.rs b/rdftk_io/tests/read_nq.rs new file mode 100644 index 0000000..432c426 --- /dev/null +++ b/rdftk_io/tests/read_nq.rs @@ -0,0 +1,27 @@ +use objio::ObjectReader; +use rdftk_io::nq::NQuadReader; + +#[test] +fn parse_simple_1() { + let reader = NQuadReader::default(); + assert!(reader.read_from_string( + r###" + . # comments here +# or on a line by themselves +_:subject1 "object1" . +_:subject2 "object2" . +"###).is_ok()); +} + +#[test] +fn parse_simple_2() { + let reader = NQuadReader::default(); + assert!(reader + .read_from_string( + r###" +_:alice _:bob . +_:bob _:alice . +"###, + ) + .is_ok()); +} diff --git a/rdftk_io/tests/read_nt.rs b/rdftk_io/tests/read_nt.rs index 8b13789..525cae3 100644 --- a/rdftk_io/tests/read_nt.rs +++ b/rdftk_io/tests/read_nt.rs @@ -1 +1,55 @@ +use objio::{ObjectReader, ObjectWriter}; +use rdftk_core::{error::Error, model::graph::GraphRef}; +use rdftk_io::nt::{NTripleReader, NTripleWriter}; +fn write_graph(graph: &GraphRef) { + let writer = NTripleWriter::default(); + let _ = writer.write(&mut std::io::stdout(), graph); +} + +#[test] +fn parse_simple() { + let reader = NTripleReader::default(); + let result: Result = reader.read_from_string( + r###" + "That Seventies Show"^^ . # literal with XML Schema string datatype + "That Seventies Show" . # same as above + "That Seventies Show"@en . # literal with a language tag + "Cette Série des Années Septante"@fr-be . # literal outside of ASCII range with a region subtag + "This is a multi-line\nliteral with many quotes (\"\"\"\"\")\nand two apostrophes ('')." . + "2"^^ . # xsd:integer + "1.663E-4"^^ . # xsd:double +"### ); + match result { + Ok(g) => { + println!("ok"); + write_graph(&g); + } + Err(e) => { + println!("{:?}", e); + panic!(); + } + } +} + +#[test] +fn parse_simple_with_blanks() { + let reader = NTripleReader::default(); + let result: Result = reader.read_from_string( + r###" + . # comments here +# or on a line by themselves +_:subject1 "object1" . +_:subject2 "object2" . +"### ); + match result { + Ok(g) => { + println!("ok"); + write_graph(&g); + } + Err(e) => { + println!("{:?}", e); + panic!(); + } + } +} diff --git a/rdftk_io/tests/read_turtle.rs b/rdftk_io/tests/read_turtle.rs new file mode 100644 index 0000000..640d588 --- /dev/null +++ b/rdftk_io/tests/read_turtle.rs @@ -0,0 +1,27 @@ +use objio::ObjectReader; +use rdftk_core::{error::Error, model::graph::GraphRef}; +use rdftk_io::turtle::TurtleReader; + +#[test] +#[ignore] +fn parse_simple_turtle() { + let reader = TurtleReader::default(); + let result: Result = reader.read_from_string( + r###"@base . +@prefix rdf: . +@prefix rdfs: . +@prefix foaf: . +@prefix rel: . + +<#green-goblin> + rel:enemyOf <#spiderman> ; + a foaf:Person ; # in the context of the Marvel universe + foaf:name "Green Goblin" . + +<#spiderman> + rel:enemyOf <#green-goblin> ; + a foaf:Person ; + foaf:name "Spiderman", "Человек-паук"@ru ."###, + ); + assert!(result.is_ok()); +} diff --git a/rdftk_io/tests/read_xml.rs b/rdftk_io/tests/read_xml.rs index 6949f27..36ef253 100644 --- a/rdftk_io/tests/read_xml.rs +++ b/rdftk_io/tests/read_xml.rs @@ -1,12 +1,12 @@ use objio::ObjectReader; -use rdftk_io::xml::reader::XmlReader; +use rdftk_io::xml::XmlReader; // https://www.w3.org/RDF/Validator/rdfval pub mod logging; #[test] -fn read_example_00() { +fn read_example_empty_graph() { let mut xml = r##" .\n")); - assert!(output.contains(" dc:")); - assert!(output.contains("dc:description [\n")); + assert!(output.contains("")); + assert!(output.contains(" dc:description [\n")); + assert!(output.contains(" a foaf:Person")); assert!(output.contains(" foaf:name \"Tony Benn\"")); - assert!(output.contains(" rdf:type foaf:Person")); + assert!(output.contains(" ] ;\n")); + assert!(output.contains(" dc:publisher \"Wikipedia\" .\n")); } #[test] @@ -41,10 +43,12 @@ fn write_to_turtle_with_base() { assert!(output.starts_with("@base .\n")); assert!(output.contains("@prefix dc: .\n")); - assert!(output.contains(" dc:")); - assert!(output.contains("dc:description [\n")); + assert!(output.contains("")); + assert!(output.contains(" dc:description [\n")); + assert!(output.contains(" a foaf:Person")); assert!(output.contains(" foaf:name \"Tony Benn\"")); - assert!(output.contains(" rdf:type foaf:Person")); + assert!(output.contains(" ] ;\n")); + assert!(output.contains(" dc:publisher \"Wikipedia\" .\n")); } #[test] @@ -66,7 +70,10 @@ fn write_to_turtle_with_options() { assert!(output.starts_with("BASE \n")); assert!(output.contains("PREFIX dc: \n")); - assert!(output.contains(" dc:")); - assert!(output.contains("dc:description _:B1")); - assert!(output.contains("\n_:B1")); + assert!(output.contains("")); + assert!(output.contains(" dc:description [\n")); + assert!(output.contains(" a foaf:Person")); + assert!(output.contains(" foaf:name \"Tony Benn\"")); + assert!(output.contains(" ] ;\n")); + assert!(output.contains(" dc:publisher \"Wikipedia\" .\n")); }