diff --git a/Cargo.toml b/Cargo.toml index 6ca4850..c66c681 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,5 +20,6 @@ exclude = ["testing-tools"] default = ["std", "positions"] std = [] # Enables Nodes and Attributes position in the original document preserving. -# Increases memory usage by `Range` for each Node and Attribute. +# Increases memory usage by `Range` for each Node. +# Increases memory usage by `Range` + `u16` + `u8` for each Attribute. positions = [] diff --git a/src/lib.rs b/src/lib.rs index a851aa3..71a00a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -490,6 +490,10 @@ struct AttributeData<'input> { value: StringStorage<'input>, #[cfg(feature = "positions")] range: Range, + #[cfg(feature = "positions")] + qname_len: u16, + #[cfg(feature = "positions")] + eq_len: u8, // includes any surrounding spaces } /// An attribute. @@ -587,6 +591,43 @@ impl<'a, 'input> Attribute<'a, 'input> { pub fn range(&self) -> Range { self.data.range.clone() } + + /// Returns attribute's qname's range in bytes in the original document. + /// + /// ```text + /// + /// ^^^^^^ + /// ``` + /// + /// To reduce memory usage the qname length is limited by u16::MAX. + /// If the attribute exceeds that limit then the end of the returned range will be incorrect. + #[cfg(feature = "positions")] + #[inline] + pub fn range_qname(&self) -> Range { + let end = self.data.range.start + usize::from(self.data.qname_len); + self.data.range.start..end + } + + /// Returns attribute's value's range in bytes in the original document, excluding the surrounding quotes. + /// + /// If the attribute's value is an empty string then the `start` and `end` of this `Range` are equal, and indicate the closing quote. + /// + /// ```text + /// + /// ^^^^^ + /// ``` + /// + /// To reduce memory usage the qname length is limited by u16::MAX, + /// and the number of spaces around the equal sign is limited by u8::MAX. + /// If the attribute exceeds those limits then the start of the returned range will be incorrect. + #[cfg(feature = "positions")] + #[inline] + pub fn range_value(&self) -> Range { + // +1 on start and -1 on end are to exclude the quotes around the value (all valid quotes are 1 byte) + let start = self.data.range.start + usize::from(self.data.qname_len) + usize::from(self.data.eq_len) + 1; + let end = self.data.range.end - 1; + start..end + } } impl PartialEq for Attribute<'_, '_> { diff --git a/src/parse.rs b/src/parse.rs index 5470ef4..cbf718f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -353,6 +353,10 @@ struct TempAttributeData<'input> { local: &'input str, value: StringStorage<'input>, range: Range, + #[allow(unused)] // only used for feature "positions" + qname_len: u16, + #[allow(unused)] // only used for feature "positions" + eq_len: u8, } impl<'input> Document<'input> { @@ -644,8 +648,8 @@ impl<'input> tokenizer::XmlEvents<'input> for Context<'input> { self.after_text = false; } - tokenizer::Token::Attribute(range, prefix, local, value) => { - process_attribute(range, prefix, local, value, self)?; + tokenizer::Token::Attribute(range, qname_len, eq_len, prefix, local, value) => { + process_attribute(range, qname_len, eq_len, prefix, local, value, self)?; } tokenizer::Token::ElementEnd(end, range) => { process_element(end, range, self)?; @@ -666,6 +670,8 @@ impl<'input> tokenizer::XmlEvents<'input> for Context<'input> { #[allow(clippy::too_many_arguments)] fn process_attribute<'input>( range: Range, + qname_len: u16, + eq_len: u8, prefix: &'input str, local: &'input str, value: StrSpan<'input>, @@ -732,6 +738,8 @@ fn process_attribute<'input>( local, value, range, + qname_len, + eq_len, }); } @@ -909,6 +917,10 @@ fn resolve_attributes(namespaces: ShortRange, ctx: &mut Context) -> Result { ElementStart(&'input str, &'input str, usize), // ns:attr="value" - Attribute(Range, &'input str, &'input str, StrSpan<'input>), + Attribute(Range, u16, u8, &'input str, &'input str, StrSpan<'input>), ElementEnd(ElementEnd<'input>, Range), @@ -553,7 +553,10 @@ fn parse_element<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'inp // We cannot mark `parse_attribute` as `#[inline(always)]` // because it will blow up the binary size. let (prefix, local) = s.consume_qname()?; + let qname_end = s.pos(); + let qname_len = u16::try_from(qname_end - start).unwrap_or(u16::MAX); s.consume_eq()?; + let eq_len = u8::try_from(s.pos() - qname_end).unwrap_or(u8::MAX); let quote = s.consume_quote()?; let quote_c = quote as char; // The attribute value must not contain the < character. @@ -562,7 +565,7 @@ fn parse_element<'input>(s: &mut Stream<'input>, events: &mut dyn XmlEvents<'inp let value = s.slice_back_span(value_start); s.consume_byte(quote)?; let end = s.pos(); - events.token(Token::Attribute(start..end, prefix, local, value))?; + events.token(Token::Attribute(start..end, qname_len, eq_len, prefix, local, value))?; } } } diff --git a/src/tokenizer_tests.rs b/src/tokenizer_tests.rs index 8688506..5fde7de 100644 --- a/src/tokenizer_tests.rs +++ b/src/tokenizer_tests.rs @@ -90,7 +90,7 @@ impl<'a> xml::XmlEvents<'a> for EventsCollector<'a> { xml::Token::ElementStart(prefix, local, start) => { Token::ElementStart(prefix, local, start) } - xml::Token::Attribute(_, prefix, local, value) => { + xml::Token::Attribute(_, _, _, prefix, local, value) => { Token::Attribute(prefix, local, value.as_str()) } xml::Token::ElementEnd(end, range) => Token::ElementEnd( diff --git a/tests/api.rs b/tests/api.rs index 211f7d3..85f7a39 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -158,6 +158,10 @@ fn text_pos_01() { if let Some(attr) = node.attribute_node("a") { assert_eq!(doc.text_pos_at(attr.range().start), TextPos::new(1, 4)); assert_eq!(doc.text_pos_at(attr.range().end), TextPos::new(1, 9)); + assert_eq!(doc.text_pos_at(attr.range_qname().start), TextPos::new(1, 4)); + assert_eq!(doc.text_pos_at(attr.range_qname().end), TextPos::new(1, 5)); + assert_eq!(doc.text_pos_at(attr.range_value().start), TextPos::new(1, 7)); + assert_eq!(doc.text_pos_at(attr.range_value().end), TextPos::new(1, 8)); } // first child is a text/whitespace, not a comment @@ -184,6 +188,10 @@ fn text_pos_02() { if let Some(attr) = node.attribute_node(("http://www.w3.org", "a")) { assert_eq!(doc.text_pos_at(attr.range().start), TextPos::new(1, 36)); assert_eq!(doc.text_pos_at(attr.range().end), TextPos::new(1, 44)); + assert_eq!(doc.text_pos_at(attr.range_qname().start), TextPos::new(1, 36)); + assert_eq!(doc.text_pos_at(attr.range_qname().end), TextPos::new(1, 40)); + assert_eq!(doc.text_pos_at(attr.range_value().start), TextPos::new(1, 42)); + assert_eq!(doc.text_pos_at(attr.range_value().end), TextPos::new(1, 43)); } } @@ -202,6 +210,60 @@ fn text_pos_03() { assert_eq!(doc.text_pos_at(node.range().end), TextPos::new(2, 5)); } +#[cfg(feature = "positions")] +#[test] +fn text_pos_04() { + let data = ""; + + let doc = Document::parse(data).unwrap(); + let node = doc.root_element(); + + if let Some(attr) = node.attribute_node("a") { + assert_eq!(doc.text_pos_at(attr.range().start), TextPos::new(1, 36)); + assert_eq!(doc.text_pos_at(attr.range().end), TextPos::new(1, 43)); + assert_eq!(doc.text_pos_at(attr.range_qname().start), TextPos::new(1, 36)); + assert_eq!(doc.text_pos_at(attr.range_qname().end), TextPos::new(1, 40)); + assert_eq!(doc.text_pos_at(attr.range_value().start), TextPos::new(1, 42)); + assert_eq!(doc.text_pos_at(attr.range_value().end), TextPos::new(1, 42)); +} +} + +#[cfg(feature = "positions")] +#[test] +fn text_pos_05() { + let data = ""; + + let doc = Document::parse(data).unwrap(); + let node = doc.root_element(); + + if let Some(attr) = node.attribute_node("a") { + assert_eq!(doc.text_pos_at(attr.range().start), TextPos::new(1, 36)); + assert_eq!(doc.text_pos_at(attr.range().end), TextPos::new(1, 48)); + assert_eq!(doc.text_pos_at(attr.range_qname().start), TextPos::new(1, 36)); + assert_eq!(doc.text_pos_at(attr.range_qname().end), TextPos::new(1, 40)); + assert_eq!(doc.text_pos_at(attr.range_value().start), TextPos::new(1, 47)); + assert_eq!(doc.text_pos_at(attr.range_value().end), TextPos::new(1, 48)); + } +} + +#[cfg(feature = "positions")] +#[test] +fn text_pos_06() { + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 + let data = ""; + + let doc = Document::parse(data).unwrap(); + let node = doc.root_element(); + + if let Some(attr) = node.attribute_node("a") { + assert_eq!(doc.text_pos_at(attr.range().start), TextPos::new(1, 4)); + assert_eq!(doc.text_pos_at(attr.range().end), TextPos::new(1, 269)); + assert_eq!(doc.text_pos_at(attr.range_qname().start), TextPos::new(1, 4)); + assert_eq!(doc.text_pos_at(attr.range_qname().end), TextPos::new(1, 5)); + attr.range_value(); // unreliable since >254 spaces around equal sign, but still shouldn't panic + } +} + #[test] fn next_sibling_element_01() { let data = "";