Skip to content

Commit

Permalink
Introduce Document struct, representing the parse result for a single…
Browse files Browse the repository at this point in the history
… AsciiDoc string/file
  • Loading branch information
scouten committed Dec 27, 2023
1 parent 60c094d commit b4ecb02
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 0 deletions.
68 changes: 68 additions & 0 deletions src/document.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use std::slice::Iter;

use nom::{Finish, IResult};

use crate::{blocks::Block, primitives::consume_empty_lines, Error, Span};

/// A document represents the top-level block element in AsciiDoc. It consists
/// of an optional document header and either a) one or more sections preceded
/// by an optional preamble or b) a sequence of top-level blocks only.
///
/// The document can be configured using a document header. The header is not a
/// block itself, but contributes metadata to the document, such as the document
/// title and document attributes.
#[allow(dead_code)] // TEMPORARY while building
pub struct Document<'a> {
blocks: Vec<Block<'a>>,
source: Span<'a>,
}

impl<'a> Document<'a> {
/// Parse a UTF-8 string as an AsciiDoc document.
///
/// Note that the document references the underlying source string and
/// necessarily has the same lifetime as the source.
pub fn parse(source: &'a str) -> Result<Self, Error> {
let source = Span::new(source, true);
let i = source.clone();

// TO DO: Look for document header.
// TO DO: Add option for best-guess parsing?

let _ = parse_blocks(i).finish();

let blocks = match parse_blocks(i) {
Ok((_, blocks)) => blocks,
Err(e) => {
return Err(e.into());
}
};

// let blocks: Vec<Block<'a>> = vec![]; // TEMPORARY
Ok(Self { source, blocks })
}

/// Return a [`Span`] describing the entire document as parsed.
pub fn span(&'a self) -> &'a Span<'a> {
&self.source
}

/// Return an iterator over the blocks in this document.
pub fn blocks(&'a self) -> Iter<'a, Block<'a>> {
self.blocks.iter()
}
}

fn parse_blocks<'a>(mut i: Span<'a>) -> IResult<Span, Vec<Block<'a>>> {
let mut blocks: Vec<Block<'a>> = vec![];
i = consume_empty_lines(i);

while !i.data().is_empty() {
// TO DO: Handle other kinds of blocks.
let (i2, block) = Block::parse(i)?;
i = i2;
blocks.push(block);
}

Ok((i, blocks))
}
15 changes: 15 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ pub enum Error {
/// Error from nom parsing framework.
#[error("nom error: {0:?}")]
NomError(ErrorKind),

/// Error with location info.
#[error("temporary error from nom: {0:?}")]
TemporaryError(String),
}

impl<'a> ParseError<Spanned<&'a str>> for Error {
Expand All @@ -36,6 +40,17 @@ impl From<nom::Err<Error>> for Error {
}
}

impl From<nom::Err<nom::error::Error<Spanned<&str>>>> for Error {
fn from(e: nom::Err<nom::error::Error<Spanned<&str>>>) -> Self {
match e {
nom::Err::Incomplete(n) => Self::Incomplete(n),
nom::Err::Error(e) | nom::Err::Failure(e) => {
Self::TemporaryError(format!("TEMPORARY: {e:#?}"))
} // TO DO: Find better solution for error lifetime issues.
}
}
}

impl<I, E> FromExternalError<I, E> for Error {
fn from_external_error(_input: I, kind: ErrorKind, _e: E) -> Error {
Error::NomError(kind)
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

pub mod blocks;

mod document;
pub use document::Document;

mod error;
pub use error::{Error, ParseResult};

Expand Down
82 changes: 82 additions & 0 deletions src/tests/document.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use crate::{
blocks::{Block, SimpleBlock},
Document, Span,
};

#[test]
fn empty_source() {
let doc = Document::parse("").unwrap();

let span = doc.span();
assert_eq!(span.data(), &"");
assert_eq!(span.line(), 1);
assert_eq!(span.col(), 1);
assert_eq!(span.byte_offset(), 0);

let mut blocks = doc.blocks();
assert!(blocks.next().is_none());
}

#[test]
fn only_spaces() {
let doc = Document::parse(" ").unwrap();

let span = doc.span();
assert_eq!(span.data(), &" ");
assert_eq!(span.line(), 1);
assert_eq!(span.col(), 1);
assert_eq!(span.byte_offset(), 0);

let mut blocks: std::slice::Iter<'_, Block<'_>> = doc.blocks();
assert!(blocks.next().is_none());
}

#[test]
fn one_simple_block() {
let doc = Document::parse("abc").unwrap();

let span = doc.span();
assert_eq!(span.data(), &"abc");
assert_eq!(span.line(), 1);
assert_eq!(span.col(), 1);
assert_eq!(span.byte_offset(), 0);

let mut blocks: std::slice::Iter<'_, Block<'_>> = doc.blocks();

let expected = Block::Simple(SimpleBlock {
inlines: vec![Span::new("abc", true)],
});
assert_eq!(blocks.next(), Some(&expected));

assert!(blocks.next().is_none());
}

#[test]
fn two_simple_blocks() {
let doc = Document::parse("abc\n\ndef").unwrap();

let span = doc.span();
assert_eq!(span.data(), &"abc\n\ndef");
assert_eq!(span.line(), 1);
assert_eq!(span.col(), 1);
assert_eq!(span.byte_offset(), 0);

let mut blocks: std::slice::Iter<'_, Block<'_>> = doc.blocks();

let expected = Block::Simple(SimpleBlock {
inlines: vec![Span::new("abc", true)],
});
assert_eq!(blocks.next(), Some(&expected));

let Block::Simple(def_block) = blocks.next().unwrap();
// else ... error

let span0 = def_block.inlines.first().unwrap();
assert_eq!(span0.data(), &"def");
assert_eq!(span0.line(), 3);
assert_eq!(span0.col(), 1);
assert_eq!(span0.byte_offset(), 5);
assert_eq!(def_block.inlines.len(), 1);

assert!(blocks.next().is_none());
}
1 change: 1 addition & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#![allow(clippy::unwrap_used)]

mod blocks;
mod document;
mod fixtures;
mod primitives;
mod strings;

0 comments on commit b4ecb02

Please sign in to comment.