diff --git a/src/iri.rs b/src/iri.rs index 9c6d6e9..6fbc46d 100644 --- a/src/iri.rs +++ b/src/iri.rs @@ -1,24 +1,52 @@ -pub struct IRI { - pub scheme: Option, - pub i_hier_part: Option, +#[derive(Debug)] +pub enum IRI { + IRI { + scheme: String, + hier_part: IHierPart, + query: String, + fragment: String, + }, + Reference(RelativeRef), + Absolute { + scheme: String, + hier_part: IHierPart, + query: String, + }, } +#[derive(Debug)] pub struct IHierPart { - pub authority: Option, + pub authority: Authority, + pub ipath: IPath, +} + +#[derive(Debug)] +pub struct RelativePart { + pub authority: Authority, + pub ipath: IPath, } +#[derive(Debug)] +pub struct RelativeRef { + pub relative_part: RelativePart, + pub query: String, + pub fragment: String, +} +#[derive(Debug)] pub struct Authority { pub user_info: Option, pub host: Host, pub port: Option, } +#[derive(Debug)] pub enum Host { IPV4(Vec), IPV6(Vec), RegName(Option), } +#[derive(Debug)] pub enum IPath { AbEmpty(Vec), // starts with / or is empty AbAbsolute { @@ -32,6 +60,14 @@ pub enum IPath { Empty, // ipath-empty } +use parser::{parse_absolute_iri, parse_iri, parse_iri_reference}; + +use crate::prelude::{alt, ParserResult}; + +pub fn parse(s: &str) -> ParserResult { + alt((parse_iri, parse_absolute_iri, parse_iri_reference))(s) +} + #[allow(unused)] mod ip { use nom::{ @@ -126,9 +162,19 @@ mod parser { use super::{ ip::{self, parse_ip_v4, parse_ip_v6}, - Authority, Host, IPath, + Authority, Host, IHierPart, IPath, RelativePart, RelativeRef, IRI, }; + fn parse_i_query(s: &str) -> ParserResult { + fold_many0( + alt((parse_ip_char, parse_i_private, tag("/"), tag("?"))), + String::new, + |mut acc, item| { + acc.push_str(item); + acc + }, + )(s) + } fn parse_authority(s: &str) -> ParserResult { map( tuple(( @@ -144,6 +190,82 @@ mod parser { )(s) } + pub(super) fn parse_iri_reference(s: &str) -> ParserResult { + map(parse_i_relative_ref, IRI::Reference)(s) + } + pub(super) fn parse_iri(s: &str) -> ParserResult { + map( + tuple(( + parse_scheme, + preceded(tag(":"), parse_i_hier_part), + preceded(opt(tag("?")), parse_i_query), + preceded(opt(tag("#")), parse_i_fragment), + )), + |(scheme, hier_part, query, fragment)| IRI::IRI { + scheme: scheme.into(), + hier_part, + query: query.into(), + fragment, + }, + )(s) + } + pub(super) fn parse_absolute_iri(s: &str) -> ParserResult { + map( + tuple(( + parse_scheme, + preceded(tag(":"), parse_i_hier_part), + preceded(opt(tag("?")), parse_i_query), + )), + |(scheme, hier_part, query)| IRI::Absolute { + scheme: scheme.into(), + hier_part, + query: query.into(), + }, + )(s) + } + fn parse_i_relative_ref(s: &str) -> ParserResult { + map( + tuple(( + parse_i_relative_part, + preceded(opt(tag("?")), parse_i_query), + preceded(opt(tag("#")), parse_i_fragment), + )), + |(relative_part, query, fragment)| RelativeRef { + relative_part, + query, + fragment, + }, + )(s) + } + fn parse_i_relative_part(s: &str) -> ParserResult { + map( + preceded( + tag("//"), + pair( + parse_authority, + alt((parse_ipath_abempty, parse_ipath_absolute)), + ), + ), + |(authority, ipath)| RelativePart { authority, ipath }, + )(s) + } + fn parse_i_hier_part(s: &str) -> ParserResult { + map( + preceded( + tag("//"), + pair( + parse_authority, + alt(( + parse_ipath_abempty, + parse_ipath_absolute, + parse_ipath_rootless, + parse_ipath_empty, + )), + ), + ), + |(authority, ipath)| IHierPart { authority, ipath }, + )(s) + } fn parse_host(s: &str) -> ParserResult { alt(( map(parse_ip_v4, Host::IPV4), diff --git a/src/iri_spect.txt b/src/iri_spect.txt index fe6a475..690aa09 100644 --- a/src/iri_spect.txt +++ b/src/iri_spect.txt @@ -2,35 +2,18 @@ IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ] - ihier-part = "//" iauthority ipath-abempty - / ipath-absolute - / ipath-rootless - / ipath-empty - - IRI-reference = IRI / irelative-ref - absolute-IRI = scheme ":" ihier-part [ "?" iquery ] - irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] - irelative-part = "//" iauthority ipath-abempty - / ipath-absolute - ipath = ipath-abempty ; begins with "/" or is empty - / ipath-absolute ; begins with "/" but not "//" - / ipath-noscheme ; begins with a non-colon segment - / ipath-rootless ; begins with a segment - / ipath-empty ; zero characters - ipath-noscheme = isegment-nz-nc *( "/" isegment ) - iquery = *( ipchar / iprivate / "/" / "?" ) @@ -72,6 +55,18 @@ RFC 3987 Internationalized Resource Identifiers January 2005 DONE: + ipath = ipath-abempty ; begins with "/" or is empty + / ipath-absolute ; begins with "/" but not "//" + / ipath-noscheme ; begins with a non-colon segment + / ipath-rootless ; begins with a segment + / ipath-empty ; zero characters + + + ihier-part = "//" iauthority ipath-abempty + / ipath-absolute + / ipath-rootless + / ipath-empty + ipath-abempty = *( "/" isegment ) ipchar = iunreserved / pct-encoded / sub-delims / ":" @@ -130,3 +125,13 @@ ls32 = ( h16 ":" h16 ) / IPv4address ifragment = *( ipchar / "/" / "?" ) iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD + iquery = *( ipchar / iprivate / "/" / "?" ) + + irelative-part = "//" iauthority ipath-abempty + / ipath-absolute + irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] + + absolute-IRI = scheme ":" ihier-part [ "?" iquery ] + + IRI-reference = IRI / irelative-ref +