Skip to content

Commit

Permalink
IRI parser mostly written but untested
Browse files Browse the repository at this point in the history
  • Loading branch information
nbittich committed Nov 29, 2024
1 parent 3d44931 commit 25c9668
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 22 deletions.
132 changes: 127 additions & 5 deletions src/iri.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,52 @@
pub struct IRI {
pub scheme: Option<String>,
pub i_hier_part: Option<IHierPart>,
#[derive(Debug)]
pub enum IRI {
IRI {
scheme: String,
hier_part: IHierPart,
query: String,
fragment: String,
},
Reference(RelativeRef),
Absolute {
scheme: String,
hier_part: IHierPart,
query: String,
},
}

#[derive(Debug)]
pub struct IHierPart {
pub authority: Option<Authority>,
pub authority: Authority,
pub ipath: IPath,
}

#[derive(Debug)]
pub struct RelativePart {
pub authority: Authority,
pub ipath: IPath,
}

#[derive(Debug)]
pub struct RelativeRef {
pub relative_part: RelativePart,
pub query: String,
pub fragment: String,
}
#[derive(Debug)]
pub struct Authority {
pub user_info: Option<String>,
pub host: Host,
pub port: Option<String>,
}

#[derive(Debug)]
pub enum Host {
IPV4(Vec<u8>),
IPV6(Vec<u16>),
RegName(Option<String>),
}

#[derive(Debug)]
pub enum IPath {
AbEmpty(Vec<String>), // starts with / or is empty
AbAbsolute {
Expand All @@ -32,6 +60,14 @@ pub enum IPath {
Empty, // ipath-empty
}

use parser::{parse_absolute_iri, parse_iri, parse_iri_reference};

use crate::prelude::{alt, ParserResult};

pub fn parse(s: &str) -> ParserResult<IRI> {
alt((parse_iri, parse_absolute_iri, parse_iri_reference))(s)
}

#[allow(unused)]
mod ip {
use nom::{
Expand Down Expand Up @@ -126,9 +162,19 @@ mod parser {

use super::{
ip::{self, parse_ip_v4, parse_ip_v6},
Authority, Host, IPath,
Authority, Host, IHierPart, IPath, RelativePart, RelativeRef, IRI,
};

fn parse_i_query(s: &str) -> ParserResult<String> {
fold_many0(
alt((parse_ip_char, parse_i_private, tag("/"), tag("?"))),
String::new,
|mut acc, item| {
acc.push_str(item);
acc
},
)(s)
}
fn parse_authority(s: &str) -> ParserResult<Authority> {
map(
tuple((
Expand All @@ -144,6 +190,82 @@ mod parser {
)(s)
}

pub(super) fn parse_iri_reference(s: &str) -> ParserResult<IRI> {
map(parse_i_relative_ref, IRI::Reference)(s)
}
pub(super) fn parse_iri(s: &str) -> ParserResult<IRI> {
map(
tuple((
parse_scheme,
preceded(tag(":"), parse_i_hier_part),
preceded(opt(tag("?")), parse_i_query),
preceded(opt(tag("#")), parse_i_fragment),
)),
|(scheme, hier_part, query, fragment)| IRI::IRI {
scheme: scheme.into(),
hier_part,
query: query.into(),
fragment,
},
)(s)
}
pub(super) fn parse_absolute_iri(s: &str) -> ParserResult<IRI> {
map(
tuple((
parse_scheme,
preceded(tag(":"), parse_i_hier_part),
preceded(opt(tag("?")), parse_i_query),
)),
|(scheme, hier_part, query)| IRI::Absolute {
scheme: scheme.into(),
hier_part,
query: query.into(),
},
)(s)
}
fn parse_i_relative_ref(s: &str) -> ParserResult<RelativeRef> {
map(
tuple((
parse_i_relative_part,
preceded(opt(tag("?")), parse_i_query),
preceded(opt(tag("#")), parse_i_fragment),
)),
|(relative_part, query, fragment)| RelativeRef {
relative_part,
query,
fragment,
},
)(s)
}
fn parse_i_relative_part(s: &str) -> ParserResult<RelativePart> {
map(
preceded(
tag("//"),
pair(
parse_authority,
alt((parse_ipath_abempty, parse_ipath_absolute)),
),
),
|(authority, ipath)| RelativePart { authority, ipath },
)(s)
}
fn parse_i_hier_part(s: &str) -> ParserResult<IHierPart> {
map(
preceded(
tag("//"),
pair(
parse_authority,
alt((
parse_ipath_abempty,
parse_ipath_absolute,
parse_ipath_rootless,
parse_ipath_empty,
)),
),
),
|(authority, ipath)| IHierPart { authority, ipath },
)(s)
}
fn parse_host(s: &str) -> ParserResult<Host> {
alt((
map(parse_ip_v4, Host::IPV4),
Expand Down
39 changes: 22 additions & 17 deletions src/iri_spect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,18 @@
IRI = scheme ":" ihier-part [ "?" iquery ]
[ "#" ifragment ]

ihier-part = "//" iauthority ipath-abempty
/ ipath-absolute
/ ipath-rootless
/ ipath-empty

IRI-reference = IRI / irelative-ref

absolute-IRI = scheme ":" ihier-part [ "?" iquery ]

irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]

irelative-part = "//" iauthority ipath-abempty
/ ipath-absolute



ipath = ipath-abempty ; begins with "/" or is empty
/ ipath-absolute ; begins with "/" but not "//"
/ ipath-noscheme ; begins with a non-colon segment
/ ipath-rootless ; begins with a segment
/ ipath-empty ; zero characters

ipath-noscheme = isegment-nz-nc *( "/" isegment )





iquery = *( ipchar / iprivate / "/" / "?" )



Expand Down Expand Up @@ -72,6 +55,18 @@ RFC 3987 Internationalized Resource Identifiers January 2005


DONE:
ipath = ipath-abempty ; begins with "/" or is empty
/ ipath-absolute ; begins with "/" but not "//"
/ ipath-noscheme ; begins with a non-colon segment
/ ipath-rootless ; begins with a segment
/ ipath-empty ; zero characters


ihier-part = "//" iauthority ipath-abempty
/ ipath-absolute
/ ipath-rootless
/ ipath-empty


ipath-abempty = *( "/" isegment )
ipchar = iunreserved / pct-encoded / sub-delims / ":"
Expand Down Expand Up @@ -130,3 +125,13 @@ ls32 = ( h16 ":" h16 ) / IPv4address
ifragment = *( ipchar / "/" / "?" )
iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD

iquery = *( ipchar / iprivate / "/" / "?" )

irelative-part = "//" iauthority ipath-abempty
/ ipath-absolute
irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]

absolute-IRI = scheme ":" ihier-part [ "?" iquery ]

IRI-reference = IRI / irelative-ref

0 comments on commit 25c9668

Please sign in to comment.