Skip to content

Commit

Permalink
WIP iri
Browse files Browse the repository at this point in the history
  • Loading branch information
nbittich committed Nov 28, 2024
1 parent 258163b commit 20386c7
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 36 deletions.
125 changes: 97 additions & 28 deletions src/iri.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,37 @@
pub struct IRI {
pub scheme: Option<String>,
pub i_hier_part: Option<IHierPart>,
}

pub struct IHierPart {
pub authority: Option<Authority>,
}

pub struct Authority {
pub user_info: Option<String>,
pub host: Host,
pub port: Option<String>,
}

pub enum Host {
IPV4(Vec<u8>),
IPV6(Vec<u16>),
RegName(Option<String>),
}

pub enum IPath {
AbEmpty(Vec<String>), // starts with / or is empty
AbAbsolute {
snz: String, // segment non zero (isegment-nz)
segments: Vec<String>, // isegment
},
Rootless {
snz_nc: String, // isegment-nz-nc
segments: Vec<String>,
},
Empty, // ipath-empty
}

#[allow(unused)]
mod ip {
use nom::{
Expand Down Expand Up @@ -79,27 +113,6 @@ mod ip {
}
}

struct IRI {
scheme: Option<String>,
i_hier_part: Option<IHierPart>,
}

struct IHierPart {
authority: Option<Authority>,
}

struct Authority {
user_info: Option<String>,
host: Host,
port: Option<String>,
}

enum Host {
IPV4(Vec<u8>),
IPV6(Vec<u16>),
RegName(Option<String>),
}

#[allow(unused)]
mod parser {
use nom::{
Expand All @@ -113,7 +126,7 @@ mod parser {

use super::{
ip::{self, parse_ip_v4, parse_ip_v6},
Authority, Host,
Authority, Host, IPath,
};

fn parse_authority(s: &str) -> ParserResult<Authority> {
Expand All @@ -138,6 +151,51 @@ mod parser {
map(opt(parse_i_reg_name), Host::RegName),
))(s)
}

fn parse_i_fragment(s: &str) -> ParserResult<String> {
fold_many0(
alt((parse_ip_char, tag("/"), tag("?"))),
String::new,
|mut acc, item| {
acc.push_str(item);
acc
},
)(s)
}
fn parse_ipath_empty(s: &str) -> ParserResult<IPath> {
map(
verify(peek(opt(parse_ip_char)), |ip_char| ip_char.is_none()),
|_| IPath::Empty,
)(s)
}

fn parse_ipath_rootless(s: &str) -> ParserResult<IPath> {
map(
pair(
parse_i_segmentnz_nc,
many0(preceded(tag("/"), parse_i_segment0)),
),
|(snz_nc, segments)| IPath::Rootless { snz_nc, segments },
)(s)
}

fn parse_ipath_abempty(s: &str) -> ParserResult<IPath> {
map(many0(preceded(tag("/"), parse_i_segment0)), IPath::AbEmpty)(s)
}
fn parse_ipath_absolute(s: &str) -> ParserResult<IPath> {
let (first_two, _) = peek(take(2usize))(s)?;
let parser = pair(
parse_i_segmentnz,
many0(preceded(tag("/"), parse_i_segment0)),
);
verify(
map(parser, |(snz, segments)| IPath::AbAbsolute {
snz,
segments,
}),
move |_| first_two.starts_with("/") && first_two != "//",
)(s)
}
fn parse_i_segmentnz(s: &str) -> ParserResult<String> {
fold_many0(parse_ip_char, String::new, |mut acc, item| {
acc.push_str(item);
Expand Down Expand Up @@ -209,6 +267,18 @@ mod parser {
},
)(s)
}
fn parse_i_private(s: &str) -> ParserResult<&str> {
verify(take(1usize), |hex: &str| {
hex.starts_with(|c: char| {
matches!(
c,
'\u{E000}'..='\u{F8FF}'
| '\u{F0000}'..='\u{FFFFD}'
| '\u{100000}'..='\u{10FFFD}'
)
})
})(s)
}
fn parse_i_unreserved(s: &str) -> ParserResult<&str> {
fn is_ucs_char(c: &char) -> bool {
matches!(c,
Expand All @@ -231,12 +301,11 @@ mod parser {
'\u{E1000}'..='\u{EFFFD}'
)
}
alt((
verify(take(2usize), |hex: &str| {
hex_to_char(hex).filter(|x| is_ucs_char(x)).is_some()
}),
take_while1(|c: char| c.is_alphanum() || c == '-' || c == '.' || c == '_' || c == '~'),
))(s)
verify(take(1usize), |unres: &str| {
unres.starts_with(|c: char| {
c.is_alphanum() || c == '-' || c == '.' || c == '_' || c == '~' || is_ucs_char(&c)
})
})(s)
}
fn parse_sub_delims(s: &str) -> ParserResult<&str> {
verify(take(1usize), |c: &str| {
Expand Down
30 changes: 22 additions & 8 deletions src/iri_spect.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
FC 3987 Internationalized Resource Identifiers January 2005

IRI = scheme ":" ihier-part [ "?" iquery ]
[ "#" ifragment ]

/ ipath-noscheme
ihier-part = "//" iauthority ipath-abempty
/ ipath-absolute
/ ipath-rootless
/ ipath-empty

IRI-reference = IRI / irelative-ref

absolute-IRI = scheme ":" ihier-part [ "?" iquery ]

irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]

irelative-part = "//" iauthority ipath-abempty
/ ipath-absolute



Expand All @@ -13,23 +24,17 @@ FC 3987 Internationalized Resource Identifiers January 2005
/ ipath-rootless ; begins with a segment
/ ipath-empty ; zero characters

ipath-abempty = *( "/" isegment )
ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ]
ipath-noscheme = isegment-nz-nc *( "/" isegment )
ipath-rootless = isegment-nz *( "/" isegment )
ipath-empty = 0<ipchar>





iquery = *( ipchar / iprivate / "/" / "?" )

ifragment = *( ipchar / "/" / "?" )



iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD

Some productions are ambiguous. The "first-match-wins" (a.k.a.
"greedy") algorithm applies. For details, see [RFC3986].
Expand Down Expand Up @@ -68,6 +73,7 @@ RFC 3987 Internationalized Resource Identifiers January 2005

DONE:

ipath-abempty = *( "/" isegment )
ipchar = iunreserved / pct-encoded / sub-delims / ":"
/ "@"
iauthority = [ iuserinfo "@" ] ihost [ ":" port ]
Expand Down Expand Up @@ -116,3 +122,11 @@ ls32 = ( h16 ":" h16 ) / IPv4address
isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
/ "@" )
; non-zero-length segment without any colon ":"
ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ]

ipath-rootless = isegment-nz *( "/" isegment )
ipath-empty = 0<ipchar>

ifragment = *( ipchar / "/" / "?" )
iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD

0 comments on commit 20386c7

Please sign in to comment.