diff --git a/src/iri.rs b/src/iri.rs index 7125d34..507f588 100644 --- a/src/iri.rs +++ b/src/iri.rs @@ -90,31 +90,94 @@ struct IHierPart { struct Authority { user_info: Option, - host: Option, + host: Host, port: Option, } enum Host { IPV4(Vec), - IPV6(Vec), - RegName(String), + IPV6(Vec), + RegName(Option), } #[allow(unused)] mod parser { use nom::{ + bytes::streaming::take_while1, character::complete::anychar, error::{ParseError, VerboseError}, - multi::{fold_many1, many1}, + multi::{fold_many0, fold_many1, many1}, }; use crate::prelude::*; + + use super::{ + ip::{self, parse_ip_v4, parse_ip_v6}, + Authority, Host, + }; + + fn parse_authority(s: &str) -> ParserResult { + map( + tuple(( + opt(parse_userinfo), + parse_host, + opt(preceded(tag(":"), parse_port)), + )), + |(user_info, host, port)| Authority { + user_info, + host, + port: port.map(String::from), + }, + )(s) + } + + fn parse_host(s: &str) -> ParserResult { + alt(( + map(parse_ip_v4, Host::IPV4), + map(parse_ip_v6, Host::IPV6), + map(opt(parse_i_reg_name), Host::RegName), + ))(s) + } + fn parse_i_segmentnz(s: &str) -> ParserResult { + fold_many0(parse_ip_char, String::new, |mut acc, item| { + acc.push_str(item); + acc + })(s) + } + fn parse_i_segment0(s: &str) -> ParserResult { + fold_many0(parse_ip_char, String::new, |mut acc, item| { + acc.push_str(item); + acc + })(s) + } + + fn parse_i_segmentnz_nc(s: &str) -> ParserResult { + fold_many1( + alt(( + parse_i_unreserved, + parse_pct_encoded, + parse_sub_delims, + tag("@"), + )), + String::new, + |mut acc, item| { + acc.push_str(item); + acc + }, + )(s) + } + fn parse_ip_char(s: &str) -> ParserResult<&str> { + alt(( + parse_i_unreserved, + parse_pct_encoded, + parse_sub_delims, + tag(":"), + tag("@"), + ))(s) + } fn parse_scheme(s: &str) -> ParserResult<&str> { verify( - terminated( - take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-' || c == '+'), - tag(":"), - ), + take_while1(|c: char| c.is_alphanumeric() || c == '.' || c == '-' || c == '+'), |scheme: &str| scheme.starts_with(|c: char| c.is_alphabetic()), )(s) } @@ -133,6 +196,9 @@ mod parser { }, )(s) } + fn parse_port(s: &str) -> ParserResult<&str> { + take_while1(|p: char| p.is_numeric())(s) + } fn parse_i_reg_name(s: &str) -> ParserResult { fold_many1( alt((parse_pct_encoded, parse_i_unreserved, parse_sub_delims)), diff --git a/src/iri_spect.txt b/src/iri_spect.txt index 55da51e..3fe44cf 100644 --- a/src/iri_spect.txt +++ b/src/iri_spect.txt @@ -4,9 +4,7 @@ FC 3987 Internationalized Resource Identifiers January 2005 / ipath-noscheme / ipath-empty - iauthority = [ iuserinfo "@" ] ihost [ ":" port ] - ihost = IP-literal / IPv4address / ireg-name ipath = ipath-abempty ; begins with "/" or is empty @@ -21,14 +19,9 @@ FC 3987 Internationalized Resource Identifiers January 2005 ipath-rootless = isegment-nz *( "/" isegment ) ipath-empty = 0 - isegment = *ipchar - isegment-nz = 1*ipchar - isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims - / "@" ) - ; non-zero-length segment without any colon ":" - ipchar = iunreserved / pct-encoded / sub-delims / ":" - / "@" + + iquery = *( ipchar / iprivate / "/" / "?" ) @@ -53,7 +46,6 @@ RFC 3987 Internationalized Resource Identifiers January 2005 - port = *DIGIT IP-literal = "[" ( IPv6address / IPvFuture ) "]" @@ -75,6 +67,13 @@ RFC 3987 Internationalized Resource Identifiers January 2005 DONE: + + ipchar = iunreserved / pct-encoded / sub-delims / ":" + / "@" + iauthority = [ iuserinfo "@" ] ihost [ ":" port ] + + port = *DIGIT + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" @@ -111,3 +110,9 @@ ls32 = ( h16 ":" h16 ) / IPv4address ireg-name = *( iunreserved / pct-encoded / sub-delims ) iuserinfo = *( iunreserved / ncoded / sub-delims / ":" ) + ihost = IP-literal / IPv4address / ireg-name + isegment = *ipchar + isegment-nz = 1*ipchar + isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims + / "@" ) + ; non-zero-length segment without any colon ":"