From 394ece3fbf8578b75c81f28f65b52de0ad71f9b9 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Sun, 9 Jun 2024 09:50:02 +0000 Subject: [PATCH] Merge java_string from https://github.com/valence-rs/valence_nbt --- crates/java_string/Cargo.toml | 13 ++-- crates/java_string/src/cesu8.rs | 33 ++++----- crates/java_string/src/char.rs | 52 +++++++-------- crates/java_string/src/iter.rs | 14 ++-- crates/java_string/src/owned.rs | 43 ++++++------ crates/java_string/src/pattern.rs | 96 ++++++++------------------- crates/java_string/src/serde.rs | 19 +++--- crates/java_string/src/slice.rs | 65 +++++++++--------- crates/java_string/src/validations.rs | 30 ++++----- 9 files changed, 154 insertions(+), 211 deletions(-) diff --git a/crates/java_string/Cargo.toml b/crates/java_string/Cargo.toml index f7300de90..4babf5d80 100644 --- a/crates/java_string/Cargo.toml +++ b/crates/java_string/Cargo.toml @@ -1,19 +1,18 @@ [package] name = "java_string" -description = "An implementation of Java strings, tolerant of invalid UTF-16 encoding" -readme = "README.md" version = "0.1.2" +description = "An implementation of Java strings, tolerant of invalid UTF-16 encoding" keywords = ["java", "string", "utf16"] edition.workspace = true repository.workspace = true -documentation.workspace = true license.workspace = true -[lints] -workspace = true - [features] serde = ["dep:serde"] [dependencies] -serde = { workspace = true, optional = true } +serde = { version = "1.0.200", optional = true } + +[lints] +workspace = true + diff --git a/crates/java_string/src/cesu8.rs b/crates/java_string/src/cesu8.rs index eb94ee6c1..dc1796ca5 100644 --- a/crates/java_string/src/cesu8.rs +++ b/crates/java_string/src/cesu8.rs @@ -15,16 +15,16 @@ impl JavaStr { /// assert_eq!(JavaStr::from_str("Hello World!"), result); /// /// let result = JavaStr::from_modified_utf8(&[ - /// 0x61, 0x62, 0x63, 0xc0, 0x80, 0xe2, 0x84, 0x9d, 0xed, 0xa0, 0xbd, 0xed, 0xb2, 0xa3, 0xed, - /// 0xa0, 0x80, + /// 0x61, 0x62, 0x63, 0xC0, 0x80, 0xE2, 0x84, 0x9D, 0xED, 0xA0, 0xBD, 0xED, 0xB2, 0xA3, 0xED, + /// 0xA0, 0x80, /// ]) /// .unwrap(); /// assert!(matches!(result, Cow::Owned(_))); /// let mut expected = JavaString::from("abc\0ℝ💣"); - /// expected.push_java(JavaCodePoint::from_u32(0xd800).unwrap()); + /// expected.push_java(JavaCodePoint::from_u32(0xD800).unwrap()); /// assert_eq!(expected, result); /// - /// let result = JavaStr::from_modified_utf8(&[0xed]); + /// let result = JavaStr::from_modified_utf8(&[0xED]); /// assert!(result.is_err()); /// ``` #[inline] @@ -46,11 +46,11 @@ impl JavaStr { /// assert_eq!(result, &b"Hello World!"[..]); /// /// let mut str = JavaString::from("abc\0ℝ💣"); - /// str.push_java(JavaCodePoint::from_u32(0xd800).unwrap()); + /// str.push_java(JavaCodePoint::from_u32(0xD800).unwrap()); /// let result = str.to_modified_utf8(); /// let expected = [ - /// 0x61, 0x62, 0x63, 0xc0, 0x80, 0xe2, 0x84, 0x9d, 0xed, 0xa0, 0xbd, 0xed, 0xb2, 0xa3, 0xed, - /// 0xa0, 0x80, + /// 0x61, 0x62, 0x63, 0xC0, 0x80, 0xE2, 0x84, 0x9D, 0xED, 0xA0, 0xBD, 0xED, 0xB2, 0xA3, 0xED, + /// 0xA0, 0x80, /// ]; /// assert!(matches!(result, Cow::Owned(_))); /// assert_eq!(result, &expected[..]); @@ -73,7 +73,7 @@ impl JavaStr { while i < bytes.len() { let b = bytes[i]; if b == 0 { - encoded.extend([0xc0, 0x80]); + encoded.extend([0xC0, 0x80]); i += 1; } else if b < 128 { // Pass ASCII through quickly. @@ -100,7 +100,7 @@ impl JavaStr { // SAFETY: s contains a single char of width 4 s.chars().next().unwrap_unchecked().as_u32() - 0x10000 }; - let s = [((c >> 10) as u16) | 0xd800, ((c & 0x3ff) as u16) | 0xdc00]; + let s = [((c >> 10) as u16) | 0xD800, ((c & 0x3FF) as u16) | 0xDC00]; encoded.extend(enc_surrogate(s[0])); encoded.extend(enc_surrogate(s[1])); } @@ -114,7 +114,7 @@ impl JavaStr { impl JavaString { /// Converts from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format to a `JavaString`. /// - /// See [JavaStr::from_modified_utf8]. + /// See [`JavaStr::from_modified_utf8`]. #[inline] pub fn from_modified_utf8(bytes: Vec) -> Result { match JavaString::from_full_utf8(bytes) { @@ -168,7 +168,7 @@ impl JavaString { } else if first < 128 { // Pass ASCII through directly. decoded.push(first); - } else if first == 0xc0 { + } else if first == 0xC0 { // modified UTF-8 encoding of null character match next!() { 0x80 => decoded.push(0), @@ -184,6 +184,7 @@ impl JavaString { } 3 => { let third = next_cont!(Some(2)); + #[allow(clippy::unnested_or_patterns)] // Justification: readability match (first, second) { // These are valid UTF-8, so pass them through. (0xe0, 0xa0..=0xbf) @@ -227,7 +228,7 @@ impl JavaString { /// Converts to Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format. /// - /// See [JavaStr::to_modified_utf8]. + /// See [`JavaStr::to_modified_utf8`]. #[inline] #[must_use] pub fn into_modified_utf8(self) -> Vec { @@ -241,7 +242,7 @@ impl JavaString { #[inline] fn dec_surrogate(second: u8, third: u8) -> u32 { - 0xd000 | ((second & CONT_MASK) as u32) << 6 | (third & CONT_MASK) as u32 + 0xD000 | u32::from(second & CONT_MASK) << 6 | u32::from(third & CONT_MASK) } #[inline] @@ -249,13 +250,13 @@ fn dec_surrogates(second: u8, third: u8, fifth: u8, sixth: u8) -> [u8; 4] { // Convert to a 32-bit code point. let s1 = dec_surrogate(second, third); let s2 = dec_surrogate(fifth, sixth); - let c = 0x10000 + (((s1 - 0xd800) << 10) | (s2 - 0xdc00)); - assert!((0x010000..=0x10ffff).contains(&c)); + let c = 0x10000 + (((s1 - 0xD800) << 10) | (s2 - 0xDC00)); + assert!((0x010000..=0x10FFFF).contains(&c)); // Convert to UTF-8. // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx [ - 0b1111_0000u8 | ((c & 0b1_1100_0000_0000_0000_0000) >> 18) as u8, + 0b1111_0000_u8 | ((c & 0b1_1100_0000_0000_0000_0000) >> 18) as u8, TAG_CONT | ((c & 0b0_0011_1111_0000_0000_0000) >> 12) as u8, TAG_CONT | ((c & 0b0_0000_0000_1111_1100_0000) >> 6) as u8, TAG_CONT | (c & 0b0_0000_0000_0000_0011_1111) as u8, diff --git a/crates/java_string/src/char.rs b/crates/java_string/src/char.rs index f338a25c4..3dea4f705 100644 --- a/crates/java_string/src/char.rs +++ b/crates/java_string/src/char.rs @@ -62,7 +62,7 @@ impl JavaCodePoint { #[inline] #[must_use] pub const fn from_u32(i: u32) -> Option { - if i <= 0x10ffff { + if i <= 0x10FFFF { unsafe { Some(Self::from_u32_unchecked(i)) } } else { None @@ -94,7 +94,7 @@ impl JavaCodePoint { /// ``` /// # use java_string::JavaCodePoint; /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32()); - /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32()); + /// assert_eq!(0xD800, JavaCodePoint::from_u32(0xD800).unwrap().as_u32()); /// ``` #[inline] #[must_use] @@ -103,7 +103,7 @@ impl JavaCodePoint { // SAFETY: JavaCodePoint has the same repr as a u32 let result = std::mem::transmute(self); - if result > 0x10ffff { + if result > 0x10FFFF { // SAFETY: JavaCodePoint can never have a value > 0x10FFFF. // This statement may allow the optimizer to remove branches in the calling code // associated with out of bounds chars. @@ -119,7 +119,7 @@ impl JavaCodePoint { /// ``` /// # use java_string::JavaCodePoint; /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char()); - /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char()); + /// assert_eq!(None, JavaCodePoint::from_u32(0xD800).unwrap().as_char()); /// ``` #[inline] #[must_use] @@ -148,7 +148,7 @@ impl JavaCodePoint { /// ); /// assert_eq!( /// 1, - /// JavaCodePoint::from_u32(0xd800) + /// JavaCodePoint::from_u32(0xD800) /// .unwrap() /// .encode_utf16(&mut [0; 2]) /// .len() @@ -170,7 +170,7 @@ impl JavaCodePoint { } /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with - /// surrogate code points. See also [char::encode_utf8]. + /// surrogate code points. See also [`char::encode_utf8`]. /// /// ``` /// # use java_string::JavaCodePoint; @@ -182,7 +182,7 @@ impl JavaCodePoint { /// ); /// assert_eq!( /// 3, - /// JavaCodePoint::from_u32(0xd800) + /// JavaCodePoint::from_u32(0xD800) /// .unwrap() /// .encode_semi_utf8(&mut [0; 4]) /// .len() @@ -202,19 +202,19 @@ impl JavaCodePoint { *a = code as u8; } (2, [a, b, ..]) => { - *a = (code >> 6 & 0x1f) as u8 | TAG_TWO_B; - *b = (code & 0x3f) as u8 | TAG_CONT; + *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *b = (code & 0x3F) as u8 | TAG_CONT; } (3, [a, b, c, ..]) => { - *a = (code >> 12 & 0x0f) as u8 | TAG_THREE_B; - *b = (code >> 6 & 0x3f) as u8 | TAG_CONT; - *c = (code & 0x3f) as u8 | TAG_CONT; + *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *c = (code & 0x3F) as u8 | TAG_CONT; } (4, [a, b, c, d, ..]) => { *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *b = (code >> 12 & 0x3f) as u8 | TAG_CONT; - *c = (code >> 6 & 0x3f) as u8 | TAG_CONT; - *d = (code & 0x3f) as u8 | TAG_CONT; + *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *d = (code & 0x3F) as u8 | TAG_CONT; } _ => panic!( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", @@ -250,7 +250,7 @@ impl JavaCodePoint { /// ); /// assert_eq!( /// "\\u{d800}", - /// JavaCodePoint::from_u32(0xd800) + /// JavaCodePoint::from_u32(0xD800) /// .unwrap() /// .escape_debug() /// .to_string() @@ -317,7 +317,7 @@ impl JavaCodePoint { /// ); /// assert_eq!( /// "\\u{d800}", - /// JavaCodePoint::from_u32(0xd800) + /// JavaCodePoint::from_u32(0xD800) /// .unwrap() /// .escape_default() /// .to_string() @@ -342,7 +342,7 @@ impl JavaCodePoint { SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']), DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']), BACKSLASH => CharEscapeIter::new([b'\\', b'\\']), - 0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]), + 0x20..=0x7E => CharEscapeIter::new([self.as_u32() as u8]), _ => self.escape_unicode(), } } @@ -358,7 +358,7 @@ impl JavaCodePoint { /// ); /// assert_eq!( /// "\\u{d800}", - /// JavaCodePoint::from_u32(0xd800) + /// JavaCodePoint::from_u32(0xD800) /// .unwrap() /// .escape_unicode() /// .to_string() @@ -410,7 +410,7 @@ impl JavaCodePoint { #[inline] #[must_use] pub fn is_ascii(self) -> bool { - self.as_u32() <= 0x7f + self.as_u32() <= 0x7F } /// See [`char::is_ascii_alphabetic`]. @@ -431,7 +431,7 @@ impl JavaCodePoint { #[inline] #[must_use] pub const fn is_ascii_control(self) -> bool { - matches!(self.as_u32(), 0..=0x1f | 0x7f) + matches!(self.as_u32(), 0..=0x1F | 0x7F) } /// See [`char::is_ascii_digit`]. @@ -447,7 +447,7 @@ impl JavaCodePoint { #[inline] #[must_use] pub const fn is_ascii_graphic(self) -> bool { - matches!(self.as_u32(), 0x21..=0x7e) + matches!(self.as_u32(), 0x21..=0x7E) } /// See [`char::is_ascii_hexdigit`]. @@ -485,7 +485,7 @@ impl JavaCodePoint { pub const fn is_ascii_punctuation(self) -> bool { matches!( self.as_u32(), - (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e) + (0x21..=0x2F) | (0x3A..=0x40) | (0x5B..=0x60) | (0x7B..=0x7E) ) } @@ -505,7 +505,7 @@ impl JavaCodePoint { const SPACE: u32 = ' ' as u32; const HORIZONTAL_TAB: u32 = '\t' as u32; const LINE_FEED: u32 = '\n' as u32; - const FORM_FEED: u32 = 0xc; + const FORM_FEED: u32 = 0xC; const CARRIAGE_RETURN: u32 = '\r' as u32; matches!( self.as_u32(), @@ -566,7 +566,7 @@ impl JavaCodePoint { /// let len = JavaCodePoint::from_char('💣').len_utf16(); /// assert_eq!(len, 2); /// - /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16()); + /// assert_eq!(1, JavaCodePoint::from_u32(0xD800).unwrap().len_utf16()); /// ``` #[inline] #[must_use] @@ -595,7 +595,7 @@ impl JavaCodePoint { /// let len = JavaCodePoint::from_char('💣').len_utf8(); /// assert_eq!(len, 4); /// - /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8(); + /// let len = JavaCodePoint::from_u32(0xD800).unwrap().len_utf8(); /// assert_eq!(len, 3); /// ``` #[inline] diff --git a/crates/java_string/src/iter.rs b/crates/java_string/src/iter.rs index 3762f6d72..f93605393 100644 --- a/crates/java_string/src/iter.rs +++ b/crates/java_string/src/iter.rs @@ -649,17 +649,17 @@ where } match self.next_match_back() { - Some((index, len)) => unsafe { + Some((index, len)) => { // SAFETY: pattern guarantees valid indices - let elt = self.haystack.get_unchecked(index + len..self.end); + let elt = unsafe { self.haystack.get_unchecked(index + len..self.end) }; self.end = index + len; Some(elt) - }, - None => unsafe { - // SAFETY: `self.start` and `self.end` always lie on unicode boundaries. + } + None => { self.finished = true; - Some(self.haystack.get_unchecked(self.start..self.end)) - }, + // SAFETY: `self.start` and `self.end` always lie on unicode boundaries. + Some(unsafe { self.haystack.get_unchecked(self.start..self.end) }) + } } } } diff --git a/crates/java_string/src/owned.rs b/crates/java_string/src/owned.rs index 786855b58..2e16970c0 100644 --- a/crates/java_string/src/owned.rs +++ b/crates/java_string/src/owned.rs @@ -23,6 +23,7 @@ pub struct JavaString { vec: Vec, } +#[allow(clippy::multiple_inherent_impl)] impl JavaString { #[inline] #[must_use] @@ -62,14 +63,14 @@ impl JavaString { /// "Hello World!" /// ); /// assert_eq!( - /// JavaString::from_semi_utf8(vec![0xf0, 0x9f, 0x92, 0x96]).unwrap(), + /// JavaString::from_semi_utf8(vec![0xF0, 0x9F, 0x92, 0x96]).unwrap(), /// "💖" /// ); /// assert_eq!( - /// JavaString::from_semi_utf8(vec![0xed, 0xa0, 0x80]).unwrap(), - /// JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()) + /// JavaString::from_semi_utf8(vec![0xED, 0xA0, 0x80]).unwrap(), + /// JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()) /// ); - /// assert!(JavaString::from_semi_utf8(vec![0xed]).is_err()); + /// assert!(JavaString::from_semi_utf8(vec![0xED]).is_err()); /// ``` pub fn from_semi_utf8(vec: Vec) -> Result { match run_utf8_semi_validation(&vec) { @@ -88,12 +89,12 @@ impl JavaString { /// # use std::borrow::Cow; /// # use java_string::{JavaStr, JavaString}; /// - /// let sparkle_heart = [0xf0, 0x9f, 0x92, 0x96]; + /// let sparkle_heart = [0xF0, 0x9F, 0x92, 0x96]; /// let result = JavaString::from_semi_utf8_lossy(&sparkle_heart); /// assert!(matches!(result, Cow::Borrowed(_))); /// assert_eq!(result, JavaStr::from_str("💖")); /// - /// let foobar_with_error = [b'f', b'o', b'o', 0xed, b'b', b'a', b'r']; + /// let foobar_with_error = [b'f', b'o', b'o', 0xED, b'b', b'a', b'r']; /// let result = JavaString::from_semi_utf8_lossy(&foobar_with_error); /// assert!(matches!(result, Cow::Owned(_))); /// assert_eq!(result, JavaStr::from_str("foo�bar")); @@ -121,9 +122,8 @@ impl JavaString { Ok(()) => { unsafe { // SAFETY: validation succeeded - result - .push_java_str(JavaStr::from_semi_utf8_unchecked(&v[index..])); - } + result.push_java_str(JavaStr::from_semi_utf8_unchecked(&v[index..])) + }; return Cow::Owned(result); } Err(error) => { @@ -131,8 +131,8 @@ impl JavaString { // SAFETY: validation succeeded up to this index result.push_java_str(JavaStr::from_semi_utf8_unchecked( v.get_unchecked(index..index + error.valid_up_to), - )); - } + )) + }; result.push_str(REPLACEMENT); index += error.valid_up_to + error.error_len.unwrap_or(1) as usize; } @@ -195,11 +195,11 @@ impl JavaString { /// ); /// /// let string_with_error = JavaString::from("abc") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str(); + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str(); /// assert!(string_with_error.into_string().is_err()); /// ``` pub fn into_string(self) -> Result { - run_utf8_full_validation_from_semi(self.as_bytes()).map(|_| unsafe { + run_utf8_full_validation_from_semi(self.as_bytes()).map(|()| unsafe { // SAFETY: validation succeeded self.into_string_unchecked() }) @@ -326,9 +326,7 @@ impl JavaString { pub fn pop(&mut self) -> Option { let ch = self.chars().next_back()?; let newlen = self.len() - ch.len_utf8(); - unsafe { - self.vec.set_len(newlen); - } + unsafe { self.vec.set_len(newlen) }; Some(ch) } @@ -357,9 +355,8 @@ impl JavaString { /// ``` #[inline] pub fn remove(&mut self, idx: usize) -> JavaCodePoint { - let ch = match self[idx..].chars().next() { - Some(ch) => ch, - None => panic!("cannot remove a char from the end of a string"), + let Some(ch) = self[idx..].chars().next() else { + panic!("cannot remove a char from the end of a string") }; let next = idx + ch.len_utf8(); @@ -370,8 +367,8 @@ impl JavaString { self.vec.as_mut_ptr().add(idx), len - next, ); - self.vec.set_len(len - (next - idx)); - } + self.vec.set_len(len - (next - idx)) + }; ch } @@ -829,13 +826,13 @@ impl Extend for JavaString { impl<'a> Extend<&'a char> for JavaString { fn extend>(&mut self, iter: T) { - self.extend(iter.into_iter().cloned()) + self.extend(iter.into_iter().copied()) } } impl<'a> Extend<&'a JavaCodePoint> for JavaString { fn extend>(&mut self, iter: T) { - self.extend(iter.into_iter().cloned()) + self.extend(iter.into_iter().copied()) } } diff --git a/crates/java_string/src/pattern.rs b/crates/java_string/src/pattern.rs index 06cc78041..a17d374a9 100644 --- a/crates/java_string/src/pattern.rs +++ b/crates/java_string/src/pattern.rs @@ -32,21 +32,13 @@ unsafe impl JavaStrPattern for char { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next()?; - if ch == *self { - Some(ch.len_utf8()) - } else { - None - } + (ch == *self).then(|| ch.len_utf8()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next_back()?; - if ch == *self { - Some(ch.len_utf8()) - } else { - None - } + (ch == *self).then(|| ch.len_utf8()) } #[inline] @@ -68,21 +60,13 @@ unsafe impl JavaStrPattern for JavaCodePoint { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next()?; - if ch == *self { - Some(ch.len_utf8()) - } else { - None - } + (ch == *self).then(|| ch.len_utf8()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next_back()?; - if ch == *self { - Some(ch.len_utf8()) - } else { - None - } + (ch == *self).then(|| ch.len_utf8()) } #[inline] @@ -103,20 +87,18 @@ unsafe impl JavaStrPattern for JavaCodePoint { unsafe impl JavaStrPattern for &str { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { - if haystack.as_bytes().starts_with(self.as_bytes()) { - Some(self.len()) - } else { - None - } + haystack + .as_bytes() + .starts_with(self.as_bytes()) + .then_some(self.len()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { - if haystack.as_bytes().ends_with(self.as_bytes()) { - Some(self.len()) - } else { - None - } + haystack + .as_bytes() + .ends_with(self.as_bytes()) + .then_some(self.len()) } #[inline] @@ -133,20 +115,18 @@ unsafe impl JavaStrPattern for &str { unsafe impl JavaStrPattern for &JavaStr { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { - if haystack.as_bytes().starts_with(self.as_bytes()) { - Some(self.len()) - } else { - None - } + haystack + .as_bytes() + .starts_with(self.as_bytes()) + .then(|| self.len()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { - if haystack.as_bytes().ends_with(self.as_bytes()) { - Some(self.len()) - } else { - None - } + haystack + .as_bytes() + .ends_with(self.as_bytes()) + .then(|| self.len()) } #[inline] @@ -167,21 +147,13 @@ where #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next()?; - if self(ch) { - Some(ch.len_utf8()) - } else { - None - } + self(ch).then(|| ch.len_utf8()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next_back()?; - if self(ch) { - Some(ch.len_utf8()) - } else { - None - } + self(ch).then(|| ch.len_utf8()) } #[inline] @@ -205,21 +177,13 @@ unsafe impl JavaStrPattern for &[char] { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next()?; - if self.iter().any(|c| ch == *c) { - Some(ch.len_utf8()) - } else { - None - } + self.iter().any(|c| ch == *c).then(|| ch.len_utf8()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next_back()?; - if self.iter().any(|c| ch == *c) { - Some(ch.len_utf8()) - } else { - None - } + self.iter().any(|c| ch == *c).then(|| ch.len_utf8()) } #[inline] @@ -243,21 +207,13 @@ unsafe impl JavaStrPattern for &[JavaCodePoint] { #[inline] fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next()?; - if self.contains(&ch) { - Some(ch.len_utf8()) - } else { - None - } + self.contains(&ch).then(|| ch.len_utf8()) } #[inline] fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option { let ch = haystack.chars().next_back()?; - if self.contains(&ch) { - Some(ch.len_utf8()) - } else { - None - } + self.contains(&ch).then(|| ch.len_utf8()) } #[inline] diff --git a/crates/java_string/src/serde.rs b/crates/java_string/src/serde.rs index e1c152d11..5743313f6 100644 --- a/crates/java_string/src/serde.rs +++ b/crates/java_string/src/serde.rs @@ -175,7 +175,7 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { where E: Error, { - self.visit_i32(v as i32) + self.visit_i32(v.into()) } #[inline] @@ -183,7 +183,7 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { where E: Error, { - self.visit_i32(v as i32) + self.visit_i32(v.into()) } fn visit_i32(self, v: i32) -> Result @@ -191,7 +191,7 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { E: Error, { if v < 0 { - Err(Error::invalid_value(Unexpected::Signed(v as i64), &self)) + Err(Error::invalid_value(Unexpected::Signed(v.into()), &self)) } else { self.visit_u32(v as u32) } @@ -213,7 +213,7 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { where E: Error, { - self.visit_u32(v as u32) + self.visit_u32(v.into()) } #[inline] @@ -221,7 +221,7 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { where E: Error, { - self.visit_u32(v as u32) + self.visit_u32(v.into()) } fn visit_u32(self, v: u32) -> Result @@ -229,17 +229,16 @@ impl<'de> Visitor<'de> for JavaCodePointVisitor { E: Error, { JavaCodePoint::from_u32(v) - .ok_or_else(|| Error::invalid_value(Unexpected::Unsigned(v as u64), &self)) + .ok_or_else(|| Error::invalid_value(Unexpected::Unsigned(v.into()), &self)) } fn visit_u64(self, v: u64) -> Result where E: Error, { - if v > u32::MAX as u64 { - Err(Error::invalid_value(Unexpected::Unsigned(v), &self)) - } else { - self.visit_u32(v as u32) + match u32::try_from(v) { + Ok(v) => self.visit_u32(v), + Err(_) => Err(Error::invalid_value(Unexpected::Unsigned(v), &self)), } } diff --git a/crates/java_string/src/slice.rs b/crates/java_string/src/slice.rs index d5dfd17ca..5c5998446 100644 --- a/crates/java_string/src/slice.rs +++ b/crates/java_string/src/slice.rs @@ -23,12 +23,13 @@ use crate::{ SplitInclusive, SplitN, SplitTerminator, SplitWhitespace, Utf8Error, }; -#[repr(transparent)] #[derive(PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] pub struct JavaStr { inner: [u8], } +#[allow(clippy::multiple_inherent_impl)] impl JavaStr { /// Converts `v` to a `&JavaStr` if it is fully-valid UTF-8, i.e. UTF-8 /// without surrogate code points. See [`std::str::from_utf8`]. @@ -194,7 +195,7 @@ impl JavaStr { /// assert_eq!(result, "Hello 🦀 World!"); /// /// let s = JavaString::from("Hello ") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str() + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() /// + JavaStr::from_str(" World!"); /// let result = s.as_str_lossy(); /// assert!(matches!(result, Cow::Owned(_))); @@ -367,7 +368,7 @@ impl JavaStr { /// /// assert_eq!(s.find('L'), Some(0)); /// assert_eq!(s.find('é'), Some(14)); - /// assert_eq!(s.find("pard"), Some(17)); + /// assert_eq!(s.find("par"), Some(17)); /// /// let x: &[_] = &['1', '2']; /// assert_eq!(s.find(x), None); @@ -735,7 +736,7 @@ impl JavaStr { /// /// assert_eq!(s.rfind('L'), Some(13)); /// assert_eq!(s.rfind('é'), Some(14)); - /// assert_eq!(s.rfind("pard"), Some(24)); + /// assert_eq!(s.rfind("par"), Some(24)); /// /// let x: &[_] = &['1', '2']; /// assert_eq!(s.rfind(x), None); @@ -1422,10 +1423,10 @@ impl JavaStr { /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase()); /// /// let s = JavaString::from("Hello ") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str() + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() /// + JavaStr::from_str(" World!"); /// let expected = JavaString::from("hello ") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str() + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() /// + JavaStr::from_str(" world!"); /// assert_eq!(expected, s.to_lowercase()); /// ``` @@ -1435,7 +1436,7 @@ impl JavaStr { self.transform_string(str::to_lowercase, |ch| ch) } - /// See [str::to_uppercase]. + /// See [`str::to_uppercase`]. /// /// ``` /// # use java_string::{JavaCodePoint, JavaStr, JavaString}; @@ -1446,10 +1447,10 @@ impl JavaStr { /// assert_eq!("TSCHÜSS", s.to_uppercase()); /// /// let s = JavaString::from("Hello ") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str() + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() /// + JavaStr::from_str(" World!"); /// let expected = JavaString::from("HELLO ") - /// + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str() + /// + JavaString::from(JavaCodePoint::from_u32(0xD800).unwrap()).as_java_str() /// + JavaStr::from_str(" WORLD!"); /// assert_eq!(expected, s.to_uppercase()); /// ``` @@ -1459,21 +1460,21 @@ impl JavaStr { self.transform_string(str::to_uppercase, |ch| ch) } - /// See [str::trim]. + /// See [`str::trim`]. #[inline] #[must_use] pub fn trim(&self) -> &JavaStr { self.trim_matches(|c: JavaCodePoint| c.is_whitespace()) } - /// See [str::trim_end]. + /// See [`str::trim_end`]. #[inline] #[must_use] pub fn trim_end(&self) -> &JavaStr { self.trim_end_matches(|c: JavaCodePoint| c.is_whitespace()) } - /// See [str::trim_end_matches]. + /// See [`str::trim_end_matches`]. /// /// ``` /// # use java_string::{JavaCodePoint, JavaStr}; @@ -1509,7 +1510,7 @@ impl JavaStr { str } - /// See [str::trim_matches]. + /// See [`str::trim_matches`]. /// /// ``` /// # use java_string::{JavaCodePoint, JavaStr}; @@ -1549,14 +1550,14 @@ impl JavaStr { str } - /// See [str::trim_start]. + /// See [`str::trim_start`]. #[inline] #[must_use] pub fn trim_start(&self) -> &JavaStr { self.trim_start_matches(|c: JavaCodePoint| c.is_whitespace()) } - /// See [str::trim_start_matches]. + /// See [`str::trim_start_matches`]. /// /// ``` /// # use java_string::{JavaCodePoint, JavaStr}; @@ -1746,8 +1747,8 @@ impl Debug for JavaStr { if esc.len() != 1 || c.as_char().is_none() { unsafe { // SAFETY: any invalid UTF-8 should have been caught by a previous iteration - f.write_str(self[from..i].as_str_unchecked())?; - } + f.write_str(self[from..i].as_str_unchecked())? + }; for c in esc { f.write_char(c)?; } @@ -1756,8 +1757,8 @@ impl Debug for JavaStr { } unsafe { // SAFETY: any invalid UTF-8 should have been caught by the loop above - f.write_str(self[from..].as_str_unchecked())?; - } + f.write_str(self[from..].as_str_unchecked())? + }; f.write_char('"') } } @@ -1854,7 +1855,7 @@ impl Hash for JavaStr { #[inline] fn hash(&self, state: &mut H) { state.write(self.as_bytes()); - state.write_u8(0xff); + state.write_u8(0xFF); } } @@ -1987,35 +1988,35 @@ impl PartialEq for str { impl<'a> PartialEq for &'a str { #[inline] fn eq(&self, other: &JavaStr) -> bool { - *self == other + self.as_bytes() == &other.inner } } impl PartialEq for JavaStr { #[inline] fn eq(&self, other: &str) -> bool { - self == JavaStr::from_str(other) + &self.inner == other.as_bytes() } } impl<'a> PartialEq<&'a str> for JavaStr { #[inline] fn eq(&self, other: &&'a str) -> bool { - self == *other + &self.inner == other.as_bytes() } } impl<'a> PartialEq for &'a JavaStr { #[inline] fn eq(&self, other: &JavaStr) -> bool { - *self == other + self.inner == other.inner } } impl<'a> PartialEq<&'a JavaStr> for JavaStr { #[inline] fn eq(&self, other: &&'a JavaStr) -> bool { - self == *other + self.inner == other.inner } } @@ -2067,20 +2068,14 @@ pub unsafe trait JavaStrSliceIndex: private_slice_index::Sealed + Sized { #[inline] fn get(self, slice: &JavaStr) -> Option<&JavaStr> { - if self.check_bounds(slice) { - Some(unsafe { &*self.get_unchecked(slice) }) - } else { - None - } + self.check_bounds(slice) + .then(|| unsafe { &*self.get_unchecked(slice) }) } #[inline] fn get_mut(self, slice: &mut JavaStr) -> Option<&mut JavaStr> { - if self.check_bounds(slice) { - Some(unsafe { &mut *self.get_unchecked_mut(slice) }) - } else { - None - } + self.check_bounds(slice) + .then(|| unsafe { &mut *self.get_unchecked_mut(slice) }) } #[inline] diff --git a/crates/java_string/src/validations.rs b/crates/java_string/src/validations.rs index 102783f55..a3518dd01 100644 --- a/crates/java_string/src/validations.rs +++ b/crates/java_string/src/validations.rs @@ -10,7 +10,7 @@ pub(crate) const CONT_MASK: u8 = 0b0011_1111; #[inline] const fn utf8_first_byte(byte: u8, width: u32) -> u32 { - (byte & (0x7f >> width)) as u32 + (byte & (0x7F >> width)) as u32 } #[inline] @@ -31,7 +31,7 @@ pub(crate) unsafe fn next_code_point<'a, I: Iterator>(bytes: &mut // Decode UTF-8 let x = *bytes.next()?; if x < 128 { - return Some(x as u32); + return Some(x.into()); } // Multibyte case follows @@ -42,15 +42,15 @@ pub(crate) unsafe fn next_code_point<'a, I: Iterator>(bytes: &mut // so the iterator must produce a value here. let y = unsafe { *bytes.next().unwrap_unchecked() }; let mut ch = utf8_acc_cont_byte(init, y); - if x >= 0xe0 { + if x >= 0xE0 { // [[x y z] w] case // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid // SAFETY: `bytes` produces an UTF-8-like string, // so the iterator must produce a value here. let z = unsafe { *bytes.next().unwrap_unchecked() }; - let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); + let y_z = utf8_acc_cont_byte((y & CONT_MASK).into(), z); ch = init << 12 | y_z; - if x >= 0xf0 { + if x >= 0xF0 { // [x y z w] case // use only the lower 3 bits of `init` // SAFETY: `bytes` produces an UTF-8-like string, @@ -72,7 +72,7 @@ pub(crate) unsafe fn next_code_point_reverse<'a, I: DoubleEndedIterator Option { // Decode UTF-8 let w = match *bytes.next_back()? { - next_byte if next_byte < 128 => return Some(next_byte as u32), + next_byte if next_byte < 128 => return Some(next_byte.into()), back_byte => back_byte, }; @@ -167,7 +167,7 @@ pub(crate) fn run_utf8_semi_validation(v: &[u8]) -> Result<(), Utf8Error> { } 3 => { match (first, next!()) { - (0xe0, 0xa0..=0xbf) | (0xe1..=0xef, 0x80..=0xbf) => {} /* INCLUDING surrogate codepoints here */ + (0xE0, 0xA0..=0xBF) | (0xE1..=0xEF, 0x80..=0xBF) => {} /* INCLUDING surrogate codepoints here */ _ => err!(Some(1)), } if next!() as i8 >= -64 { @@ -176,7 +176,7 @@ pub(crate) fn run_utf8_semi_validation(v: &[u8]) -> Result<(), Utf8Error> { } 4 => { match (first, next!()) { - (0xf0, 0x90..=0xbf) | (0xf1..=0xf3, 0x80..=0xbf) | (0xf4, 0x80..=0x8f) => {} + (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {} _ => err!(Some(1)), } if next!() as i8 >= -64 { @@ -232,7 +232,7 @@ pub(crate) const fn run_utf8_full_validation_from_semi(v: &[u8]) -> Result<(), U // followed by a >=A0 byte. let mut index = 0; while index + 3 <= v.len() { - if v[index] == 0xed && v[index + 1] >= 0xa0 { + if v[index] == 0xED && v[index + 1] >= 0xA0 { return Err(Utf8Error { valid_up_to: index, error_len: Some(1), @@ -258,7 +258,7 @@ pub(crate) const fn utf8_char_width(first_byte: u8) -> usize { 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; - UTF8_CHAR_WIDTH[first_byte as usize] as _ + UTF8_CHAR_WIDTH[first_byte as usize] as usize } #[inline] @@ -284,11 +284,7 @@ pub(crate) fn slice_error_fail(s: &JavaStr, begin: usize, end: usize) -> ! { // 2. begin <= end assert!( begin <= end, - "begin <= end ({} <= {}) when slicing `{}`{}", - begin, - end, - s_trunc, - ellipsis + "begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}", ); // 3. character boundary @@ -303,8 +299,8 @@ pub(crate) fn slice_error_fail(s: &JavaStr, begin: usize, end: usize) -> ! { let ch = s[char_start..].chars().next().unwrap(); let char_range = char_start..char_start + ch.len_utf8(); panic!( - "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}", - index, ch, char_range, s_trunc, ellipsis + "byte index {index} is not a char boundary; it is inside {ch:?} (bytes {char_range:?}) of \ + `{s_trunc}`{ellipsis}", ); }