From 40ea873e7d559a95b8a28dea3b02c8f5a3468caa Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 2 Oct 2023 20:42:10 +0100 Subject: [PATCH] Add docs where there is no analogous function in std --- crates/valence_java_string/src/char.rs | 2 ++ crates/valence_java_string/src/iter.rs | 4 ++-- crates/valence_java_string/src/owned.rs | 8 ++++++++ crates/valence_java_string/src/slice.rs | 12 ++++++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/valence_java_string/src/char.rs b/crates/valence_java_string/src/char.rs index 13130aaf5..466de2e1f 100644 --- a/crates/valence_java_string/src/char.rs +++ b/crates/valence_java_string/src/char.rs @@ -122,6 +122,8 @@ impl JavaCodePoint { } } + /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with + /// surrogate code points. #[inline] pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] { let len = self.len_utf8(); diff --git a/crates/valence_java_string/src/iter.rs b/crates/valence_java_string/src/iter.rs index cebd1993e..5c73a0b6e 100644 --- a/crates/valence_java_string/src/iter.rs +++ b/crates/valence_java_string/src/iter.rs @@ -192,7 +192,7 @@ impl<'a> Iterator for Chars<'a> { #[inline] fn next(&mut self) -> Option { - // SAFETY: `JavaStr` invariant says `self.iter` is a semi-valid UTF-8 string and + // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string and // the resulting `ch` is a valid Unicode Scalar Value or surrogate code point. unsafe { next_code_point(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch)) } } @@ -227,7 +227,7 @@ impl Debug for Chars<'_> { impl<'a> DoubleEndedIterator for Chars<'a> { #[inline] fn next_back(&mut self) -> Option { - // SAFETY: `JavaStr` invariant says `self.iter` is a semi-valid UTF-8 string and + // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string and // the resulting `ch` is a valid Unicode Scalar Value or surrogate code point. unsafe { next_code_point_reverse(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch)) diff --git a/crates/valence_java_string/src/owned.rs b/crates/valence_java_string/src/owned.rs index 83d21a62c..7c332f4e7 100644 --- a/crates/valence_java_string/src/owned.rs +++ b/crates/valence_java_string/src/owned.rs @@ -38,6 +38,8 @@ impl JavaString { } } + /// Converts `vec` to a `JavaString` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. #[inline] pub fn from_full_utf8(vec: Vec) -> Result { match std::str::from_utf8(&vec) { @@ -49,6 +51,8 @@ impl JavaString { } } + /// Converts `vec` to a `JavaString` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. pub fn from_semi_utf8(vec: Vec) -> Result { match run_utf8_semi_validation(&vec) { Ok(..) => Ok(JavaString { vec }), @@ -59,6 +63,8 @@ impl JavaString { } } + /// Converts `v` to a `Cow`, replacing invalid semi-UTF-8 with the + /// replacement character �. #[must_use] pub fn from_semi_utf8_lossy(v: &[u8]) -> Cow<'_, JavaStr> { const REPLACEMENT: &str = "\u{FFFD}"; @@ -137,6 +143,8 @@ impl JavaString { } } + /// Tries to convert this `JavaString` to a `String`, returning an error if + /// it is not fully valid UTF-8, i.e. has no surrogate code points. pub fn into_string(self) -> Result { run_utf8_full_validation_from_semi(self.as_bytes()).map(|_| unsafe { // SAFETY: validation succeeded diff --git a/crates/valence_java_string/src/slice.rs b/crates/valence_java_string/src/slice.rs index 49a5373c5..5d5d91e7d 100644 --- a/crates/valence_java_string/src/slice.rs +++ b/crates/valence_java_string/src/slice.rs @@ -30,6 +30,8 @@ pub struct JavaStr { } impl JavaStr { + /// Converts `v` to a `&JavaStr` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. #[inline] pub const fn from_full_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> { match std::str::from_utf8(v) { @@ -38,6 +40,8 @@ impl JavaStr { } } + /// Converts `v` to a `&mut JavaStr` if it is fully-valid UTF-8, i.e. UTF-8 + /// without surrogate code points. #[inline] pub fn from_full_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> { match std::str::from_utf8_mut(v) { @@ -46,6 +50,8 @@ impl JavaStr { } } + /// Converts `v` to a `&JavaStr` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. pub fn from_semi_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> { match run_utf8_semi_validation(v) { Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked(v) }), @@ -53,6 +59,8 @@ impl JavaStr { } } + /// Converts `v` to a `&mut JavaStr` if it is semi-valid UTF-8, i.e. UTF-8 + /// with surrogate code points. pub fn from_semi_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> { match run_utf8_semi_validation(v) { Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked_mut(v) }), @@ -146,6 +154,8 @@ impl JavaStr { self.inner.as_ptr() } + /// Tries to convert this `&JavaStr` to a `&str`, returning an error if + /// it is not fully valid UTF-8, i.e. has no surrogate code points. pub const fn as_str(&self) -> Result<&str, Utf8Error> { // Manual implementation of Option::map since it's not const match run_utf8_full_validation_from_semi(self.as_bytes()) { @@ -167,6 +177,8 @@ impl JavaStr { std::str::from_utf8_unchecked(self.as_bytes()) } + /// Converts this `&JavaStr` to a `Cow`, replacing surrogate code + /// points with the replacement character �. #[must_use] pub fn as_str_lossy(&self) -> Cow<'_, str> { match run_utf8_full_validation_from_semi(self.as_bytes()) {