Skip to content

Commit

Permalink
Add a Format parameter to MultiFieldsULE, add Index8 format (unicode-…
Browse files Browse the repository at this point in the history
…org#5522)

<!--
Thank you for your pull request to ICU4X!

Reminder: try to use [Conventional
Comments](https://conventionalcomments.org/) to make comments clearer.

Please see
https://github.com/unicode-org/icu4x/blob/main/CONTRIBUTING.md for
general
information on contributing to ICU4X.
-->
  • Loading branch information
Manishearth authored Sep 10, 2024
1 parent 18b44b9 commit e266337
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 10 deletions.
6 changes: 4 additions & 2 deletions utils/zerovec/derive/src/make_varule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,8 @@ impl<'a> UnsizedFields<'a> {

quote!(
let lengths = [#(#lengths),*];
let mut multi = zerovec::ule::MultiFieldsULE::new_from_lengths_partially_initialized(&lengths, #out);
// Todo: index type should be settable by attribute
let mut multi = zerovec::ule::MultiFieldsULE::<zerovec::vecs::Index32>::new_from_lengths_partially_initialized(&lengths, #out);
unsafe {
#(#writers;)*
}
Expand All @@ -539,7 +540,8 @@ impl<'a> UnsizedFields<'a> {
for field in self.fields.iter() {
lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!()));
}
quote!(zerovec::ule::MultiFieldsULE::compute_encoded_len_for(&[#(#lengths),*]))
// Todo: index type should be settable by attribute
quote!(zerovec::ule::MultiFieldsULE::<zerovec::vecs::Index32>::compute_encoded_len_for(&[#(#lengths),*]))
}
}

Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ pub mod vecs {
#[doc(no_inline)]
pub use crate::varzerovec::{VarZeroSlice, VarZeroVec};

pub use crate::varzerovec::{Index16, Index32, VarZeroVecFormat, VarZeroVecOwned};
pub use crate::varzerovec::{Index16, Index32, Index8, VarZeroVecFormat, VarZeroVecOwned};

pub use crate::flexzerovec::{FlexZeroSlice, FlexZeroVec, FlexZeroVecOwned};
}
Expand Down
5 changes: 3 additions & 2 deletions utils/zerovec/src/ule/multi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use super::*;
use crate::varzerovec::Index32;
use crate::vecs::VarZeroVecFormat;
use crate::VarZeroSlice;
use core::mem;

Expand All @@ -17,9 +18,9 @@ use core::mem;
/// Internally, it is represented by a VarZeroSlice.
#[derive(PartialEq, Eq, Debug)]
#[repr(transparent)]
pub struct MultiFieldsULE(VarZeroSlice<[u8], Index32>);
pub struct MultiFieldsULE<Format: VarZeroVecFormat = Index32>(VarZeroSlice<[u8], Format>);

impl MultiFieldsULE {
impl<Format: VarZeroVecFormat> MultiFieldsULE<Format> {
/// Compute the amount of bytes needed to support elements with lengths `lengths`
#[inline]
pub fn compute_encoded_len_for(lengths: &[usize]) -> usize {
Expand Down
35 changes: 35 additions & 0 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ pub(super) const MAX_INDEX: usize = u32::MAX as usize;
/// and all of its associated items are hidden from the docs.
#[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period
pub unsafe trait VarZeroVecFormat: 'static + Sized {
/// The error to show when unable to construct a vec
#[doc(hidden)]
const TOO_LARGE_ERROR: &'static str;
#[doc(hidden)]
const INDEX_WIDTH: usize;
#[doc(hidden)]
Expand All @@ -50,6 +53,14 @@ pub unsafe trait VarZeroVecFormat: 'static + Sized {
fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes];
}

/// This is a [`VarZeroVecFormat`] that stores u8s in the index array.
/// Will have a smaller data size, but it's *extremely* likely for larger arrays
/// to be unrepresentable (and error on construction). Should probably be used
/// for known-small arrays, where all but the last field are known-small.
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[allow(clippy::exhaustive_structs)] // marker
pub struct Index8;

/// This is a [`VarZeroVecFormat`] that stores u16s in the index array.
/// Will have a smaller data size, but it's more likely for larger arrays
/// to be unrepresentable (and error on construction)
Expand All @@ -66,7 +77,29 @@ pub struct Index16;
#[allow(clippy::exhaustive_structs)] // marker
pub struct Index32;

unsafe impl VarZeroVecFormat for Index8 {
const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
cumulatively are larger than a u8 in size";
const INDEX_WIDTH: usize = 1;
const MAX_VALUE: u32 = u8::MAX as u32;
type RawBytes = u8;
#[inline]
fn rawbytes_to_usize(raw: Self::RawBytes) -> usize {
raw as usize
}
#[inline]
fn usize_to_rawbytes(u: usize) -> Self::RawBytes {
u as u8
}
#[inline]
fn rawbytes_from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self::RawBytes] {
bytes
}
}

unsafe impl VarZeroVecFormat for Index16 {
const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
cumulatively are larger than a u16 in size";
const INDEX_WIDTH: usize = 2;
const MAX_VALUE: u32 = u16::MAX as u32;
type RawBytes = RawBytesULE<2>;
Expand All @@ -85,6 +118,8 @@ unsafe impl VarZeroVecFormat for Index16 {
}

unsafe impl VarZeroVecFormat for Index32 {
const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \
cumulatively are larger than a u32 in size";
const INDEX_WIDTH: usize = 4;
const MAX_VALUE: u32 = u32::MAX;
type RawBytes = RawBytesULE<4>;
Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/src/varzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub use crate::{VarZeroSlice, VarZeroVec};
#[doc(hidden)]
pub use components::VarZeroVecComponents;

pub use components::{Index16, Index32, VarZeroVecFormat};
pub use components::{Index16, Index32, Index8, VarZeroVecFormat};

pub use owned::VarZeroVecOwned;

Expand Down
5 changes: 1 addition & 4 deletions utils/zerovec/src/varzerovec/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
marker: PhantomData,
// TODO(#1410): Rethink length errors in VZV.
entire_slice: components::get_serializable_bytes_non_empty::<T, A, F>(elements)
.ok_or(
"Attempted to build VarZeroVec out of elements that \
cumulatively are larger than a u32 in size",
)?,
.ok_or(F::TOO_LARGE_ERROR)?,
}
})
}
Expand Down

0 comments on commit e266337

Please sign in to comment.