Skip to content

Commit

Permalink
improve docs
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgazelka committed Oct 4, 2024
1 parent f42524d commit 321c351
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 56 deletions.
62 changes: 38 additions & 24 deletions src/arrow2/src/array/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ mod iterator;
#[allow(unused)]
pub use iterator::*;


/// An array representing a (key, value), both of arbitrary logical types.
#[derive(Clone)]
pub struct MapArray {
Expand Down Expand Up @@ -202,40 +201,55 @@ impl MapArray {
impl Array for MapArray {
impl_common_array!();

fn convert_logical_type(&self, target: DataType) -> Box<dyn Array> {
let outer_is_map = matches!(target, DataType::Map { .. });

if outer_is_map {
// we can do simple conversion
let mut new = self.to_boxed();
new.change_type(target);
return new;
}
fn convert_logical_type(&self, target_data_type: DataType) -> Box<dyn Array> {
let is_target_map = matches!(target_data_type, DataType::Map { .. });

let DataType::LargeList(target_inner) = &target else {
panic!("MapArray can only be converted to Map or LargeList");
let DataType::Map(current_field, _) = self.data_type() else {
unreachable!(
"Expected MapArray to have Map data type, but found {:?}",
self.data_type()
);
};

let DataType::Map(current_inner, _) = self.data_type() else {
unreachable!("Somehow DataType is not Map for a MapArray");
if is_target_map {
// For Map-to-Map conversions, we can clone
// (same top level representation we are still a Map). and then change the subtype in
// place.
let mut converted_array = self.to_boxed();
converted_array.change_type(target_data_type);
return converted_array;
}

// Target type is a LargeList, so we need to convert to a ListArray before converting
let DataType::LargeList(target_field) = &target_data_type else {
panic!("MapArray can only be converted to Map or LargeList, but target type is {target_data_type:?}");
};

let current_inner_physical = current_inner.data_type.to_physical_type();
let target_inner_physical = target_inner.data_type.to_physical_type();

if current_inner_physical != target_inner_physical {
panic!("inner types are not equal");
let current_physical_type = current_field.data_type.to_physical_type();
let target_physical_type = target_field.data_type.to_physical_type();

if current_physical_type != target_physical_type {
panic!(
"Inner physical types must be equal for conversion. Current: {:?}, Target: {:?}",
current_physical_type, target_physical_type
);
}

let mut field = self.field.clone();
field.change_type(target_inner.data_type.clone());
let mut converted_field = self.field.clone();
converted_field.change_type(target_field.data_type.clone());

let offsets = self.offsets().clone();
let offsets = unsafe { offsets.map_unchecked(|offset| offset as i64) };
let original_offsets = self.offsets().clone();
let converted_offsets = unsafe { original_offsets.map_unchecked(|offset| offset as i64) };

let list = ListArray::new(target, offsets, field, self.validity.clone());
let converted_list = ListArray::new(
target_data_type,
converted_offsets,
converted_field,
self.validity.clone(),
);

Box::new(list)
Box::new(converted_list)
}

fn validity(&self) -> Option<&Bitmap> {
Expand Down
73 changes: 41 additions & 32 deletions src/arrow2/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,45 +158,50 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
/// Clone a `&dyn Array` to an owned `Box<dyn Array>`.
fn to_boxed(&self) -> Box<dyn Array>;

/// Overwrites [`Array`]'s type with a different logical type.
/// Changes the logical type of this array in-place.
///
/// This function is useful to assign a different [`DataType`] to the array.
/// Used to change the arrays' logical type (see example). This updates the array
/// in place and does not clone the array.
/// # Example
/// ```rust,ignore
/// use arrow2::array::Int32Array;
/// use arrow2::datatypes::DataType;
/// This method modifies the array's `DataType` without changing its underlying data.
/// It's useful for reinterpreting the logical meaning of the data (e.g., from Int32 to Date32).
///
/// # Arguments
/// * `data_type` - The new [`DataType`] to assign to this array.
///
/// let &mut array = Int32Array::from(&[Some(1), None, Some(2)])
/// array.to(DataType::Date32);
/// assert_eq!(
/// format!("{:?}", array),
/// "Date32[1970-01-02, None, 1970-01-03]"
/// );
/// ```
/// # Panics
/// Panics iff the `data_type`'s [`PhysicalType`] is not equal to array's `PhysicalType`.
/// Panics if the new `data_type`'s [`PhysicalType`] is not equal to the array's current [`PhysicalType`].
///
/// # Example
/// ```
/// # use arrow2::array::{Array, Int32Array};
/// # use arrow2::datatypes::DataType;
/// let mut array = Int32Array::from(&[Some(1), None, Some(2)]);
/// array.change_type(DataType::Date32);
/// assert_eq!(array.data_type(), &DataType::Date32);
/// ```
fn change_type(&mut self, data_type: DataType);

/// Returns a new [`Array`] with a different logical type.
/// Creates a new [`Array`] with a different logical type.
///
/// This function is useful to assign a different [`DataType`] to the array.
/// Used to change the arrays' logical type (see example). Unlike, this clones the array
/// in order to return a new array.
/// # Example
/// ```rust,ignore
/// use arrow2::array::Int32Array;
/// use arrow2::datatypes::DataType;
/// This method returns a new array with the specified `DataType`, leaving the original array unchanged.
/// It's useful for creating a new view of the data with a different logical interpretation.
///
/// # Arguments
/// * `data_type` - The [`DataType`] for the new array.
///
/// # Returns
/// A new `Box<dyn Array>` with the specified `DataType`.
///
/// let array = Int32Array::from(&[Some(1), None, Some(2)]).to(DataType::Date32);
/// assert_eq!(
/// format!("{:?}", array),
/// "Date32[1970-01-02, None, 1970-01-03]"
/// );
/// ```
/// # Panics
/// Panics iff the `data_type`'s [`PhysicalType`] is not equal to array's `PhysicalType`.
/// Panics if the new `data_type`'s [`PhysicalType`] is not equal to the array's current [`PhysicalType`].
///
/// # Example
/// ```
/// # use arrow2::array::Int32Array;
/// # use arrow2::datatypes::DataType;
/// let array = Int32Array::from(&[Some(1), None, Some(2)]);
/// let new_array = array.convert_logical_type(DataType::Date32);
/// assert_eq!(new_array.data_type(), &DataType::Date32);
/// assert_eq!(array.data_type(), &DataType::Int32); // Original array unchanged
/// ```
fn convert_logical_type(&self, data_type: DataType) -> Box<dyn Array> {
let mut new = self.to_boxed();
new.change_type(data_type);
Expand Down Expand Up @@ -647,7 +652,11 @@ macro_rules! impl_common_array {

fn change_type(&mut self, data_type: DataType) {
if data_type.to_physical_type() != self.data_type().to_physical_type() {
panic!("Cannot change array type from {:?} to {:?}", self.data_type(), data_type);
panic!(
"Cannot change array type from {:?} to {:?}",
self.data_type(),
data_type
);
}

self.data_type = data_type.clone();
Expand Down

0 comments on commit 321c351

Please sign in to comment.