Skip to content

Commit

Permalink
[CHORE] Add tests for decimal casting (#3179)
Browse files Browse the repository at this point in the history
There are no existing tests for decimal casting. This PR adds some
randomized tests.
desmondcheongzx authored Nov 6, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 64e35f8 commit 2829a09
Showing 4 changed files with 177 additions and 1 deletion.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/daft-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@ mur3 = "0.1.0"
ndarray = "0.15.6"
num-traits = {workspace = true}
pyo3 = {workspace = true, optional = true}
rand = "0.8.5"
regex = {workspace = true}
serde = {workspace = true}
sketches-ddsketch = {workspace = true}
153 changes: 153 additions & 0 deletions src/daft-core/src/array/ops/cast.rs
Original file line number Diff line number Diff line change
@@ -2253,3 +2253,156 @@ where
.into_series())
})
}

#[cfg(test)]
mod tests {
use arrow2::array::PrimitiveArray;
use rand::{thread_rng, Rng};

use super::*;
use crate::{
datatypes::DataArray,
prelude::{Decimal128Type, Float64Array},
};

fn create_test_decimal_array(
values: Vec<i128>,
precision: usize,
scale: usize,
) -> DataArray<Decimal128Type> {
let arrow_array = PrimitiveArray::from_vec(values)
.to(arrow2::datatypes::DataType::Decimal(precision, scale));
let field = Arc::new(Field::new(
"test_decimal",
DataType::Decimal128(precision, scale),
));
DataArray::<Decimal128Type>::from_arrow(field, Box::new(arrow_array))
.expect("Failed to create test decimal array")
}

fn create_test_f64_array(values: Vec<f64>) -> Float64Array {
let arrow_array = PrimitiveArray::from_vec(values).to(arrow2::datatypes::DataType::Float64);
let field = Arc::new(Field::new("test_float", DataType::Float64));
Float64Array::from_arrow(field, Box::new(arrow_array))
.expect("Failed to create test float array")
}

fn create_test_i64_array(values: Vec<i64>) -> Int64Array {
let arrow_array = PrimitiveArray::from_vec(values).to(arrow2::datatypes::DataType::Int64);
let field = Arc::new(Field::new("test_int", DataType::Int64));
Int64Array::from_arrow(field, Box::new(arrow_array))
.expect("Failed to create test int array")
}

// For a Decimal(p, s) to be valid, p, s, and max_val must satisfy:
// p > ceil(log_9(max_val * 10^s)) - 1
// So with a max_val of 10^10, we get:
// p > ceil(log_9(10^(10+s))) - 1
// Since p <= 32, for this inequality to hold, we need s <= 20.
const MAX_VAL: f64 = 1e10;
const MAX_SCALE: usize = 20;
const MIN_DIFF_FOR_PRECISION: usize = 12;
#[test]
fn test_decimal_to_decimal_cast() {
let mut rng = thread_rng();
let mut values: Vec<f64> = (0..100).map(|_| rng.gen_range(-MAX_VAL..MAX_VAL)).collect();
values.extend_from_slice(&[0.0, -0.0]);

let initial_scale: usize = rng.gen_range(0..=MAX_SCALE);
let initial_precision: usize = rng.gen_range(initial_scale + MIN_DIFF_FOR_PRECISION..=32);
let min_integral_comp = initial_precision - initial_scale;
let i128_values: Vec<i128> = values
.iter()
.map(|&x| (x * 10_f64.powi(initial_scale as i32) as f64) as i128)
.collect();
let original = create_test_decimal_array(i128_values, initial_precision, initial_scale);

// We always widen the Decimal, otherwise we lose information and can no longer compare with the original Decimal values.
let intermediate_scale: usize = rng.gen_range(initial_scale..=32 - min_integral_comp);
let intermediate_precision: usize =
rng.gen_range(intermediate_scale + min_integral_comp..=32);

let result = original
.cast(&DataType::Decimal128(
intermediate_precision,
intermediate_scale,
))
.expect("Failed to cast to intermediate decimal")
.cast(&DataType::Decimal128(initial_precision, initial_scale))
.expect("Failed to cast back to original decimal");

assert!(
original.into_series() == result,
"Failed with intermediate decimal({}, {})",
intermediate_precision,
intermediate_scale,
);
}

// We do fuzzy equality when comparing floats converted to and from decimals. This test is
// primarily sanity checking that we don't repeat the mistake of shifting the scale and precision
// of floats during casting, while avoiding flakiness due small differences in floats.
const EPSILON: f64 = 0.1;
#[test]
fn test_decimal_to_float() {
let mut rng = thread_rng();
let mut values: Vec<f64> = (0..100).map(|_| rng.gen_range(-MAX_VAL..MAX_VAL)).collect();
values.extend_from_slice(&[0.0, -0.0]);
let num_values = values.len();

let scale: usize = rng.gen_range(0..=MAX_SCALE);
let precision: usize = rng.gen_range(scale + MIN_DIFF_FOR_PRECISION..=32);
let i128_values: Vec<i128> = values
.iter()
.map(|&x| (x * 10_f64.powi(scale as i32) as f64) as i128)
.collect();
let original = create_test_decimal_array(i128_values, precision, scale);

let result = original
.cast(&DataType::Float64)
.expect("Failed to cast to float");
let original = create_test_f64_array(values);

let epsilon_series = create_test_f64_array(vec![EPSILON; num_values]).into_series();

assert!(
result.fuzzy_eq(&original.into_series(), &epsilon_series),
"Failed with decimal({}, {})",
precision,
scale,
);
}

// 2^63 gives us 18 unrestricted digits. So precision - scale has to be <= 18.
const MAX_DIFF_FOR_PRECISION: usize = 18;
#[test]
fn test_decimal_to_int() {
let mut rng = thread_rng();
let mut values: Vec<f64> = (0..100).map(|_| rng.gen_range(-MAX_VAL..MAX_VAL)).collect();
values.extend_from_slice(&[0.0, -0.0]);

let scale: usize = rng.gen_range(0..=MAX_SCALE);
let precision: usize =
rng.gen_range(scale + MIN_DIFF_FOR_PRECISION..=scale + MAX_DIFF_FOR_PRECISION);
let i128_values: Vec<i128> = values
.iter()
.map(|&x| (x * 10_f64.powi(scale as i32) as f64) as i128)
.collect();
let original = create_test_decimal_array(i128_values, precision, scale);

let result = original
.cast(&DataType::Int64)
.expect("Failed to cast to int64");

// Convert the original floats directly to integers.
let values = values.into_iter().map(|f| f as i64).collect();
let original = create_test_i64_array(values);

assert!(
original.into_series() == result,
"Failed with decimal({}, {})",
precision,
scale,
);
}
}
23 changes: 22 additions & 1 deletion src/daft-core/src/series/mod.rs
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ mod ops;
mod serdes;
mod series_like;
mod utils;
use std::sync::Arc;
use std::{ops::Sub, sync::Arc};

pub use array_impl::IntoSeries;
use common_display::table_display::{make_comfy_table, StrValue};
@@ -148,4 +148,25 @@ impl Series {
let data: &DataArray<N::DAFTTYPE> = self.downcast()?;
Ok(data.as_slice())
}

/// Helper function to check that two series of floats are within some `epsilon` of each other.
pub fn fuzzy_eq(&self, other: &Self, epsilon: &Self) -> bool {
if self.data_type() != other.data_type() {
return false;
}
match self.data_type() {
DataType::Float32 | DataType::Float64 => {
let diff = self
.sub(other)
.expect("Failed to subtract one series from the other")
.abs()
.expect("Failed to get absolute difference between the two given series");
match diff.lte(epsilon) {
Ok(arr) => arr.into_iter().all(|x| x.unwrap_or(false)),
Err(_) => false,
}
}
_ => self == other,
}
}
}

0 comments on commit 2829a09

Please sign in to comment.