From 52b520969372cc408b92037d143c4fb209278d20 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Tue, 19 Dec 2023 21:08:15 -0800 Subject: [PATCH] [BUG] Concat Fix when Variable Length Array is sliced (#1750) --- src/daft-core/src/array/ops/concat.rs | 3 ++- tests/series/test_concat.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/daft-core/src/array/ops/concat.rs b/src/daft-core/src/array/ops/concat.rs index 58906ae2a4..17461c5eb3 100644 --- a/src/daft-core/src/array/ops/concat.rs +++ b/src/daft-core/src/array/ops/concat.rs @@ -38,7 +38,8 @@ macro_rules! impl_variable_length_concat { bitmap.extend_constant(arr.len(), true); } } - buffer.extend_from_slice(arr.values().as_slice()); + let range = (*arr.offsets().first() as usize)..(*arr.offsets().last() as usize); + buffer.extend_from_slice(&arr.values().as_slice()[range]); } let dtype = arrays.first().unwrap().data_type().clone(); #[allow(unused_unsafe)] diff --git a/tests/series/test_concat.py b/tests/series/test_concat.py index 635e15ef72..6599097d5e 100644 --- a/tests/series/test_concat.py +++ b/tests/series/test_concat.py @@ -42,6 +42,27 @@ def test_series_concat(dtype, chunks) -> None: counter += 1 +@pytest.mark.parametrize( + "dtype, chunks", itertools.product(ARROW_FLOAT_TYPES + ARROW_INT_TYPES + ARROW_STRING_TYPES, [1, 2, 3, 10]) +) +def test_series_concat_with_slicing(dtype, chunks) -> None: + series = [] + for i in range(chunks): + s = Series.from_pylist([i] * 4).cast(dtype=DataType.from_arrow_type(dtype)) + series.append(s.slice(0, 2)) + + concated = Series.concat(series) + + assert concated.datatype() == DataType.from_arrow_type(dtype) + concated_list = concated.to_pylist() + + counter = 0 + for i in range(chunks): + for _ in range(2): + assert float(concated_list[counter]) == i + counter += 1 + + @pytest.mark.parametrize("fixed", [False, True]) @pytest.mark.parametrize("chunks", [1, 2, 3, 10]) def test_series_concat_list_array(chunks, fixed) -> None: