diff --git a/daft/daft.pyi b/daft/daft.pyi index bbb27200a9..a87403a3b2 100644 --- a/daft/daft.pyi +++ b/daft/daft.pyi @@ -801,6 +801,8 @@ class PyDataType: @staticmethod def date() -> PyDataType: ... @staticmethod + def time(time_unit: PyTimeUnit) -> PyDataType: ... + @staticmethod def timestamp(time_unit: PyTimeUnit, timezone: str | None = None) -> PyDataType: ... @staticmethod def duration(time_unit: PyTimeUnit) -> PyDataType: ... @@ -935,6 +937,7 @@ def eq(expr1: PyExpr, expr2: PyExpr) -> bool: ... def col(name: str) -> PyExpr: ... def lit(item: Any) -> PyExpr: ... def date_lit(item: int) -> PyExpr: ... +def time_lit(item: int, tu: PyTimeUnit) -> PyExpr: ... def timestamp_lit(item: int, tu: PyTimeUnit, tz: str | None) -> PyExpr: ... def series_lit(item: PySeries) -> PyExpr: ... def udf(func: Callable, expressions: list[PyExpr], return_dtype: PyDataType) -> PyExpr: ... diff --git a/daft/datatype.py b/daft/datatype.py index d4e1456399..06b5443a89 100644 --- a/daft/datatype.py +++ b/daft/datatype.py @@ -193,6 +193,13 @@ def date(cls) -> DataType: """Create a Date DataType: A date with a year, month and day""" return cls._from_pydatatype(PyDataType.date()) + @classmethod + def time(cls, timeunit: TimeUnit | str) -> DataType: + """Time DataType. Supported timeunits are "us", "ns".""" + if isinstance(timeunit, str): + timeunit = TimeUnit.from_str(timeunit) + return cls._from_pydatatype(PyDataType.time(timeunit._timeunit)) + @classmethod def timestamp(cls, timeunit: TimeUnit | str, timezone: str | None = None) -> DataType: """Timestamp DataType.""" @@ -359,6 +366,9 @@ def from_arrow_type(cls, arrow_type: pa.lib.DataType) -> DataType: return cls.decimal128(arrow_type.precision, arrow_type.scale) elif pa.types.is_date32(arrow_type): return cls.date() + elif pa.types.is_time64(arrow_type): + timeunit = TimeUnit.from_str(pa.type_for_alias(str(arrow_type)).unit) + return cls.time(timeunit) elif pa.types.is_timestamp(arrow_type): timeunit = TimeUnit.from_str(arrow_type.unit) return cls.timestamp(timeunit=timeunit, timezone=arrow_type.tz) diff --git a/daft/expressions/expressions.py b/daft/expressions/expressions.py index d994a86357..b2ca62815a 100644 --- a/daft/expressions/expressions.py +++ b/daft/expressions/expressions.py @@ -3,7 +3,7 @@ import builtins import os import sys -from datetime import date, datetime +from datetime import date, datetime, time from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, TypeVar, overload import pyarrow as pa @@ -15,6 +15,7 @@ from daft.daft import date_lit as _date_lit from daft.daft import lit as _lit from daft.daft import series_lit as _series_lit +from daft.daft import time_lit as _time_lit from daft.daft import timestamp_lit as _timestamp_lit from daft.daft import udf as _udf from daft.datatype import DataType, TimeUnit @@ -80,6 +81,12 @@ def lit(value: object) -> Expression: # pyo3 date (PyDate) is not available when running in abi3 mode, workaround epoch_time = value - date(1970, 1, 1) lit_value = _date_lit(epoch_time.days) + elif isinstance(value, time): + # pyo3 time (PyTime) is not available when running in abi3 mode, workaround + pa_time = pa.scalar(value) + i64_value = pa_time.cast(pa.int64()).as_py() + time_unit = TimeUnit.from_str(pa.type_for_alias(str(pa_time.type)).unit)._timeunit + lit_value = _time_lit(i64_value, time_unit) elif isinstance(value, Series): lit_value = _series_lit(value._series) else: diff --git a/src/daft-core/src/array/growable/logical_growable.rs b/src/daft-core/src/array/growable/logical_growable.rs index c62345851b..5f0770789b 100644 --- a/src/daft-core/src/array/growable/logical_growable.rs +++ b/src/daft-core/src/array/growable/logical_growable.rs @@ -6,7 +6,7 @@ use crate::{ datatypes::{ logical::LogicalArray, DaftDataType, DaftLogicalType, DateType, Decimal128Type, DurationType, EmbeddingType, Field, FixedShapeImageType, FixedShapeTensorType, ImageType, - TensorType, TimestampType, + TensorType, TimeType, TimestampType, }, DataType, IntoSeries, Series, }; @@ -77,6 +77,7 @@ macro_rules! impl_logical_growable { impl_logical_growable!(LogicalTimestampGrowable, TimestampType); impl_logical_growable!(LogicalDurationGrowable, DurationType); impl_logical_growable!(LogicalDateGrowable, DateType); +impl_logical_growable!(LogicalTimeGrowable, TimeType); impl_logical_growable!(LogicalEmbeddingGrowable, EmbeddingType); impl_logical_growable!(LogicalFixedShapeImageGrowable, FixedShapeImageType); impl_logical_growable!(LogicalFixedShapeTensorGrowable, FixedShapeTensorType); diff --git a/src/daft-core/src/array/growable/mod.rs b/src/daft-core/src/array/growable/mod.rs index f745809e42..b0de488352 100644 --- a/src/daft-core/src/array/growable/mod.rs +++ b/src/daft-core/src/array/growable/mod.rs @@ -5,7 +5,7 @@ use crate::{ datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, TensorArray, TimeArray, TimestampArray, }, BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int128Array, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, UInt16Array, UInt32Array, @@ -192,6 +192,7 @@ impl_growable_array!( ); impl_growable_array!(DurationArray, logical_growable::LogicalDurationGrowable<'a>); impl_growable_array!(DateArray, logical_growable::LogicalDateGrowable<'a>); +impl_growable_array!(TimeArray, logical_growable::LogicalTimeGrowable<'a>); impl_growable_array!( EmbeddingArray, logical_growable::LogicalEmbeddingGrowable<'a> diff --git a/src/daft-core/src/array/ops/as_arrow.rs b/src/daft-core/src/array/ops/as_arrow.rs index 62950bb5e4..f9d2ef566b 100644 --- a/src/daft-core/src/array/ops/as_arrow.rs +++ b/src/daft-core/src/array/ops/as_arrow.rs @@ -4,7 +4,7 @@ use arrow2::array; use crate::{ array::DataArray, datatypes::{ - logical::{DateArray, Decimal128Array, DurationArray, TimestampArray}, + logical::{DateArray, Decimal128Array, DurationArray, TimeArray, TimestampArray}, BinaryArray, BooleanArray, DaftNumericType, NullArray, Utf8Array, }, }; @@ -65,5 +65,6 @@ impl_asarrow_dataarray!(PythonArray, PseudoArrowArray); impl_asarrow_logicalarray!(Decimal128Array, array::PrimitiveArray); impl_asarrow_logicalarray!(DateArray, array::PrimitiveArray); +impl_asarrow_logicalarray!(TimeArray, array::PrimitiveArray); impl_asarrow_logicalarray!(DurationArray, array::PrimitiveArray); impl_asarrow_logicalarray!(TimestampArray, array::PrimitiveArray); diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index 482f2ab168..417cf77874 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -4,20 +4,20 @@ use super::as_arrow::AsArrow; use crate::{ array::{ growable::make_growable, - ops::image::ImageArraySidecarData, - ops::{from_arrow::FromArrow, full::FullNull}, + ops::{from_arrow::FromArrow, full::FullNull, image::ImageArraySidecarData}, DataArray, FixedSizeListArray, ListArray, StructArray, }, datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, ImageArray, LogicalArray, LogicalArrayImpl, TensorArray, - TimestampArray, + TimeArray, TimestampArray, }, DaftArrowBackedType, DaftLogicalType, DataType, Field, ImageMode, Int64Array, TimeUnit, UInt64Array, Utf8Array, }, series::{IntoSeries, Series}, + utils::display_table::display_time64, with_match_daft_logical_primitive_types, }; use common_error::{DaftError, DaftResult}; @@ -72,7 +72,7 @@ where use DataType::*; let source_arrow_array = match source_dtype { // Wrapped primitives - Decimal128(..) | Date | Timestamp(..) | Duration(..) => { + Decimal128(..) | Date | Timestamp(..) | Duration(..) | Time(..) => { with_match_daft_logical_primitive_types!(source_dtype, |$T| { use arrow2::array::Array; to_cast @@ -115,7 +115,7 @@ where let target_physical_type = dtype.to_physical().to_arrow()?; match dtype { // Primitive wrapper types: change the arrow2 array's type field to primitive - Decimal128(..) | Date | Timestamp(..) | Duration(..) => { + Decimal128(..) | Date | Timestamp(..) | Duration(..) | Time(..) => { with_match_daft_logical_primitive_types!(dtype, |$P| { use arrow2::array::Array; result_arrow_array @@ -363,6 +363,7 @@ impl TimestampArray { match dtype { DataType::Timestamp(..) => arrow_logical_cast(self, dtype), DataType::Date => Ok(self.date()?.into_series()), + DataType::Time(tu) => Ok(self.time(tu)?.into_series()), DataType::Utf8 => { let DataType::Timestamp(unit, timezone) = self.data_type() else { panic!("Wrong dtype for TimestampArray: {}", self.data_type()) @@ -407,6 +408,35 @@ impl TimestampArray { } } +impl TimeArray { + pub fn cast(&self, dtype: &DataType) -> DaftResult { + match dtype { + DataType::Time(..) => arrow_logical_cast(self, dtype), + DataType::Utf8 => { + let time_array = self.as_arrow(); + let time_str: arrow2::array::Utf8Array = time_array + .iter() + .map(|val| { + val.map(|val| { + let DataType::Time(unit) = &self.field.dtype else { + panic!("Wrong dtype for TimeArray: {}", self.field.dtype) + }; + display_time64(*val, unit) + }) + }) + .collect(); + Ok(Utf8Array::from((self.name(), Box::new(time_str))).into_series()) + } + DataType::Int64 => Ok(self.physical.clone().into_series()), + DataType::Float32 => self.cast(&DataType::Int64)?.cast(&DataType::Float32), + DataType::Float64 => self.cast(&DataType::Int64)?.cast(&DataType::Float64), + #[cfg(feature = "python")] + DataType::Python => cast_logical_to_python_array(self, dtype), + _ => arrow_cast(&self.physical, dtype), + } + } +} + impl DurationArray { pub fn cast(&self, dtype: &DataType) -> DaftResult { match dtype { diff --git a/src/daft-core/src/array/ops/date.rs b/src/daft-core/src/array/ops/date.rs index af1e896049..8bba15ee91 100644 --- a/src/daft-core/src/array/ops/date.rs +++ b/src/daft-core/src/array/ops/date.rs @@ -1,12 +1,12 @@ use crate::{ datatypes::{ - logical::{DateArray, TimestampArray}, - Field, Int32Array, UInt32Array, + logical::{DateArray, TimeArray, TimestampArray}, + Field, Int32Array, Int64Array, TimeUnit, UInt32Array, }, DataType, }; use arrow2::compute::arithmetics::ArraySub; -use chrono::{NaiveDate, Timelike}; +use chrono::{NaiveDate, NaiveTime, Timelike}; use common_error::{DaftError, DaftResult}; use super::as_arrow::AsArrow; @@ -108,6 +108,59 @@ impl TimestampArray { )) } + pub fn time(&self, timeunit_for_cast: &TimeUnit) -> DaftResult { + let physical = self.physical.as_arrow(); + let DataType::Timestamp(timeunit, tz) = self.data_type() else { + unreachable!("Timestamp array must have Timestamp datatype") + }; + let tu = timeunit.to_arrow(); + if !matches!( + timeunit_for_cast, + TimeUnit::Microseconds | TimeUnit::Nanoseconds + ) { + return Err(DaftError::ValueError(format!("Only microseconds and nanoseconds time units are supported for the Time dtype, but got {timeunit_for_cast}"))); + } + let time_arrow = match tz { + Some(tz) => match arrow2::temporal_conversions::parse_offset(tz) { + Ok(tz) => Ok(arrow2::array::PrimitiveArray::::from_iter( + physical.iter().map(|ts| { + ts.map(|ts| { + let dt = + arrow2::temporal_conversions::timestamp_to_datetime(*ts, tu, &tz); + let time_delta = dt.time() - NaiveTime::from_hms_opt(0,0,0).unwrap(); + match timeunit_for_cast { + TimeUnit::Microseconds => time_delta.num_microseconds().unwrap(), + TimeUnit::Nanoseconds => time_delta.num_nanoseconds().unwrap(), + _ => unreachable!("Only microseconds and nanoseconds time units are supported for the Time dtype, but got {timeunit_for_cast}"), + } + }) + }), + )), + Err(e) => Err(DaftError::TypeError(format!( + "Cannot parse timezone in Timestamp datatype: {}, error: {}", + tz, e + ))), + }, + None => Ok(arrow2::array::PrimitiveArray::::from_iter( + physical.iter().map(|ts| { + ts.map(|ts| { + let dt = arrow2::temporal_conversions::timestamp_to_naive_datetime(*ts, tu); + let time_delta = dt.time() - NaiveTime::from_hms_opt(0,0,0).unwrap(); + match timeunit_for_cast { + TimeUnit::Microseconds => time_delta.num_microseconds().unwrap(), + TimeUnit::Nanoseconds => time_delta.num_nanoseconds().unwrap(), + _ => unreachable!("Only microseconds and nanoseconds time units are supported for the Time dtype, but got {timeunit_for_cast}"), + } + }) + }), + )), + }?; + Ok(TimeArray::new( + Field::new(self.name(), DataType::Time(*timeunit_for_cast)), + Int64Array::from((self.name(), Box::new(time_arrow))), + )) + } + pub fn hour(&self) -> DaftResult { let physical = self.physical.as_arrow(); let DataType::Timestamp(timeunit, tz) = self.data_type() else { diff --git a/src/daft-core/src/array/ops/get.rs b/src/daft-core/src/array/ops/get.rs index a0f534da07..6f972325ef 100644 --- a/src/daft-core/src/array/ops/get.rs +++ b/src/daft-core/src/array/ops/get.rs @@ -1,7 +1,9 @@ use crate::{ array::{DataArray, FixedSizeListArray, ListArray}, datatypes::{ - logical::{DateArray, Decimal128Array, DurationArray, LogicalArrayImpl, TimestampArray}, + logical::{ + DateArray, Decimal128Array, DurationArray, LogicalArrayImpl, TimeArray, TimestampArray, + }, BinaryArray, BooleanArray, DaftLogicalType, DaftNumericType, ExtensionArray, NullArray, Utf8Array, }, @@ -66,6 +68,7 @@ impl_array_arrow_get!(BooleanArray, bool); impl_array_arrow_get!(BinaryArray, &[u8]); impl_array_arrow_get!(Decimal128Array, i128); impl_array_arrow_get!(DateArray, i32); +impl_array_arrow_get!(TimeArray, i64); impl_array_arrow_get!(DurationArray, i64); impl_array_arrow_get!(TimestampArray, i64); diff --git a/src/daft-core/src/array/ops/hash.rs b/src/daft-core/src/array/ops/hash.rs index 370c4208e6..f1a84529f3 100644 --- a/src/daft-core/src/array/ops/hash.rs +++ b/src/daft-core/src/array/ops/hash.rs @@ -1,7 +1,7 @@ use crate::{ array::DataArray, datatypes::{ - logical::{DateArray, Decimal128Array, TimestampArray}, + logical::{DateArray, Decimal128Array, TimeArray, TimestampArray}, BinaryArray, BooleanArray, DaftNumericType, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, Utf8Array, }, @@ -154,6 +154,15 @@ impl DateArray { } } +impl TimeArray { + pub fn murmur3_32(&self) -> DaftResult { + let us = self.cast(&crate::DataType::Time( + crate::datatypes::TimeUnit::Microseconds, + ))?; + us.time()?.physical.murmur3_32() + } +} + impl TimestampArray { pub fn murmur3_32(&self) -> DaftResult { let us = self.cast(&crate::DataType::Timestamp( diff --git a/src/daft-core/src/array/ops/repr.rs b/src/daft-core/src/array/ops/repr.rs index 2e50379473..e0d8c959d6 100644 --- a/src/daft-core/src/array/ops/repr.rs +++ b/src/daft-core/src/array/ops/repr.rs @@ -5,12 +5,12 @@ use crate::{ datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, TensorArray, TimeArray, TimestampArray, }, BinaryArray, BooleanArray, DaftNumericType, ExtensionArray, ImageFormat, NullArray, UInt64Array, Utf8Array, }, - utils::display_table::{display_date32, display_timestamp}, + utils::display_table::{display_date32, display_time64, display_timestamp}, with_match_daft_types, DataType, Series, }; use common_error::DaftResult; @@ -149,6 +149,21 @@ impl DateArray { } } +impl TimeArray { + pub fn str_value(&self, idx: usize) -> DaftResult { + let res = self.get(idx).map_or_else( + || "None".to_string(), + |val| -> String { + let DataType::Time(unit) = &self.field.dtype else { + panic!("Wrong dtype for TimeArray: {}", self.field.dtype) + }; + display_time64(val, unit) + }, + ); + Ok(res) + } +} + impl TimestampArray { pub fn str_value(&self, idx: usize) -> DaftResult { let res = self.get(idx).map_or_else( @@ -328,6 +343,7 @@ impl_array_html_value!(StructArray); impl_array_html_value!(ExtensionArray); impl_array_html_value!(Decimal128Array); impl_array_html_value!(DateArray); +impl_array_html_value!(TimeArray); impl_array_html_value!(DurationArray); impl_array_html_value!(TimestampArray); impl_array_html_value!(EmbeddingArray); diff --git a/src/daft-core/src/array/ops/sort.rs b/src/daft-core/src/array/ops/sort.rs index f677037e62..f1813c7a4c 100644 --- a/src/daft-core/src/array/ops/sort.rs +++ b/src/daft-core/src/array/ops/sort.rs @@ -3,7 +3,7 @@ use crate::{ datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, TensorArray, TimeArray, TimestampArray, }, BinaryArray, BooleanArray, DaftIntegerType, DaftNumericType, ExtensionArray, Float32Array, Float64Array, NullArray, Utf8Array, @@ -607,6 +607,13 @@ impl DateArray { } } +impl TimeArray { + pub fn sort(&self, descending: bool) -> DaftResult { + let new_array = self.physical.sort(descending)?; + Ok(Self::new(self.field.clone(), new_array)) + } +} + impl DurationArray { pub fn sort(&self, descending: bool) -> DaftResult { let new_array = self.physical.sort(descending)?; diff --git a/src/daft-core/src/array/ops/take.rs b/src/daft-core/src/array/ops/take.rs index de1b59b48b..e393d4342e 100644 --- a/src/daft-core/src/array/ops/take.rs +++ b/src/daft-core/src/array/ops/take.rs @@ -6,7 +6,7 @@ use crate::{ datatypes::{ logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, TensorArray, TimeArray, TimestampArray, }, BinaryArray, BooleanArray, DaftIntegerType, DaftNumericType, ExtensionArray, NullArray, Utf8Array, @@ -70,6 +70,7 @@ impl_dataarray_take!(NullArray); impl_dataarray_take!(ExtensionArray); impl_logicalarray_take!(Decimal128Array); impl_logicalarray_take!(DateArray); +impl_logicalarray_take!(TimeArray); impl_logicalarray_take!(DurationArray); impl_logicalarray_take!(TimestampArray); impl_logicalarray_take!(EmbeddingArray); diff --git a/src/daft-core/src/datatypes/dtype.rs b/src/daft-core/src/datatypes/dtype.rs index 37cdfd016a..65dbfe2b50 100644 --- a/src/daft-core/src/datatypes/dtype.rs +++ b/src/daft-core/src/datatypes/dtype.rs @@ -340,6 +340,7 @@ impl DataType { self, DataType::Decimal128(..) | DataType::Date + | DataType::Time(..) | DataType::Timestamp(..) | DataType::Duration(..) | DataType::Embedding(..) diff --git a/src/daft-core/src/datatypes/logical.rs b/src/daft-core/src/datatypes/logical.rs index 75c9d80bb2..ec00c609b7 100644 --- a/src/daft-core/src/datatypes/logical.rs +++ b/src/daft-core/src/datatypes/logical.rs @@ -9,7 +9,7 @@ use common_error::DaftResult; use super::{ DaftArrayType, DaftDataType, DataArray, DataType, Decimal128Type, DurationType, EmbeddingType, - FixedShapeImageType, FixedShapeTensorType, FixedSizeListArray, ImageType, TensorType, + FixedShapeImageType, FixedShapeTensorType, FixedSizeListArray, ImageType, TensorType, TimeType, TimestampType, }; @@ -94,7 +94,7 @@ impl LogicalArrayImpl> { use crate::datatypes::DataType::*; match daft_type { // For wrapped primitive types, switch the datatype label on the arrow2 Array. - Decimal128(..) | Date | Timestamp(..) | Duration(..) => { + Decimal128(..) | Date | Timestamp(..) | Duration(..) | Time(..) => { with_match_daft_logical_primitive_types!(daft_type, |$P| { use arrow2::array::Array; physical_arrow_array @@ -148,6 +148,7 @@ pub type LogicalArray = LogicalArrayImpl::PhysicalType as DaftDataType>::ArrayType>; pub type Decimal128Array = LogicalArray; pub type DateArray = LogicalArray; +pub type TimeArray = LogicalArray; pub type DurationArray = LogicalArray; pub type ImageArray = LogicalArray; pub type TimestampArray = LogicalArray; diff --git a/src/daft-core/src/datatypes/matching.rs b/src/daft-core/src/datatypes/matching.rs index 68a7ba0040..42778f8e85 100644 --- a/src/daft-core/src/datatypes/matching.rs +++ b/src/daft-core/src/datatypes/matching.rs @@ -23,6 +23,7 @@ macro_rules! with_match_daft_types {( Float64 => __with_ty__! { Float64Type }, Timestamp(_, _) => __with_ty__! { TimestampType }, Date => __with_ty__! { DateType }, + Time(_) => __with_ty__! { TimeType }, Duration(_) => __with_ty__! { DurationType }, Binary => __with_ty__! { BinaryType }, Utf8 => __with_ty__! { Utf8Type }, @@ -38,7 +39,6 @@ macro_rules! with_match_daft_types {( Tensor(..) => __with_ty__! { TensorType }, FixedShapeTensor(..) => __with_ty__! { FixedShapeTensorType }, Decimal128(..) => __with_ty__! { Decimal128Type }, - Time(_) => unimplemented!("Array for Time DataType not implemented"), // Float16 => unimplemented!("Array for Float16 DataType not implemented"), Unknown => unimplemented!("Array for Unknown DataType not implemented"), @@ -218,6 +218,7 @@ macro_rules! with_match_daft_logical_primitive_types {( Decimal128(..) => __with_ty__! { i128 }, Duration(..) => __with_ty__! { i64 }, Date => __with_ty__! { i32 }, + Time(..) => __with_ty__! { i64 }, Timestamp(..) => __with_ty__! { i64 }, _ => panic!("no logical -> primitive conversion available for {:?}", $key_type) } diff --git a/src/daft-core/src/datatypes/mod.rs b/src/daft-core/src/datatypes/mod.rs index efa9a34f09..1b91b499ca 100644 --- a/src/daft-core/src/datatypes/mod.rs +++ b/src/daft-core/src/datatypes/mod.rs @@ -174,7 +174,7 @@ impl_nested_datatype!(ListType, ListArray); impl_daft_logical_data_array_datatype!(Decimal128Type, Unknown, Int128Type); impl_daft_logical_data_array_datatype!(TimestampType, Unknown, Int64Type); impl_daft_logical_data_array_datatype!(DateType, Date, Int32Type); -// impl_daft_logical_data_array_datatype!(TimeType, Unknown, Int64Type); +impl_daft_logical_data_array_datatype!(TimeType, Unknown, Int64Type); impl_daft_logical_data_array_datatype!(DurationType, Unknown, Int64Type); impl_daft_logical_data_array_datatype!(ImageType, Unknown, StructType); impl_daft_logical_data_array_datatype!(TensorType, Unknown, StructType); diff --git a/src/daft-core/src/python/datatype.rs b/src/daft-core/src/python/datatype.rs index 09973227d9..aacdd647ad 100644 --- a/src/daft-core/src/python/datatype.rs +++ b/src/daft-core/src/python/datatype.rs @@ -160,6 +160,20 @@ impl PyDataType { Ok(DataType::Date.into()) } + #[staticmethod] + pub fn time(timeunit: PyTimeUnit) -> PyResult { + if !matches!( + timeunit.timeunit, + TimeUnit::Microseconds | TimeUnit::Nanoseconds + ) { + return Err(PyValueError::new_err(format!( + "The time unit for time types must be microseconds or nanoseconds, but got: {}", + timeunit.timeunit + ))); + } + Ok(DataType::Time(timeunit.timeunit).into()) + } + #[staticmethod] pub fn timestamp(timeunit: PyTimeUnit, timezone: Option) -> PyResult { Ok(DataType::Timestamp(timeunit.timeunit, timezone).into()) diff --git a/src/daft-core/src/series/array_impl/binary_ops.rs b/src/daft-core/src/series/array_impl/binary_ops.rs index b2a652d529..5fd46e106a 100644 --- a/src/daft-core/src/series/array_impl/binary_ops.rs +++ b/src/daft-core/src/series/array_impl/binary_ops.rs @@ -14,7 +14,7 @@ use crate::{ use crate::datatypes::logical::{ DateArray, DurationArray, EmbeddingArray, FixedShapeImageArray, FixedShapeTensorArray, - ImageArray, TensorArray, TimestampArray, + ImageArray, TensorArray, TimeArray, TimestampArray, }; use crate::datatypes::{ BinaryArray, BooleanArray, ExtensionArray, Float32Array, Float64Array, Int16Array, Int32Array, @@ -229,6 +229,7 @@ impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper {} +impl SeriesBinaryOps for ArrayWrapper {} impl SeriesBinaryOps for ArrayWrapper { fn add(&self, rhs: &Series) -> DaftResult { use DataType::*; diff --git a/src/daft-core/src/series/array_impl/logical_array.rs b/src/daft-core/src/series/array_impl/logical_array.rs index 51a2ecd39d..8f4e82aab9 100644 --- a/src/daft-core/src/series/array_impl/logical_array.rs +++ b/src/daft-core/src/series/array_impl/logical_array.rs @@ -1,6 +1,6 @@ use crate::datatypes::logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, LogicalArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, LogicalArray, TensorArray, TimeArray, TimestampArray, }; use crate::datatypes::{BooleanArray, DaftLogicalType, Field}; @@ -225,6 +225,7 @@ macro_rules! impl_series_like_for_logical_array { impl_series_like_for_logical_array!(Decimal128Array); impl_series_like_for_logical_array!(DateArray); +impl_series_like_for_logical_array!(TimeArray); impl_series_like_for_logical_array!(DurationArray); impl_series_like_for_logical_array!(TimestampArray); impl_series_like_for_logical_array!(ImageArray); diff --git a/src/daft-core/src/series/ops/downcast.rs b/src/daft-core/src/series/ops/downcast.rs index 8d8469fe06..a9c5780fee 100644 --- a/src/daft-core/src/series/ops/downcast.rs +++ b/src/daft-core/src/series/ops/downcast.rs @@ -1,5 +1,7 @@ use crate::array::{FixedSizeListArray, ListArray, StructArray}; -use crate::datatypes::logical::{DateArray, Decimal128Array, FixedShapeImageArray, TimestampArray}; +use crate::datatypes::logical::{ + DateArray, Decimal128Array, FixedShapeImageArray, TimeArray, TimestampArray, +}; use crate::datatypes::*; use crate::series::array_impl::ArrayWrapper; use crate::series::Series; @@ -99,6 +101,10 @@ impl Series { self.downcast() } + pub fn time(&self) -> DaftResult<&TimeArray> { + self.downcast() + } + pub fn timestamp(&self) -> DaftResult<&TimestampArray> { self.downcast() } diff --git a/src/daft-core/src/series/ops/hash.rs b/src/daft-core/src/series/ops/hash.rs index 80359f06e5..f5196eeae4 100644 --- a/src/daft-core/src/series/ops/hash.rs +++ b/src/daft-core/src/series/ops/hash.rs @@ -28,6 +28,7 @@ impl Series { Utf8 => self.utf8()?.murmur3_32(), Binary => self.binary()?.murmur3_32(), Date => self.date()?.murmur3_32(), + Time(..) => self.time()?.murmur3_32(), Timestamp(..) => self.timestamp()?.murmur3_32(), Decimal128(..) => self.decimal128()?.murmur3_32(), v => panic!("murmur3 hash not implemented for datatype: {v}"), diff --git a/src/daft-core/src/series/serdes.rs b/src/daft-core/src/series/serdes.rs index 8d9370a7cf..6503a6ebae 100644 --- a/src/daft-core/src/series/serdes.rs +++ b/src/daft-core/src/series/serdes.rs @@ -10,7 +10,7 @@ use crate::{ }, datatypes::logical::{ DateArray, Decimal128Array, DurationArray, EmbeddingArray, FixedShapeImageArray, - FixedShapeTensorArray, ImageArray, TensorArray, TimestampArray, + FixedShapeTensorArray, ImageArray, TensorArray, TimeArray, TimestampArray, }, with_match_daft_types, DataType, IntoSeries, Series, }; @@ -227,7 +227,14 @@ impl<'d> serde::Deserialize<'d> for Series { .into_series(), ) } - Time(..) => panic!("Time Deserialization not implemented"), + Time(..) => { + type PType = <::PhysicalType as DaftDataType>::ArrayType; + let physical = map.next_value::()?; + Ok( + TimeArray::new(field, physical.downcast::().unwrap().clone()) + .into_series(), + ) + } Duration(..) => { type PType = <::PhysicalType as DaftDataType>::ArrayType; let physical = map.next_value::()?; diff --git a/src/daft-core/src/utils/display_table.rs b/src/daft-core/src/utils/display_table.rs index 5bb2a31482..ec63a0deda 100644 --- a/src/daft-core/src/utils/display_table.rs +++ b/src/daft-core/src/utils/display_table.rs @@ -15,6 +15,23 @@ pub fn display_date32(val: i32) -> String { format!("{date}") } +pub fn display_time64(val: i64, unit: &TimeUnit) -> String { + let time = match unit { + TimeUnit::Nanoseconds => chrono::NaiveTime::from_num_seconds_from_midnight_opt( + (val / 1_000_000_000) as u32, + (val % 1_000_000_000) as u32, + ) + .unwrap(), + TimeUnit::Microseconds => chrono::NaiveTime::from_num_seconds_from_midnight_opt( + (val / 1_000_000) as u32, + ((val % 1_000_000) * 1_000) as u32, + ) + .unwrap(), + _ => panic!("Unsupported time unit for time64: {unit}"), + }; + format!("{time}") +} + pub fn display_timestamp(val: i64, unit: &TimeUnit, timezone: &Option) -> String { use crate::array::ops::cast::{ timestamp_to_str_naive, timestamp_to_str_offset, timestamp_to_str_tz, diff --git a/src/daft-dsl/src/lib.rs b/src/daft-dsl/src/lib.rs index 78696a72e1..ea6cafc875 100644 --- a/src/daft-dsl/src/lib.rs +++ b/src/daft-dsl/src/lib.rs @@ -24,6 +24,7 @@ pub fn register_modules(_py: Python, parent: &PyModule) -> PyResult<()> { parent.add_wrapped(wrap_pyfunction!(python::col))?; parent.add_wrapped(wrap_pyfunction!(python::lit))?; parent.add_wrapped(wrap_pyfunction!(python::date_lit))?; + parent.add_wrapped(wrap_pyfunction!(python::time_lit))?; parent.add_wrapped(wrap_pyfunction!(python::timestamp_lit))?; parent.add_wrapped(wrap_pyfunction!(python::series_lit))?; parent.add_wrapped(wrap_pyfunction!(python::udf))?; diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index ec8923cff9..c0fc08ee21 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -1,5 +1,7 @@ use crate::expr::Expr; +use daft_core::datatypes::logical::TimeArray; +use daft_core::utils::display_table::display_time64; use daft_core::utils::hashable_float_wrapper::FloatWrapper; use daft_core::{array::ops::full::FullNull, datatypes::DataType}; use daft_core::{ @@ -55,6 +57,8 @@ pub enum LiteralValue { /// An [`i32`] representing the elapsed time since UNIX epoch (1970-01-01) /// in days. Date(i32), + /// An [`i64`] representing a time in microseconds or nanoseconds since midnight. + Time(i64, TimeUnit), /// A 64-bit floating point number. Float64(f64), /// A list @@ -81,6 +85,10 @@ impl Hash for LiteralValue { Int64(n) => n.hash(state), UInt64(n) => n.hash(state), Date(n) => n.hash(state), + Time(n, tu) => { + n.hash(state); + tu.hash(state); + } Timestamp(n, tu, tz) => { n.hash(state); tu.hash(state); @@ -115,6 +123,7 @@ impl Display for LiteralValue { Int64(val) => write!(f, "{val}"), UInt64(val) => write!(f, "{val}"), Date(val) => write!(f, "{}", display_date32(*val)), + Time(val, tu) => write!(f, "{}", display_time64(*val, tu)), Timestamp(val, tu, tz) => write!(f, "{}", display_timestamp(*val, tu, tz)), Float64(val) => write!(f, "{val:.1}"), Series(series) => write!(f, "{}", display_series_literal(series)), @@ -145,6 +154,7 @@ impl LiteralValue { Int64(_) => DataType::Int64, UInt64(_) => DataType::UInt64, Date(_) => DataType::Date, + Time(_, tu) => DataType::Time(*tu), Timestamp(_, tu, tz) => DataType::Timestamp(*tu, tz.clone()), Float64(_) => DataType::Float64, Series(series) => series.data_type().clone(), @@ -170,6 +180,10 @@ impl LiteralValue { let physical = Int32Array::from(("literal", [*val].as_slice())); DateArray::new(Field::new("literal", self.get_type()), physical).into_series() } + Time(val, ..) => { + let physical = Int64Array::from(("literal", [*val].as_slice())); + TimeArray::new(Field::new("literal", self.get_type()), physical).into_series() + } Timestamp(val, ..) => { let physical = Int64Array::from(("literal", [*val].as_slice())); TimestampArray::new(Field::new("literal", self.get_type()), physical).into_series() diff --git a/src/daft-dsl/src/python.rs b/src/daft-dsl/src/python.rs index 4dc8eeb74c..e5c26b7bed 100644 --- a/src/daft-dsl/src/python.rs +++ b/src/daft-dsl/src/python.rs @@ -34,6 +34,12 @@ pub fn date_lit(item: i32) -> PyResult { Ok(expr.into()) } +#[pyfunction] +pub fn time_lit(item: i64, tu: PyTimeUnit) -> PyResult { + let expr = Expr::Literal(LiteralValue::Time(item, tu.timeunit)); + Ok(expr.into()) +} + #[pyfunction] pub fn timestamp_lit(val: i64, tu: PyTimeUnit, tz: Option) -> PyResult { let expr = Expr::Literal(LiteralValue::Timestamp(val, tu.timeunit, tz)); diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index 84102ef625..92c80df665 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1,7 +1,7 @@ from __future__ import annotations import copy -from datetime import date, datetime +from datetime import date, datetime, time import pytest import pytz @@ -26,6 +26,7 @@ (None, DataType.null()), (Series.from_pylist([1, 2, 3]), DataType.int64()), (date(2023, 1, 1), DataType.date()), + (time(1, 2, 3, 4), DataType.time(timeunit=TimeUnit.from_str("us"))), (datetime(2023, 1, 1), DataType.timestamp(timeunit=TimeUnit.from_str("us"))), (datetime(2022, 1, 1, tzinfo=pytz.utc), DataType.timestamp(timeunit=TimeUnit.from_str("us"), timezone="UTC")), ], diff --git a/tests/expressions/typing/conftest.py b/tests/expressions/typing/conftest.py index 5c13c7e378..89afb366e6 100644 --- a/tests/expressions/typing/conftest.py +++ b/tests/expressions/typing/conftest.py @@ -226,6 +226,7 @@ def has_supertype(dt1: DataType, dt2: DataType) -> bool: # --- Across type hierarchies --- date_and_numeric = x == DataType.date() and is_numeric(y) + time_and_numeric = x == (DataType.time("us") or DataType.time("ns")) and is_numeric(y) timestamp_and_big_numeric = x._is_temporal_type() and is_numeric_bitwidth_gte_32(y) if ( @@ -234,6 +235,7 @@ def has_supertype(dt1: DataType, dt2: DataType) -> bool: or both_numeric or both_temporal or date_and_numeric + or time_and_numeric or timestamp_and_big_numeric ): return True diff --git a/tests/io/test_csv_roundtrip.py b/tests/io/test_csv_roundtrip.py index 1508376653..2043e30a1b 100644 --- a/tests/io/test_csv_roundtrip.py +++ b/tests/io/test_csv_roundtrip.py @@ -31,6 +31,18 @@ DataType.float64(), ), ([datetime.date(1994, 1, 1), datetime.date(1995, 1, 1), None], pa.date32(), DataType.date(), DataType.date()), + ( + [datetime.time(1, 2, 3, 4), datetime.time(5, 6, 7, 8), None], + pa.time64("us"), + DataType.time(TimeUnit.us()), + DataType.time(TimeUnit.us()), + ), + ( + [datetime.time(1, 2, 3, 4), datetime.time(5, 6, 7, 8), None], + pa.time64("ns"), + DataType.time(TimeUnit.ns()), + DataType.time(TimeUnit.us()), + ), ( [datetime.datetime(1994, 1, 1), datetime.datetime(1995, 1, 1), None], pa.timestamp("ms"), diff --git a/tests/io/test_parquet_roundtrip.py b/tests/io/test_parquet_roundtrip.py index 46fb92a661..79050f3fcc 100644 --- a/tests/io/test_parquet_roundtrip.py +++ b/tests/io/test_parquet_roundtrip.py @@ -27,6 +27,16 @@ ([None, None, None], pa.null(), DataType.null()), ([decimal.Decimal("1.23"), decimal.Decimal("1.24"), None], pa.decimal128(16, 8), DataType.decimal128(16, 8)), ([datetime.date(1994, 1, 1), datetime.date(1995, 1, 1), None], pa.date32(), DataType.date()), + ( + [datetime.time(12, 1, 22, 4), datetime.time(13, 8, 45, 34), None], + pa.time64("us"), + DataType.time(TimeUnit.us()), + ), + ( + [datetime.time(12, 1, 22, 4), datetime.time(13, 8, 45, 34), None], + pa.time64("ns"), + DataType.time(TimeUnit.ns()), + ), ( [datetime.datetime(1994, 1, 1), datetime.datetime(1995, 1, 1), None], pa.timestamp("ms"), diff --git a/tests/series/test_cast.py b/tests/series/test_cast.py index 9367ba131c..eb664e0118 100644 --- a/tests/series/test_cast.py +++ b/tests/series/test_cast.py @@ -773,3 +773,21 @@ def test_cast_date_to_timestamp(): back = casted.dt.date() assert (input == back).to_pylist() == [True] + + +@pytest.mark.parametrize("timeunit", ["us", "ns"]) +def test_cast_timestamp_to_time(timeunit): + from datetime import datetime, time + + input = Series.from_pylist([datetime(2022, 1, 6, 12, 34, 56, 78)]) + casted = input.cast(DataType.time(timeunit)) + assert casted.to_pylist() == [time(12, 34, 56, 78)] + + +@pytest.mark.parametrize("timeunit", ["s", "ms"]) +def test_cast_timestamp_to_time_unsupported_timeunit(timeunit): + from datetime import datetime + + input = Series.from_pylist([datetime(2022, 1, 6, 12, 34, 56, 78)]) + with pytest.raises(ValueError): + input.cast(DataType.time(timeunit)) diff --git a/tests/series/test_hash.py b/tests/series/test_hash.py index f2e3f8951a..444c69fb22 100644 --- a/tests/series/test_hash.py +++ b/tests/series/test_hash.py @@ -1,7 +1,7 @@ from __future__ import annotations import decimal -from datetime import date, datetime +from datetime import date, datetime, time import numpy as np import pytest @@ -176,6 +176,21 @@ def test_murmur3_32_hash_date(): assert hashes.to_pylist() == [-653330422, None] +def test_murmur3_32_hash_time(): + arr = Series.from_pylist([time(22, 31, 8, 0), None]) + assert arr.datatype() == DataType.time("us") + hashes = arr.murmur3_32() + assert hashes.to_pylist() == [-662762989, None] + + +def test_murmur3_32_hash_time_nanoseconds(): + arr = Series.from_pylist([time(22, 31, 8, 0), None]) + arr = arr.cast(DataType.time("ns")) + assert arr.datatype() == DataType.time("ns") + hashes = arr.murmur3_32() + assert hashes.to_pylist() == [-662762989, None] + + def test_murmur3_32_hash_timestamp(): arr = Series.from_pylist([datetime(2017, 11, 16, 22, 31, 8), None]) hashes = arr.murmur3_32() diff --git a/tests/series/test_size_bytes.py b/tests/series/test_size_bytes.py index 8bbd2ff883..3fe42b8662 100644 --- a/tests/series/test_size_bytes.py +++ b/tests/series/test_size_bytes.py @@ -88,6 +88,25 @@ def test_series_date_size_bytes(size, with_nulls) -> None: assert s.size_bytes() == get_total_buffer_size(data) +@pytest.mark.parametrize("size", [0, 1, 2, 8, 9, 16]) +@pytest.mark.parametrize("with_nulls", [True, False]) +@pytest.mark.parametrize("precision", ["us", "ns"]) +def test_series_time_size_bytes(size, with_nulls, precision) -> None: + from datetime import time + + pydata = [time(i, i, i, i) for i in range(size)] + + if with_nulls and size > 0: + data = pa.array(pydata[:-1] + [None], pa.time64(precision)) + else: + data = pa.array(pydata, pa.time64(precision)) + + s = Series.from_arrow(data) + + assert s.datatype() == DataType.time(precision) + assert s.size_bytes() == get_total_buffer_size(data) + + @pytest.mark.parametrize("size", [0, 1, 2, 8, 9, 16]) @pytest.mark.parametrize("with_nulls", [True, False]) def test_series_binary_size_bytes(size, with_nulls) -> None: diff --git a/tests/series/test_sort.py b/tests/series/test_sort.py index d9c67892ea..790aa4a1d1 100644 --- a/tests/series/test_sort.py +++ b/tests/series/test_sort.py @@ -112,6 +112,47 @@ def date_maker(d): assert taken.to_pylist() == sorted_order[::-1] +@pytest.mark.parametrize("timeunit", ["us", "ns"]) +def test_series_time_sorting(timeunit) -> None: + from datetime import time + + def time_maker(h, m, s, us): + if us is None: + return None + return time(h, m, s, us) + + times = list(map(time_maker, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [5, 4, 1, None, 2, None])) + s = Series.from_pylist(times) + sorted_order = list( + map(time_maker, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 2, 4, 5, None, None]) + ) + s = s.cast(DataType.time(timeunit)) + s_sorted = s.sort() + assert len(s_sorted) == len(s) + assert s_sorted.datatype() == s.datatype() + assert s_sorted.to_pylist() == sorted_order + + s_argsorted = s.argsort() + assert len(s_argsorted) == len(s) + + taken = s.take(s_argsorted) + assert len(taken) == len(s) + assert taken.to_pylist() == sorted_order + + ## Descending + s_sorted = s.sort(descending=True) + assert len(s_sorted) == len(s) + assert s_sorted.datatype() == s.datatype() + assert s_sorted.to_pylist() == sorted_order[::-1] + + s_argsorted = s.argsort(descending=True) + assert len(s_argsorted) == len(s) + + taken = s.take(s_argsorted) + assert len(taken) == len(s) + assert taken.to_pylist() == sorted_order[::-1] + + def test_series_string_sorting() -> None: data = pa.array(["hi", "bye", "thai", None, "2", None, "h", "by"]) sorted_order = ["2", "by", "bye", "h", "hi", "thai", None, None] diff --git a/tests/series/test_take.py b/tests/series/test_take.py index b86015847f..ea68ff515e 100644 --- a/tests/series/test_take.py +++ b/tests/series/test_take.py @@ -45,6 +45,23 @@ def date_maker(d): assert taken.to_pylist() == days[::-1] +@pytest.mark.parametrize("time_unit", ["us", "ns"]) +def test_series_time_take(time_unit) -> None: + from datetime import time + + def time_maker(h, m, s, us): + if us is None: + return None + return time(h, m, s, us) + + times = list(map(time_maker, [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [5, 4, 1, None, 2, None])) + s = Series.from_pylist(times) + s = s.cast(DataType.time(time_unit)) + taken = s.take(Series.from_pylist([5, 4, 3, 2, 1, 0])) + assert taken.datatype() == DataType.time(time_unit) + assert taken.to_pylist() == times[::-1] + + def test_series_binary_take() -> None: data = pa.array([b"1", b"2", b"3", None, b"5", None]) diff --git a/tests/table/test_from_py.py b/tests/table/test_from_py.py index 8f7cf68c99..ec89cd87be 100644 --- a/tests/table/test_from_py.py +++ b/tests/table/test_from_py.py @@ -24,6 +24,7 @@ "str": ["foo", "bar"], "binary": [b"foo", b"bar"], "date": [datetime.date.today(), datetime.date.today()], + "time": [datetime.time(1, 2, 3, 4), datetime.time(5, 6, 7, 8)], "list": [[1, 2], [3]], "struct": [{"a": 1, "b": 2.0}, {"b": 3.0}], "empty_struct": [{}, {}], @@ -41,6 +42,7 @@ "str": DataType.string(), "binary": DataType.binary(), "date": DataType.date(), + "time": DataType.time(TimeUnit.us()), "list": DataType.list(DataType.int64()), "struct": DataType.struct({"a": DataType.int64(), "b": DataType.float64()}), "empty_struct": DataType.struct({"": DataType.null()}), @@ -65,6 +67,7 @@ "str": pa.large_string(), "binary": pa.large_binary(), "date": pa.date32(), + "time": pa.time64("us"), "list": pa.large_list(pa.int64()), "struct": pa.struct({"a": pa.int64(), "b": pa.float64()}), "empty_struct": pa.struct({}), @@ -91,6 +94,8 @@ "binary": pa.array(PYTHON_TYPE_ARRAYS["binary"], pa.binary()), "boolean": pa.array(PYTHON_TYPE_ARRAYS["bool"], pa.bool_()), "date32": pa.array(PYTHON_TYPE_ARRAYS["date"], pa.date32()), + "time64_microseconds": pa.array(PYTHON_TYPE_ARRAYS["time"], pa.time64("us")), + "time64_nanoseconds": pa.array(PYTHON_TYPE_ARRAYS["time"], pa.time64("ns")), "list": pa.array(PYTHON_TYPE_ARRAYS["list"], pa.list_(pa.int64())), "fixed_size_list": pa.array([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)), "struct": pa.array(PYTHON_TYPE_ARRAYS["struct"], pa.struct([("a", pa.int64()), ("b", pa.float64())])), @@ -140,6 +145,8 @@ "binary": pa.large_binary(), "boolean": pa.bool_(), "date32": pa.date32(), + "time64_microseconds": pa.time64("us"), + "time64_nanoseconds": pa.time64("ns"), "list": pa.large_list(pa.int64()), "fixed_size_list": pa.list_(pa.int64(), 2), "struct": pa.struct([("a", pa.int64()), ("b", pa.float64())]),