Skip to content

Commit

Permalink
[FEAT] 1606 - Adding hour expression in date util (#1637)
Browse files Browse the repository at this point in the history
Adds `dt_hour()` Expression to fetch hour of the current day in a
column. Based on, #1606
  • Loading branch information
subygan authored Nov 21, 2023
1 parent f289da1 commit 55a95ab
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 3 deletions.
2 changes: 2 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ class PyExpr:
def is_nan(self) -> PyExpr: ...
def dt_date(self) -> PyExpr: ...
def dt_day(self) -> PyExpr: ...
def dt_hour(self) -> PyExpr: ...
def dt_month(self) -> PyExpr: ...
def dt_year(self) -> PyExpr: ...
def dt_day_of_week(self) -> PyExpr: ...
Expand Down Expand Up @@ -801,6 +802,7 @@ class PySeries:
def is_nan(self) -> PySeries: ...
def dt_date(self) -> PySeries: ...
def dt_day(self) -> PySeries: ...
def dt_hour(self) -> PySeries: ...
def dt_month(self) -> PySeries: ...
def dt_year(self) -> PySeries: ...
def dt_day_of_week(self) -> PySeries: ...
Expand Down
11 changes: 11 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,17 @@ def day(self) -> Expression:
"""
return Expression._from_pyexpr(self._expr.dt_day())

def hour(self) -> Expression:
"""Retrieves the day for a datetime column
Example:
>>> col("x").dt.day()
Returns:
Expression: a UInt32 expression with just the day extracted from a datetime column
"""
return Expression._from_pyexpr(self._expr.dt_hour())

def month(self) -> Expression:
"""Retrieves the month for a datetime column
Expand Down
3 changes: 3 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,9 @@ def date(self) -> Series:
def day(self) -> Series:
return Series._from_pyseries(self._series.dt_day())

def hour(self) -> Series:
return Series._from_pyseries(self._series.dt_hour())

def month(self) -> Series:
return Series._from_pyseries(self._series.dt_month())

Expand Down
37 changes: 36 additions & 1 deletion src/daft-core/src/array/ops/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
DataType,
};
use arrow2::compute::arithmetics::ArraySub;
use chrono::NaiveDate;
use chrono::{NaiveDate, Timelike};
use common_error::{DaftError, DaftResult};

use super::as_arrow::AsArrow;
Expand Down Expand Up @@ -107,4 +107,39 @@ impl TimestampArray {
Int32Array::from((self.name(), Box::new(date_arrow))),
))
}

pub fn hour(&self) -> DaftResult<UInt32Array> {
let physical = self.physical.as_arrow();
let DataType::Timestamp(timeunit, tz) = self.data_type() else {
unreachable!("Timestamp array must have Timestamp datatype")
};
let tu = timeunit.to_arrow();
let date_arrow = match tz {
Some(tz) => match arrow2::temporal_conversions::parse_offset(tz) {
Ok(tz) => Ok(arrow2::array::UInt32Array::from_iter(physical.iter().map(
|ts| {
ts.map(|ts| {
arrow2::temporal_conversions::timestamp_to_datetime(*ts, tu, &tz).hour()
})
},
))),
Err(e) => Err(DaftError::TypeError(format!(
"Cannot parse timezone in Timestamp datatype: {}, error: {}",
tz, e
))),
},
None => Ok(arrow2::array::UInt32Array::from_iter(physical.iter().map(
|ts| {
ts.map(|ts| {
arrow2::temporal_conversions::timestamp_to_naive_datetime(*ts, tu).hour()
})
},
))),
}?;

UInt32Array::new(
std::sync::Arc::new(Field::new(self.name(), DataType::UInt32)),
Box::new(date_arrow),
)
}
}
4 changes: 4 additions & 0 deletions src/daft-core/src/python/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ impl PySeries {
Ok(self.series.dt_day()?.into())
}

pub fn dt_hour(&self) -> PyResult<Self> {
Ok(self.series.dt_hour()?.into())
}

pub fn dt_month(&self) -> PyResult<Self> {
Ok(self.series.dt_month()?.into())
}
Expand Down
13 changes: 13 additions & 0 deletions src/daft-core/src/series/ops/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,19 @@ impl Series {
}
}

pub fn dt_hour(&self) -> DaftResult<Self> {
match self.data_type() {
DataType::Timestamp(..) => {
let ts_array = self.downcast::<TimestampArray>()?;
Ok(ts_array.hour()?.into_series())
}
_ => Err(DaftError::ComputeError(format!(
"Can only run day() operation on temporal types, got {}",
self.data_type()
))),
}
}

pub fn dt_month(&self) -> DaftResult<Self> {
match self.data_type() {
DataType::Date => {
Expand Down
47 changes: 47 additions & 0 deletions src/daft-dsl/src/functions/temporal/hour.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use common_error::{DaftError, DaftResult};
use daft_core::{
datatypes::{DataType, Field},
schema::Schema,
series::Series,
};

use crate::Expr;

use super::super::FunctionEvaluator;

pub(super) struct HourEvaluator {}

impl FunctionEvaluator for HourEvaluator {
fn fn_name(&self) -> &'static str {
"hour"
}

fn to_field(&self, inputs: &[Expr], schema: &Schema, _: &Expr) -> DaftResult<Field> {
match inputs {
[input] => match input.to_field(schema) {
Ok(field) if field.dtype.is_temporal() => {
Ok(Field::new(field.name, DataType::UInt32))
}
Ok(field) => Err(DaftError::TypeError(format!(
"Expected input to hour to be temporal, got {}",
field.dtype
))),
Err(e) => Err(e),
},
_ => Err(DaftError::SchemaMismatch(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}

fn evaluate(&self, inputs: &[Series], _: &Expr) -> DaftResult<Series> {
match inputs {
[input] => input.dt_hour(),
_ => Err(DaftError::ValueError(format!(
"Expected 1 input arg, got {}",
inputs.len()
))),
}
}
}
14 changes: 12 additions & 2 deletions src/daft-dsl/src/functions/temporal/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
mod date;
mod day;
mod day_of_week;
mod hour;
mod month;
mod year;

use serde::{Deserialize, Serialize};

use crate::functions::temporal::{
date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, month::MonthEvaluator,
year::YearEvaluator,
date::DateEvaluator, day::DayEvaluator, day_of_week::DayOfWeekEvaluator, hour::HourEvaluator,
month::MonthEvaluator, year::YearEvaluator,
};
use crate::Expr;

Expand All @@ -17,6 +18,7 @@ use super::FunctionEvaluator;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum TemporalExpr {
Day,
Hour,
Month,
Year,
DayOfWeek,
Expand All @@ -29,6 +31,7 @@ impl TemporalExpr {
use TemporalExpr::*;
match self {
Day => &DayEvaluator {},
Hour => &HourEvaluator {},
Month => &MonthEvaluator {},
Year => &YearEvaluator {},
DayOfWeek => &DayOfWeekEvaluator {},
Expand All @@ -51,6 +54,13 @@ pub fn day(input: &Expr) -> Expr {
}
}

pub fn hour(input: &Expr) -> Expr {
Expr::Function {
func: super::FunctionExpr::Temporal(TemporalExpr::Hour),
inputs: vec![input.clone()],
}
}

pub fn month(input: &Expr) -> Expr {
Expr::Function {
func: super::FunctionExpr::Temporal(TemporalExpr::Month),
Expand Down
5 changes: 5 additions & 0 deletions src/daft-dsl/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ impl PyExpr {
Ok(day(&self.expr).into())
}

pub fn dt_hour(&self) -> PyResult<Self> {
use functions::temporal::hour;
Ok(hour(&self.expr).into())
}

pub fn dt_month(&self) -> PyResult<Self> {
use functions::temporal::month;
Ok(month(&self.expr).into())
Expand Down
19 changes: 19 additions & 0 deletions tests/series/test_temporal_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,25 @@ def ts_maker(d):
assert expected == days.to_pylist()


def test_series_timestamp_hour() -> None:
from datetime import datetime

def ts_maker(h):
if h is None:
return None
return datetime(2023, 1, 26, h, 1, 1)

input = [1, 5, 14, None, 23, None, 21]

input_ts = list(map(ts_maker, input))
s = Series.from_pylist(input_ts).cast(DataType.timestamp(TimeUnit.ms()))
days = s.dt.hour()

assert days.datatype() == DataType.uint32()

assert input == days.to_pylist()


@pytest.mark.parametrize("tz", [None, "UTC", "+08:00", "Asia/Singapore"])
def test_series_timestamp_month_operation(tz) -> None:
from datetime import datetime
Expand Down

0 comments on commit 55a95ab

Please sign in to comment.