Skip to content

Commit

Permalink
Add Box2D
Browse files Browse the repository at this point in the history
  • Loading branch information
lewiszlw committed Mar 6, 2024
1 parent b660dfc commit 4ce618b
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 2 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ Add geo functionality extension to datafusion query engine.

**Goals**
1. Support multiple wkb dialects
2. Provide DataFusion user defined functions
3. Prefer using geos library if feature flag enabled
2. Provide DataFusion user defined functions similar with PostGIS
3. Prefer using geos library if possible

P.S. Please see each function unit test to know how to use them.

## Useful Links
1. Ewkb format: https://github.com/postgis/postgis/blob/master/doc/ZMSgeoms.txt
Expand Down
3 changes: 3 additions & 0 deletions src/function/make_envelope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ impl ScalarUDFImpl for MakeEnvelopeUdf {
} else {
None
};

let coords = CoordSeq::new_from_vec(&[
&[xmin, ymin],
&[xmin, ymax],
Expand All @@ -94,13 +95,15 @@ impl ScalarUDFImpl for MakeEnvelopeUdf {
.map_err(|_| DataFusionError::Internal("Failed to create exterior".to_string()))?;
let mut polygon = geos::Geometry::create_polygon(exterior, vec![])
.map_err(|_| DataFusionError::Internal("Failed to create polygon".to_string()))?;

let mut builder = if let Some(srid) = srid {
polygon.set_srid(srid as usize);
GeometryArrayBuilder::<i32>::new(WkbDialect::Ewkb, 1)
} else {
GeometryArrayBuilder::<i32>::new(WkbDialect::Wkb, 1)
};
builder.append_geos_geometry(&Some(polygon))?;

let wkb_arr = builder.build();
Ok(ColumnarValue::Array(Arc::new(wkb_arr)))
}
Expand Down
138 changes: 138 additions & 0 deletions src/geo/box.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
use crate::DFResult;
use arrow_array::cast::AsArray;
use arrow_array::types::Float64Type;
use arrow_array::{Array, Float64Array, StructArray};
use arrow_buffer::NullBuffer;
use arrow_schema::{DataType, Field};
use datafusion_common::{DataFusionError, ScalarValue};
use std::sync::Arc;

#[derive(Debug, Clone)]
pub struct Box2D {
pub(crate) xmin: f64,
pub(crate) ymin: f64,
pub(crate) xmax: f64,
pub(crate) ymax: f64,
}

impl Box2D {
pub fn fields() -> Vec<Field> {
vec![
Field::new("xmin", DataType::Float64, false),
Field::new("ymin", DataType::Float64, false),
Field::new("xmax", DataType::Float64, false),
Field::new("ymax", DataType::Float64, false),
]
}
pub fn data_type() -> DataType {
DataType::Struct(Self::fields().into())
}

pub fn value(arr: &StructArray, index: usize) -> DFResult<Option<Box2D>> {
if arr.data_type() != &Box2D::data_type() {
return Err(DataFusionError::Internal(
"StructArray data type is not matched".to_string(),
));
}
if index >= arr.len() || arr.is_null(index) {
return Ok(None);
}
let scalar = ScalarValue::Struct(Arc::new(arr.slice(index, 1)));
let box2d: Box2D = (&scalar).try_into()?;
Ok(Some(box2d))
}
}

impl TryFrom<&ScalarValue> for Box2D {
type Error = DataFusionError;

fn try_from(value: &ScalarValue) -> Result<Self, Self::Error> {
if let ScalarValue::Struct(arr) = value {
if arr.data_type() != &Box2D::data_type() {
return Err(DataFusionError::Internal(
"ScalarValue data type is not matched".to_string(),
));
}
let xmin = arr.column(0).as_primitive::<Float64Type>().value(0);
let ymin = arr.column(1).as_primitive::<Float64Type>().value(0);
let xmax = arr.column(2).as_primitive::<Float64Type>().value(0);
let ymax = arr.column(3).as_primitive::<Float64Type>().value(0);
Ok(Box2D {
xmin,
ymin,
xmax,
ymax,
})
} else {
Err(DataFusionError::Internal(
"ScalarValue is not struct".to_string(),
))
}
}
}

pub fn build_box2d_array(data: Vec<Option<Box2D>>) -> StructArray {
let xmin_arr = Arc::new(Float64Array::from(
data.iter()
.map(|b| b.clone().map(|b| b.xmin))
.collect::<Vec<_>>(),
));
let ymin_arr = Arc::new(Float64Array::from(
data.iter()
.map(|b| b.clone().map(|b| b.ymin))
.collect::<Vec<_>>(),
));
let xmax_arr = Arc::new(Float64Array::from(
data.iter()
.map(|b| b.clone().map(|b| b.xmax))
.collect::<Vec<_>>(),
));
let ymax_arr = Arc::new(Float64Array::from(
data.iter()
.map(|b| b.clone().map(|b| b.ymax))
.collect::<Vec<_>>(),
));
let nulls: NullBuffer = data.iter().map(|b| b.is_some()).collect::<Vec<_>>().into();
StructArray::try_new(
Box2D::fields().into(),
vec![xmin_arr, ymin_arr, xmax_arr, ymax_arr],
Some(nulls),
)
.expect("data is valid")
}

#[cfg(test)]
mod tests {
use crate::geo::r#box::{build_box2d_array, Box2D};
use arrow_array::{Array, StructArray};

#[test]
fn box2d() {
let box2d0 = Box2D {
xmin: 1.0,
ymin: 2.0,
xmax: 3.0,
ymax: 4.0,
};
let box2d2 = Box2D {
xmin: 5.0,
ymin: 6.0,
xmax: 7.0,
ymax: 8.0,
};
let arr: StructArray =
build_box2d_array(vec![Some(box2d0.clone()), None, Some(box2d2.clone())]);
assert_eq!(arr.len(), 3);

assert_eq!(
format!("{:?}", Box2D::value(&arr, 0).unwrap()),
"Some(Box2D { xmin: 1.0, ymin: 2.0, xmax: 3.0, ymax: 4.0 })"
);
assert_eq!(format!("{:?}", Box2D::value(&arr, 1).unwrap()), "None");
assert_eq!(
format!("{:?}", Box2D::value(&arr, 2).unwrap()),
"Some(Box2D { xmin: 5.0, ymin: 6.0, xmax: 7.0, ymax: 8.0 })"
);
assert_eq!(format!("{:?}", Box2D::value(&arr, 3).unwrap()), "None");
}
}
2 changes: 2 additions & 0 deletions src/geo/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mod array;
mod r#box;
mod builder;
pub(crate) mod dialect;

pub use array::*;
pub use builder::*;
pub use r#box::*;

0 comments on commit 4ce618b

Please sign in to comment.