Skip to content

Commit

Permalink
Add rstar tree index
Browse files Browse the repository at this point in the history
  • Loading branch information
lewiszlw committed Mar 13, 2024
1 parent dd5e81e commit 9521855
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 15 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ datafusion-expr = "36"
geo = "0.28"
geos = { version = "8.3", features = ["v3_10_0", "geo"], optional = true }
#geozero = { version = "0.12", features = ["with-wkb"] }
geozero = { git = "https://github.com/lewiszlw/geozero.git", rev = "40e33a714b624b90497ec7032ac5be1d9f02aa13", features = ["with-wkb"] }
geozero = { git = "https://github.com/georust/geozero.git", rev = "3378dda305ec88cabb092d458f8a61a140f60827", features = ["with-wkb"] }
rayon = "1.9"
rstar = "0.12.0"

[dev-dependencies]
arrow = "50"
Expand Down
15 changes: 3 additions & 12 deletions src/function/extent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use datafusion_expr::{Accumulator, AggregateUDFImpl, Signature, Volatility};
use geo::BoundingRect;
use std::any::Any;

// TODO add aliases after datafusion 37.0 released
#[derive(Debug)]
pub struct ExtentUdaf {
signature: Signature,
Expand Down Expand Up @@ -66,12 +67,7 @@ pub struct ExtentAccumulator {
impl ExtentAccumulator {
pub fn new() -> Self {
Self {
box2d: Box2d {
xmin: f64::MAX,
ymin: f64::MAX,
xmax: f64::MIN,
ymax: f64::MIN,
},
box2d: Box2d::new(),
}
}
}
Expand Down Expand Up @@ -133,12 +129,7 @@ impl Accumulator for ExtentAccumulator {
}

fn compute_extent<O: OffsetSizeTrait>(arr: &GenericBinaryArray<O>) -> DFResult<Box2d> {
let mut box2d = Box2d {
xmin: f64::MAX,
ymin: f64::MAX,
xmax: f64::MIN,
ymax: f64::MIN,
};
let mut box2d = Box2d::new();
for i in 0..arr.geom_len() {
if let Some(value) = arr
.geo_value(i)?
Expand Down
3 changes: 2 additions & 1 deletion src/function/intersects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::DFResult;
use arrow_array::cast::AsArray;
use arrow_array::{Array, BooleanArray, GenericBinaryArray, OffsetSizeTrait};
use arrow_schema::DataType;
use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError};
use datafusion_common::{internal_err, DataFusionError};
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use rayon::prelude::*;
use std::any::Any;
Expand Down Expand Up @@ -106,6 +106,7 @@ fn intersects<O: OffsetSizeTrait, F: OffsetSizeTrait>(
.map(|geom_index| {
#[cfg(feature = "geos")]
{
use datafusion_common::internal_datafusion_err;
use geos::Geom;
match (arr0.geos_value(geom_index)?, arr1.geos_value(geom_index)?) {
(Some(geom0), Some(geom1)) => {
Expand Down
42 changes: 42 additions & 0 deletions src/geo/box.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ pub struct Box2d {
}

impl Box2d {
pub fn new() -> Self {
Self {
xmin: f64::MAX,
ymin: f64::MAX,
xmax: f64::MIN,
ymax: f64::MIN,
}
}
pub fn fields() -> Vec<Field> {
vec![
Field::new("xmin", DataType::Float64, false),
Expand All @@ -41,6 +49,12 @@ impl Box2d {
}
}

impl Default for Box2d {
fn default() -> Self {
Self::new()
}
}

impl TryFrom<&ScalarValue> for Box2d {
type Error = DataFusionError;

Expand Down Expand Up @@ -83,6 +97,34 @@ impl From<geo::Rect> for Box2d {
}
}

#[cfg(feature = "geos")]
impl TryFrom<geos::Geometry> for Box2d {
type Error = DataFusionError;

fn try_from(value: geos::Geometry) -> Result<Self, Self::Error> {
use datafusion_common::internal_datafusion_err;
use geos::Geom;
let xmin = value
.get_x_min()
.map_err(internal_datafusion_err!("geom get_x_min failed"))?;
let ymin = value
.get_y_min()
.map_err(internal_datafusion_err!("geom get_y_min failed"))?;
let xmax = value
.get_x_max()
.map_err(internal_datafusion_err!("geom get_x_max failed"))?;
let ymax = value
.get_y_max()
.map_err(internal_datafusion_err!("geom get_y_max failed"))?;
Ok(Box2d {
xmin,
ymin,
xmax,
ymax,
})
}
}

pub fn build_box2d_array(data: Vec<Option<Box2d>>) -> StructArray {
let xmin_arr = Arc::new(Float64Array::from(
data.iter()
Expand Down
66 changes: 66 additions & 0 deletions src/geo/index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use crate::geo::{Box2d, GeometryArray};
use crate::DFResult;
use arrow_array::{GenericBinaryArray, OffsetSizeTrait};
use geo::BoundingRect;
use rstar::{RTree, RTreeObject, AABB};

#[derive(Clone, Debug)]
pub struct GeoGeometry(geo::Geometry);

impl RTreeObject for GeoGeometry {
type Envelope = AABB<[f64; 2]>;

fn envelope(&self) -> Self::Envelope {
let box2d: Box2d = if let Some(rect) = self.0.bounding_rect() {
rect.into()
} else {
Box2d::new()
};
AABB::from_corners([box2d.xmin, box2d.ymin], [box2d.xmax, box2d.ymax])
}
}

pub fn build_rtree_index<O: OffsetSizeTrait>(
wkb_arr: GenericBinaryArray<O>,
) -> DFResult<RTree<GeoGeometry>> {
let mut geom_vec = vec![];
for i in 0..wkb_arr.geom_len() {
if let Some(geom) = wkb_arr.geo_value(i)? {
geom_vec.push(GeoGeometry(geom));
}
}
Ok(RTree::bulk_load(geom_vec))
}

#[cfg(test)]
mod tests {
use crate::geo::index::build_rtree_index;
use crate::geo::GeometryArrayBuilder;
use geo::line_string;
use rstar::AABB;

#[test]
fn rtree_index() {
let ls0 = line_string![
(x: 0., y: 0.),
(x: 1., y: 1.)
];
let ls2 = line_string![
(x: 0., y: 0.),
(x: -1., y: -1.)
];
let builder: GeometryArrayBuilder<i32> = vec![Some(ls0), None, Some(ls2)].as_slice().into();
let wkb_arr = builder.build();

let index = build_rtree_index(wkb_arr).unwrap();

let elements = index.locate_in_envelope(&AABB::from_corners([0., 0.], [0.5, 0.5]));
assert_eq!(elements.count(), 0);
let elements = index.locate_in_envelope(&AABB::from_corners([0., 0.], [1., 1.]));
assert_eq!(elements.count(), 1);
let elements = index.locate_in_envelope(&AABB::from_corners([-1., -1.], [1., 1.]));
assert_eq!(elements.count(), 2);
let elements = index.locate_in_envelope(&AABB::from_corners([-2., -2.], [2., 2.]));
assert_eq!(elements.count(), 2);
}
}
2 changes: 2 additions & 0 deletions src/geo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ mod array;
mod r#box;
mod builder;
pub(crate) mod dialect;
mod index;

pub use array::*;
pub use builder::*;
pub use index::*;
pub use r#box::*;

0 comments on commit 9521855

Please sign in to comment.