Skip to content

Commit

Permalink
Add generic GeoArrowArrayReader to use any array with the visitor p…
Browse files Browse the repository at this point in the history
…attern (#56)

* start on generic handler

* theoretical support for WKT/WKB visiting

* add array reader

* test some of the visiting

* with wkb test

* use array reader to simplify kernel

* run apt-get update in ubuntu job
  • Loading branch information
paleolimbot authored Sep 17, 2023
1 parent 0ba1870 commit 0b31fea
Show file tree
Hide file tree
Showing 9 changed files with 321 additions and 121 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:

- name: Install dependencies
run: |
sudo apt-get update
sudo apt install -y -V ca-certificates lsb-release wget cmake valgrind
wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ add_library(
src/geoarrow/wkb_writer.c
src/geoarrow/wkt_reader.c
src/geoarrow/wkt_writer.c
src/geoarrow/array_reader.c
${GEOARROW_DOUBLE_PARSE_SOURCE}
${GEOARROW_DOUBLE_PRINT_SOURCE}
src/geoarrow/nanoarrow.c)
Expand Down Expand Up @@ -166,6 +167,7 @@ if(GEOARROW_BUILD_TESTS)
add_executable(wkb_writer_test src/geoarrow/wkb_writer_test.cc)
add_executable(wkt_reader_test src/geoarrow/wkt_reader_test.cc)
add_executable(wkt_writer_test src/geoarrow/wkt_writer_test.cc)
add_executable(array_reader_test src/geoarrow/array_reader_test.cc)
add_executable(wkx_files_test src/geoarrow/wkx_files_test.cc)
add_executable(geoarrow_arrow_test src/geoarrow/geoarrow_arrow_test.cc)

Expand All @@ -189,6 +191,7 @@ if(GEOARROW_BUILD_TESTS)
target_link_libraries(wkb_writer_test geoarrow gtest_main)
target_link_libraries(wkt_reader_test geoarrow gtest_main)
target_link_libraries(wkt_writer_test geoarrow gtest_main)
target_link_libraries(array_reader_test geoarrow gtest_main)
target_link_libraries(wkx_files_test geoarrow gtest_main)
target_link_libraries(geoarrow_arrow_test geoarrow arrow_shared gtest_main)

Expand All @@ -208,5 +211,6 @@ if(GEOARROW_BUILD_TESTS)
gtest_discover_tests(wkt_reader_test)
gtest_discover_tests(wkt_writer_test)
gtest_discover_tests(wkx_files_test)
gtest_discover_tests(array_reader_test)
gtest_discover_tests(geoarrow_arrow_test)
endif()
108 changes: 108 additions & 0 deletions src/geoarrow/array_reader.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@

#include "geoarrow.h"

#include "nanoarrow.h"

struct GeoArrowArrayReaderPrivate {
struct GeoArrowWKTReader wkt_reader;
struct GeoArrowWKBReader wkb_reader;
};

static GeoArrowErrorCode GeoArrowArrayViewVisitWKT(struct GeoArrowArrayView* array_view,
int64_t offset, int64_t length,
struct GeoArrowWKTReader* reader,
struct GeoArrowVisitor* v) {
struct GeoArrowStringView item;
const int32_t* offset_begin = array_view->offsets[0] + array_view->offset[0] + offset;

for (int64_t i = 0; i < length; i++) {
if (!array_view->validity_bitmap ||
ArrowBitGet(array_view->validity_bitmap, array_view->offset[0] + offset + i)) {
item.data = (const char*)(array_view->data + offset_begin[i]);
item.size_bytes = offset_begin[i + 1] - offset_begin[i];
NANOARROW_RETURN_NOT_OK(GeoArrowWKTReaderVisit(reader, item, v));
} else {
NANOARROW_RETURN_NOT_OK(v->feat_start(v));
NANOARROW_RETURN_NOT_OK(v->null_feat(v));
NANOARROW_RETURN_NOT_OK(v->feat_end(v));
}
}

return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowArrayViewVisitWKB(struct GeoArrowArrayView* array_view,
int64_t offset, int64_t length,
struct GeoArrowWKBReader* reader,
struct GeoArrowVisitor* v) {
struct GeoArrowBufferView item;
const int32_t* offset_begin = array_view->offsets[0] + array_view->offset[0] + offset;

for (int64_t i = 0; i < length; i++) {
if (!array_view->validity_bitmap ||
ArrowBitGet(array_view->validity_bitmap, array_view->offset[0] + offset + i)) {
item.data = array_view->data + offset_begin[i];
item.size_bytes = offset_begin[i + 1] - offset_begin[i];
NANOARROW_RETURN_NOT_OK(GeoArrowWKBReaderVisit(reader, item, v));
} else {
NANOARROW_RETURN_NOT_OK(v->feat_start(v));
NANOARROW_RETURN_NOT_OK(v->null_feat(v));
NANOARROW_RETURN_NOT_OK(v->feat_end(v));
}
}

return GEOARROW_OK;
}

GeoArrowErrorCode GeoArrowArrayReaderInit(struct GeoArrowArrayReader* reader) {
struct GeoArrowArrayReaderPrivate* private_data =
(struct GeoArrowArrayReaderPrivate*)ArrowMalloc(
sizeof(struct GeoArrowArrayReaderPrivate));

if (private_data == NULL) {
return ENOMEM;
}

int result = GeoArrowWKTReaderInit(&private_data->wkt_reader);
if (result != GEOARROW_OK) {
ArrowFree(private_data);
return result;
}

result = GeoArrowWKBReaderInit(&private_data->wkb_reader);
if (result != GEOARROW_OK) {
GeoArrowWKTReaderReset(&private_data->wkt_reader);
ArrowFree(private_data);
return result;
}

reader->private_data = private_data;
return GEOARROW_OK;
}

void GeoArrowArrayReaderReset(struct GeoArrowArrayReader* reader) {
struct GeoArrowArrayReaderPrivate* private_data =
(struct GeoArrowArrayReaderPrivate*)reader->private_data;
GeoArrowWKBReaderReset(&private_data->wkb_reader);
GeoArrowWKTReaderReset(&private_data->wkt_reader);
ArrowFree(reader->private_data);
}

GeoArrowErrorCode GeoArrowArrayReaderVisit(struct GeoArrowArrayReader* reader,
struct GeoArrowArrayView* array_view,
int64_t offset, int64_t length,
struct GeoArrowVisitor* v) {
struct GeoArrowArrayReaderPrivate* private_data =
(struct GeoArrowArrayReaderPrivate*)reader->private_data;

switch (array_view->schema_view.type) {
case GEOARROW_TYPE_WKT:
return GeoArrowArrayViewVisitWKT(array_view, offset, length,
&private_data->wkt_reader, v);
case GEOARROW_TYPE_WKB:
return GeoArrowArrayViewVisitWKB(array_view, offset, length,
&private_data->wkb_reader, v);
default:
return GeoArrowArrayViewVisit(array_view, offset, length, v);
}
}
132 changes: 132 additions & 0 deletions src/geoarrow/array_reader_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@

#include <gtest/gtest.h>

#include "geoarrow.h"

#include "wkx_testing.hpp"

TEST(ArrayReaderTest, ArrayReaderTestBasic) {
struct GeoArrowArrayReader reader;
ASSERT_EQ(GeoArrowArrayReaderInit(&reader), GEOARROW_OK);
GeoArrowArrayReaderReset(&reader);
}

TEST(ArrayReaderTest, ArrayReaderTestVisitWKT) {
struct ArrowSchema schema;
struct ArrowArray array;
enum GeoArrowType type = GEOARROW_TYPE_WKT;

// Build the array for [POINT (30 10), null]
ASSERT_EQ(GeoArrowSchemaInit(&schema, type), GEOARROW_OK);
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), GEOARROW_OK);
ASSERT_EQ(ArrowArrayStartAppending(&array), GEOARROW_OK);

ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("POINT (30 10)")), GEOARROW_OK);
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), GEOARROW_OK);

ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), GEOARROW_OK);

// Set the array view
struct GeoArrowArrayView array_view;
EXPECT_EQ(GeoArrowArrayViewInitFromType(&array_view, type), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, nullptr), GEOARROW_OK);

// Check its contents
WKXTester tester;
struct GeoArrowArrayReader reader;
ASSERT_EQ(GeoArrowArrayReaderInit(&reader), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayReaderVisit(&reader, &array_view, 0, array.length,
tester.WKTVisitor()),
GEOARROW_OK);
auto values = tester.WKTValues("<null value>");
ASSERT_EQ(values.size(), 2);
EXPECT_EQ(values[0], "POINT (30 10)");
EXPECT_EQ(values[1], "<null value>");

schema.release(&schema);
array.release(&array);
GeoArrowArrayReaderReset(&reader);
}

TEST(ArrayReaderTest, ArrayReaderTestVisitWKB) {
struct ArrowSchema schema;
struct ArrowArray array;
enum GeoArrowType type = GEOARROW_TYPE_WKB;

std::basic_string<uint8_t> point({0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40});
struct ArrowBufferView point_view;
point_view.data.as_uint8 = point.data();
point_view.size_bytes = point.size();

// Build the array for [POINT (30 10), null]
ASSERT_EQ(GeoArrowSchemaInit(&schema, type), GEOARROW_OK);
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), GEOARROW_OK);
ASSERT_EQ(ArrowArrayStartAppending(&array), GEOARROW_OK);

ASSERT_EQ(ArrowArrayAppendBytes(&array, point_view), GEOARROW_OK);
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), GEOARROW_OK);

ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), GEOARROW_OK);

// Set the array view
struct GeoArrowArrayView array_view;
EXPECT_EQ(GeoArrowArrayViewInitFromType(&array_view, type), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, nullptr), GEOARROW_OK);

// Check its contents
WKXTester tester;
struct GeoArrowArrayReader reader;
ASSERT_EQ(GeoArrowArrayReaderInit(&reader), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayReaderVisit(&reader, &array_view, 0, array.length,
tester.WKTVisitor()),
GEOARROW_OK);
auto values = tester.WKTValues("<null value>");
ASSERT_EQ(values.size(), 2);
EXPECT_EQ(values[0], "POINT (30 10)");
EXPECT_EQ(values[1], "<null value>");

schema.release(&schema);
array.release(&array);
GeoArrowArrayReaderReset(&reader);
}

TEST(ArrayReaderTest, ArrayReaderTestVisitGeoArrow) {
struct ArrowSchema schema;
struct ArrowArray array;
enum GeoArrowType type = GEOARROW_TYPE_POINT;

// Build the array for [POINT (30 10), null]
ASSERT_EQ(GeoArrowSchemaInit(&schema, type), GEOARROW_OK);
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), GEOARROW_OK);
ASSERT_EQ(ArrowArrayStartAppending(&array), GEOARROW_OK);

ASSERT_EQ(ArrowArrayAppendDouble(array.children[0], 30), GEOARROW_OK);
ASSERT_EQ(ArrowArrayAppendDouble(array.children[1], 10), GEOARROW_OK);
ASSERT_EQ(ArrowArrayFinishElement(&array), GEOARROW_OK);
ASSERT_EQ(ArrowArrayAppendNull(&array, 1), GEOARROW_OK);

ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), GEOARROW_OK);

// Set the array view
struct GeoArrowArrayView array_view;
EXPECT_EQ(GeoArrowArrayViewInitFromType(&array_view, type), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayViewSetArray(&array_view, &array, nullptr), GEOARROW_OK);

// Check its contents
WKXTester tester;
struct GeoArrowArrayReader reader;
ASSERT_EQ(GeoArrowArrayReaderInit(&reader), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayReaderVisit(&reader, &array_view, 0, array.length,
tester.WKTVisitor()),
GEOARROW_OK);
auto values = tester.WKTValues("<null value>");
ASSERT_EQ(values.size(), 2);
EXPECT_EQ(values[0], "POINT (30 10)");
EXPECT_EQ(values[1], "<null value>");

schema.release(&schema);
array.release(&array);
GeoArrowArrayReaderReset(&reader);
}
40 changes: 33 additions & 7 deletions src/geoarrow/array_view.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view,
array_view->n_offsets = 3;
break;
default:
GeoArrowErrorSet(error, "Unsupported geometry type in GeoArrowArrayViewInit()");
return EINVAL;
// i.e., serialized type
array_view->n_offsets = 1;
break;
}

for (int i = 0; i < 4; i++) {
Expand All @@ -38,6 +39,7 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view,
for (int i = 0; i < 3; i++) {
array_view->offsets[i] = NULL;
}
array_view->data = NULL;

array_view->coords.n_coords = 0;
switch (array_view->schema_view.dimensions) {
Expand All @@ -52,8 +54,9 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view,
array_view->coords.n_values = 4;
break;
default:
GeoArrowErrorSet(error, "Unsupported dimensions in GeoArrowArrayViewInit()");
return EINVAL;
// i.e., serialized type
array_view->coords.n_coords = 0;
break;
}

switch (array_view->schema_view.coord_type) {
Expand All @@ -64,8 +67,9 @@ static int GeoArrowArrayViewInitInternal(struct GeoArrowArrayView* array_view,
array_view->coords.coords_stride = array_view->coords.n_values;
break;
default:
GeoArrowErrorSet(error, "Unsupported coord type in GeoArrowArrayViewInit()");
return EINVAL;
// i.e., serialized type
array_view->coords.coords_stride = 0;
break;
}

for (int i = 0; i < 4; i++) {
Expand Down Expand Up @@ -197,10 +201,32 @@ static int GeoArrowArrayViewSetArrayInternal(struct GeoArrowArrayView* array_vie
level + 1);
}

GeoArrowErrorCode GeoArrowArrayViewSetArraySerialized(
struct GeoArrowArrayView* array_view, struct ArrowArray* array,
struct GeoArrowError* error) {
array_view->length[0] = array->length;
array_view->offset[0] = array->offset;

array_view->offsets[0] = (const int32_t*)array->buffers[1];
array_view->data = (const uint8_t*)array->buffers[2];
return GEOARROW_OK;
}

GeoArrowErrorCode GeoArrowArrayViewSetArray(struct GeoArrowArrayView* array_view,
struct ArrowArray* array,
struct GeoArrowError* error) {
NANOARROW_RETURN_NOT_OK(GeoArrowArrayViewSetArrayInternal(array_view, array, error, 0));
switch (array_view->schema_view.type) {
case GEOARROW_TYPE_WKT:
case GEOARROW_TYPE_WKB:
NANOARROW_RETURN_NOT_OK(
GeoArrowArrayViewSetArraySerialized(array_view, array, error));
break;
default:
NANOARROW_RETURN_NOT_OK(
GeoArrowArrayViewSetArrayInternal(array_view, array, error, 0));
break;
}

array_view->validity_bitmap = array->buffers[0];
return GEOARROW_OK;
}
Expand Down
11 changes: 0 additions & 11 deletions src/geoarrow/array_view_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,6 @@ INSTANTIATE_TEST_SUITE_P(
GEOARROW_TYPE_INTERLEAVED_MULTILINESTRING_ZM,
GEOARROW_TYPE_INTERLEAVED_MULTIPOLYGON_ZM));

TEST(ArrayViewTest, ArrayViewTestInitErrors) {
struct GeoArrowArrayView array_view;
struct GeoArrowError error;
struct ArrowSchema schema;

ASSERT_EQ(GeoArrowSchemaInitExtension(&schema, GEOARROW_TYPE_WKB), GEOARROW_OK);
EXPECT_EQ(GeoArrowArrayViewInitFromSchema(&array_view, &schema, &error), EINVAL);
EXPECT_STREQ(error.message, "Unsupported geometry type in GeoArrowArrayViewInit()");
schema.release(&schema);
}

TEST(ArrayViewTest, ArrayViewTestSetArrayErrors) {
struct GeoArrowArrayView array_view;
struct GeoArrowError error;
Expand Down
Loading

0 comments on commit 0b31fea

Please sign in to comment.