From 596673b842020218634242d35fe814f1073276ba Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 30 Aug 2023 22:55:15 -0300 Subject: [PATCH 1/5] unpin Cython --- .github/workflows/python.yaml | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 283c20ab..d0e9f2ac 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -54,7 +54,7 @@ jobs: run: | sudo apt-get install -y lcov pip uninstall --yes geoarrow - pip install pytest-cov "Cython <= 0.29.36" + pip install pytest-cov Cython pushd python # Build with Cython + gcc coverage options diff --git a/python/pyproject.toml b/python/pyproject.toml index 8d23ae84..7f57ff78 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -35,6 +35,6 @@ repository = "https://github.com/geoarrow/geoarrow-c" requires = [ "setuptools >= 61.0.0", "setuptools-scm", - "Cython <= 0.29.36" + "Cython" ] build-backend = "setuptools.build_meta" From 927afa16e5df714ea3ab80555d07ef8d4aaf0459 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 30 Aug 2023 23:26:35 -0300 Subject: [PATCH 2/5] better error reporting --- python/src/geoarrow/_lib.pyx | 92 +++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/python/src/geoarrow/_lib.pyx b/python/src/geoarrow/_lib.pyx index da96f734..5cc7056b 100644 --- a/python/src/geoarrow/_lib.pyx +++ b/python/src/geoarrow/_lib.pyx @@ -201,6 +201,33 @@ cdef extern from "geoarrow.hpp" namespace "geoarrow": const string& metadata) +class GeoArrowCException(RuntimeError): + + def __init__(self, what, code, message=""): + self.what = what + self.code = code + self.message = message + + if self.message == "": + super().__init__(f"{self.what} failed ({self.code})") + else: + super().__init__(f"{self.what} failed ({self.code}): {self.message}") + + +cdef class Error: + cdef GeoArrowError c_error + + def __cinit__(self): + self.c_error.message[0] = 0 + + def raise_message(self, what, code): + raise GeoArrowCException(what, code, self.c_error.message.decode("UTF-8")) + + @staticmethod + def raise_error(what, code): + raise GeoArrowCException(what, code, "") + + cdef class SchemaHolder: cdef ArrowSchema c_schema @@ -399,56 +426,57 @@ cdef class CKernel: cdef const char* cname = name cdef int result = GeoArrowKernelInit(&self.c_kernel, cname, NULL) if result != GEOARROW_OK: - raise ValueError('GeoArrowKernelInit() failed') + Error.raise_error("GeoArrowKernelInit()", result) def __dealloc__(self): if self.c_kernel.release != NULL: self.c_kernel.release(&self.c_kernel) def start(self, SchemaHolder schema, const char* options): - cdef GeoArrowError error + cdef Error error = Error() out = SchemaHolder() cdef int result = self.c_kernel.start(&self.c_kernel, &schema.c_schema, - options, &out.c_schema, &error) + options, &out.c_schema, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message) + error.raise_message("GeoArrowKernel::start()", result) return out def push_batch(self, ArrayHolder array): - cdef GeoArrowError error + cdef Error error = Error() out = ArrayHolder() cdef int result with nogil: result = self.c_kernel.push_batch(&self.c_kernel, &array.c_array, - &out.c_array, &error) + &out.c_array, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message) + error.raise_message("GeoArrowKernel::push_batch()", result) return out def finish(self): - cdef GeoArrowError error + cdef Error error = Error() out = ArrayHolder() cdef int result with nogil: - result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error) + result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message) + error.raise_message("GeoArrowKernel::finish()", result) def push_batch_agg(self, ArrayHolder array): - cdef GeoArrowError error + cdef Error error = Error() cdef int result = self.c_kernel.push_batch(&self.c_kernel, &array.c_array, - NULL, &error) + NULL, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message) + error.raise_message("GeoArrowKernel::push_batch()", result) def finish_agg(self): - cdef GeoArrowError error + cdef Error error = Error() out = ArrayHolder() - cdef int result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error) + cdef int result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message) + error.raise_message("GeoArrowKernel::finish()", result) + return out @@ -459,12 +487,12 @@ cdef class CArrayView: def __init__(self, ArrayHolder array, SchemaHolder schema): self._base = array - cdef GeoArrowError error - cdef int result = GeoArrowArrayViewInitFromSchema(&self.c_array_view, &schema.c_schema, &error) + cdef Error error = Error() + cdef int result = GeoArrowArrayViewInitFromSchema(&self.c_array_view, &schema.c_schema, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message.decode('UTF-8')) + error.raise_message("GeoArrowArrayViewInitFromSchema()", result) - result = GeoArrowArrayViewSetArray(&self.c_array_view, &array.c_array, &error) + result = GeoArrowArrayViewSetArray(&self.c_array_view, &array.c_array, &error.c_error) if result != GEOARROW_OK: raise ValueError(error.message.decode('UTF-8')) @@ -568,10 +596,10 @@ cdef class CBuilder: def __init__(self, SchemaHolder schema): self._schema = schema - cdef GeoArrowError error - cdef int result = GeoArrowBuilderInitFromSchema(&self.c_builder, &schema.c_schema, &error) + cdef Error error = Error() + cdef int result = GeoArrowBuilderInitFromSchema(&self.c_builder, &schema.c_schema, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.message.decode('UTF-8')) + error.raise_message("GeoArrowBuilderInitFromSchema()", result) def __dealloc__(self): GeoArrowBuilderReset(&self.c_builder) @@ -579,20 +607,20 @@ cdef class CBuilder: def set_buffer_uint8(self, int64_t i, object obj): cdef const unsigned char[:] view = memoryview(obj) cdef int result = GeoArrowBuilderSetPyBuffer(&self.c_builder, i, obj, &(view[0]), view.shape[0]) - if result != 0: - raise ValueError("GeoArrowBuilderSetPyBuffer() failed") + if result != GEOARROW_OK: + Error.raise_error("GeoArrowBuilderSetPyBuffer()", result) def set_buffer_int32(self, int64_t i, object obj): cdef const int32_t[:] view = memoryview(obj) cdef int result = GeoArrowBuilderSetPyBuffer(&self.c_builder, i, obj, &(view[0]), view.shape[0] * 4) - if result != 0: - raise ValueError("GeoArrowBuilderSetPyBuffer() failed") + if result != GEOARROW_OK: + Error.raise_error("GeoArrowBuilderSetPyBuffer()", result) def set_buffer_double(self, int64_t i, object obj): cdef const double[:] view = memoryview(obj) cdef int result = GeoArrowBuilderSetPyBuffer(&self.c_builder, i, obj, &(view[0]), view.shape[0] * 8) - if result != 0: - raise ValueError("GeoArrowBuilderSetPyBuffer() failed") + if result != GEOARROW_OK: + Error.raise_error("GeoArrowBuilderSetPyBuffer()", result) @property def schema(self): @@ -600,8 +628,8 @@ cdef class CBuilder: def finish(self): out = ArrayHolder() - cdef GeoArrowError error - cdef int result = GeoArrowBuilderFinish(&self.c_builder, &out.c_array, &error) + cdef Error error = Error() + cdef int result = GeoArrowBuilderFinish(&self.c_builder, &out.c_array, &error.c_error) if result != GEOARROW_OK: - raise ValueError(error.decode('UTF-8')) + error.raise_message("GeoArrowBuilderFinish()", result) return out From f9d2131b30477a1e31402cc0bd0a21f61b357291 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 30 Aug 2023 23:57:44 -0300 Subject: [PATCH 3/5] with passing tests --- python/setup.py | 22 +++++++++++-------- python/src/geoarrow/_lib.pyx | 14 +++++++----- python/src/geoarrow/lib.py | 10 ++++++++- python/src/geoarrow/pyarrow/_kernel.py | 2 +- python/tests/test_geoarrow_lib.py | 2 +- python/tests/test_geoarrow_pandas.py | 3 ++- python/tests/test_geoarrow_pyarrow.py | 4 ++-- python/tests/test_geoarrow_pyarrow_compute.py | 3 ++- 8 files changed, 38 insertions(+), 22 deletions(-) diff --git a/python/setup.py b/python/setup.py index 00e8a26c..37b6eabe 100644 --- a/python/setup.py +++ b/python/setup.py @@ -52,13 +52,17 @@ def new__compile(obj, src, ext, cc_args, extra_postargs, pp_opts): # Set some extra flags for compiling with coverage support if os.getenv('GEOARROW_COVERAGE') == "1": - coverage_compile_args = ['--coverage'] - coverage_link_args = ['--coverage'] - coverage_define_macros = [("CYTHON_TRACE", 1)] + extra_compile_args = ['--coverage'] + extra_link_args = ['--coverage'] + extra_define_macros = [("CYTHON_TRACE", 1)] +elif os.getenv('GEOARROW_DEBUG_EXTENSION') == "1": + extra_compile_args = ['-g', '-O0'] + extra_link_args = [] + extra_define_macros = [] else: - coverage_compile_args = [] - coverage_link_args = [] - coverage_define_macros = [] + extra_compile_args = [] + extra_link_args = [] + extra_define_macros = [] setup( ext_modules=[ @@ -67,9 +71,9 @@ def new__compile(obj, src, ext, cc_args, extra_postargs, pp_opts): include_dirs=['src/geoarrow/geoarrow', 'src/geoarrow/geoarrow_python'], language='c++', sources=['src/geoarrow/_lib.pyx'] + sources, - extra_compile_args = ['-std=c++11'] + coverage_compile_args, - extra_link_args = [] + coverage_link_args, - define_macros= [] + coverage_define_macros + extra_compile_args = ['-std=c++11'] + extra_compile_args, + extra_link_args = [] + extra_link_args, + define_macros= [] + extra_define_macros ) ], cmdclass = {"build_ext": build_ext_subclass} diff --git a/python/src/geoarrow/_lib.pyx b/python/src/geoarrow/_lib.pyx index 5cc7056b..0978bb62 100644 --- a/python/src/geoarrow/_lib.pyx +++ b/python/src/geoarrow/_lib.pyx @@ -421,12 +421,14 @@ cdef class CVectorType: cdef class CKernel: cdef GeoArrowKernel c_kernel + cdef object cname_str def __init__(self, const char* name): cdef const char* cname = name + self.cname_str = cname.decode("UTF-8") cdef int result = GeoArrowKernelInit(&self.c_kernel, cname, NULL) if result != GEOARROW_OK: - Error.raise_error("GeoArrowKernelInit()", result) + Error.raise_error("GeoArrowKernelInit('{self.cname_str}'>)", result) def __dealloc__(self): if self.c_kernel.release != NULL: @@ -438,7 +440,7 @@ cdef class CKernel: cdef int result = self.c_kernel.start(&self.c_kernel, &schema.c_schema, options, &out.c_schema, &error.c_error) if result != GEOARROW_OK: - error.raise_message("GeoArrowKernel::start()", result) + error.raise_message(f"GeoArrowKernel<{self.cname_str}>::start()", result) return out @@ -450,7 +452,7 @@ cdef class CKernel: result = self.c_kernel.push_batch(&self.c_kernel, &array.c_array, &out.c_array, &error.c_error) if result != GEOARROW_OK: - error.raise_message("GeoArrowKernel::push_batch()", result) + error.raise_message(f"GeoArrowKernel<{self.cname_str}>::push_batch()", result) return out @@ -461,21 +463,21 @@ cdef class CKernel: with nogil: result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error.c_error) if result != GEOARROW_OK: - error.raise_message("GeoArrowKernel::finish()", result) + error.raise_message(f"GeoArrowKernel<{self.cname_str}>::finish()", result) def push_batch_agg(self, ArrayHolder array): cdef Error error = Error() cdef int result = self.c_kernel.push_batch(&self.c_kernel, &array.c_array, NULL, &error.c_error) if result != GEOARROW_OK: - error.raise_message("GeoArrowKernel::push_batch()", result) + error.raise_message(f"GeoArrowKernel<{self.cname_str}>::push_batch()", result) def finish_agg(self): cdef Error error = Error() out = ArrayHolder() cdef int result = self.c_kernel.finish(&self.c_kernel, &out.c_array, &error.c_error) if result != GEOARROW_OK: - error.raise_message("GeoArrowKernel::finish()", result) + error.raise_message(f"GeoArrowKernel<{self.cname_str}>::finish()", result) return out diff --git a/python/src/geoarrow/lib.py b/python/src/geoarrow/lib.py index 85dc592f..dfad2133 100644 --- a/python/src/geoarrow/lib.py +++ b/python/src/geoarrow/lib.py @@ -1,5 +1,13 @@ from . import _lib -from ._lib import CKernel, SchemaHolder, ArrayHolder, CVectorType, CArrayView, CBuilder +from ._lib import ( + CKernel, + SchemaHolder, + ArrayHolder, + CVectorType, + CArrayView, + CBuilder, + GeoArrowCException, +) class GeometryType: diff --git a/python/src/geoarrow/pyarrow/_kernel.py b/python/src/geoarrow/pyarrow/_kernel.py index 950a0026..c011fe50 100644 --- a/python/src/geoarrow/pyarrow/_kernel.py +++ b/python/src/geoarrow/pyarrow/_kernel.py @@ -88,7 +88,7 @@ def format_wkt(type_in, significant_digits=None, max_element_size_bytes=None): @staticmethod def as_geoarrow(type_in, type_id): - return Kernel("as_geoarrow", type_in, type=type_id) + return Kernel("as_geoarrow", type_in, type=int(type_id)) @staticmethod def unique_geometry_types_agg(type_in): diff --git a/python/tests/test_geoarrow_lib.py b/python/tests/test_geoarrow_lib.py index d1f58844..533ab195 100644 --- a/python/tests/test_geoarrow_lib.py +++ b/python/tests/test_geoarrow_lib.py @@ -125,7 +125,7 @@ def test_kernel_void_agg(): def test_kernel_init_error(): - with pytest.raises(ValueError): + with pytest.raises(lib.GeoArrowCException): lib.CKernel(b"not_a_kernel") diff --git a/python/tests/test_geoarrow_pandas.py b/python/tests/test_geoarrow_pandas.py index afa3e695..ec3579ad 100644 --- a/python/tests/test_geoarrow_pandas.py +++ b/python/tests/test_geoarrow_pandas.py @@ -4,6 +4,7 @@ import pyarrow as pa import geoarrow.pandas as gapd import geoarrow.pyarrow as ga +import geoarrow.lib as lib import numpy as np @@ -156,7 +157,7 @@ def test_pyarrow_integration(): def test_accessor_parse_all(): series = pd.Series(["POINT (0 1)"]) assert series.geoarrow.parse_all() is series - with pytest.raises(ValueError): + with pytest.raises(lib.GeoArrowCException): pd.Series(["NOT WKT"]).geoarrow.parse_all() diff --git a/python/tests/test_geoarrow_pyarrow.py b/python/tests/test_geoarrow_pyarrow.py index d13bfca0..f11a9f86 100644 --- a/python/tests/test_geoarrow_pyarrow.py +++ b/python/tests/test_geoarrow_pyarrow.py @@ -297,8 +297,8 @@ def test_kernel_visit_void(): ["POINT (30 10)", "NOT VALID WKT AT ALL"], ga.wkt(), validate=False ) kernel = ga.Kernel.visit_void_agg(array.type) - with pytest.raises(ValueError): - kernel.push(array) is None + with pytest.raises(lib.GeoArrowCException): + kernel.push(array) out = kernel.finish() assert out.type == pa.null() assert len(out) == 1 diff --git a/python/tests/test_geoarrow_pyarrow_compute.py b/python/tests/test_geoarrow_pyarrow_compute.py index d1ff80b1..b6b22320 100644 --- a/python/tests/test_geoarrow_pyarrow_compute.py +++ b/python/tests/test_geoarrow_pyarrow_compute.py @@ -6,6 +6,7 @@ import geoarrow.pyarrow as ga import geoarrow.pyarrow._kernel as _kernel import geoarrow.pyarrow._compute as _compute +import geoarrow.lib as lib def test_set_max_workers(): @@ -60,7 +61,7 @@ def test_push_all(): def test_parse_all(): assert _compute.parse_all(["POINT (0 1)"]) is None - with pytest.raises(ValueError): + with pytest.raises(lib.GeoArrowCException): _compute.parse_all(["not valid wkt"]) geoarrow_array = ga.array(["POINT (0 1)"]).as_geoarrow(ga.point()) From 8efc2e2f40f63f2d11979d52ee44e26b5daa3de0 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 30 Aug 2023 23:57:58 -0300 Subject: [PATCH 4/5] format setup.py with black --- python/setup.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/python/setup.py b/python/setup.py index 37b6eabe..82f35b2b 100644 --- a/python/setup.py +++ b/python/setup.py @@ -27,36 +27,40 @@ # checkout or copy from ../dist if the caller doesn't have cmake available. # Note that bootstrap.py won't exist if building from sdist. this_dir = os.path.dirname(__file__) -bootstrap_py = os.path.join(this_dir, 'bootstrap.py') +bootstrap_py = os.path.join(this_dir, "bootstrap.py") if os.path.exists(bootstrap_py): subprocess.run([sys.executable, bootstrap_py]) -vendor_dir = os.path.join(this_dir, 'src', 'geoarrow', 'geoarrow') +vendor_dir = os.path.join(this_dir, "src", "geoarrow", "geoarrow") vendored_files = os.listdir(vendor_dir) -sources = [f'src/geoarrow/geoarrow/{f}' for f in vendored_files if f.endswith('.c')] +sources = [f"src/geoarrow/geoarrow/{f}" for f in vendored_files if f.endswith(".c")] + # Workaround because setuptools has no easy way to mix C and C++ sources # if extra flags are required (e.g., -std=c++11 like we need here). class build_ext_subclass(build_ext): def build_extensions(self): original__compile = self.compiler._compile + def new__compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - if src.endswith('.c'): + if src.endswith(".c"): extra_postargs = [s for s in extra_postargs if s != "-std=c++11"] return original__compile(obj, src, ext, cc_args, extra_postargs, pp_opts) + self.compiler._compile = new__compile try: build_ext.build_extensions(self) finally: del self.compiler._compile + # Set some extra flags for compiling with coverage support -if os.getenv('GEOARROW_COVERAGE') == "1": - extra_compile_args = ['--coverage'] - extra_link_args = ['--coverage'] +if os.getenv("GEOARROW_COVERAGE") == "1": + extra_compile_args = ["--coverage"] + extra_link_args = ["--coverage"] extra_define_macros = [("CYTHON_TRACE", 1)] -elif os.getenv('GEOARROW_DEBUG_EXTENSION') == "1": - extra_compile_args = ['-g', '-O0'] +elif os.getenv("GEOARROW_DEBUG_EXTENSION") == "1": + extra_compile_args = ["-g", "-O0"] extra_link_args = [] extra_define_macros = [] else: @@ -67,14 +71,14 @@ def new__compile(obj, src, ext, cc_args, extra_postargs, pp_opts): setup( ext_modules=[ Extension( - name='geoarrow._lib', - include_dirs=['src/geoarrow/geoarrow', 'src/geoarrow/geoarrow_python'], - language='c++', - sources=['src/geoarrow/_lib.pyx'] + sources, - extra_compile_args = ['-std=c++11'] + extra_compile_args, - extra_link_args = [] + extra_link_args, - define_macros= [] + extra_define_macros + name="geoarrow._lib", + include_dirs=["src/geoarrow/geoarrow", "src/geoarrow/geoarrow_python"], + language="c++", + sources=["src/geoarrow/_lib.pyx"] + sources, + extra_compile_args=["-std=c++11"] + extra_compile_args, + extra_link_args=[] + extra_link_args, + define_macros=[] + extra_define_macros, ) ], - cmdclass = {"build_ext": build_ext_subclass} + cmdclass={"build_ext": build_ext_subclass}, ) From 365fb1942f3695bb80299851d478770170447cb8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 31 Aug 2023 00:05:02 -0300 Subject: [PATCH 5/5] fix doctests --- python/src/geoarrow/pyarrow/_compute.py | 4 ++-- python/src/geoarrow/pyarrow/_type.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/src/geoarrow/pyarrow/_compute.py b/python/src/geoarrow/pyarrow/_compute.py index 32382e93..b7141067 100644 --- a/python/src/geoarrow/pyarrow/_compute.py +++ b/python/src/geoarrow/pyarrow/_compute.py @@ -80,7 +80,7 @@ def parse_all(obj): >>> ga.parse_all(["POINT (0 1"]) Traceback (most recent call last): ... - ValueError: b"Expected ')' at byte 10" + geoarrow._lib.GeoArrowCException: GeoArrowKernel::push_batch() failed (22): Expected ')' at byte 10 """ obj = obj_as_array_or_chunked(obj) @@ -546,7 +546,7 @@ def with_geometry_type(obj, geometry_type): >>> ga.with_geometry_type(["MULTIPOINT (0 1, 2 3)"], ga.GeometryType.POINT) Traceback (most recent call last): ... - ValueError: b"Can't convert feature with >1 coordinate to POINT" + geoarrow._lib.GeoArrowCException: GeoArrowKernel::push_batch() failed (22): Can't convert feature with >1 coordinate to POINT """ obj = as_geoarrow(obj) if geometry_type == obj.type.geometry_type: diff --git a/python/src/geoarrow/pyarrow/_type.py b/python/src/geoarrow/pyarrow/_type.py index da7a2a59..145d1d88 100644 --- a/python/src/geoarrow/pyarrow/_type.py +++ b/python/src/geoarrow/pyarrow/_type.py @@ -118,7 +118,7 @@ def id(self): >>> import geoarrow.pyarrow as ga >>> ga.wkb().id - 100001 + """ return self._type.id @@ -156,7 +156,7 @@ def coord_type(self): >>> ga.linestring().coord_type == ga.CoordType.SEPARATE True >>> ga.linestring().with_coord_type(ga.CoordType.INTERLEAVED).coord_type - 2 + """ return self._type.coord_type @@ -168,7 +168,7 @@ def edge_type(self): >>> ga.linestring().edge_type == ga.EdgeType.PLANAR True >>> ga.linestring().with_edge_type(ga.EdgeType.SPHERICAL).edge_type - 1 + """ return self._type.edge_type @@ -180,7 +180,7 @@ def crs_type(self): >>> ga.point().crs_type == ga.CrsType.NONE True >>> ga.point().with_crs("EPSG:1234").crs_type - 1 + """ return self._type.crs_type