Skip to content

Commit

Permalink
improve safetey of wkt writer
Browse files Browse the repository at this point in the history
  • Loading branch information
paleolimbot committed Oct 31, 2024
1 parent f7d858c commit 806e401
Showing 1 changed file with 31 additions and 16 deletions.
47 changes: 31 additions & 16 deletions src/geoarrow/wkt_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ static inline int WKTWriterWrite(struct WKTWriterPrivate* private, const char* v

static inline void WKTWriterWriteDoubleUnsafe(struct WKTWriterPrivate* private,
double value) {
// Always ensure that we have at least 40 writable bytes remaining before calling
// GeoArrowPrintDouble()
NANOARROW_DCHECK((private->values.capacity_bytes - private->values.size_bytes) >= 40);
private->values.size_bytes +=
GeoArrowPrintDouble(value, private->precision,
((char*)private->values.data) + private->values.size_bytes);
Expand Down Expand Up @@ -143,20 +146,26 @@ static int coords_wkt(struct GeoArrowVisitor* v, const struct GeoArrowCoordView*
struct WKTWriterPrivate* private = (struct WKTWriterPrivate*)v->private_data;
NANOARROW_RETURN_NOT_OK(WKTWriterCheckLevel(private));

int64_t max_chars_needed = (n_coords * 2) + // space + comma after coordinate
(n_coords * (n_dims - 1)) + // spaces between ordinates
((private->precision + 1 + 5) * n_coords *
n_dims); // significant digits + decimal + exponent
if (private->max_element_size_bytes >= 0 &&
max_chars_needed > private->max_element_size_bytes) {
// Because we write a coordinate before actually checking
max_chars_needed = private->max_element_size_bytes + 1024;
}

NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&private->values, max_chars_needed));
int64_t max_chars_per_coord_theoretical =
// space + comma after coordinate
(n_coords * 2) +
// spaces between ordinates
(n_coords * (n_dims - 1)) +
// GeoArrowPrintDouble might require up to 40 accessible bytes per call
(40 * n_dims);

// Use a heuristic to esimate the number of characters we are about to write
// to avoid more then one allocation for this call. This is normally substantially
// less than the theoretical amount.
int64_t max_chars_estimated = (n_coords * 2) + // space + comma after coordinate
(n_coords * (n_dims - 1)) + // spaces between ordinates
// precision + decimal + estimate of normal
// digits to the left of the decimal
((private->precision + 1 + 8) * n_coords * n_dims);
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(&private->values, max_chars_estimated));

// Write the first coordinate, possibly with a leading comma if there was
// a previous call to coords, or the opening ( if it wasn't. Special case
// a previous call to coords, or the opening (if it wasn't). Special case
// for the flat multipoint output MULTIPOINT (1 2, 3 4, ...) which doesn't
// have extra () for inner POINTs
if (private->i[private->level] != 0) {
Expand All @@ -167,25 +176,31 @@ static int coords_wkt(struct GeoArrowVisitor* v, const struct GeoArrowCoordView*
ArrowBufferAppendUnsafe(&private->values, "(", 1);
}

WKTWriterWriteDoubleUnsafe(private, coords->values[0][0]);
// Actually write the first coordinate (no leading comma)
NANOARROW_RETURN_NOT_OK(
ArrowBufferReserve(&private->values, max_chars_per_coord_theoretical));
WKTWriterWriteDoubleUnsafe(private, GEOARROW_COORD_VIEW_VALUE(coords, 0, 0));
for (int32_t j = 1; j < n_dims; j++) {
ArrowBufferAppendUnsafe(&private->values, " ", 1);
WKTWriterWriteDoubleUnsafe(private, coords->values[j][0]);
WKTWriterWriteDoubleUnsafe(private, GEOARROW_COORD_VIEW_VALUE(coords, 0, j));
}

// Write the remaining coordinates (which all have leading commas)
for (int64_t i = 1; i < n_coords; i++) {
// Check if we've hit our max number of bytes for this feature
if (private->max_element_size_bytes >= 0 &&
(private->values.size_bytes - private->values_feat_start) >=
private->max_element_size_bytes) {
return EAGAIN;
}

NANOARROW_RETURN_NOT_OK(
ArrowBufferReserve(&private->values, max_chars_per_coord_theoretical));
ArrowBufferAppendUnsafe(&private->values, ", ", 2);
WKTWriterWriteDoubleUnsafe(private, coords->values[0][i * coords->coords_stride]);
WKTWriterWriteDoubleUnsafe(private, GEOARROW_COORD_VIEW_VALUE(coords, i, 0));
for (int32_t j = 1; j < n_dims; j++) {
ArrowBufferAppendUnsafe(&private->values, " ", 1);
WKTWriterWriteDoubleUnsafe(private, coords->values[j][i * coords->coords_stride]);
WKTWriterWriteDoubleUnsafe(private, GEOARROW_COORD_VIEW_VALUE(coords, i, j));
}
}

Expand Down

0 comments on commit 806e401

Please sign in to comment.