From d44d862bd07f05328f5fcfd68a8f40357aa072fa Mon Sep 17 00:00:00 2001 From: Paul Nienaber Date: Mon, 8 Apr 2024 15:18:50 -0700 Subject: [PATCH 1/3] Fix for DX-86729 & DX-90059 DX-86729 (Windows-only issue, with other related bugs including DX-90059) fix applied to all platforms for behaviour consistency +Add DATE and TIMESTAMP accessor tests +Handle inconsistent implementation-defined negative mod +Fixed negative epoch time division; fixed wrong tests accordingly +Fixed (pending CI tests) handling of values near INT64_MIN +Added test for case near INT64_MIN nanoseconds. Change-Id: I16e4fe9befbff2327608bca92216a670492c8e44 --- .../accessors/date_array_accessor_test.cc | 50 +++++-- .../accessors/timestamp_array_accessor.cc | 54 ++++--- .../timestamp_array_accessor_test.cc | 141 +++++++++++------- odbcabstraction/calendar_utils.cc | 10 +- 4 files changed, 157 insertions(+), 98 deletions(-) diff --git a/flight_sql/accessors/date_array_accessor_test.cc b/flight_sql/accessors/date_array_accessor_test.cc index 13b9cada..e5543784 100644 --- a/flight_sql/accessors/date_array_accessor_test.cc +++ b/flight_sql/accessors/date_array_accessor_test.cc @@ -17,7 +17,15 @@ using namespace arrow; using namespace odbcabstraction; TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) { - std::vector values = {7589, 12320, 18980, 19095}; + std::vector values = {7589, 12320, 18980, 19095, -1, 0}; + std::vector expected = { + {1990, 10, 12}, + {2003, 9, 25}, + {2021, 12, 19}, + {2022, 4, 13}, + {1969, 12, 31}, + {1970, 1, 1}, + }; std::shared_ptr array; ArrayFromVector(values, &array); @@ -25,7 +33,7 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) { DateArrayFlightSqlAccessor accessor( dynamic_cast *>(array.get())); - std::vector buffer(values.size()); + std::vector buffer(values.size()); std::vector strlen_buffer(values.size()); ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data()); @@ -37,19 +45,31 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]); - tm date{}; - int64_t converted_time = values[i] * 86400; - GetTimeForSecondsSinceEpoch(date, converted_time); - ASSERT_EQ((date.tm_year + 1900), buffer[i].year); - ASSERT_EQ(date.tm_mon + 1, buffer[i].month); - ASSERT_EQ(date.tm_mday, buffer[i].day); + ASSERT_EQ(expected[i].year, buffer[i].year); + ASSERT_EQ(expected[i].month, buffer[i].month); + ASSERT_EQ(expected[i].day, buffer[i].day); } } TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) { - std::vector values = {86400000, 172800000, 259200000, 1649793238110, - 345600000, 432000000, 518400000}; + std::vector values = {86400000, 172800000, 259200000, 1649793238110, 0, + 345600000, 432000000, 518400000, -86400000, -17987443200000}; + std::vector expected = { + /* year(16), month(u16), day(u16) */ + {1970, 1, 2}, + {1970, 1, 3}, + {1970, 1, 4}, + {2022, 4, 12}, + {1970, 1, 1}, + {1970, 1, 5}, + {1970, 1, 6}, + {1970, 1, 7}, + {1969, 12, 31}, + // This is the documented lower limit of supported Gregorian dates for some parts of Boost, + // however boost::posix_time may go lower? + {1400, 1, 1}, + }; std::shared_ptr array; ArrayFromVector(values, &array); @@ -57,7 +77,7 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) { DateArrayFlightSqlAccessor accessor( dynamic_cast *>(array.get())); - std::vector buffer(values.size()); + std::vector buffer(values.size()); std::vector strlen_buffer(values.size()); ColumnBinding binding(CDataType_DATE, 0, 0, buffer.data(), 0, strlen_buffer.data()); @@ -71,11 +91,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) { ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]); tm date{}; - int64_t converted_time = values[i] / 1000; - GetTimeForSecondsSinceEpoch(date, converted_time); - ASSERT_EQ((date.tm_year + 1900), buffer[i].year); - ASSERT_EQ(date.tm_mon + 1, buffer[i].month); - ASSERT_EQ(date.tm_mday, buffer[i].day); + ASSERT_EQ(expected[i].year, buffer[i].year); + ASSERT_EQ(expected[i].month, buffer[i].month); + ASSERT_EQ(expected[i].day, buffer[i].day); } } diff --git a/flight_sql/accessors/timestamp_array_accessor.cc b/flight_sql/accessors/timestamp_array_accessor.cc index b74f3889..0d04b2ab 100644 --- a/flight_sql/accessors/timestamp_array_accessor.cc +++ b/flight_sql/accessors/timestamp_array_accessor.cc @@ -7,10 +7,13 @@ #include "timestamp_array_accessor.h" #include "odbcabstraction/calendar_utils.h" +#include +#include + using namespace arrow; namespace { -int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) { +inline int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) { int64_t divisor = 1; switch (unit) { case TimeUnit::SECOND: @@ -32,27 +35,25 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) { return divisor; } -uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) { +uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) { // Convert the given remainder and time unit to nanoseconds // since the fraction field on TIMESTAMP_STRUCT is in nanoseconds. - switch (unit) { - case TimeUnit::SECOND: + if (unit == TimeUnit::SECOND) return 0; - case TimeUnit::MILLI: - // 1000000 nanoseconds = 1 millisecond. - return (units_since_epoch % - driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) * - 1000000; - case TimeUnit::MICRO: - // 1000 nanoseconds = 1 microsecond. - return (units_since_epoch % - driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * 1000; - case TimeUnit::NANO: - // 1000 nanoseconds = 1 microsecond. - return (units_since_epoch % - driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR); - } - return 0; + + const int64_t divisor = GetConversionToSecondsDivisor(unit); + const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO); + + if (units_since_epoch < 0) + if (units_since_epoch <= (std::numeric_limits::min() + divisor)) + // Prevent trying to derive and add a value larger than INT64_MAX (i.e. the time value at the start of + // the second which is used to shift the value positive before the modulo operation)) in next statement. + units_since_epoch += divisor; + // See below regarding floor division; here we want ceiling division. + // FIXME this goes poorly (trying to use a value > INT64_MAX when units_since_epoch is + // less than the smallest multiple of divisor greater than INT64_MIN. + units_since_epoch += divisor * std::abs((units_since_epoch - (divisor - 1)) / divisor); + return (units_since_epoch % divisor) * (nano_divisor / divisor); } } // namespace @@ -72,11 +73,24 @@ TimestampArrayFlightSqlAccessor::MoveSingleCell_impl( ColumnBinding *binding, int64_t arrow_row, int64_t cell_counter, int64_t &value_offset, bool update_value_offset, odbcabstraction::Diagnostics &diagnostics) { + // Times less than the minimum integer number of seconds that can be represented + // for each time unit will not convert correctly. This is mostly interesting for + // nanoseconds as timestamps in other units are outside of the accepted range of + // Gregorian dates. auto *buffer = static_cast(binding->buffer); int64_t value = this->GetArray()->Value(arrow_row); const auto divisor = GetConversionToSecondsDivisor(UNIT); - const auto converted_result_seconds = value / divisor; + const auto converted_result_seconds = + // We want floor division here; C++ will round towards zero + (value < 0) + // Floor division: Shift all "fractional" (not a multiple of divisor) values so they round towards + // zero (and to the same value) along with the "floor" less than them, then add 1 to get back to + // the floor. Althernative we could shift negatively by (divisor - 1) but this breaks near + // INT64_MIN causing underflow.. + ? ((value + 1) / divisor) - 1 + // Towards zero is already floor + : value / divisor; tm timestamp = {0}; GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds); diff --git a/flight_sql/accessors/timestamp_array_accessor_test.cc b/flight_sql/accessors/timestamp_array_accessor_test.cc index 5486799d..8b0f1911 100644 --- a/flight_sql/accessors/timestamp_array_accessor_test.cc +++ b/flight_sql/accessors/timestamp_array_accessor_test.cc @@ -17,9 +17,27 @@ using namespace arrow; using namespace odbcabstraction; TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) { - std::vector values = {86400370, 172800000, 259200000, 1649793238110LL, - 345600000, 432000000, 518400000}; - + std::vector values = {86400370, 172800000, 259200000, 1649793238110LL, 345600000, + 432000000, 518400000, -86399000, 0, -86399999, -86399001, + 86400001, 86400999}; + std::vector expected = { + /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */ + {1970, 1, 2, 0, 0, 0, 370000000}, + {1970, 1, 3, 0, 0, 0, 0}, + {1970, 1, 4, 0, 0, 0, 0}, + {2022, 4, 12, 19, 53, 58, 110000000}, + {1970, 1, 5, 0, 0, 0, 0}, + {1970, 1, 6, 0, 0, 0, 0}, + {1970, 1, 7, 0, 0, 0, 0}, + {1969, 12, 31, 0, 0, 1, 0}, + {1970, 1, 1, 0, 0, 0, 0}, + /* Tests both ends of the fraction rounding range to ensure we don't tip the wrong way */ + {1969, 12, 31, 0, 0, 0, 1000000}, + {1969, 12, 31, 0, 0, 0, 999000000}, + {1970, 1, 2, 0, 0, 0, 1000000}, + {1970, 1, 2, 0, 0, 0, 999000000}, + }; + std::shared_ptr timestamp_array; auto timestamp_field = field("timestamp_field", timestamp(TimeUnit::MILLI)); @@ -40,26 +58,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]); - tm date{}; - - auto converted_time = values[i] / MILLI_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); - - ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); - ASSERT_EQ(buffer[i].month, date.tm_mon + 1); - ASSERT_EQ(buffer[i].day, date.tm_mday); - ASSERT_EQ(buffer[i].hour, date.tm_hour); - ASSERT_EQ(buffer[i].minute, date.tm_min); - ASSERT_EQ(buffer[i].second, date.tm_sec); - - constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000; - ASSERT_EQ(buffer[i].fraction, (values[i] % MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI); + ASSERT_EQ(buffer[i].year, expected[i].year); + ASSERT_EQ(buffer[i].month, expected[i].month); + ASSERT_EQ(buffer[i].day, expected[i].day); + ASSERT_EQ(buffer[i].hour, expected[i].hour); + ASSERT_EQ(buffer[i].minute, expected[i].minute); + ASSERT_EQ(buffer[i].second, expected[i].second); + ASSERT_EQ(buffer[i].fraction, expected[i].fraction); } } TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) { std::vector values = {86400, 172800, 259200, 1649793238, - 345600, 432000, 518400}; + 345600, 432000, 518400, -86399, 0}; + std::vector expected = { + /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */ + {1970, 1, 2, 0, 0, 0, 0}, + {1970, 1, 3, 0, 0, 0, 0}, + {1970, 1, 4, 0, 0, 0, 0}, + {2022, 4, 12, 19, 53, 58, 0}, + {1970, 1, 5, 0, 0, 0, 0}, + {1970, 1, 6, 0, 0, 0, 0}, + {1970, 1, 7, 0, 0, 0, 0}, + {1969, 12, 31, 0, 0, 1, 0}, + {1970, 1, 1, 0, 0, 0, 0}, + }; std::shared_ptr timestamp_array; @@ -81,23 +104,27 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]); - tm date{}; - - auto converted_time = values[i]; - GetTimeForSecondsSinceEpoch(date, converted_time); - ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); - ASSERT_EQ(buffer[i].month, date.tm_mon + 1); - ASSERT_EQ(buffer[i].day, date.tm_mday); - ASSERT_EQ(buffer[i].hour, date.tm_hour); - ASSERT_EQ(buffer[i].minute, date.tm_min); - ASSERT_EQ(buffer[i].second, date.tm_sec); + ASSERT_EQ(buffer[i].year, expected[i].year); + ASSERT_EQ(buffer[i].month, expected[i].month); + ASSERT_EQ(buffer[i].day, expected[i].day); + ASSERT_EQ(buffer[i].hour, expected[i].hour); + ASSERT_EQ(buffer[i].minute, expected[i].minute); + ASSERT_EQ(buffer[i].second, expected[i].second); ASSERT_EQ(buffer[i].fraction, 0); } } TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) { - std::vector values = {86400000000, 1649793238000000}; + std::vector values = {0, 86400000000, 1649793238000000, -86399999999, -86399000001}; + std::vector expected = { + /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */ + {1970, 1, 1, 0, 0, 0, 0}, + {1970, 1, 2, 0, 0, 0, 0}, + {2022, 4, 12, 19, 53, 58, 0}, + {1969, 12, 31, 0, 0, 0, 1000}, + {1969, 12, 31, 0, 0, 0, 999999000}, + }; std::shared_ptr timestamp_array; @@ -120,24 +147,31 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]); - tm date{}; - - auto converted_time = values[i] / MICRO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); - - ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); - ASSERT_EQ(buffer[i].month, date.tm_mon + 1); - ASSERT_EQ(buffer[i].day, date.tm_mday); - ASSERT_EQ(buffer[i].hour, date.tm_hour); - ASSERT_EQ(buffer[i].minute, date.tm_min); - ASSERT_EQ(buffer[i].second, date.tm_sec); - constexpr uint32_t MICROS_PER_NANO = 1000; - ASSERT_EQ(buffer[i].fraction, (values[i] % MICRO_TO_SECONDS_DIVISOR) * MICROS_PER_NANO); + ASSERT_EQ(buffer[i].year, expected[i].year); + ASSERT_EQ(buffer[i].month, expected[i].month); + ASSERT_EQ(buffer[i].day, expected[i].day); + ASSERT_EQ(buffer[i].hour, expected[i].hour); + ASSERT_EQ(buffer[i].minute, expected[i].minute); + ASSERT_EQ(buffer[i].second, expected[i].second); + ASSERT_EQ(buffer[i].fraction, expected[i].fraction); } } TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) { - std::vector values = {86400000010000, 1649793238000000000}; + std::vector values = {86400000010000, 1649793238000000000, -86399999999999, -86399000000001, + 86400000000001, 86400999999999, 0, -9223372036000000001}; + std::vector expected = { + /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), fraction(u32) */ + {1970, 1, 2, 0, 0, 0, 10000}, + {2022, 4, 12, 19, 53, 58, 0}, + {1969, 12, 31, 0, 0, 0, 1}, + {1969, 12, 31, 0, 0, 0, 999999999}, + {1970, 1, 2, 0, 0, 0, 1}, + {1970, 1, 2, 0, 0, 0, 999999999}, + {1970, 1, 1, 0, 0, 0, 0}, + /* Test within range where floor (seconds) value is below INT64_MIN in nanoseconds */ + {1677, 9, 21, 0, 12, 43, 999999999}, + }; std::shared_ptr timestamp_array; @@ -159,19 +193,16 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]); - tm date{}; - - auto converted_time = values[i] / NANO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); - - ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); - ASSERT_EQ(buffer[i].month, date.tm_mon + 1); - ASSERT_EQ(buffer[i].day, date.tm_mday); - ASSERT_EQ(buffer[i].hour, date.tm_hour); - ASSERT_EQ(buffer[i].minute, date.tm_min); - ASSERT_EQ(buffer[i].second, date.tm_sec); - ASSERT_EQ(buffer[i].fraction, (values[i] % NANO_TO_SECONDS_DIVISOR)); + + ASSERT_EQ(buffer[i].year, expected[i].year); + ASSERT_EQ(buffer[i].month, expected[i].month); + ASSERT_EQ(buffer[i].day, expected[i].day); + ASSERT_EQ(buffer[i].hour, expected[i].hour); + ASSERT_EQ(buffer[i].minute, expected[i].minute); + ASSERT_EQ(buffer[i].second, expected[i].second); + ASSERT_EQ(buffer[i].fraction, expected[i].fraction); } } + } // namespace flight_sql } // namespace driver diff --git a/odbcabstraction/calendar_utils.cc b/odbcabstraction/calendar_utils.cc index 5de95860..d8c1606c 100644 --- a/odbcabstraction/calendar_utils.cc +++ b/odbcabstraction/calendar_utils.cc @@ -6,6 +6,7 @@ #include "odbcabstraction/calendar_utils.h" +#include #include #include @@ -29,12 +30,7 @@ int64_t GetTodayTimeFromEpoch() { } void GetTimeForSecondsSinceEpoch(tm& date, int64_t value) { - #if defined(_WIN32) - gmtime_s(&date, &value); - #else - time_t time_value = static_cast(value); - gmtime_r(&time_value, &date); - #endif - } + date = boost::posix_time::to_tm(boost::posix_time::from_time_t(value)); +} } // namespace odbcabstraction } // namespace driver From 02ee44986db3dcfc800a977f9385d74232d97cc4 Mon Sep 17 00:00:00 2001 From: Paul Nienaber Date: Wed, 17 Apr 2024 17:37:51 -0700 Subject: [PATCH 2/3] Fix DX-90153 CMake ApacheArrow configure error CMake no longer supports source subdirectory full path as a commandline argument and this must now be added to the ExternalProject_Add as SOURCE_SUBDIR (just the subpath rather than the full path). Change-Id: I8b711b9dc01331b025410c8e2bbc6ffdd202cce2 --- flight_sql/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flight_sql/CMakeLists.txt b/flight_sql/CMakeLists.txt index 44887430..a3f2660c 100644 --- a/flight_sql/CMakeLists.txt +++ b/flight_sql/CMakeLists.txt @@ -70,7 +70,6 @@ if (MSVC) -DCMAKE_DEPENDS_USE_COMPILER=FALSE -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp ) elseif(APPLE) set(ARROW_CMAKE_ARGS @@ -90,7 +89,6 @@ elseif(APPLE) -DCMAKE_DEPENDS_USE_COMPILER=FALSE -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp ) if (DEFINED CMAKE_TOOLCHAIN_FILE) list(APPEND ARROW_CMAKE_ARGS -DARROW_DEPENDENCY_SOURCE=VCPKG) @@ -109,7 +107,6 @@ else() -DCMAKE_DEPENDS_USE_COMPILER=FALSE -DOPENSSL_INCLUDE_DIR=${OPENSSL_INCLUDE_DIR} -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install - ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow/cpp ) endif() @@ -120,6 +117,7 @@ message("Using Arrow from ${ARROW_GIT_REPOSITORY} on tag ${ARROW_GIT_TAG}") ExternalProject_Add(ApacheArrow GIT_REPOSITORY ${ARROW_GIT_REPOSITORY} GIT_TAG ${ARROW_GIT_TAG} + SOURCE_SUBDIR "cpp" CMAKE_ARGS ${ARROW_CMAKE_ARGS}) include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/ApacheArrow-prefix/src/ApacheArrow-install/include) From 3e428b25aea4aabd93f64e135751792c420667ef Mon Sep 17 00:00:00 2001 From: Paul Nienaber Date: Wed, 10 Jul 2024 15:48:15 -0700 Subject: [PATCH 3/3] Update builtin-baseline from release source branch Release job was building from https://github.com/stevelorddremio/flightsql-odbc/tree/DX-59038, updating to this per work in DX-90582 Change-Id: I0e040fce63329244865e45f6a58a787fed4bee59 --- vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcpkg.json b/vcpkg.json index 519d6441..3c5bd0b6 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -27,5 +27,5 @@ "zstd", "rapidjson" ], - "builtin-baseline": "4e485c34f5e056327ef00c14e2e3620bc50de098" + "builtin-baseline": "94ce0dab56f4d8ba6bd631ba59ed682b02d45c46" }