From da0c1ae8b27a5903cde235f38dad5a00a8ff969b Mon Sep 17 00:00:00 2001 From: Oleksandr Kozachuk Date: Mon, 6 Nov 2023 16:09:09 +0000 Subject: [PATCH] Prototype for binary (hashtype) integration. --- .../exaudflib/impl/swig/swig_table_iterator.h | 32 +++++++++++++++---- .../base/exaudflib/swig/swig_common.h | 3 +- .../base/exaudflib/zmqcontainer.proto | 3 ++ .../base/python/exascript_python_wrap.py | 2 ++ .../python/python3/python_ext_dataframe.cc | 6 +++- 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/exaudfclient/base/exaudflib/impl/swig/swig_table_iterator.h b/exaudfclient/base/exaudflib/impl/swig/swig_table_iterator.h index bf5f014ba..e7b34ed1c 100644 --- a/exaudfclient/base/exaudflib/impl/swig/swig_table_iterator.h +++ b/exaudfclient/base/exaudflib/impl/swig/swig_table_iterator.h @@ -20,9 +20,9 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter uint64_t m_rows_received; struct values_per_row_t { - uint64_t strings, bools, int32s, int64s, doubles; + uint64_t strings, bools, int32s, int64s, doubles, binaries; values_per_row_t(): strings(0), bools(0), int32s(0), int64s(0), doubles(0) {} - void reset() { strings = bools = int32s = int64s = doubles = 0; } + void reset() { strings = bools = int32s = int64s = doubles = binaries = 0; } } m_values_per_row; uint64_t m_column_count; std::vector m_col_offsets; @@ -57,11 +57,12 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter case INT32: m_col_offsets[current_column] = m_values_per_row.int32s++; break; case INT64: m_col_offsets[current_column] = m_values_per_row.int64s++; break; case NUMERIC: - case TIMESTAMP: - case DATE: - case STRING: m_col_offsets[current_column] = m_values_per_row.strings++; break; - case BOOLEAN: m_col_offsets[current_column] = m_values_per_row.bools++; break; - default: m_exch->setException("F-UDF-CL-LIB-1058: Unknown data type found, got "+it->type); return; + case TIMESTAMP: + case DATE: + case STRING: m_col_offsets[current_column] = m_values_per_row.strings++; break; + case HASHTYPE: m_col_offsets[current_column] = m_values_per_row.binaries++; break; + case BOOLEAN: m_col_offsets[current_column] = m_values_per_row.bools++; break; + default: m_exch->setException("F-UDF-CL-LIB-1058: Unknown data type found, got "+it->type); return; } } } @@ -242,6 +243,23 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter if (length != NULL) *length = s.length(); return s.c_str(); } + inline const char *getBinary(unsigned int col, size_t *length = NULL) { + if (col >= m_types.size()) { + m_exch->setException("E-UDF-CL-LIB-1068: Input column "+std::to_string(col)+" does not exist"); + m_was_null = true; + return ""; + } + if (m_types[col].type != HASHTYPE) { + m_exch->setException("E-UDF-CL-LIB-1069: Wrong input column type, expected BINARY, got "+ + exaudflib::msg_conversion::convert_type_to_string(m_types[col].type)); + m_was_null = true; + return ""; + } + ssize_t index = check_value(col, m_next_response.next().table().data_binary_size(), "binary"); + if (m_was_null) return ""; + if (length != NULL) *length = m_next_response.next().table().data_binary_size(); + return m_next_response.next().table().data_binary(index).data(); + } inline int32_t getInt32(unsigned int col) { if (col >= m_types.size()) { m_exch->setException("E-UDF-CL-LIB-1070: Input column "+std::to_string(col)+" does not exist"); diff --git a/exaudfclient/base/exaudflib/swig/swig_common.h b/exaudfclient/base/exaudflib/swig/swig_common.h index 50f710963..c55df5535 100644 --- a/exaudfclient/base/exaudflib/swig/swig_common.h +++ b/exaudfclient/base/exaudflib/swig/swig_common.h @@ -46,7 +46,8 @@ enum SWIGVM_datatype_e { BOOLEAN = 8, INTERVALYM = 9, INTERVALDS = 10, - GEOMETRY = 11 + GEOMETRY = 11, + HASHTYPE = 12 }; diff --git a/exaudfclient/base/exaudflib/zmqcontainer.proto b/exaudfclient/base/exaudflib/zmqcontainer.proto index 7d7e08514..4f9707639 100644 --- a/exaudfclient/base/exaudflib/zmqcontainer.proto +++ b/exaudfclient/base/exaudflib/zmqcontainer.proto @@ -56,6 +56,9 @@ message exascript_table_data { // Storage for following types: NUMERIC, TIMESTAMP, DATE and STRING repeated string data_string = 2; + // Storage for binary data type (only HASHTYPE) for now + repeated bytes data_binary = 10; + repeated bool data_nulls = 3 [packed = true]; repeated bool data_bool = 4 [packed = true]; repeated int32 data_int32 = 5 [packed = true]; diff --git a/exaudfclient/base/python/exascript_python_wrap.py b/exaudfclient/base/python/exascript_python_wrap.py index f2d6b2da1..541c41221 100644 --- a/exaudfclient/base/python/exascript_python_wrap.py +++ b/exaudfclient/base/python/exascript_python_wrap.py @@ -61,6 +61,8 @@ def convert_timestamp(x): data[colname] = rd(inp.getDouble, inp.wasNull, col) elif self.__incoltypes[col] == STRING: data[colname] = rd(inp.getString, inp.wasNull, col, lambda x: decodeUTF8(x)) + elif self.__incoltypes[col] == HASHTYPE: + data[colname] = rd(inp.getBinary, inp.wasNull, col) elif self.__incoltypes[col] == INT32: data[colname] = rd(inp.getInt32, inp.wasNull, col) elif self.__incoltypes[col] == INT64: diff --git a/exaudfclient/base/python/python3/python_ext_dataframe.cc b/exaudfclient/base/python/python3/python_ext_dataframe.cc index 36f0fffdb..7f2ea7040 100644 --- a/exaudfclient/base/python/python3/python_ext_dataframe.cc +++ b/exaudfclient/base/python/python3/python_ext_dataframe.cc @@ -118,7 +118,8 @@ std::map emitTypeMap { {SWIGVMContainers::BOOLEAN, "BOOLEAN"}, {SWIGVMContainers::INTERVALYM, "INTERVALYM"}, {SWIGVMContainers::INTERVALDS, "INTERVALDS"}, - {SWIGVMContainers::GEOMETRY, "GEOMETRY"} + {SWIGVMContainers::GEOMETRY, "GEOMETRY"}, + {SWIGVMContainers::HASHTYPE, "HASHTYPE"} }; @@ -296,6 +297,9 @@ PyObject *getColumnData(std::vector& colInfo, PyObject *tableIter, l case SWIGVMContainers::STRING: methodName = "getString"; break; + case SWIGVMContainers::HASHTYPE: + methodName = "getBinary"; + break; case SWIGVMContainers::BOOLEAN: methodName = "getBoolean"; break;