Skip to content

Commit

Permalink
Prototype for binary (hashtype) integration.
Browse files Browse the repository at this point in the history
  • Loading branch information
ooke committed Nov 6, 2023
1 parent 209594c commit da0c1ae
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 9 deletions.
32 changes: 25 additions & 7 deletions exaudfclient/base/exaudflib/impl/swig/swig_table_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter

uint64_t m_rows_received;
struct values_per_row_t {
uint64_t strings, bools, int32s, int64s, doubles;
uint64_t strings, bools, int32s, int64s, doubles, binaries;
values_per_row_t(): strings(0), bools(0), int32s(0), int64s(0), doubles(0) {}
void reset() { strings = bools = int32s = int64s = doubles = 0; }
void reset() { strings = bools = int32s = int64s = doubles = binaries = 0; }
} m_values_per_row;
uint64_t m_column_count;
std::vector<uint64_t> m_col_offsets;
Expand Down Expand Up @@ -57,11 +57,12 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter
case INT32: m_col_offsets[current_column] = m_values_per_row.int32s++; break;
case INT64: m_col_offsets[current_column] = m_values_per_row.int64s++; break;
case NUMERIC:
case TIMESTAMP:
case DATE:
case STRING: m_col_offsets[current_column] = m_values_per_row.strings++; break;
case BOOLEAN: m_col_offsets[current_column] = m_values_per_row.bools++; break;
default: m_exch->setException("F-UDF-CL-LIB-1058: Unknown data type found, got "+it->type); return;
case TIMESTAMP:
case DATE:
case STRING: m_col_offsets[current_column] = m_values_per_row.strings++; break;
case HASHTYPE: m_col_offsets[current_column] = m_values_per_row.binaries++; break;
case BOOLEAN: m_col_offsets[current_column] = m_values_per_row.bools++; break;
default: m_exch->setException("F-UDF-CL-LIB-1058: Unknown data type found, got "+it->type); return;
}
}
}
Expand Down Expand Up @@ -242,6 +243,23 @@ class SWIGTableIterator_Impl : public AbstractSWIGTableIterator, SWIGGeneralIter
if (length != NULL) *length = s.length();
return s.c_str();
}
inline const char *getBinary(unsigned int col, size_t *length = NULL) {
if (col >= m_types.size()) {
m_exch->setException("E-UDF-CL-LIB-1068: Input column "+std::to_string(col)+" does not exist");
m_was_null = true;
return "";
}
if (m_types[col].type != HASHTYPE) {
m_exch->setException("E-UDF-CL-LIB-1069: Wrong input column type, expected BINARY, got "+
exaudflib::msg_conversion::convert_type_to_string(m_types[col].type));
m_was_null = true;
return "";
}
ssize_t index = check_value(col, m_next_response.next().table().data_binary_size(), "binary");
if (m_was_null) return "";
if (length != NULL) *length = m_next_response.next().table().data_binary_size();
return m_next_response.next().table().data_binary(index).data();
}
inline int32_t getInt32(unsigned int col) {
if (col >= m_types.size()) {
m_exch->setException("E-UDF-CL-LIB-1070: Input column "+std::to_string(col)+" does not exist");
Expand Down
3 changes: 2 additions & 1 deletion exaudfclient/base/exaudflib/swig/swig_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ enum SWIGVM_datatype_e {
BOOLEAN = 8,
INTERVALYM = 9,
INTERVALDS = 10,
GEOMETRY = 11
GEOMETRY = 11,
HASHTYPE = 12
};


Expand Down
3 changes: 3 additions & 0 deletions exaudfclient/base/exaudflib/zmqcontainer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ message exascript_table_data {
// Storage for following types: NUMERIC, TIMESTAMP, DATE and STRING
repeated string data_string = 2;

// Storage for binary data type (only HASHTYPE) for now
repeated bytes data_binary = 10;

repeated bool data_nulls = 3 [packed = true];
repeated bool data_bool = 4 [packed = true];
repeated int32 data_int32 = 5 [packed = true];
Expand Down
2 changes: 2 additions & 0 deletions exaudfclient/base/python/exascript_python_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def convert_timestamp(x):
data[colname] = rd(inp.getDouble, inp.wasNull, col)
elif self.__incoltypes[col] == STRING:
data[colname] = rd(inp.getString, inp.wasNull, col, lambda x: decodeUTF8(x))
elif self.__incoltypes[col] == HASHTYPE:
data[colname] = rd(inp.getBinary, inp.wasNull, col)
elif self.__incoltypes[col] == INT32:
data[colname] = rd(inp.getInt32, inp.wasNull, col)
elif self.__incoltypes[col] == INT64:
Expand Down
6 changes: 5 additions & 1 deletion exaudfclient/base/python/python3/python_ext_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ std::map<int, std::string> emitTypeMap {
{SWIGVMContainers::BOOLEAN, "BOOLEAN"},
{SWIGVMContainers::INTERVALYM, "INTERVALYM"},
{SWIGVMContainers::INTERVALDS, "INTERVALDS"},
{SWIGVMContainers::GEOMETRY, "GEOMETRY"}
{SWIGVMContainers::GEOMETRY, "GEOMETRY"},
{SWIGVMContainers::HASHTYPE, "HASHTYPE"}
};


Expand Down Expand Up @@ -296,6 +297,9 @@ PyObject *getColumnData(std::vector<ColumnInfo>& colInfo, PyObject *tableIter, l
case SWIGVMContainers::STRING:
methodName = "getString";
break;
case SWIGVMContainers::HASHTYPE:
methodName = "getBinary";
break;
case SWIGVMContainers::BOOLEAN:
methodName = "getBoolean";
break;
Expand Down

0 comments on commit da0c1ae

Please sign in to comment.