DataSet<T>: Add helpers, estimators, math functions, restructure Data…

…Set, and add tests - Introduced helper utilities for DataSet<T> access and manipulation. - Implemented simple estimators and basic mathematical functions. - Restructured DataSet<T>: - Added explicit Range<T>::min and Range<T>::max. - Removed `signal_error` in favor of `UncertainValue<T>`. - Enhanced UncertainValue<T>: - Added `isfinite` and `log10` functions. - Implemented the `<<` operator for easy formatting. - Added unit tests for DataSetEstimators, DataSetMath, DataSetTestFunctions, and DataSet functionalities. - Updated accessors and utility functions to support new features. Signed-off-by: Ralph J. Steinhagen <[email protected]>
fair-acc · Dec 29, 2024 · 0f7e5a1 · 0f7e5a1
1 parent 0da51c5
commit 0f7e5a1
Show file tree

Hide file tree

Showing 19 changed files with 1,787 additions and 63 deletions.
diff --git a/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetEstimators.hpp b/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetEstimators.hpp
diff --git a/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetHelper.hpp b/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetHelper.hpp
@@ -0,0 +1,224 @@
+#ifndef DATASETHELPER_HPP
+#define DATASETHELPER_HPP
+
+namespace gr::dataset {
+namespace dim {
+// N.B. explicitly and individually defined indices, rather than enum class
+// since dimensionality is a priori open
+inline static constexpr std::size_t X = 0UZ; /// X-axis index
+inline static constexpr std::size_t Y = 1UZ; /// Y-axis index
+inline static constexpr std::size_t Z = 2UZ; /// Z-axis index
+} // namespace dim
+
+namespace detail {
+inline void checkRangeIndex(std::size_t minIndex, std::size_t maxIndex, std::size_t dataCount, std::source_location location = std::source_location::current()) {
+    if (minIndex > maxIndex || maxIndex > dataCount) {
+        throw gr::exception(fmt::format("{} invalid range: [{},{}), dataCount={}", location, minIndex, maxIndex, dataCount));
+    }
+}
+
+template<typename T, typename U>
+[[nodiscard]] constexpr U linearInterpolate(T x0, T x1, U y0, U y1, U y) noexcept {
+    if (y1 == y0) {
+        return U(x0);
+    }
+    return x0 + (y - y0) * (x1 - x0) / (y1 - y0);
+}
+
+template<typename T>
+[[nodiscard]] constexpr T sign(T positiveFactor, int val) noexcept {
+    return (val >= 0) ? positiveFactor : -positiveFactor;
+}
+
+} // namespace detail
+
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr T getIndexValue(const DataSet<T>& dataSet, std::size_t dimIndex, std::size_t sampleIndex, std::size_t signalIndex = 0UZ) {
+    if (dimIndex == dim::X) {
+        if (dataSet.axisCount() == 0UZ || dataSet.axisValues(0UZ).size() <= sampleIndex) {
+            return std::numeric_limits<TValue>::quiet_NaN();
+        }
+        return static_cast<T>(dataSet.axisValues(dim::X)[sampleIndex]);
+    }
+
+    if (dimIndex == dim::Y) {
+        if (signalIndex >= dataSet.size()) {
+            return std::numeric_limits<TValue>::quiet_NaN();
+        }
+        std::span<const T> vals = dataSet.signalValues(signalIndex);
+        if (vals.size() <= sampleIndex) {
+            return std::numeric_limits<TValue>::quiet_NaN();
+        }
+        return static_cast<T>(vals[sampleIndex]);
+    }
+
+    throw gr::exception(fmt::format("axis dimIndex {} is out of range [0, {}] and", dimIndex, dataSet.axisCount()));
+}
+
+template<typename T>
+T getDistance(const DataSet<T>& dataSet, std::size_t dimIndex, std::size_t indexMin = 0UZ, std::size_t indexMax = 0UZ, std::size_t signalIndex = 0UZ) {
+    if (indexMax == 0UZ) { // renormalise default range
+        indexMax = dataSet.axisValues(dim::X).size() - 1UZ;
+    }
+    detail::checkRangeIndex(indexMin, indexMax, dataSet.axisValues(dim::X).size());
+
+    if (dimIndex == dim::X || dimIndex == dim::Y) {
+        return getIndexValue(dataSet, dimIndex, indexMax, signalIndex) - getIndexValue(dataSet, dimIndex, indexMin, signalIndex);
+    }
+
+    throw gr::exception(fmt::format("axis dimIndex {} is out of range [0, {}] and", dimIndex, dataSet.axisCount()));
+}
+
+/*!
+ * \brief Interpolate in Y dimension at a given X coordinate xValue.
+ *        If out of range, we clamp or return NaN.
+ */
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>, std::convertible_to<TValue> U>
+[[nodiscard]] constexpr T getValue(const DataSet<T>& dataSet, std::size_t dimIndex, U xValue, std::size_t signalIndex = 0) {
+    if (dimIndex == dim::X) {
+        return xValue; // if user requests X dimension at xValue, that’s basically identity
+    }
+
+    if (dimIndex == dim::Y) { // linear interpolation in X-axis=0
+        if (dataSet.axisCount() == 0) {
+            return std::numeric_limits<TValue>::quiet_NaN();
+        }
+        const auto& xAxis = dataSet.axisValues(dim::X);
+        const auto  ySpan = dataSet.signalValues(signalIndex);
+
+        if (xAxis.empty() || ySpan.empty()) {
+            return std::numeric_limits<TValue>::quiet_NaN();
+        }
+
+        // clamp
+        if (xValue <= static_cast<T>(xAxis.front())) {
+            return static_cast<T>(ySpan.front());
+        } else if (xValue >= static_cast<T>(xAxis.back())) {
+            return static_cast<T>(ySpan.back());
+        }
+
+        // binary-search for interval
+        auto it = std::lower_bound(xAxis.begin(), xAxis.end(), T(xValue));
+        if (it == xAxis.end()) {
+            return static_cast<T>(ySpan.back());
+        }
+        auto idxHigh = std::distance(xAxis.begin(), it);
+        if (idxHigh == 0) {
+            return static_cast<T>(ySpan.front());
+        }
+        std::size_t iHigh = static_cast<std::size_t>(idxHigh);
+        std::size_t iLow  = iHigh - 1;
+
+        T xLow  = static_cast<T>(xAxis[iLow]);
+        T xHigh = static_cast<T>(xAxis[iHigh]);
+        T yLow  = static_cast<T>(ySpan[iLow]);
+        T yHigh = static_cast<T>(ySpan[iHigh]);
+
+        // linear interpolation
+        T dx = xHigh - xLow;
+        if (std::abs(gr::value(dx)) == T(0)) {
+            return yLow;
+        }
+        const T t = (xValue - xLow) / dx;
+        return yLow + t * (yHigh - yLow);
+    }
+
+    throw gr::exception(fmt::format("axis dimIndex {} is out of range [0, {}] and", dimIndex, dataSet.axisCount()));
+}
+
+template<typename T>
+std::vector<T> getSubArrayCopy(const DataSet<T>& ds, std::size_t indexMin, std::size_t indexMax, std::size_t signalIndex = 0) {
+    if (indexMax <= indexMin) {
+        return {};
+    }
+    detail::checkRangeIndex(indexMin, indexMax, ds.axisValues(0).size());
+    std::span<const T> values = ds.signalValues(signalIndex);
+    return std::vector<T>(values.begin() + static_cast<std::ptrdiff_t>(indexMin), values.begin() + static_cast<std::ptrdiff_t>(indexMax));
+}
+
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr T tenLog10(T x) noexcept {
+    if (std::abs(gr::value(x)) <= T(0)) {
+        return -std::numeric_limits<TValue>::infinity();
+    }
+    return T(10) * gr::math::log10(x); // 10 * log10(x)
+}
+
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr T decibel(T x) noexcept {
+    if (std::abs(gr::value(x)) <= T(0)) {
+        return -std::numeric_limits<TValue>::infinity();
+    }
+    return T(20) * gr::math::log10(x); // 20 * log10(x)
+}
+
+template<bool ThrowOnFailure = true, typename T>
+[[maybe_unused]] constexpr bool verify(const DataSet<T>& dataSet, std::source_location location = std::source_location::current()) {
+    auto handleFailure = [&](const std::string& message) -> bool {
+        if constexpr (ThrowOnFailure) {
+            throw gr::exception(message, location);
+        }
+        return false;
+    };
+
+    // axes checks
+    if (dataSet.axisCount() == 0UZ) {
+        return handleFailure("DataSet has zero axes.");
+    }
+    if (dataSet.axis_names.size() != dataSet.axisCount()) {
+        return handleFailure("axis_names size does not match axisCount().");
+    }
+    if (dataSet.axis_units.size() != dataSet.axisCount()) {
+        return handleFailure("axis_units size does not match axisCount().");
+    }
+    if (dataSet.axis_values.size() != dataSet.axisCount()) {
+        return handleFailure("axis_values size does not match axisCount().");
+    }
+    for (std::size_t axis = 0; axis < dataSet.axisCount(); ++axis) {
+        if (dataSet.axis_values[axis].empty()) {
+            return handleFailure(fmt::format("axis_values[{}] is empty.", axis));
+        }
+    }
+
+    // verify extends
+    if (dataSet.extents.empty()) {
+        return handleFailure("DataSet has no extents defined.");
+    }
+    for (std::size_t dim = 0; dim < dataSet.extents.size(); ++dim) {
+        if (dataSet.extents[dim] <= 0) {
+            return handleFailure(fmt::format("Extent at dimension {} is non-positive.", dim));
+        }
+    }
+
+    // verify signal_names, signal_quantities, and signal_units sizes match extents[0]
+    std::size_t num_signals = static_cast<std::size_t>(dataSet.extents[0]);
+    if (dataSet.signal_names.size() != num_signals) {
+        return handleFailure("signal_names size does not match extents[0].");
+    }
+    if (dataSet.signal_quantities.size() != num_signals) {
+        return handleFailure("signal_quantities size does not match extents[0].");
+    }
+    if (dataSet.signal_units.size() != num_signals) {
+        return handleFailure("signal_units size does not match extents[0].");
+    }
+
+    std::size_t expected_signal_size = std::accumulate(dataSet.extents.begin(), dataSet.extents.end(), static_cast<std::size_t>(1), std::multiplies<>());
+    if (dataSet.signal_values.size() != expected_signal_size) {
+        return handleFailure("signal_values size does not match the product of extents.");
+    }
+    if (dataSet.signal_ranges.size() != num_signals) {
+        return handleFailure("signal_ranges size does not match the number of signals.");
+    }
+    if (dataSet.meta_information.size() != num_signals) { // Assuming meta_information per signal value
+        return handleFailure("meta_information size does not match the number of signals.");
+    }
+    if (dataSet.timing_events.size() != num_signals) { // Assuming timing_events per signal value
+        return handleFailure("timing_events size does not match the number of signals.");
+    }
+
+    return true; // all checks passed
+}
+
+} // namespace gr::dataset
+
+#endif // DATASETHELPER_HPP
diff --git a/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetMath.hpp b/algorithm/include/gnuradio-4.0/algorithm/dataset/DataSetMath.hpp
@@ -0,0 +1,130 @@
+#ifndef DATASETMATH_HPP
+#define DATASETMATH_HPP
+
+#include <fmt/format.h>
+
+#include <gnuradio-4.0/DataSet.hpp>
+#include <gnuradio-4.0/Message.hpp>
+#include <gnuradio-4.0/meta/UncertainValue.hpp>
+
+#include "DataSetHelper.hpp"
+
+namespace gr::dataset {
+
+enum class MathOp { ADD = 0, SUBTRACT, MULTIPLY, DIVIDE, SQR, SQRT, LOG10, DB, INV_DB, IDENTITY };
+
+template<typename T>
+[[nodiscard]] constexpr bool sameHorizontalBase(const DataSet<T>& ds1, const DataSet<T>& ds2) {
+    if (ds1.axisCount() == 0 || ds2.axisCount() == 0) {
+        return false;
+    }
+    const auto& x1 = ds1.axisValues(0);
+    const auto& x2 = ds2.axisValues(0);
+    if (x1.size() != x2.size()) {
+        return false;
+    }
+    for (std::size_t i = 0; i < x1.size(); i++) {
+        if (x1[i] != x2[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+namespace detail {
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr T applyMathOperation(MathOp op, T y1, T y2) {
+    switch (op) {
+    case MathOp::ADD: return y1 + y2;
+    case MathOp::SUBTRACT: return y1 - y2;
+    case MathOp::MULTIPLY: return y1 * y2;
+    case MathOp::DIVIDE: return (y2 == TValue(0)) ? std::numeric_limits<TValue>::quiet_NaN() : (y1 / y2);
+    case MathOp::SQR: return (y1 + y2) * (y1 + y2);
+    case MathOp::SQRT: return (y1 + y2) > TValue(0) ? gr::math::sqrt(y1 + y2) : std::numeric_limits<TValue>::quiet_NaN();
+    case MathOp::LOG10: return tenLog10((y1 + y2));
+    case MathOp::DB: return decibel((y1 + y2));
+    case MathOp::INV_DB: return inverseDecibel(y1);
+    case MathOp::IDENTITY:
+    default: return (y1 + y2);
+    }
+}
+} // namespace detail
+
+/*!
+ * \brief mathFunction(DataSet, DataSet, MathOp) => merges or interpolates ds2 onto ds1’s base
+ */
+template<typename T, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr DataSet<T> mathFunction(const DataSet<T>& ds1, const DataSet<T>& ds2, MathOp op) {
+
+    // Create new DataSet
+    DataSet<T> ret;
+    ret.axis_names        = ds1.axis_names;
+    ret.axis_units        = ds1.axis_units;
+    ret.axis_values       = ds1.axis_values; // or we do an interpolation base
+    ret.layout            = ds1.layout;
+    ret.extents           = ds1.extents;
+    ret.signal_names      = {"mathOp"};
+    ret.signal_quantities = {ds1.signal_quantities.empty() ? "quantity" : ds1.signal_quantities[0]};
+    ret.signal_units      = {ds1.signal_units.empty() ? "" : ds1.signal_units[0]};
+    ret.signal_ranges     = {{T(0.0), T(1.0)}};   // placeholder
+    ret.meta_information  = ds1.meta_information; // or merge
+    ret.timing_events     = ds1.timing_events;
+
+    std::size_t dataCount = 0U;
+    if (!ret.axis_values.empty()) {
+        dataCount = ret.axis_values[0].size();
+    }
+    ret.signal_values.resize(dataCount);
+
+    bool needsInterpolation = !sameHorizontalBase(ds1, ds2);
+
+    for (std::size_t i = 0; i < dataCount; i++) {
+        TValue x             = gr::value(getIndexValue(ds1, dim::X, i));
+        T      Y1            = getIndexValue(ds1, dim::Y, i);
+        T      Y2            = needsInterpolation ? getValue(ds2, dim::Y, x) : getIndexValue(ds2, dim::Y, i);
+        T      result        = detail::applyMathOperation<T>(op, Y1, Y2);
+        ret.signal_values[i] = static_cast<T>(result);
+    }
+    return ret;
+}
+
+template<typename T, std::convertible_to<T> U, typename TValue = gr::meta::fundamental_base_value_type_t<T>>
+[[nodiscard]] constexpr DataSet<T> mathFunction(const DataSet<T>& ds, U value, MathOp op) {
+
+    DataSet<T> ret       = ds; // copy
+    auto       dataCount = ds.axis_values.empty() ? 0UZ : ds.axis_values[0].size();
+    for (std::size_t i = 0; i < dataCount; i++) {
+        T Y1     = getIndexValue(ds, dim::Y, i);
+        T result = 0;
+        switch (op) {
+        case MathOp::ADD: result = Y1 + value; break;
+        case MathOp::SUBTRACT: result = Y1 - value; break;
+        case MathOp::MULTIPLY: result = Y1 * value; break;
+        case MathOp::DIVIDE: result = (value == TValue(0)) ? std::numeric_limits<TValue>::quiet_NaN() : (Y1 / value); break;
+        case MathOp::SQR: result = (Y1 + value) * (Y1 + value); break;
+        case MathOp::SQRT: result = (Y1 + value) > TValue(0) ? gr::math::sqrt(Y1 + value) : std::numeric_limits<TValue>::quiet_NaN(); break;
+        case MathOp::LOG10: result = tenLog10(Y1 + value); break;
+        case MathOp::DB: result = decibel(Y1 + value); break;
+        case MathOp::INV_DB: result = inverseDecibel(Y1); break;
+        case MathOp::IDENTITY:
+        default: result = Y1; break;
+        }
+        ret.signal_values[i] = static_cast<T>(result);
+    }
+    return ret;
+}
+
+template<typename T, std::convertible_to<T> U>
+[[nodiscard]] constexpr DataSet<T> addFunction(const DataSet<T>& ds, U value) {
+    return mathFunction(ds, value, MathOp::ADD);
+}
+template<typename T>
+[[nodiscard]] constexpr DataSet<T> addFunction(const DataSet<T>& ds1, const DataSet<T>& ds2) {
+    return mathFunction(ds1, ds2, MathOp::ADD);
+}
+
+// WIP complete other convenience and missing math functions
+
+} // namespace gr::dataset
+
+#endif // DATASETMATH_HPP