From e4618d8b0b74c0458281994f66314228de2ac9d9 Mon Sep 17 00:00:00 2001 From: "Arseny V. Povolotsky" Date: Tue, 28 May 2019 17:00:37 +0200 Subject: [PATCH] Moved edge index implementation to a separate namespace (#27) --- CMakeLists.txt | 1 + extlib/HighFive | 2 +- src/edge_index.cpp | 111 +++++++++++++++++++++++++++++++++++++++++++++ src/edge_index.h | 29 ++++++++++++ src/edges.cpp | 89 +++++++++--------------------------- src/population.cpp | 21 ++++++++- src/population.hpp | 26 ----------- 7 files changed, 184 insertions(+), 95 deletions(-) create mode 100644 src/edge_index.cpp create mode 100644 src/edge_index.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b26ef74..1d9d94b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ endif() set(SONATA_SRC src/common.cpp + src/edge_index.cpp src/edges.cpp src/hdf5_mutex.cpp src/nodes.cpp diff --git a/extlib/HighFive b/extlib/HighFive index e472a7f6..e88b5b1e 160000 --- a/extlib/HighFive +++ b/extlib/HighFive @@ -1 +1 @@ -Subproject commit e472a7f618a551f8c315ed91130b1ad2b212af29 +Subproject commit e88b5b1e7dd46caf539e2ac13bb76525506af07c diff --git a/src/edge_index.cpp b/src/edge_index.cpp new file mode 100644 index 00000000..7ff26920 --- /dev/null +++ b/src/edge_index.cpp @@ -0,0 +1,111 @@ +/************************************************************************* + * Copyright (C) 2018-2019 Blue Brain Project + * + * This file is part of 'libsonata', distributed under the terms + * of the GNU Lesser General Public License. + * + * See top-level LICENSE.txt file for details. + *************************************************************************/ + +#include "edge_index.h" + +#include + +#include +#include +#include + + +namespace bbp { +namespace sonata { +namespace edge_index { + +namespace { + +typedef std::vector> RawIndex; + +const char* SOURCE_INDEX_GROUP = "indices/source_to_target"; +const char* TARGET_INDEX_GROUP = "indices/target_to_source"; +const char* NODE_ID_TO_RANGES_DSET = "node_id_to_ranges"; +const char* RANGE_TO_EDGE_ID_DSET = "range_to_edge_id"; + +} // unnamed namespace + + +const HighFive::Group sourceIndex(const HighFive::Group& h5Root) +{ + if (!h5Root.exist(SOURCE_INDEX_GROUP)) { + throw SonataError("No source index group found"); + } + return h5Root.getGroup(SOURCE_INDEX_GROUP); +} + + +const HighFive::Group targetIndex(const HighFive::Group& h5Root) +{ + if (!h5Root.exist(TARGET_INDEX_GROUP)) { + throw SonataError("No target index group found"); + } + return h5Root.getGroup(TARGET_INDEX_GROUP); +} + + +Selection resolve(const HighFive::Group& indexGroup, const NodeID nodeID) +{ + if (nodeID >= indexGroup.getDataSet(NODE_ID_TO_RANGES_DSET).getSpace().getDimensions()[0]) { + // Returning empty set for out-of-range node IDs, to be aligned with SYN2 reader implementation + // TODO: throw a SonataError instead + return Selection({}); + } + + RawIndex primaryRange; + indexGroup + .getDataSet(NODE_ID_TO_RANGES_DSET) + .select({ nodeID, 0 }, { 1, 2 }) + .read(primaryRange); + + const uint64_t primaryRangeBegin = primaryRange[0][0]; + const uint64_t primaryRangeEnd = primaryRange[0][1]; + + if (primaryRangeBegin >= primaryRangeEnd) { + return Selection({}); + } + + RawIndex secondaryRange; + indexGroup + .getDataSet(RANGE_TO_EDGE_ID_DSET) + .select({ primaryRangeBegin, 0 }, { primaryRangeEnd - primaryRangeBegin, 2 }) + .read(secondaryRange); + + Selection::Ranges ranges; + ranges.reserve(secondaryRange.size()); + + for (const auto& row: secondaryRange) { + ranges.emplace_back(row[0], row[1]); + } + + return Selection(std::move(ranges)); +} + + +Selection resolve(const HighFive::Group& indexGroup, const std::vector& nodeIDs) +{ + if (nodeIDs.size() == 1) { + return resolve(indexGroup, nodeIDs[0]); + } + // TODO optimize: bulk read for primary index + // TODO optimize: range merging + std::set merged; + for (NodeID nodeID : nodeIDs) { + const auto ids = resolve(indexGroup, nodeID).flatten(); + merged.insert(ids.begin(), ids.end()); + } + std::vector result; + result.reserve(merged.size()); + std::copy(merged.begin(), merged.end(), std::back_inserter(result)); + return Selection::fromValues(result); +} + +} +} +} // namespace bbp::sonata::edge_index \ No newline at end of file diff --git a/src/edge_index.h b/src/edge_index.h new file mode 100644 index 00000000..35a36c32 --- /dev/null +++ b/src/edge_index.h @@ -0,0 +1,29 @@ +/************************************************************************* + * Copyright (C) 2018-2019 Blue Brain Project + * + * This file is part of 'libsonata', distributed under the terms + * of the GNU Lesser General Public License. + * + * See top-level LICENSE.txt file for details. + *************************************************************************/ + +#pragma once + +#include + +#include +#include + +namespace bbp { +namespace sonata { +namespace edge_index { + +const HighFive::Group sourceIndex(const HighFive::Group& h5Root); +const HighFive::Group targetIndex(const HighFive::Group& h5Root); + +Selection resolve(const HighFive::Group& indexGroup, NodeID nodeID); +Selection resolve(const HighFive::Group& indexGroup, const std::vector& nodeIDs); + +} +} +} // namespace bbp::sonata::edge_index \ No newline at end of file diff --git a/src/edges.cpp b/src/edges.cpp index 47a716ea..678295ef 100644 --- a/src/edges.cpp +++ b/src/edges.cpp @@ -9,6 +9,7 @@ #include "population.hpp" #include "hdf5_mutex.hpp" +#include "edge_index.h" #include #include @@ -19,6 +20,15 @@ #include +namespace { + +const char* SOURCE_NODE_ID_DSET = "source_node_id"; +const char* TARGET_NODE_ID_DSET = "target_node_id"; +const char* NODE_POPULATION_ATTR = "node_population"; + +} // unnamed namespace + + namespace bbp { namespace sonata { @@ -36,7 +46,7 @@ std::string EdgePopulation::source() const { HDF5_LOCK_GUARD std::string result; - impl_->h5Root.getDataSet("source_node_id").getAttribute("node_population").read(result); + impl_->h5Root.getDataSet(SOURCE_NODE_ID_DSET).getAttribute(NODE_POPULATION_ATTR).read(result); return result; } @@ -45,7 +55,7 @@ std::string EdgePopulation::target() const { HDF5_LOCK_GUARD std::string result; - impl_->h5Root.getDataSet("target_node_id").getAttribute("node_population").read(result); + impl_->h5Root.getDataSet(TARGET_NODE_ID_DSET).getAttribute(NODE_POPULATION_ATTR).read(result); return result; } @@ -53,7 +63,7 @@ std::string EdgePopulation::target() const std::vector EdgePopulation::sourceNodeIDs(const Selection& selection) const { HDF5_LOCK_GUARD - const auto dset = impl_->h5Root.getDataSet("source_node_id"); + const auto dset = impl_->h5Root.getDataSet(SOURCE_NODE_ID_DSET); return _readSelection(dset, selection); } @@ -61,84 +71,29 @@ std::vector EdgePopulation::sourceNodeIDs(const Selection& selection) co std::vector EdgePopulation::targetNodeIDs(const Selection& selection) const { HDF5_LOCK_GUARD - const auto dset = impl_->h5Root.getDataSet("target_node_id"); + const auto dset = impl_->h5Root.getDataSet(TARGET_NODE_ID_DSET); return _readSelection(dset, selection); } -namespace { - -Selection _resolveIndex(const HighFive::Group& indexGroup, const NodeID nodeID) -{ - typedef std::vector> RawIndex; - - if (nodeID >= indexGroup.getDataSet("node_id_to_ranges").getSpace().getDimensions()[0]) { - // Returning empty set for out-of-range node IDs, to be aligned with SYN2 reader implementation - // TODO: throw a SonataError instead - return Selection({}); - } - - RawIndex primaryRange; - indexGroup - .getDataSet("node_id_to_ranges") - .select({ nodeID, 0 }, { 1, 2 }) - .read(primaryRange); - - const uint64_t primaryRangeBegin = primaryRange[0][0]; - const uint64_t primaryRangeEnd = primaryRange[0][1]; - - if (primaryRangeBegin >= primaryRangeEnd) { - return Selection({}); - } - - RawIndex secondaryRange; - indexGroup - .getDataSet("range_to_edge_id") - .select({ primaryRangeBegin, 0 }, { primaryRangeEnd - primaryRangeBegin, 2 }) - .read(secondaryRange); - - Selection::Ranges ranges; - ranges.reserve(secondaryRange.size()); - - for (const auto& row: secondaryRange) { - ranges.emplace_back(row[0], row[1]); - } - - return Selection(std::move(ranges)); -} - - -Selection _resolveIndex(const HighFive::Group& indexGroup, const std::vector& nodeIDs) -{ - if (nodeIDs.size() == 1) { - return _resolveIndex(indexGroup, nodeIDs[0]); - } - // TODO optimize: bulk read for primary index - // TODO optimize: range merging - std::set result; - for (NodeID nodeID : nodeIDs) { - const auto ids = _resolveIndex(indexGroup, nodeID).flatten(); - result.insert(ids.begin(), ids.end()); - } - return _selectionFromValues(result.begin(), result.end()); -} - -} // unnamed namespace - Selection EdgePopulation::afferentEdges(const std::vector& target) const { HDF5_LOCK_GUARD - const auto& indexGroup = impl_->h5Root.getGroup("indices/target_to_source"); - return _resolveIndex(indexGroup, target); + return edge_index::resolve( + edge_index::targetIndex(impl_->h5Root), + target + ); } Selection EdgePopulation::efferentEdges(const std::vector& source) const { HDF5_LOCK_GUARD - const auto& indexGroup = impl_->h5Root.getGroup("indices/source_to_target"); - return _resolveIndex(indexGroup, source); + return edge_index::resolve( + edge_index::sourceIndex(impl_->h5Root), + source + ); } diff --git a/src/population.cpp b/src/population.cpp index 9ade7df2..2869c722 100644 --- a/src/population.cpp +++ b/src/population.cpp @@ -49,7 +49,26 @@ Selection::Selection(const Selection::Ranges& ranges) Selection Selection::fromValues(const Selection::Values& values) { - return _selectionFromValues(values.begin(), values.end()); + Selection::Ranges ranges; + + Selection::Range range{ 0, 0 }; + for (const auto v: values) { + if (v == range.second) { + ++range.second; + } else { + if (range.first < range.second) { + ranges.push_back(range); + } + range.first = v; + range.second = v + 1; + } + } + + if (range.first < range.second) { + ranges.push_back(range); + } + + return Selection(std::move(ranges)); } diff --git a/src/population.hpp b/src/population.hpp index e8de92b9..f52664b0 100644 --- a/src/population.hpp +++ b/src/population.hpp @@ -39,32 +39,6 @@ std::set _listChildren(const HighFive::Group& group, const std::set } -template -Selection _selectionFromValues(Iterator first, Iterator last) -{ - Selection::Ranges ranges; - - Selection::Range range{ 0, 0 }; - for (Iterator it = first; it != last; ++it) { - if (*it == range.second) { - ++range.second; - } else { - if (range.first < range.second) { - ranges.push_back(range); - } - range.first = *it; - range.second = *it + 1; - } - } - - if (range.first < range.second) { - ranges.push_back(range); - } - - return Selection(std::move(ranges)); -} - - template std::vector _readChunk(const HighFive::DataSet& dset, const Selection::Range& range) {