Skip to content

Commit

Permalink
TabletReader::getStreamSizeSum
Browse files Browse the repository at this point in the history
Summary: We want to be able to know stream sizes without actually loading the stripe.

Differential Revision: D56738694
  • Loading branch information
Daniel Munoz authored and facebook-github-bot committed May 1, 2024
1 parent fcf98e1 commit 0034666
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 1 deletion.
17 changes: 17 additions & 0 deletions dwio/nimble/tablet/TabletReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,23 @@ std::vector<std::unique_ptr<StreamLoader>> TabletReader::load(
return streams;
}

uint64_t TabletReader::getStreamSizeSum(
const StripeIdentifier& stripe,
std::span<const uint32_t> streamIdentifiers) const {
NIMBLE_CHECK(stripe.stripeId_ < stripeCount_, "Stripe is out of range.");
const auto& stripeGroup = stripe.stripeGroup_;

uint64_t streamSizeSum = 0;
const auto stripeStreamSizes = stripeGroup->streamSizes(stripe.stripeId_);
for (auto streamId : streamIdentifiers) {
if (streamId >= stripeGroup->streamCount()) {
continue;
}
streamSizeSum += stripeStreamSizes[streamId];
}
return streamSizeSum;
}

std::optional<Section> TabletReader::loadOptionalSection(
const std::string& name,
bool keepCache) const {
Expand Down
4 changes: 4 additions & 0 deletions dwio/nimble/tablet/TabletReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ class TabletReader {
return std::string_view{};
}) const;

uint64_t getStreamSizeSum(
const StripeIdentifier& stripe,
std::span<const uint32_t> streamIdentifiers) const;

std::optional<Section> loadOptionalSection(
const std::string& name,
bool keepCache = false) const;
Expand Down
25 changes: 24 additions & 1 deletion dwio/nimble/tablet/tests/TabletTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,24 @@ void parameterizedTest(
VLOG(1) << "Output Tablet -> StripeCount: " << tablet.stripeCount()
<< ", RowCount: " << tablet.tabletRowCount();

// Now, read all stripes and verify results
uint32_t maxIdentifiers = 0;
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
auto stripeIdentifier = tablet.getStripeIdentifier(stripe);
maxIdentifiers =
std::max(maxIdentifiers, tablet.streamCount(stripeIdentifier));
}
std::vector<uint32_t> allIdentifiers(maxIdentifiers);
std::iota(allIdentifiers.begin(), allIdentifiers.end(), 0);
std::span<const uint32_t> allIdentifiersSpan{
allIdentifiers.cbegin(), allIdentifiers.cend()};
size_t extraReads = 0;
std::vector<uint64_t> streamSizeSum;
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
auto stripeIdentifier = tablet.getStripeIdentifier(stripe);
streamSizeSum.push_back(
tablet.getStreamSizeSum(stripeIdentifier, allIdentifiersSpan));
}
// Now, read all stripes and verify results
for (auto stripe = 0; stripe < stripesData.size(); ++stripe) {
EXPECT_EQ(stripesData[stripe].rowCount, tablet.stripeRowCount(stripe));

Expand All @@ -159,6 +175,13 @@ void parameterizedTest(
std::iota(identifiers.begin(), identifiers.end(), 0);
auto serializedStreams = tablet.load(
stripeIdentifier, {identifiers.cbegin(), identifiers.cend()});
uint64_t streamSizeSumExpected = 0;
for (const auto& stream : serializedStreams) {
if (stream) {
streamSizeSumExpected += stream->getStream().size();
}
}
EXPECT_EQ(streamSizeSum[stripe], streamSizeSumExpected);
auto chunks = readFile.chunks();
auto expectedReads = stripesData[stripe].streams.size();
auto diff = chunks.size() - expectedReads;
Expand Down

0 comments on commit 0034666

Please sign in to comment.