Skip to content

Commit

Permalink
Verax Demo
Browse files Browse the repository at this point in the history
Plan generation demo.
  • Loading branch information
Orri Erling committed Dec 4, 2024
1 parent 86d6f33 commit 0c0f801
Show file tree
Hide file tree
Showing 51 changed files with 10,250 additions and 29 deletions.
6 changes: 2 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -506,11 +506,9 @@ if(${VELOX_BUILD_TESTING})

# 'gRPC_CARES_PROVIDER' is set as 'package', which means c-ares library needs
# to be installed on the system, instead of being built by gRPC.
velox_set_source(c-ares)
velox_resolve_dependency(c-ares)
# set_source(c-ares) resolve_dependency(c-ares)

velox_set_source(gRPC)
velox_resolve_dependency(gRPC)
# set_source(gRPC) resolve_dependency(gRPC)
endif()

if(VELOX_ENABLE_REMOTE_FUNCTIONS)
Expand Down
4 changes: 4 additions & 0 deletions velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ if(${VELOX_ENABLE_SUBSTRAIT})
add_subdirectory(substrait)
endif()

if(VELOX_ENABLE_PARQUET)
add_subdirectory(experimental/query)
endif()

if(${VELOX_BUILD_TESTING})
add_subdirectory(tool)
endif()
16 changes: 16 additions & 0 deletions velox/common/base/tests/RawVectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,19 @@ TEST(RawVectorTest, toStdVector) {
;
}
}

template <class _Tp, class... _Args>
inline _Tp* make(_Args&&... __args) {
return (new _Tp(std::forward<_Args>(__args)...));
}

class Pfaal {
public:
Pfaal(int x, int y) : n(x + y){};
int n;
};

TEST(RawVectorTest, pfaal) {
auto x = make<Pfaal>(1, 2);
printf("%d", x->n);
}
14 changes: 12 additions & 2 deletions velox/dwio/common/Statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,13 @@ class ColumnStatistics {
std::optional<uint64_t> valueCount,
std::optional<bool> hasNull,
std::optional<uint64_t> rawSize,
std::optional<uint64_t> size)
std::optional<uint64_t> size,
std::optional<int64_t> numDistinct = std::nullopt)
: valueCount_(valueCount),
hasNull_(hasNull),
rawSize_(rawSize),
size_(size) {}
size_(size),
numDistinct_(numDistinct) {}

virtual ~ColumnStatistics() = default;

Expand Down Expand Up @@ -123,6 +125,13 @@ class ColumnStatistics {
return size_;
}

std::optional<uint64_t> numDistinct() const {
return numDistinct_;
}
void setNumDistinct(int64_t count) {
numDistinct_ = count;
}

/**
* return string representation of this stats object
*/
Expand All @@ -145,6 +154,7 @@ class ColumnStatistics {
std::optional<bool> hasNull_;
std::optional<uint64_t> rawSize_;
std::optional<uint64_t> size_;
std::optional<uint64_t> numDistinct_;
};

/**
Expand Down
1 change: 1 addition & 0 deletions velox/dwio/dwrf/writer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ velox_add_library(

velox_link_libraries(
velox_dwio_dwrf_writer
velox_common_hyperloglog
velox_dwio_common
velox_dwio_dwrf_common
velox_dwio_dwrf_utils
Expand Down
29 changes: 27 additions & 2 deletions velox/dwio/dwrf/writer/StatisticsBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <velox/common/base/Exceptions.h>
#include <velox/common/hyperloglog/SparseHll.h>
#include "velox/dwio/dwrf/common/Config.h"
#include "velox/dwio/dwrf/common/Statistics.h"
#include "velox/dwio/dwrf/common/wrap/dwrf-proto-wrapper.h"
Expand Down Expand Up @@ -76,11 +77,18 @@ inline dwio::common::KeyInfo constructKey(const dwrf::proto::KeyInfo& keyInfo) {
struct StatisticsBuilderOptions {
explicit StatisticsBuilderOptions(
uint32_t stringLengthLimit,
std::optional<uint64_t> initialSize = std::nullopt)
: stringLengthLimit{stringLengthLimit}, initialSize{initialSize} {}
std::optional<uint64_t> initialSize = std::nullopt,
bool countDistincts = false,
HashStringAllocator* allocator = nullptr)
: stringLengthLimit{stringLengthLimit},
initialSize{initialSize},
countDistincts(countDistincts),
allocator(allocator) {}

uint32_t stringLengthLimit;
std::optional<uint64_t> initialSize;
bool countDistincts{false};
HashStringAllocator* allocator;

static StatisticsBuilderOptions fromConfig(const Config& config) {
return StatisticsBuilderOptions{config.get(Config::STRING_STATS_LIMIT)};
Expand Down Expand Up @@ -132,6 +140,18 @@ class StatisticsBuilder : public virtual dwio::common::ColumnStatistics {
}
}

template <typename T>
void addHash(const T& data) {
if (hll_) {
hll_->insertHash(folly::hasher<T>()(data));
}
}

int64_t cardinality() {
VELOX_CHECK(hll_);
return hll_->cardinality();
}

/*
* Merge stats of same type. This is used in writer to aggregate file level
* stats.
Expand Down Expand Up @@ -170,10 +190,14 @@ class StatisticsBuilder : public virtual dwio::common::ColumnStatistics {
hasNull_ = false;
rawSize_ = 0;
size_ = options_.initialSize;
if (options_.countDistincts) {
hll_ = std::make_shared<common::hll::SparseHll>(options_.allocator);
}
}

protected:
StatisticsBuilderOptions options_;
std::shared_ptr<common::hll::SparseHll> hll_;
};

class BooleanStatisticsBuilder : public StatisticsBuilder,
Expand Down Expand Up @@ -229,6 +253,7 @@ class IntegerStatisticsBuilder : public StatisticsBuilder,
max_ = value;
}
addWithOverflowCheck(sum_, value, count);
addHash(value);
}

void merge(
Expand Down
6 changes: 5 additions & 1 deletion velox/exec/PlanNodeStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ void printCustomStats(
std::string printPlanWithStats(
const core::PlanNode& plan,
const TaskStats& taskStats,
bool includeCustomStats) {
bool includeCustomStats,
std::function<std::string(const core::PlanNodeId& planNodeId)> annotation) {
auto planStats = toPlanStats(taskStats);
auto leafPlanNodes = plan.leafPlanNodeIds();

Expand Down Expand Up @@ -267,6 +268,9 @@ std::string printPlanWithStats(
printCustomStats(stats.customStats, indentation + " ", stream);
}
}
if (annotation) {
stream << indentation << annotation(planNodeId) << std::endl;
}
});
}
} // namespace facebook::velox::exec
4 changes: 3 additions & 1 deletion velox/exec/PlanNodeStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,5 +171,7 @@ folly::dynamic toPlanStatsJson(const facebook::velox::exec::TaskStats& stats);
std::string printPlanWithStats(
const core::PlanNode& plan,
const TaskStats& taskStats,
bool includeCustomStats = false);
bool includeCustomStats = false,
std::function<std::string(const core::PlanNodeId& planNodeId)> annotation =
nullptr);
} // namespace facebook::velox::exec
12 changes: 12 additions & 0 deletions velox/exec/tests/utils/QueryAssertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,18 @@ std::pair<std::unique_ptr<TaskCursor>, std::vector<RowVectorPtr>> readCursor(
return {std::move(cursor), std::move(result)};
}

// static
std::vector<RowVectorPtr> readCursor(
std::shared_ptr<runner::LocalRunner> runner) {
// 'result' borrows memory from cursor so the life cycle must be shorter.
std::vector<RowVectorPtr> result;

while (auto rows = runner->next()) {
result.push_back(rows);
}
return result;
}

bool waitForTaskFinish(
exec::Task* task,
TaskState expectedState,
Expand Down
5 changes: 5 additions & 0 deletions velox/exec/tests/utils/QueryAssertions.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "velox/vector/ComplexVector.h"

#include <duckdb.hpp> // @manual
#include "velox/runner/LocalRunner.h"

namespace facebook::velox::exec::test {

Expand Down Expand Up @@ -179,6 +180,10 @@ std::pair<std::unique_ptr<TaskCursor>, std::vector<RowVectorPtr>> readCursor(
std::function<void(exec::Task*)> addSplits,
uint64_t maxWaitMicros = 5'000'000);

/// Reads all results from 'runner'.
std::vector<RowVectorPtr> readCursor(
std::shared_ptr<runner::LocalRunner> runner);

/// The Task can return results before the Driver is finished executing.
/// Wait upto maxWaitMicros for the Task to finish as 'expectedState' before
/// returning to ensure it's stable e.g. the Driver isn't updating it anymore.
Expand Down
63 changes: 63 additions & 0 deletions velox/experimental/query/ArenaCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <vector>
#include "velox/common/base/BitUtil.h"
#include "velox/common/memory/HashStringAllocator.h"

namespace facebook::velox::optimizer {

class ArenaCache {
static constexpr int32_t kMaxSize = 512;
static constexpr int32_t kGranularity = 16;

public:
explicit ArenaCache(velox::HashStringAllocator& allocator)
: allocator_(allocator), allocated_(kMaxSize / kGranularity) {}

void* allocate(size_t size) {
auto sizeClass = velox::bits::roundUp(size, kGranularity) / kGranularity;
if (sizeClass < kMaxSize / kGranularity) {
if (!allocated_[sizeClass].empty()) {
void* result = allocated_[sizeClass].back();
allocated_[sizeClass].pop_back();
totalSize_ -= sizeClass;
return result;
}
}
return allocator_.allocate(sizeClass * kGranularity)->begin();
}

void free(void* ptr) {
auto header = velox::HashStringAllocator::headerOf(ptr);
int32_t sizeClass = header->size() / kGranularity;
if (sizeClass < kMaxSize / kGranularity) {
totalSize_ += sizeClass;
allocated_[sizeClass].push_back(ptr);
return;
}
allocator_.free(header);
}

private:
velox::HashStringAllocator& allocator_;
std::vector<std::vector<void*>> allocated_;
uint64_t totalSize_{0};
};

} // namespace facebook::velox::optimizer
35 changes: 35 additions & 0 deletions velox/experimental/query/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(tests)

add_library(
velox_verax
ToGraph.cpp
Plan.cpp
PlanObject.cpp
Schema.cpp
QueryGraph.cpp
QueryGraphContext.cpp
Filters.cpp
Cost.cpp
PlanUtils.cpp
RelationOp.cpp
ToVelox.cpp
VeloxHistory.cpp)

add_dependencies(velox_verax velox_hive_connector)

target_link_libraries(
velox_verax velox_core velox_hive_connector velox_dwio_dwrf_proto)
Loading

0 comments on commit 0c0f801

Please sign in to comment.