From 4815c45416fbeef8e95a11dadcd8fa0baa0b1b8c Mon Sep 17 00:00:00 2001 From: yingsu00 Date: Thu, 21 Mar 2024 19:32:53 +0800 Subject: [PATCH] Reorder SplitReader class fields --- velox/connectors/hive/HiveDataSource.cpp | 10 ++-- velox/connectors/hive/SplitReader.cpp | 57 +++++++++---------- velox/connectors/hive/SplitReader.h | 24 ++++---- .../hive/iceberg/IcebergSplitReader.cpp | 22 +++---- .../hive/iceberg/IcebergSplitReader.h | 12 ++-- 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/velox/connectors/hive/HiveDataSource.cpp b/velox/connectors/hive/HiveDataSource.cpp index 817de3b9bd9a1..99694aba34e83 100644 --- a/velox/connectors/hive/HiveDataSource.cpp +++ b/velox/connectors/hive/HiveDataSource.cpp @@ -167,14 +167,14 @@ std::unique_ptr HiveDataSource::createSplitReader() { return SplitReader::create( split_, hiveTableHandle_, - scanSpec_, - readerOutputType_, &partitionKeys_, - fileHandleFactory_, - executor_, connectorQueryCtx_, hiveConfig_, - ioStats_); + readerOutputType_, + ioStats_, + fileHandleFactory_, + executor_, + scanSpec_); } void HiveDataSource::addSplit(std::shared_ptr split) { diff --git a/velox/connectors/hive/SplitReader.cpp b/velox/connectors/hive/SplitReader.cpp index f8adf92082125..1af2b444d4da3 100644 --- a/velox/connectors/hive/SplitReader.cpp +++ b/velox/connectors/hive/SplitReader.cpp @@ -62,69 +62,69 @@ VectorPtr newConstantFromString( std::unique_ptr SplitReader::create( const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - const std::shared_ptr& ioStats) { + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec) { // Create the SplitReader based on hiveSplit->customSplitInfo["table_format"] if (hiveSplit->customSplitInfo.count("table_format") > 0 && hiveSplit->customSplitInfo["table_format"] == "hive-iceberg") { return std::make_unique( hiveSplit, hiveTableHandle, - scanSpec, - readerOutputType, partitionKeys, - fileHandleFactory, - executor, connectorQueryCtx, hiveConfig, - ioStats); + readerOutputType, + ioStats, + fileHandleFactory, + executor, + scanSpec); } else { return std::make_unique( hiveSplit, hiveTableHandle, - scanSpec, - readerOutputType, partitionKeys, - fileHandleFactory, - executor, connectorQueryCtx, hiveConfig, - ioStats); + readerOutputType, + ioStats, + fileHandleFactory, + executor, + scanSpec); } } SplitReader::SplitReader( const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - const std::shared_ptr& ioStats) + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec) : hiveSplit_(hiveSplit), hiveTableHandle_(hiveTableHandle), - scanSpec_(scanSpec), - readerOutputType_(readerOutputType), partitionKeys_(partitionKeys), - pool_(connectorQueryCtx->memoryPool()), - fileHandleFactory_(fileHandleFactory), - executor_(executor), connectorQueryCtx_(connectorQueryCtx), hiveConfig_(hiveConfig), + readerOutputType_(readerOutputType), ioStats_(ioStats), + fileHandleFactory_(fileHandleFactory), + executor_(executor), + pool_(connectorQueryCtx->memoryPool()), + scanSpec_(scanSpec), baseReaderOpts_(connectorQueryCtx->memoryPool()), emptySplit_(false) {} @@ -180,11 +180,8 @@ int64_t SplitReader::estimatedRowSize() const { return DataSource::kUnknownRowSize; } - auto size = baseRowReader_->estimatedRowSize(); - if (size.has_value()) { - return size.value(); - } - return DataSource::kUnknownRowSize; + const auto size = baseRowReader_->estimatedRowSize(); + return size.value_or(DataSource::kUnknownRowSize); } void SplitReader::updateRuntimeStats( diff --git a/velox/connectors/hive/SplitReader.h b/velox/connectors/hive/SplitReader.h index 4596c618a32f3..e642d24afbcef 100644 --- a/velox/connectors/hive/SplitReader.h +++ b/velox/connectors/hive/SplitReader.h @@ -57,30 +57,30 @@ class SplitReader { static std::unique_ptr create( const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - const std::shared_ptr& ioStats); + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec); SplitReader( const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - const std::shared_ptr& ioStats); + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec); virtual ~SplitReader() = default; @@ -139,6 +139,7 @@ class SplitReader { const std::string& partitionKey, const std::optional& value) const; + std::shared_ptr hiveSplit_; const std::shared_ptr hiveTableHandle_; const std::unordered_map< std::string, @@ -146,12 +147,11 @@ class SplitReader { const ConnectorQueryCtx* const connectorQueryCtx_; const std::shared_ptr hiveConfig_; - std::shared_ptr hiveSplit_; const RowTypePtr readerOutputType_; const std::shared_ptr ioStats_; - memory::MemoryPool* const pool_; FileHandleFactory* const fileHandleFactory_; folly::Executor* const executor_; + memory::MemoryPool* const pool_; std::shared_ptr scanSpec_; std::unique_ptr baseReader_; diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp index 54c53c9b5275c..d7af5a8955708 100644 --- a/velox/connectors/hive/iceberg/IcebergSplitReader.cpp +++ b/velox/connectors/hive/iceberg/IcebergSplitReader.cpp @@ -25,29 +25,29 @@ using namespace facebook::velox::dwio::common; namespace facebook::velox::connector::hive::iceberg { IcebergSplitReader::IcebergSplitReader( - const std::shared_ptr& hiveSplit, + const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - std::shared_ptr ioStats) + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* const fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec) : SplitReader( hiveSplit, hiveTableHandle, - scanSpec, - readerOutputType, partitionKeys, - fileHandleFactory, - executor, connectorQueryCtx, hiveConfig, - ioStats), + readerOutputType, + ioStats, + fileHandleFactory, + executor, + scanSpec), baseReadOffset_(0), splitOffset_(0) {} diff --git a/velox/connectors/hive/iceberg/IcebergSplitReader.h b/velox/connectors/hive/iceberg/IcebergSplitReader.h index a9b5e1c707b84..c476354a9afb6 100644 --- a/velox/connectors/hive/iceberg/IcebergSplitReader.h +++ b/velox/connectors/hive/iceberg/IcebergSplitReader.h @@ -27,18 +27,18 @@ struct IcebergDeleteFile; class IcebergSplitReader : public SplitReader { public: IcebergSplitReader( - const std::shared_ptr& hiveSplit, + const std::shared_ptr& hiveSplit, const std::shared_ptr& hiveTableHandle, - const std::shared_ptr& scanSpec, - const RowTypePtr& readerOutputType, const std::unordered_map< std::string, std::shared_ptr>* partitionKeys, - FileHandleFactory* fileHandleFactory, - folly::Executor* executor, const ConnectorQueryCtx* connectorQueryCtx, const std::shared_ptr& hiveConfig, - std::shared_ptr ioStats); + const RowTypePtr& readerOutputType, + const std::shared_ptr& ioStats, + FileHandleFactory* fileHandleFactory, + folly::Executor* executor, + const std::shared_ptr& scanSpec); ~IcebergSplitReader() override = default;