Skip to content

Commit

Permalink
Reorder SplitReader class fields
Browse files Browse the repository at this point in the history
  • Loading branch information
yingsu00 committed Apr 2, 2024
1 parent 4d2ed59 commit 1f88646
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 70 deletions.
10 changes: 5 additions & 5 deletions velox/connectors/hive/HiveDataSource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,14 @@ std::unique_ptr<SplitReader> HiveDataSource::createSplitReader() {
return SplitReader::create(
split_,
hiveTableHandle_,
scanSpec_,
readerOutputType_,
&partitionKeys_,
fileHandleFactory_,
executor_,
connectorQueryCtx_,
hiveConfig_,
ioStats_);
readerOutputType_,
ioStats_,
fileHandleFactory_,
executor_,
scanSpec_);
}

void HiveDataSource::addSplit(std::shared_ptr<ConnectorSplit> split) {
Expand Down
60 changes: 28 additions & 32 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,68 +62,67 @@ VectorPtr newConstantFromString(
std::unique_ptr<SplitReader> SplitReader::create(
const std::shared_ptr<hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
const std::shared_ptr<io::IoStatistics>& ioStats) {
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec) {
// Create the SplitReader based on hiveSplit->customSplitInfo["table_format"]
if (hiveSplit->customSplitInfo.count("table_format") > 0 &&
hiveSplit->customSplitInfo["table_format"] == "hive-iceberg") {
return std::make_unique<iceberg::IcebergSplitReader>(
hiveSplit,
hiveTableHandle,
scanSpec,
readerOutputType,
partitionKeys,
fileHandleFactory,
executor,
connectorQueryCtx,
hiveConfig,
ioStats);
readerOutputType,
ioStats,
fileHandleFactory,
executor,
scanSpec);
} else {
return std::make_unique<SplitReader>(
hiveSplit,
hiveTableHandle,
scanSpec,
readerOutputType,
partitionKeys,
fileHandleFactory,
executor,
connectorQueryCtx,
hiveConfig,
ioStats);
readerOutputType,
ioStats,
fileHandleFactory,
executor,
scanSpec);
}
}

SplitReader::SplitReader(
const std::shared_ptr<const hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<
std::string, std::shared_ptr<HiveColumnHandle>>*
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
const std::shared_ptr<io::IoStatistics>& ioStats)
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec)
: hiveSplit_(hiveSplit),
hiveTableHandle_(hiveTableHandle),
scanSpec_(scanSpec),
readerOutputType_(readerOutputType),
partitionKeys_(partitionKeys),
pool_(connectorQueryCtx->memoryPool()),
fileHandleFactory_(fileHandleFactory),
executor_(executor),
connectorQueryCtx_(connectorQueryCtx),
hiveConfig_(hiveConfig),
readerOutputType_(readerOutputType),
ioStats_(ioStats),
fileHandleFactory_(fileHandleFactory),
executor_(executor),
pool_(connectorQueryCtx->memoryPool()),
scanSpec_(scanSpec),
baseReaderOpts_(connectorQueryCtx->memoryPool()),
emptySplit_(false) {}

Expand Down Expand Up @@ -179,11 +178,8 @@ int64_t SplitReader::estimatedRowSize() const {
return DataSource::kUnknownRowSize;
}

auto size = baseRowReader_->estimatedRowSize();
if (size.has_value()) {
return size.value();
}
return DataSource::kUnknownRowSize;
const auto size = baseRowReader_->estimatedRowSize();
return size.value_or(DataSource::kUnknownRowSize);
}

void SplitReader::updateRuntimeStats(
Expand Down
30 changes: 14 additions & 16 deletions velox/connectors/hive/SplitReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,30 +57,28 @@ class SplitReader {
static std::unique_ptr<SplitReader> create(
const std::shared_ptr<hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<
std::string, std::shared_ptr<HiveColumnHandle>>*
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
const std::shared_ptr<io::IoStatistics>& ioStats);
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec);

SplitReader(
const std::shared_ptr<const hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<
std::string, std::shared_ptr<HiveColumnHandle>>*
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
const std::shared_ptr<io::IoStatistics>& ioStats);
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec);

virtual ~SplitReader() = default;

Expand Down Expand Up @@ -139,19 +137,19 @@ class SplitReader {
const std::string& partitionKey,
const std::optional<std::string>& value) const;

std::shared_ptr<const HiveConnectorSplit> hiveSplit_;
const std::shared_ptr<const HiveTableHandle> hiveTableHandle_;
const std::unordered_map<
std::string,
std::shared_ptr<HiveColumnHandle>>* const partitionKeys_;
const ConnectorQueryCtx* const connectorQueryCtx_;
const std::shared_ptr<const HiveConfig> hiveConfig_;

std::shared_ptr<const HiveConnectorSplit> hiveSplit_;
const RowTypePtr readerOutputType_;
const std::shared_ptr<io::IoStatistics> ioStats_;
memory::MemoryPool* const pool_;
FileHandleFactory* const fileHandleFactory_;
folly::Executor* const executor_;
memory::MemoryPool* const pool_;

std::shared_ptr<common::ScanSpec> scanSpec_;
std::unique_ptr<dwio::common::Reader> baseReader_;
Expand Down
22 changes: 11 additions & 11 deletions velox/connectors/hive/iceberg/IcebergSplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,28 @@ using namespace facebook::velox::dwio::common;
namespace facebook::velox::connector::hive::iceberg {

IcebergSplitReader::IcebergSplitReader(
const std::shared_ptr<hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
std::shared_ptr<io::IoStatistics> ioStats)
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* const fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec)
: SplitReader(
hiveSplit,
hiveTableHandle,
scanSpec,
readerOutputType,
partitionKeys,
fileHandleFactory,
executor,
connectorQueryCtx,
hiveConfig,
ioStats),
readerOutputType,
ioStats,
fileHandleFactory,
executor,
scanSpec),
baseReadOffset_(0),
splitOffset_(0) {}

Expand Down
12 changes: 6 additions & 6 deletions velox/connectors/hive/iceberg/IcebergSplitReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ struct IcebergDeleteFile;
class IcebergSplitReader : public SplitReader {
public:
IcebergSplitReader(
const std::shared_ptr<hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const hive::HiveConnectorSplit>& hiveSplit,
const std::shared_ptr<const HiveTableHandle>& hiveTableHandle,
const std::shared_ptr<common::ScanSpec>& scanSpec,
const RowTypePtr& readerOutputType,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>*
partitionKeys,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const ConnectorQueryCtx* connectorQueryCtx,
const std::shared_ptr<const HiveConfig>& hiveConfig,
std::shared_ptr<io::IoStatistics> ioStats);
const RowTypePtr& readerOutputType,
const std::shared_ptr<io::IoStatistics>& ioStats,
FileHandleFactory* fileHandleFactory,
folly::Executor* executor,
const std::shared_ptr<common::ScanSpec>& scanSpec);

~IcebergSplitReader() override = default;

Expand Down

0 comments on commit 1f88646

Please sign in to comment.