From 937968811591741b0bda19a94cdd4a39b4afcad6 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Fri, 25 Oct 2024 11:22:28 -0400 Subject: [PATCH] more cleanup --- .../gcs/tests/GCSFileSystemTest.cpp | 72 +++++++------------ .../gcs/tests/GCSInsertTest.cpp | 21 ++---- .../storage_adapters/gcs/tests/GcsTestbench.h | 28 +++++--- 3 files changed, 47 insertions(+), 74 deletions(-) diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp index cd52f927aeb60..c0e233ff3a6bc 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp @@ -27,33 +27,20 @@ namespace facebook::velox::filesystems { namespace { class GCSFileSystemTest : public testing::Test { - protected: - static void SetUpTestSuite() { - if (testbench_ == nullptr) { - testbench_ = std::make_shared(); - testbench_->bootstrap(); - } + public: + void SetUp() { + testbench_ = std::make_shared(); + testbench_->bootstrap(); } - std::shared_ptr testGcsOptions() const { - std::unordered_map configOverride = {}; - - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - return std::make_shared( - std::move(configOverride)); - } - - static std::shared_ptr testbench_; + std::shared_ptr testbench_; }; -std::shared_ptr GCSFileSystemTest::testbench_ = nullptr; - TEST_F(GCSFileSystemTest, readFile) { const auto gcsFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto readFile = gcfs.openFileForRead(gcsFile); std::int64_t size = readFile->size(); @@ -90,7 +77,7 @@ TEST_F(GCSFileSystemTest, writeAndReadFile) { const std::string_view newFile = "readWriteFile.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), newFile); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); auto writeFile = gcfs.openFileForWrite(gcsFile); std::string dataContent = @@ -120,7 +107,7 @@ TEST_F(GCSFileSystemTest, openExistingFileForWrite) { const std::string_view newFile = "readWriteFile.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), newFile); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW(gcfs.openFileForWrite(gcsFile), "File already exists"); } @@ -130,7 +117,7 @@ TEST_F(GCSFileSystemTest, renameNotImplemented) { const auto gcsExistingFile = gcsURI( testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); const auto gcsNewFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); gcfs.openFileForRead(gcsExistingFile); VELOX_ASSERT_THROW( @@ -141,7 +128,7 @@ TEST_F(GCSFileSystemTest, renameNotImplemented) { TEST_F(GCSFileSystemTest, mkdirNotImplemented) { const std::string_view dir = "newDirectory"; const auto gcsNewDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW( gcfs.mkdir(gcsNewDirectory), "mkdir for GCS not implemented"); @@ -150,7 +137,7 @@ TEST_F(GCSFileSystemTest, mkdirNotImplemented) { TEST_F(GCSFileSystemTest, rmdirNotImplemented) { const std::string_view dir = "Directory"; const auto gcsDirectory = gcsURI(testbench_->preexistingBucketName(), dir); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_THROW(gcfs.rmdir(gcsDirectory), "rmdir for GCS not implemented"); } @@ -158,7 +145,7 @@ TEST_F(GCSFileSystemTest, rmdirNotImplemented) { TEST_F(GCSFileSystemTest, missingFile) { const std::string_view file = "newTest.txt"; const auto gcsFile = gcsURI(testbench_->preexistingBucketName(), file); - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); VELOX_ASSERT_RUNTIME_THROW_CODE( gcfs.openFileForRead(gcsFile), @@ -167,7 +154,7 @@ TEST_F(GCSFileSystemTest, missingFile) { } TEST_F(GCSFileSystemTest, missingBucket) { - filesystems::GCSFileSystem gcfs(testGcsOptions()); + filesystems::GCSFileSystem gcfs(testbench_->hiveConfig()); gcfs.initializeClient(); const std::string_view gcsFile = "gs://dummy/foo.txt"; VELOX_ASSERT_RUNTIME_THROW_CODE( @@ -177,14 +164,12 @@ TEST_F(GCSFileSystemTest, missingBucket) { } TEST_F(GCSFileSystemTest, credentialsConfig) { - std::unordered_map configOverride = {}; - // credentials from arrow gcsfs test case // While this service account key has the correct format, it cannot be used // for authentication because the key has been deactivated on the server-side, // *and* the account(s) involved are deleted *and* they are not the accounts // or projects do not match its contents. - auto creds = R"""({ + const std::string_view kCreds = R"""({ "type": "service_account", "project_id": "foo-project", "private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111", @@ -220,28 +205,19 @@ TEST_F(GCSFileSystemTest, credentialsConfig) { })"""; auto jsonFile = exec::test::TempFilePath::create(); std::ofstream credsOut(jsonFile->getPath()); - credsOut << creds; + credsOut << kCreds; credsOut.close(); - configOverride["hive.gcs.json-key-file-path"] = jsonFile->getPath(); - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - std::shared_ptr conf = - std::make_shared(std::move(configOverride)); - filesystems::GCSFileSystem gcfs(conf); + std::unordered_map configOverride = { + {"hive.gcs.json-key-file-path", jsonFile->getPath()}}; + auto hiveConfig = testbench_->hiveConfig(configOverride); + + filesystems::GCSFileSystem gcfs(hiveConfig); gcfs.initializeClient(); - try { - const std::string gcsFile = gcsURI( - testbench_->preexistingBucketName(), - testbench_->preexistingObjectName()); - gcfs.openFileForRead(gcsFile); - FAIL() << "Expected VeloxException"; - } catch (VeloxException const& err) { - EXPECT_THAT( - err.message(), testing::HasSubstr("gs://test1-gcs/test-object-name")); - EXPECT_THAT( - err.message(), testing::HasSubstr("Invalid ServiceAccountCredentials")); - } + const auto gcsFile = gcsURI( + testbench_->preexistingBucketName(), testbench_->preexistingObjectName()); + VELOX_ASSERT_THROW( + gcfs.openFileForRead(gcsFile), "Invalid ServiceAccountCredentials"); } } // namespace } // namespace facebook::velox::filesystems diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp index f9db097b100bc..41c5b1c3a8846 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSInsertTest.cpp @@ -31,10 +31,6 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { static void SetUpTestSuite() { registerGCSFileSystem(); memory::MemoryManager::testingSetInstance({}); - if (testbench_ == nullptr) { - testbench_ = std::make_shared(); - testbench_->bootstrap(); - } } void SetUp() override { @@ -44,11 +40,15 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { connector::getConnectorFactory( connector::hive::HiveConnectorFactory::kHiveConnectorName) ->newConnector( - exec::test::kHiveConnectorId, gcsOptions(), ioExecutor_.get()); + exec::test::kHiveConnectorId, + testbench_->hiveConfig(), + ioExecutor_.get()); connector::registerConnector(hiveConnector); parquet::registerParquetReaderFactory(); parquet::registerParquetWriterFactory(); ioExecutor_ = std::make_unique(3); + testbench_ = std::make_shared(); + testbench_->bootstrap(); } void TearDown() override { @@ -59,16 +59,7 @@ class GCSInsertTest : public testing::Test, public test::InsertTest { connector::unregisterConnector(exec::test::kHiveConnectorId); } - std::shared_ptr gcsOptions() const { - static std::unordered_map configOverride = {}; - - configOverride["hive.gcs.scheme"] = "http"; - configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port(); - return std::make_shared( - std::move(configOverride)); - } - - static std::shared_ptr testbench_; + std::shared_ptr testbench_; std::unique_ptr ioExecutor_; }; diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h b/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h index 38e5bd561bf90..689e44b39b232 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GcsTestbench.h @@ -33,8 +33,8 @@ namespace facebook::velox::filesystems { class GcsTestbench : public testing::Environment { public: GcsTestbench() { - auto port = facebook::velox::exec::test::getFreePorts(1); - port_ = std::to_string(port[0]); + auto port = std::to_string(facebook::velox::exec::test::getFreePorts(1)[0]); + endpoint_ = "http://localhost:" + port; std::vector names{"python3", "python"}; // If the build script or application developer provides a value in the // PYTHON environment variable, then just use that. @@ -58,7 +58,7 @@ class GcsTestbench : public testing::Environment { "-m", "testbench", "--port", - port_, + port, group_); if (serverProcess_.valid() && serverProcess_.running()) break; @@ -80,12 +80,18 @@ class GcsTestbench : public testing::Environment { } } - const std::string& port() const { - return port_; - } + std::shared_ptr hiveConfig( + const std::unordered_map configOverride = {}) + const { + std::unordered_map config( + {{"hive.gcs.endpoint", endpoint_}}); + + // Update the default config map with the supplied configOverride map + for (const auto& [configName, configValue] : configOverride) { + config[configName] = configValue; + } - const std::string& error() const { - return error_; + return std::make_shared(std::move(config)); } std::string_view preexistingBucketName() { @@ -98,13 +104,13 @@ class GcsTestbench : public testing::Environment { void bootstrap() { ASSERT_THAT(this, ::testing::NotNull()); - ASSERT_THAT(this->error(), ::testing::IsEmpty()); + ASSERT_THAT(this->error_, ::testing::IsEmpty()); // Create a bucket and a small file in the testbench. This makes it easier // to bootstrap GcsFileSystem and its tests. auto client = gcs::Client( google::cloud::Options{} - .set("http://localhost:" + this->port()) + .set(this->endpoint_) .set(gc::MakeInsecureCredentials())); bucketName_ = "test1-gcs"; @@ -122,7 +128,7 @@ class GcsTestbench : public testing::Environment { } private: - std::string port_; + std::string endpoint_; bp::child serverProcess_; bp::group group_; std::string error_;