Skip to content

Commit

Permalink
Add GCS filesystem
Browse files Browse the repository at this point in the history
  • Loading branch information
elonazoulay authored and electrum committed Nov 7, 2023
1 parent c0d03b6 commit 881cc39
Show file tree
Hide file tree
Showing 24 changed files with 2,187 additions and 2 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ jobs:
!:trino-faulttolerant-tests,
!:trino-filesystem,
!:trino-filesystem-azure,
!:trino-filesystem-gcs,
!:trino-filesystem-manager,
!:trino-filesystem-s3,
!:trino-google-sheets,
Expand Down Expand Up @@ -574,6 +575,7 @@ jobs:
- { modules: lib/trino-filesystem-s3, profile: cloud-tests }
- { modules: lib/trino-filesystem-azure, profile: cloud-tests }
- { modules: lib/trino-hdfs, profile: cloud-tests }
- { modules: lib/trino-filesystem-gcs, profile: cloud-tests }
- { modules: plugin/trino-accumulo }
- { modules: plugin/trino-bigquery }
- { modules: plugin/trino-bigquery, profile: cloud-tests-arrow }
Expand Down Expand Up @@ -659,6 +661,7 @@ jobs:
&& ! (contains(matrix.modules, 'trino-filesystem-s3') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-filesystem-azure') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-hdfs') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-filesystem-gcs') && contains(matrix.profile, 'cloud-tests'))
run: $MAVEN test ${MAVEN_TEST} -pl ${{ matrix.modules }} ${{ matrix.profile != '' && format('-P {0}', matrix.profile) || '' }}
# Additional tests for selected modules
- name: HDFS file system cache isolated JVM tests
Expand Down Expand Up @@ -714,6 +717,14 @@ jobs:
(env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.ABFS_BLOB_ACCOUNT != '' || env.ABFS_BLOB_ACCESS_KEY != '' || env.ABFS_FLAT_ACCOUNT != '' || env.ABFS_FLAT_ACCESS_KEY != '' || env.ABFS_ACCOUNT != '' || env.ABFS_ACCESS_KEY != '')
run: |
$MAVEN test ${MAVEN_TEST} -pl ${{ matrix.modules }} ${{ format('-P {0}', matrix.profile) }}
- name: GCS FileSystem Cloud Tests
env:
GCP_CREDENTIALS_KEY: ${{ secrets.GCP_CREDENTIALS_KEY }}
if: >-
contains(matrix.modules, 'trino-filesystem-gcs') && contains(matrix.profile, 'cloud-tests') &&
(env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.GCP_CREDENTIALS_KEY != '')
run: |
$MAVEN test ${MAVEN_TEST} -pl ${{ matrix.modules }} ${{ format('-P {0}', matrix.profile) }}
- name: Cloud Delta Lake Tests
# Cloud tests are separate because they are time intensive, requiring cross-cloud network communication
env:
Expand Down
203 changes: 203 additions & 0 deletions lib/trino-filesystem-gcs/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>io.trino</groupId>
<artifactId>trino-root</artifactId>
<version>433-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<artifactId>trino-filesystem-gcs</artifactId>
<name>trino-filesystem-gcs</name>
<description>Trino Filesystem - GCS</description>

<properties>
<air.main.basedir>${project.parent.basedir}</air.main.basedir>
</properties>

<dependencies>
<dependency>
<groupId>com.google.api</groupId>
<artifactId>gax</artifactId>
<exclusions>
<exclusion>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-credentials</artifactId>
</dependency>

<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-core</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>listenablefuture</artifactId>
</exclusion>
<exclusion>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>

<dependency>
<groupId>com.google.http-client</groupId>
<artifactId>google-http-client</artifactId>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>concurrent</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>configuration</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>units</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-filesystem</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-memory-context</artifactId>
</dependency>

<dependency>
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
</dependency>

<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-spi</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>testing</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-filesystem</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<profiles>
<profile>
<id>default</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/TestDefaultGcsFileSystem.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<profile>
<id>cloud-tests</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>**/TestDefaultGcsFileSystem.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.filesystem.gcs;

import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import com.google.common.base.VerifyException;
import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
import io.trino.spi.security.ConnectorIdentity;

import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.Optional;

import static com.google.common.base.Strings.nullToEmpty;
import static java.nio.charset.StandardCharsets.UTF_8;

public class DefaultGcsStorageFactory
implements GcsStorageFactory
{
public static final String GCS_OAUTH_KEY = "gcs.oauth";
public static final List<String> DEFAULT_SCOPES = ImmutableList.of("https://www.googleapis.com/auth/cloud-platform");
private final String projectId;
private final boolean useGcsAccessToken;
private final Optional<GoogleCredentials> jsonGoogleCredential;

@Inject
public DefaultGcsStorageFactory(GcsFileSystemConfig config)
throws IOException
{
config.validate();
projectId = config.getProjectId();
useGcsAccessToken = config.isUseGcsAccessToken();
String jsonKey = config.getJsonKey();
String jsonKeyFilePath = config.getJsonKeyFilePath();
if (jsonKey != null) {
try (InputStream inputStream = new ByteArrayInputStream(jsonKey.getBytes(UTF_8))) {
jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(DEFAULT_SCOPES));
}
}
else if (jsonKeyFilePath != null) {
try (FileInputStream inputStream = new FileInputStream(jsonKeyFilePath)) {
jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(DEFAULT_SCOPES));
}
}
else {
jsonGoogleCredential = Optional.empty();
}
}

@Override
public Storage create(ConnectorIdentity identity)
{
try {
GoogleCredentials credentials;
if (useGcsAccessToken) {
String accessToken = nullToEmpty(identity.getExtraCredentials().get(GCS_OAUTH_KEY));
try (ByteArrayInputStream inputStream = new ByteArrayInputStream(accessToken.getBytes(UTF_8))) {
credentials = GoogleCredentials.fromStream(inputStream).createScoped(DEFAULT_SCOPES);
}
}
else {
credentials = jsonGoogleCredential.orElseThrow(() -> new VerifyException("GCS credentials not configured"));
}
StorageOptions.Builder storageOptionsBuilder = StorageOptions.newBuilder();
if (projectId != null) {
storageOptionsBuilder.setProjectId(projectId);
}
return storageOptionsBuilder.setCredentials(credentials).build().getService();
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
Loading

0 comments on commit 881cc39

Please sign in to comment.