From 65e9d4364fdca562e1f1aa12d532d9497af4dc5e Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 11:16:55 +0000
Subject: [PATCH 1/6] Fix CSV type inference in `sgr csv import`

When using `sgr csv import`, we load data using `COPY FROM STDIN` which doesn't
let us treat empty strings as NULLs, whereas with the CSV FDW we can do that.
To get around this, we use the empty strings in the type inference in the former
case (so that integer columns with empty strings still end up as VARCHARs).
---
 splitgraph/commandline/ingestion.py | 4 +++-
 splitgraph/ingestion/inference.py   | 9 ++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/splitgraph/commandline/ingestion.py b/splitgraph/commandline/ingestion.py
index 9c436165..f45d81fd 100644
--- a/splitgraph/commandline/ingestion.py
+++ b/splitgraph/commandline/ingestion.py
@@ -142,7 +142,9 @@ def csv_import(
         sample = [[str(i) for i in range(len(sample))]] + sample
 
     type_overrides = dict(override_type or [])
-    sg_schema = infer_sg_schema(sample, override_types=type_overrides, primary_keys=primary_key)
+    sg_schema = infer_sg_schema(
+        sample, override_types=type_overrides, primary_keys=primary_key, ignore_empty_strings=False
+    )
     logging.debug("Using Splitgraph schema: %r", sg_schema)
 
     # Reset the stream and pass it to COPY FROM STDIN
diff --git a/splitgraph/ingestion/inference.py b/splitgraph/ingestion/inference.py
index 812dba96..bb758b16 100644
--- a/splitgraph/ingestion/inference.py
+++ b/splitgraph/ingestion/inference.py
@@ -48,12 +48,12 @@ def parse_json(json_s: str):
 ]
 
 
-def _infer_column_schema(column_sample: Sequence[str]) -> str:
+def _infer_column_schema(column_sample: Sequence[str], ignore_empty_strings=True) -> str:
     for candidate, converter in _CONVERTERS:
         try:
             seen_value = False
             for c in column_sample:
-                if c == "" or c is None:
+                if (c == "" and ignore_empty_strings) or c is None:
                     continue
 
                 seen_value = True
@@ -73,6 +73,7 @@ def infer_sg_schema(
     sample: Sequence[List[str]],
     override_types: Optional[Dict[str, str]] = None,
     primary_keys: Optional[List[str]] = None,
+    ignore_empty_strings: bool = True,
 ):
     override_types = override_types or {}
     primary_keys = primary_keys or []
@@ -92,7 +93,9 @@ def infer_sg_schema(
         )
 
     for i, (c_name, c_sample) in enumerate(zip(header, columns)):
-        pg_type = override_types.get(c_name, _infer_column_schema(c_sample))
+        pg_type = override_types.get(
+            c_name, _infer_column_schema(c_sample, ignore_empty_strings=ignore_empty_strings)
+        )
 
         result.append(
             TableColumn(

From 7e861408f00b05bc6bae32dd1887d2e4945c5261 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 12:35:21 +0000
Subject: [PATCH 2/6] Fix autogenerated README for the project

Elaborate on needing to edit the `splitgraph.yml` file
---
 splitgraph/cloud/project/templates.py         | 112 ++++++++++++++++++
 .../generate_project_dbt/README.md            | 112 ++++++++++++++++++
 2 files changed, 224 insertions(+)

diff --git a/splitgraph/cloud/project/templates.py b/splitgraph/cloud/project/templates.py
index 383ced2b..7c75863f 100644
--- a/splitgraph/cloud/project/templates.py
+++ b/splitgraph/cloud/project/templates.py
@@ -132,6 +132,118 @@
     "SQL credentials"). You can get them at https://www.splitgraph.com/settings/sql-credentials (or
     your deployment URL if you're on a private deployment).
 
+### Edit `splitgraph.yml`
+
+We generated a [`splitgraph.yml`](./splitgraph.yml) file from your chosen plugins'
+parameters JSONSchema. You should review it and add suitable plugin settings:
+
+  - set `tables` to `tables: {}` to let the plugin automatically infer the schema and the
+    options of the data source (by default, it adds a sample table into the project file)
+  - change and customize the `metadata` block
+  - set up the plugin parameters in `external.params`. Where the comment says `CHOOSE ONE`
+    and offers a list of alternative subobjects, choose one entry from the list and delete
+    the list itself, leaving the object at the top level.
+
+Example:
+
+```yaml
+- namespace: my_namespace
+  repository: csv
+  # Catalog-specific metadata for the repository. Optional.
+  metadata:
+    readme:
+      text: Readme
+    description: Description of the repository
+    topics:
+    - sample_topic
+  # Data source settings for the repository. Optional.
+  external:
+    # Name of the credential that the plugin uses. This can also be a credential_id if the
+    # credential is already registered on Splitgraph.
+    credential: csv
+    plugin: csv
+    # Plugin-specific parameters matching the plugin's parameters schema
+    params:
+      connection:  # Choose one of:
+      - connection_type: http  # REQUIRED. Constant
+        url: '' # REQUIRED. HTTP URL to the CSV file
+      - connection_type: s3  # REQUIRED. Constant
+        s3_endpoint: '' # REQUIRED. S3 endpoint (including port if required)
+        s3_bucket: '' # REQUIRED. Bucket the object is in
+        s3_region: '' # Region of the S3 bucket
+        s3_secure: false # Whether to use HTTPS for S3 access
+        s3_object: '' # Limit the import to a single object
+        s3_object_prefix: '' # Prefix for object in S3 bucket
+      autodetect_header: true # Detect whether the CSV file has a header automatically
+      autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+      autodetect_encoding: true # Detect the CSV file's encoding automatically
+      autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+      schema_inference_rows: 100000 # Number of rows to use for schema inference
+      encoding: utf-8 # Encoding of the CSV file
+      ignore_decode_errors: false # Ignore errors when decoding the file
+      header: true # First line of the CSV file is its header
+      delimiter: ',' # Character used to separate fields in the file
+      quotechar: '"' # Character used to quote fields
+    tables:
+      sample_table:
+        # Plugin-specific table parameters matching the plugin's schema
+        options:
+          url: ''  # HTTP URL to the CSV file
+          s3_object: '' # S3 object of the CSV file
+          autodetect_header: true # Detect whether the CSV file has a header automatically
+          autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+          autodetect_encoding: true # Detect the CSV file's encoding automatically
+          autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+          schema_inference_rows: 100000 # Number of rows to use for schema inference
+          encoding: utf-8 # Encoding of the CSV file
+          ignore_decode_errors: false # Ignore errors when decoding the file
+          header: true # First line of the CSV file is its header
+          delimiter: ',' # Character used to separate fields in the file
+          quotechar: '"' # Character used to quote fields
+        # Schema of the table, a list of objects with `name` and `type`. If set to `[]`, will infer. 
+        schema: []
+    # Whether live querying is enabled for the plugin (creates a "live" tag in the
+    # repository proxying to the data source). The plugin must support live querying.
+    is_live: true
+    # Ingestion schedule settings. Disable this if you're using GitHub Actions or other methods
+    # to trigger ingestion.
+    schedule:
+```  
+
+becomes:
+
+```yaml
+- namespace: my_namespace
+  repository: csv
+  metadata:
+    readme:
+      text: Readme
+    description: Description of the repository
+    topics:
+    - sample_topic
+  external:
+    # No credential required since we're querying a CSV file over HTTP
+    plugin: csv
+    # Plugin-specific parameters matching the plugin's parameters schema
+    params:
+      connection:
+        connection_type: http  # REQUIRED. Constant
+        url: 'https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv' # REQUIRED. HTTP URL to the CSV file
+      autodetect_header: true # Detect whether the CSV file has a header automatically
+      autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+      autodetect_encoding: true # Detect the CSV file's encoding automatically
+      autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+      schema_inference_rows: 100000 # Number of rows to use for schema inference
+      encoding: utf-8 # Encoding of the CSV file
+      ignore_decode_errors: false # Ignore errors when decoding the file
+      header: true # First line of the CSV file is its header
+      delimiter: ',' # Character used to separate fields in the file
+      quotechar: '"' # Character used to quote fields
+    # Automatically infer table parameters
+    tables: {}
+    is_live: true
+```
+
 ### Set up GitHub Actions
 
 Because this repository was itself generated by a GitHub Actions job, we can't edit the workflow
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/README.md b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/README.md
index 09edab64..d53cb52a 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/README.md
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/README.md
@@ -39,6 +39,118 @@ repository and create the following secrets:
     "SQL credentials"). You can get them at https://www.splitgraph.com/settings/sql-credentials (or
     your deployment URL if you're on a private deployment).
 
+### Edit `splitgraph.yml`
+
+We generated a [`splitgraph.yml`](./splitgraph.yml) file from your chosen plugins'
+parameters JSONSchema. You should review it and add suitable plugin settings:
+
+  - set `tables` to `tables: {}` to let the plugin automatically infer the schema and the
+    options of the data source (by default, it adds a sample table into the project file)
+  - change and customize the `metadata` block
+  - set up the plugin parameters in `external.params`. Where the comment says `CHOOSE ONE`
+    and offers a list of alternative subobjects, choose one entry from the list and delete
+    the list itself, leaving the object at the top level.
+
+Example:
+
+```yaml
+- namespace: my_namespace
+  repository: csv
+  # Catalog-specific metadata for the repository. Optional.
+  metadata:
+    readme:
+      text: Readme
+    description: Description of the repository
+    topics:
+    - sample_topic
+  # Data source settings for the repository. Optional.
+  external:
+    # Name of the credential that the plugin uses. This can also be a credential_id if the
+    # credential is already registered on Splitgraph.
+    credential: csv
+    plugin: csv
+    # Plugin-specific parameters matching the plugin's parameters schema
+    params:
+      connection:  # Choose one of:
+      - connection_type: http  # REQUIRED. Constant
+        url: '' # REQUIRED. HTTP URL to the CSV file
+      - connection_type: s3  # REQUIRED. Constant
+        s3_endpoint: '' # REQUIRED. S3 endpoint (including port if required)
+        s3_bucket: '' # REQUIRED. Bucket the object is in
+        s3_region: '' # Region of the S3 bucket
+        s3_secure: false # Whether to use HTTPS for S3 access
+        s3_object: '' # Limit the import to a single object
+        s3_object_prefix: '' # Prefix for object in S3 bucket
+      autodetect_header: true # Detect whether the CSV file has a header automatically
+      autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+      autodetect_encoding: true # Detect the CSV file's encoding automatically
+      autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+      schema_inference_rows: 100000 # Number of rows to use for schema inference
+      encoding: utf-8 # Encoding of the CSV file
+      ignore_decode_errors: false # Ignore errors when decoding the file
+      header: true # First line of the CSV file is its header
+      delimiter: ',' # Character used to separate fields in the file
+      quotechar: '"' # Character used to quote fields
+    tables:
+      sample_table:
+        # Plugin-specific table parameters matching the plugin's schema
+        options:
+          url: ''  # HTTP URL to the CSV file
+          s3_object: '' # S3 object of the CSV file
+          autodetect_header: true # Detect whether the CSV file has a header automatically
+          autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+          autodetect_encoding: true # Detect the CSV file's encoding automatically
+          autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+          schema_inference_rows: 100000 # Number of rows to use for schema inference
+          encoding: utf-8 # Encoding of the CSV file
+          ignore_decode_errors: false # Ignore errors when decoding the file
+          header: true # First line of the CSV file is its header
+          delimiter: ',' # Character used to separate fields in the file
+          quotechar: '"' # Character used to quote fields
+        # Schema of the table, a list of objects with `name` and `type`. If set to `[]`, will infer. 
+        schema: []
+    # Whether live querying is enabled for the plugin (creates a "live" tag in the
+    # repository proxying to the data source). The plugin must support live querying.
+    is_live: true
+    # Ingestion schedule settings. Disable this if you're using GitHub Actions or other methods
+    # to trigger ingestion.
+    schedule:
+```  
+
+becomes:
+
+```yaml
+- namespace: my_namespace
+  repository: csv
+  metadata:
+    readme:
+      text: Readme
+    description: Description of the repository
+    topics:
+    - sample_topic
+  external:
+    # No credential required since we're querying a CSV file over HTTP
+    plugin: csv
+    # Plugin-specific parameters matching the plugin's parameters schema
+    params:
+      connection:
+        connection_type: http  # REQUIRED. Constant
+        url: 'https://people.sc.fsu.edu/~jburkardt/data/csv/airtravel.csv' # REQUIRED. HTTP URL to the CSV file
+      autodetect_header: true # Detect whether the CSV file has a header automatically
+      autodetect_dialect: true # Detect the CSV file's dialect (separator, quoting characters etc) automatically
+      autodetect_encoding: true # Detect the CSV file's encoding automatically
+      autodetect_sample_size: 65536 # Sample size, in bytes, for encoding/dialect/header detection
+      schema_inference_rows: 100000 # Number of rows to use for schema inference
+      encoding: utf-8 # Encoding of the CSV file
+      ignore_decode_errors: false # Ignore errors when decoding the file
+      header: true # First line of the CSV file is its header
+      delimiter: ',' # Character used to separate fields in the file
+      quotechar: '"' # Character used to quote fields
+    # Automatically infer table parameters
+    tables: {}
+    is_live: true
+```
+
 ### Set up GitHub Actions
 
 Because this repository was itself generated by a GitHub Actions job, we can't edit the workflow

From 943e60ef87020aa28ac1c0aef41a70796ba33945 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 12:37:07 +0000
Subject: [PATCH 3/6] Default to `--initial-private` for live data sources

---
 splitgraph/cloud/project/github_actions.py                    | 2 +-
 .../generate_project/.github/workflows/build.yml              | 4 ++--
 .../generate_project_dbt/.github/workflows/build.yml          | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/splitgraph/cloud/project/github_actions.py b/splitgraph/cloud/project/github_actions.py
index bbd55b0a..7d5e14c6 100644
--- a/splitgraph/cloud/project/github_actions.py
+++ b/splitgraph/cloud/project/github_actions.py
@@ -54,7 +54,7 @@ def generate_job(
         steps.append(
             {
                 "name": "Run sgr cloud load to set up metadata and data source settings",
-                "run": "sgr cloud load --remote splitgraph "
+                "run": "sgr cloud load --remote splitgraph --initial-private "
                 f"-f splitgraph.yml -f splitgraph.credentials.yml {repository}",
                 "shell": "bash",
             }
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_no_dbt/generate_project/.github/workflows/build.yml b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_no_dbt/generate_project/.github/workflows/build.yml
index 5e452cd3..705d2dde 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_no_dbt/generate_project/.github/workflows/build.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_no_dbt/generate_project/.github/workflows/build.yml
@@ -18,8 +18,8 @@ jobs:
       env:
         CREDENTIALS_YML: ${{secrets.SPLITGRAPH_CREDENTIALS_YML}}
     - name: Run sgr cloud load to set up metadata and data source settings
-      run: sgr cloud load --remote splitgraph -f splitgraph.yml -f splitgraph.credentials.yml
-        myns/postgres_fdw
+      run: sgr cloud load --remote splitgraph --initial-private -f splitgraph.yml
+        -f splitgraph.credentials.yml myns/postgres_fdw
       shell: bash
   myns_airbyte_postgres:
     name: Build myns/airbyte-postgres
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
index acecc238..9e36f79e 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
@@ -18,8 +18,8 @@ jobs:
       env:
         CREDENTIALS_YML: ${{secrets.SPLITGRAPH_CREDENTIALS_YML}}
     - name: Run sgr cloud load to set up metadata and data source settings
-      run: sgr cloud load --remote splitgraph -f splitgraph.yml -f splitgraph.credentials.yml
-        myns/postgres_fdw
+      run: sgr cloud load --remote splitgraph --initial-private -f splitgraph.yml
+        -f splitgraph.credentials.yml myns/postgres_fdw
       shell: bash
   myns_airbyte_postgres:
     name: Build myns/airbyte-postgres

From df090f3ecab471706a128a5443a995bf560d5570 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 14:41:51 +0000
Subject: [PATCH 4/6] Fix `sgr cloud load` with `--skip-external`

It would ignore the list of repos to limit the load to; fix by filtering the
repos in any case.
---
 splitgraph/commandline/cloud.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/splitgraph/commandline/cloud.py b/splitgraph/commandline/cloud.py
index ea9db815..a51cae65 100644
--- a/splitgraph/commandline/cloud.py
+++ b/splitgraph/commandline/cloud.py
@@ -632,15 +632,15 @@ def load_c(
 
     repo_yaml = load_project(repositories_file)
     repositories = repo_yaml.repositories
+    if limit_repositories:
+        repositories = [
+            r for r in repositories if f"{r.namespace}/{r.repository}" in limit_repositories
+        ]
 
     gql_client = GQLAPIClient(remote)
 
     if not skip_external:
         rest_client = RESTAPIClient(remote)
-        if limit_repositories:
-            repositories = [
-                r for r in repositories if f"{r.namespace}/{r.repository}" in limit_repositories
-            ]
 
         filter_credential_names = [
             r.external.credential for r in repositories if r.external and r.external.credential

From 9c773eddcd7a4128d9d7a7d91203d2e0f2d5f4c2 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 14:52:23 +0000
Subject: [PATCH 5/6] Use the current Git SHA for the dbt project

---
 splitgraph/cloud/project/dbt.py                                | 3 ++-
 splitgraph/cloud/project/github_actions.py                     | 3 ++-
 .../generate_project_dbt/.github/workflows/build.yml           | 2 +-
 .../generate_project_dbt/splitgraph.yml                        | 1 +
 test/splitgraph/cloud/project/test_dbt.py                      | 3 ++-
 5 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/splitgraph/cloud/project/dbt.py b/splitgraph/cloud/project/dbt.py
index 624cb03c..72d6eca2 100644
--- a/splitgraph/cloud/project/dbt.py
+++ b/splitgraph/cloud/project/dbt.py
@@ -78,6 +78,7 @@ def generate_dbt_plugin_params(repositories: List[str]) -> Tuple[Dict[str, Any],
     # the Git pull URL at action runtime (using GITHUB_TOKEN).
     credentials = {"git_url": "$THIS_REPO_URL"}
 
-    params = {"sources": [_make_source(r) for r in repositories]}
+    # Same with the branch: we want to inject the current SHA we're running the action for.
+    params = {"sources": [_make_source(r) for r in repositories], "git_branch": "$THIS_SHA"}
 
     return params, credentials
diff --git a/splitgraph/cloud/project/github_actions.py b/splitgraph/cloud/project/github_actions.py
index 7d5e14c6..df4fedb8 100644
--- a/splitgraph/cloud/project/github_actions.py
+++ b/splitgraph/cloud/project/github_actions.py
@@ -30,7 +30,8 @@ def generate_job(
             {
                 "name": "Set up dbt Git URL",
                 "run": 'echo "$CREDENTIALS_YML" > splitgraph.credentials.yml && '
-                'sed -i "s|\\$THIS_REPO_URL|https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY|g" splitgraph.credentials.yml',
+                'sed -i "s|\\$THIS_REPO_URL|https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY|g" splitgraph.credentials.yml && '
+                'sed -i "s|\\$THIS_SHA|$GITHUB_SHA|g" splitgraph.yml',
                 "shell": "bash",
                 "env": {
                     "CREDENTIALS_YML": "${{secrets.SPLITGRAPH_CREDENTIALS_YML}}",
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
index 9e36f79e..5eaa71d2 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/.github/workflows/build.yml
@@ -58,7 +58,7 @@ jobs:
         splitgraph_api_secret: ${{ secrets.SPLITGRAPH_API_SECRET }}
     - name: Set up dbt Git URL
       run: echo "$CREDENTIALS_YML" > splitgraph.credentials.yml && sed -i "s|\$THIS_REPO_URL|https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY|g"
-        splitgraph.credentials.yml
+        splitgraph.credentials.yml && sed -i "s|\$THIS_SHA|$GITHUB_SHA|g" splitgraph.yml
       shell: bash
       env:
         CREDENTIALS_YML: ${{secrets.SPLITGRAPH_CREDENTIALS_YML}}
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/splitgraph.yml b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/splitgraph.yml
index 71678f35..056bcce9 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/splitgraph.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/splitgraph.yml
@@ -96,6 +96,7 @@ repositories:
         namespace: myns
         repository: airbyte-postgres
         hash_or_tag: latest
+      git_branch: $THIS_SHA
     is_live: false
     tables: {}
   metadata:
diff --git a/test/splitgraph/cloud/project/test_dbt.py b/test/splitgraph/cloud/project/test_dbt.py
index 059485ec..59ba84fe 100644
--- a/test/splitgraph/cloud/project/test_dbt.py
+++ b/test/splitgraph/cloud/project/test_dbt.py
@@ -12,6 +12,7 @@ def test_generate_dbt_plugin_params():
         ["some-data/source", "some-other/data-raw", "and-third/data"]
     ) == (
         {
+            "git_branch": "$THIS_SHA",
             "sources": [
                 {
                     "dbt_source_name": "some_data_source",
@@ -31,7 +32,7 @@ def test_generate_dbt_plugin_params():
                     "repository": "data",
                     "hash_or_tag": "latest",
                 },
-            ]
+            ],
         },
         {"git_url": "$THIS_REPO_URL"},
     )

From 85ae2c763537bf76caf9b5a5d6e1a01e24491a75 Mon Sep 17 00:00:00 2001
From: Artjoms Iskovs <mildbyte@gmail.com>
Date: Thu, 30 Dec 2021 15:04:40 +0000
Subject: [PATCH 6/6] Fix generated dbt project

  - use "ephemeral" instead of cte for materialization

The sample job in the suite at https://github.com/mildbyte/template-test-2/runs/4667663552?check_suite_focus=true
now actually completes (but doesn't do anything) because it just makes an unchecked
CTE (the source tables don't exist).

Fails as expected (https://github.com/mildbyte/template-test-2/runs/4667687824?check_suite_focus=true)
if we materialize as tables since then the relation indeed doesn't exist.
---
 splitgraph/cloud/project/templates.py                           | 2 +-
 .../splitgraph_template/dbt_project.yml                         | 2 +-
 .../generate_project_dbt/dbt_project.yml                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/splitgraph/cloud/project/templates.py b/splitgraph/cloud/project/templates.py
index 7c75863f..899da0fd 100644
--- a/splitgraph/cloud/project/templates.py
+++ b/splitgraph/cloud/project/templates.py
@@ -67,7 +67,7 @@
     # Here as a starting point. You can reference these models downstream in models that actually
     # materialize as tables.
     staging:
-      +materialized: cte
+      +materialized: ephemeral
 """
 
 SOURCES_YML_TEMPLATE = """# This file defines all data sources referenced by this model. The mapping
diff --git a/test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/dbt_project.yml b/test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/dbt_project.yml
index 82da3bc1..5b62a246 100644
--- a/test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/dbt_project.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_dbt/test_generate_dbt_project/splitgraph_template/dbt_project.yml
@@ -30,4 +30,4 @@ models:
     # Here as a starting point. You can reference these models downstream in models that actually
     # materialize as tables.
     staging:
-      +materialized: cte
+      +materialized: ephemeral
diff --git a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/dbt_project.yml b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/dbt_project.yml
index 82da3bc1..5b62a246 100644
--- a/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/dbt_project.yml
+++ b/test/splitgraph/cloud/project/snapshots/test_generation/test_generate_project_with_dbt/generate_project_dbt/dbt_project.yml
@@ -30,4 +30,4 @@ models:
     # Here as a starting point. You can reference these models downstream in models that actually
     # materialize as tables.
     staging:
-      +materialized: cte
+      +materialized: ephemeral