diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 397a3a8a..6bae11d6 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,63 +1,63 @@ -name-template: 'v$RESOLVED_VERSION' -tag-template: 'v$RESOLVED_VERSION' +name-template: "v$RESOLVED_VERSION" +tag-template: "v$RESOLVED_VERSION" categories: - - title: 'New Features ✨' + - title: "New Features ✨" labels: - - 'feature' - - 'enhancement' - - title: 'Bug Fixes 🐛' + - "feature" + - "enhancement" + - title: "Bug Fixes 🐛" labels: - - 'fix' - - 'bugfix' - - 'bug' - - title: 'Under the Hood ⚙️' + - "fix" + - "bugfix" + - "bug" + - title: "Under the Hood ⚙️" labels: - - 'chore' - - 'ci' - - 'refactor' - - title: 'Documentation 📖' - label: 'docs' -change-template: '- $TITLE (#$NUMBER)' + - "chore" + - "ci" + - "refactor" + - title: "Documentation 📖" + label: "docs" +change-template: "- $TITLE (#$NUMBER)" change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. version-resolver: major: labels: - - 'major' + - "major" minor: labels: - - 'minor' + - "minor" patch: labels: - - 'patch' + - "patch" default: patch template: | ## Changes $CHANGES autolabeler: - - label: 'chore' + - label: "chore" title: - '/^chore(\(.*\))?\:/i' - - label: 'ci' + - label: "ci" title: - '/^ci(\(.*\))?\:/i' - - label: 'bug' + - label: "bug" title: - '/^fix(\(.*\))?\:/i' - - label: 'enhancement' + - label: "enhancement" title: - '/^feat(\(.*\))?/i' - - label: 'docs' + - label: "docs" title: - '/^docs(\(.*\))?\:/i' - - label: 'security' + - label: "security" title: - '/^security(\(.*\))?\:/i' - '/^fix(\(security\))?\:/i' - - label: 'dependencies' + - label: "dependencies" title: - '/^chore\(deps\)\:/i' - '/^build\(deps\)\:/i' - - label: 'breaking' + - label: "breaking" title: - '/!:\s*$/i' diff --git a/.github/workflows/autofix-command.yml b/.github/workflows/autofix-command.yml index d01d24e2..03ec62c0 100644 --- a/.github/workflows/autofix-command.yml +++ b/.github/workflows/autofix-command.yml @@ -4,11 +4,11 @@ on: workflow_dispatch: inputs: pr: - description: 'PR Number' + description: "PR Number" type: string required: true comment-id: - description: 'Comment ID (Optional)' + description: "Comment ID (Optional)" type: string required: false @@ -20,152 +20,147 @@ jobs: # Don't run on forks. Run on pushes to main, and on PRs that are not from forks. strategy: matrix: - python-version: [ - '3.10', - ] - os: [ - Ubuntu, - ] + python-version: ["3.10"] + os: [Ubuntu] fail-fast: false runs-on: "${{ matrix.os }}-latest" steps: - - # Custom steps to fetch the PR and checkout the code: - - name: Checkout Airbyte - uses: actions/checkout@v4 - with: - # Important that this is set so that CI checks are triggered again - # Without this we would be forever waiting on required checks to pass - token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} - - - name: Checkout PR (${{ github.event.inputs.pr }}) - uses: dawidd6/action-checkout-pr@v1 - with: - pr: ${{ github.event.inputs.pr }} - - - name: Get PR info - id: pr-info - run: | - PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) - echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT - echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - shell: bash - - - name: Create URL to the run output - id: vars - run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT - - - name: Append comment with job run link - id: first-comment-action - uses: peter-evans/create-or-update-comment@v4 - with: - comment-id: ${{ github.event.inputs.comment-id }} - issue-number: ${{ github.event.inputs.pr }} - body: | - > **Auto-Fix Job Info** - > - > This job attempts to auto-fix any linting or formating issues. If any fixes are made, - > those changes will be automatically committed and pushed back to the PR. - > - > Note: This job can only be run by maintainers. On PRs from forks, this command requires - > that the PR author has enabled the `Allow edits from maintainers` option. - - > PR auto-fix job started... [Check job output.][1] - - [1]: ${{ steps.vars.outputs.run-url }} - - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras - - # Fix any lint or format issues - - - name: Auto-Fix Ruff Lint Issues - run: poetry run ruff check --fix . || true - - name: Auto-Fix Ruff Format Issues - run: poetry run ruff format . || true - - # Check for changes in git - - - name: Check for changes - id: git-diff - run: | - git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT - shell: bash - - # Commit changes (if any) - - - name: Commit changes - if: steps.git-diff.outputs.changes == 'true' - run: | - git config --global user.name "octavia-squidington-iii" - git config --global user.email "contact@airbyte.com" - git add . - git commit -m "Auto-fix lint and format issues" - - # Fix any further 'unsafe' lint issues in a separate commit - - - name: Auto-Fix Ruff Lint Issues (Unsafe) - run: poetry run ruff check --fix --unsafe-fixes . || true - - name: Auto-Fix Ruff Format Issues - run: poetry run ruff format . || true - - # Check for changes in git (2nd time, for 'unsafe' lint fixes) - - - name: Check for changes ('unsafe' fixes) - id: git-diff-2 - run: | - git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT - shell: bash - - - name: Commit 'unsafe' lint fixes - if: steps.git-diff-2.outputs.changes == 'true' - run: | - git config --global user.name "octavia-squidington-iii" - git config --global user.email "contact@airbyte.com" - git add . - git commit -m "Auto-fix lint issues (unsafe)" - - - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' - if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' - run: | - git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git - git push contributor HEAD:${{ steps.pr-info.outputs.branch }} - - - name: Append success comment - uses: peter-evans/create-or-update-comment@v4 - if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: hooray - body: | - > ✅ Changes applied successfully. - - - name: Append success comment (no-op) - uses: peter-evans/create-or-update-comment@v4 - if: steps.git-diff.outputs.changes != 'true' && steps.git-diff-2.outputs.changes != 'true' - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: "+1" - body: | - > 🟦 Job completed successfully (no changes). - - - name: Append failure comment - uses: peter-evans/create-or-update-comment@v4 - if: failure() - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: confused - body: | - > ❌ Job failed. + # Custom steps to fetch the PR and checkout the code: + - name: Checkout Airbyte + uses: actions/checkout@v4 + with: + # Important that this is set so that CI checks are triggered again + # Without this we would be forever waiting on required checks to pass + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} + + - name: Get PR info + id: pr-info + run: | + PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) + echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT + echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + + - name: Create URL to the run output + id: vars + run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | + > **Auto-Fix Job Info** + > + > This job attempts to auto-fix any linting or formating issues. If any fixes are made, + > those changes will be automatically committed and pushed back to the PR. + > + > Note: This job can only be run by maintainers. On PRs from forks, this command requires + > that the PR author has enabled the `Allow edits from maintainers` option. + + > PR auto-fix job started... [Check job output.][1] + + [1]: ${{ steps.vars.outputs.run-url }} + + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras + + # Fix any lint or format issues + + - name: Auto-Fix Ruff Lint Issues + run: poetry run ruff check --fix . || true + - name: Auto-Fix Ruff Format Issues + run: poetry run ruff format . || true + + # Check for changes in git + + - name: Check for changes + id: git-diff + run: | + git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT + shell: bash + + # Commit changes (if any) + + - name: Commit changes + if: steps.git-diff.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-fix lint and format issues" + + # Fix any further 'unsafe' lint issues in a separate commit + + - name: Auto-Fix Ruff Lint Issues (Unsafe) + run: poetry run ruff check --fix --unsafe-fixes . || true + - name: Auto-Fix Ruff Format Issues + run: poetry run ruff format . || true + + # Check for changes in git (2nd time, for 'unsafe' lint fixes) + + - name: Check for changes ('unsafe' fixes) + id: git-diff-2 + run: | + git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT + shell: bash + + - name: Commit 'unsafe' lint fixes + if: steps.git-diff-2.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-fix lint issues (unsafe)" + + - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' + if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' + run: | + git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git + git push contributor HEAD:${{ steps.pr-info.outputs.branch }} + + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes == 'true' || steps.git-diff-2.outputs.changes == 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: hooray + body: | + > ✅ Changes applied successfully. + + - name: Append success comment (no-op) + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes != 'true' && steps.git-diff-2.outputs.changes != 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: "+1" + body: | + > 🟦 Job completed successfully (no changes). + + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + if: failure() + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: confused + body: | + > ❌ Job failed. diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index 227d5b75..9f2e7ca9 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -51,7 +51,6 @@ jobs: vector-db-based: ${{ steps.changes.outputs.vector-db-based }} sql: ${{ steps.changes.outputs.sql }} - # # The Connector CI Tests is a status check emitted by airbyte-ci # # We make it pass once we have determined that there are no changes to the connectors # - name: "Skip Connectors CI tests" @@ -76,7 +75,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 360 # 6 hours strategy: - fail-fast: true # Save resources by aborting if one connector fails + fail-fast: true # Save resources by aborting if one connector fails matrix: include: - connector: source-shopify @@ -98,9 +97,9 @@ jobs: id: no_changes if: ${{ matrix.cdk_extra != 'n/a' && needs.cdk_changes.outputs[matrix.cdk_extra] == 'false' }} run: | - echo "Aborting job as specified extra not changed: ${{matrix.cdk_extra}} = ${{ needs.cdk_changes.outputs[matrix.cdk_extra] }}" - echo "status=cancelled" >> $GITHUB_OUTPUT - exit 1 + echo "Aborting job as specified extra not changed: ${{matrix.cdk_extra}} = ${{ needs.cdk_changes.outputs[matrix.cdk_extra] }}" + echo "status=cancelled" >> $GITHUB_OUTPUT + exit 1 continue-on-error: true # Get the monorepo so we can test the connectors - name: Checkout Airbyte Monorepo diff --git a/.github/workflows/pdoc_preview.yml b/.github/workflows/pdoc_preview.yml index cf8a4b31..c260e458 100644 --- a/.github/workflows/pdoc_preview.yml +++ b/.github/workflows/pdoc_preview.yml @@ -3,7 +3,7 @@ name: Generate Docs on: push: branches: - - main + - main pull_request: {} jobs: @@ -11,32 +11,32 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" - - name: Install dependencies - run: poetry install --all-extras + - name: Install dependencies + run: poetry install --all-extras - - name: Generate documentation - run: | - set -e - poetry run poe docs-generate - if [ ! -d "docs/generated" ]; then - echo "Error: Documentation generation failed - docs/generated directory not found" - exit 1 - fi + - name: Generate documentation + run: | + set -e + poetry run poe docs-generate + if [ ! -d "docs/generated" ]; then + echo "Error: Documentation generation failed - docs/generated directory not found" + exit 1 + fi - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - # Upload generated documentation - path: 'docs/generated' + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload generated documentation + path: "docs/generated" diff --git a/.github/workflows/pdoc_publish.yml b/.github/workflows/pdoc_publish.yml index dbd35dc5..fc4107b3 100644 --- a/.github/workflows/pdoc_publish.yml +++ b/.github/workflows/pdoc_publish.yml @@ -3,7 +3,7 @@ name: Publish Docs on: push: branches: - - main + - main # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -28,33 +28,33 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' - - name: Setup Pages - uses: actions/configure-pages@v5 - - - name: Install dependencies - run: poetry install --all-extras - - - name: Generate documentation - run: | - poetry run poe docs-generate - - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - # Upload entire repository - path: 'docs/generated' - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Install dependencies + run: poetry install --all-extras + + - name: Generate documentation + run: | + poetry run poe docs-generate + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: "docs/generated" + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/poetry-lock-command.yml b/.github/workflows/poetry-lock-command.yml index 6a616b8b..6a968874 100644 --- a/.github/workflows/poetry-lock-command.yml +++ b/.github/workflows/poetry-lock-command.yml @@ -4,11 +4,11 @@ on: workflow_dispatch: inputs: pr: - description: 'PR Number' + description: "PR Number" type: string required: true comment-id: - description: 'Comment ID (Optional)' + description: "Comment ID (Optional)" type: string required: false @@ -17,125 +17,120 @@ jobs: name: On-Demand Poetry Lock strategy: matrix: - python-version: [ - '3.10', - ] - os: [ - Ubuntu, - ] + python-version: ["3.10"] + os: [Ubuntu] fail-fast: false runs-on: "${{ matrix.os }}-latest" steps: - - # Custom steps to fetch the PR and checkout the code: - - name: Checkout Airbyte - uses: actions/checkout@v4 - with: - # Important that this is set so that CI checks are triggered again - # Without this we would be forever waiting on required checks to pass - token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} - - - name: Checkout PR (${{ github.event.inputs.pr }}) - uses: dawidd6/action-checkout-pr@v1 - with: - pr: ${{ github.event.inputs.pr }} - - - name: Get PR info - id: pr-info - run: | - PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) - echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT - echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - shell: bash - - - name: Create URL to the run output - id: vars - run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT - - - name: Append comment with job run link - id: first-comment-action - uses: peter-evans/create-or-update-comment@v4 - with: - comment-id: ${{ github.event.inputs.comment-id }} - issue-number: ${{ github.event.inputs.pr }} - body: | - > **Poetry-Lock Job Info** - > - > This job attempts to re-lock dependencies using `poetry lock` command. If any changes - > are made, those changes will be automatically committed and pushed back to the PR. - > - > Note: This job can only be run by maintainers. On PRs from forks, this command requires - > that the PR author has enabled the `Allow edits from maintainers` option. - > - > `poetry lock` job started... [Check job output.][1] - - [1]: ${{ steps.vars.outputs.run-url }} - - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - # Run `poetry lock` - - - name: Run `poetry lock` - run: poetry lock - - # Check for changes in git - - - name: Check for changes - id: git-diff - run: | - git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT - shell: bash - - # Commit changes (if any) - - - name: Commit changes - if: steps.git-diff.outputs.changes == 'true' - run: | - git config --global user.name "octavia-squidington-iii" - git config --global user.email "contact@airbyte.com" - git add . - git commit -m "Auto-commit `poetry lock` changes" - - - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' - if: steps.git-diff.outputs.changes == 'true' - run: | - git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git - git push contributor HEAD:${{ steps.pr-info.outputs.branch }} - - - name: Append success comment - uses: peter-evans/create-or-update-comment@v4 - if: steps.git-diff.outputs.changes == 'true' - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: hooray - body: | - > ✅ `poetry lock` applied successfully. - - - name: Append success comment (no-op) - uses: peter-evans/create-or-update-comment@v4 - if: steps.git-diff.outputs.changes != 'true' - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: "+1" - body: | - > 🟦 Job completed successfully (no changes). - - - name: Append failure comment - uses: peter-evans/create-or-update-comment@v4 - if: failure() - with: - comment-id: ${{ steps.first-comment-action.outputs.comment-id }} - reactions: confused - body: | - > ❌ Job failed. + # Custom steps to fetch the PR and checkout the code: + - name: Checkout Airbyte + uses: actions/checkout@v4 + with: + # Important that this is set so that CI checks are triggered again + # Without this we would be forever waiting on required checks to pass + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} + + - name: Get PR info + id: pr-info + run: | + PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}) + echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT + echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + + - name: Create URL to the run output + id: vars + run: echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT + + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | + > **Poetry-Lock Job Info** + > + > This job attempts to re-lock dependencies using `poetry lock` command. If any changes + > are made, those changes will be automatically committed and pushed back to the PR. + > + > Note: This job can only be run by maintainers. On PRs from forks, this command requires + > that the PR author has enabled the `Allow edits from maintainers` option. + > + > `poetry lock` job started... [Check job output.][1] + + [1]: ${{ steps.vars.outputs.run-url }} + + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + + # Run `poetry lock` + + - name: Run `poetry lock` + run: poetry lock + + # Check for changes in git + + - name: Check for changes + id: git-diff + run: | + git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT + shell: bash + + # Commit changes (if any) + + - name: Commit changes + if: steps.git-diff.outputs.changes == 'true' + run: | + git config --global user.name "octavia-squidington-iii" + git config --global user.email "contact@airbyte.com" + git add . + git commit -m "Auto-commit `poetry lock` changes" + + - name: Push changes to '(${{ steps.pr-info.outputs.repo }})' + if: steps.git-diff.outputs.changes == 'true' + run: | + git remote add contributor https://github.com/${{ steps.pr-info.outputs.repo }}.git + git push contributor HEAD:${{ steps.pr-info.outputs.branch }} + + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes == 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: hooray + body: | + > ✅ `poetry lock` applied successfully. + + - name: Append success comment (no-op) + uses: peter-evans/create-or-update-comment@v4 + if: steps.git-diff.outputs.changes != 'true' + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: "+1" + body: | + > 🟦 Job completed successfully (no changes). + + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + if: failure() + with: + comment-id: ${{ steps.first-comment-action.outputs.comment-id }} + reactions: confused + body: | + > ❌ Job failed. diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml index 97c316d0..55d44750 100644 --- a/.github/workflows/pypi_publish.yml +++ b/.github/workflows/pypi_publish.yml @@ -9,35 +9,35 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - uses: hynek/build-and-inspect-python-package@v2 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: hynek/build-and-inspect-python-package@v2 publish: name: Publish to PyPI runs-on: ubuntu-latest needs: [build] permissions: - id-token: write # IMPORTANT: this permission is mandatory for trusted publishing - contents: write # Needed to upload artifacts to the release + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + contents: write # Needed to upload artifacts to the release environment: name: PyPi url: https://pypi.org/p/airbyte if: startsWith(github.ref, 'refs/tags/') steps: - - uses: actions/download-artifact@v4 - with: - name: Packages - path: dist - - name: Upload wheel to release - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: dist/*.whl - tag: ${{ github.ref }} - overwrite: true - file_glob: true + - uses: actions/download-artifact@v4 + with: + name: Packages + path: dist + - name: Upload wheel to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: dist/*.whl + tag: ${{ github.ref }} + overwrite: true + file_glob: true - - name: Publish - uses: pypa/gh-action-pypi-publish@v1.10.3 + - name: Publish + uses: pypa/gh-action-pypi-publish@v1.10.3 diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index 01c0b0e1..f437f3b7 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -1,85 +1,85 @@ name: Linters on: - push: - branches: + push: + branches: - main - pull_request: {} + pull_request: {} jobs: ruff-lint-check: name: Ruff Lint Check runs-on: ubuntu-latest steps: - # Common steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - # Job-specifc step(s): - - name: Run lint check - run: poetry run ruff check . + # Job-specifc step(s): + - name: Run lint check + run: poetry run ruff check . ruff-format-check: name: Ruff Format Check runs-on: ubuntu-latest steps: - # Common steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - # Job-specifc step(s): - - name: Check code format - run: poetry run ruff format --check . + # Job-specifc step(s): + - name: Check code format + run: poetry run ruff format --check . mypy-check: name: MyPy Check runs-on: ubuntu-latest steps: - # Common steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - # Job-specifc step(s): + # Job-specifc step(s): - # For now, we run mypy only on modified files - - name: Get changed Python files - id: changed-py-files - uses: tj-actions/changed-files@v43 - with: - files: "airbyte_cdk/**/*.py" - - name: Run mypy on changed files - if: steps.changed-py-files.outputs.any_changed == 'true' - run: poetry run mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --config-file mypy.ini --install-types --non-interactive + # For now, we run mypy only on modified files + - name: Get changed Python files + id: changed-py-files + uses: tj-actions/changed-files@v43 + with: + files: "airbyte_cdk/**/*.py" + - name: Run mypy on changed files + if: steps.changed-py-files.outputs.any_changed == 'true' + run: poetry run mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --config-file mypy.ini --install-types --non-interactive diff --git a/.github/workflows/python_pytest.yml b/.github/workflows/python_pytest.yml index 81b4c27c..ce5edbdc 100644 --- a/.github/workflows/python_pytest.yml +++ b/.github/workflows/python_pytest.yml @@ -8,66 +8,66 @@ name: PyTest on: - push: - branches: + push: + branches: - main - pull_request: {} + pull_request: {} jobs: pytest-fast: name: Pytest (Fast) runs-on: ubuntu-latest steps: - # Common steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - - name: Run Pytest with Coverage (Fast Tests Only) - timeout-minutes: 60 - env: - GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - run: > - poetry run coverage run -m pytest - --durations=5 --exitfirst - -m "not slow and not requires_creds and not linting and not flaky" + - name: Run Pytest with Coverage (Fast Tests Only) + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=5 --exitfirst + -m "not slow and not requires_creds and not linting and not flaky" - - name: Run Pytest with Coverage (Flaky Tests Only) - timeout-minutes: 60 - continue-on-error: true - env: - GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - run: > - poetry run coverage run -m pytest - --durations=5 --exitfirst - -m "flaky and not slow and not requires_creds" + - name: Run Pytest with Coverage (Flaky Tests Only) + timeout-minutes: 60 + continue-on-error: true + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=5 --exitfirst + -m "flaky and not slow and not requires_creds" - - name: Print Coverage Report - if: always() - run: poetry run coverage report + - name: Print Coverage Report + if: always() + run: poetry run coverage report - - name: Create Coverage Artifacts - if: always() - run: | - poetry run coverage html -d htmlcov - poetry run coverage xml -o htmlcov/coverage.xml + - name: Create Coverage Artifacts + if: always() + run: | + poetry run coverage html -d htmlcov + poetry run coverage xml -o htmlcov/coverage.xml - - name: Upload coverage to GitHub Artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: fasttest-coverage - path: htmlcov/ + - name: Upload coverage to GitHub Artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: fasttest-coverage + path: htmlcov/ pytest: name: Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }}) @@ -78,14 +78,14 @@ jobs: strategy: matrix: python-version: [ - '3.10', - '3.11', - #'3.12', # Currently blocked by Pendulum - ] + "3.10", + "3.11", + #'3.12', # Currently blocked by Pendulum + ] os: [ - Ubuntu, - # Windows, # For now, we don't include Windows in the test matrix. - ] + Ubuntu, + # Windows, # For now, we don't include Windows in the test matrix. + ] fail-fast: false runs-on: "${{ matrix.os }}-latest" @@ -94,44 +94,44 @@ jobs: # TODO: See if we can fully enforce this within PyAirbyte itself. PYTHONIOENCODING: utf-8 steps: - # Common steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + # Common steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - # Job-specific step(s): - - name: Run Pytest - timeout-minutes: 60 - env: - GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - run: > - poetry run coverage run -m pytest - --durations=10 - -m "not linting and not super_slow and not flaky" + # Job-specific step(s): + - name: Run Pytest + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run coverage run -m pytest + --durations=10 + -m "not linting and not super_slow and not flaky" - - name: Print Coverage Report - if: always() - run: poetry run coverage report + - name: Print Coverage Report + if: always() + run: poetry run coverage report - - name: Create Coverage Artifacts - if: always() - run: | - poetry run coverage html -d htmlcov - poetry run coverage xml -o htmlcov/coverage.xml + - name: Create Coverage Artifacts + if: always() + run: | + poetry run coverage html -d htmlcov + poetry run coverage xml -o htmlcov/coverage.xml - - name: Upload coverage to GitHub Artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: py${{ matrix.python-version }}-${{ matrix.os }}-test-coverage - path: htmlcov/ + - name: Upload coverage to GitHub Artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: py${{ matrix.python-version }}-${{ matrix.os }}-test-coverage + path: htmlcov/ diff --git a/.github/workflows/slash_command_dispatch.yml b/.github/workflows/slash_command_dispatch.yml index 8318d34b..cb889cda 100644 --- a/.github/workflows/slash_command_dispatch.yml +++ b/.github/workflows/slash_command_dispatch.yml @@ -10,7 +10,6 @@ jobs: if: ${{ github.event.issue.pull_request }} runs-on: ubuntu-latest steps: - - name: Slash Command Dispatch id: dispatch uses: peter-evans/slash-command-dispatch@v4 diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index cb2e2a75..ed469441 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -4,11 +4,11 @@ on: workflow_dispatch: inputs: pr: - description: 'PR Number' + description: "PR Number" type: string required: true comment-id: - description: 'Comment ID (Optional)' + description: "Comment ID (Optional)" type: string required: false @@ -17,31 +17,31 @@ jobs: name: Append 'Starting' Comment runs-on: ubuntu-latest steps: - - name: Get PR JSON - id: pr-info - env: - GH_TOKEN: ${{ github.token }} - run: | - PR_JSON=$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}") - echo "$PR_JSON" > pr-info.json - echo "sha=$(jq -r .head.sha < pr-info.json)" >> "$GITHUB_OUTPUT" - echo "run-url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> "$GITHUB_OUTPUT" - - name: Upload PR details as artifact - uses: actions/upload-artifact@v4 - with: - name: pr-info - path: pr-info.json - - name: Append comment with job run link - id: first-comment-action - uses: peter-evans/create-or-update-comment@v4 - with: - comment-id: ${{ github.event.inputs.comment-id }} - issue-number: ${{ github.event.inputs.pr }} - body: | + - name: Get PR JSON + id: pr-info + env: + GH_TOKEN: ${{ github.token }} + run: | + PR_JSON=$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }}") + echo "$PR_JSON" > pr-info.json + echo "sha=$(jq -r .head.sha < pr-info.json)" >> "$GITHUB_OUTPUT" + echo "run-url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" >> "$GITHUB_OUTPUT" + - name: Upload PR details as artifact + uses: actions/upload-artifact@v4 + with: + name: pr-info + path: pr-info.json + - name: Append comment with job run link + id: first-comment-action + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ github.event.inputs.comment-id }} + issue-number: ${{ github.event.inputs.pr }} + body: | - > PR test job started... [Check job output.][1] + > PR test job started... [Check job output.][1] - [1]: ${{ steps.pr-info.outputs.run-url }} + [1]: ${{ steps.pr-info.outputs.run-url }} # This is copied from the `python_pytest.yml` file. # Only the first two steps of the job are different, and they check out the PR's branch. @@ -50,14 +50,11 @@ jobs: needs: [start-workflow] strategy: matrix: - python-version: [ - '3.10', - '3.11', - ] + python-version: ["3.10", "3.11"] os: [ - Ubuntu, - # Windows, # For now, we don't include Windows in the test matrix. - ] + Ubuntu, + # Windows, # For now, we don't include Windows in the test matrix. + ] fail-fast: false runs-on: "${{ matrix.os }}-latest" env: @@ -65,82 +62,81 @@ jobs: # TODO: See if we can fully enforce this within PyAirbyte itself. PYTHONIOENCODING: utf-8 steps: + # Custom steps to fetch the PR and checkout the code: - # Custom steps to fetch the PR and checkout the code: + - name: Download PR info + # This puts the `pr-info.json` file in the current directory. + # We need this to get the PR's SHA at the time of the workflow run. + uses: actions/download-artifact@v4 + with: + name: pr-info - - name: Download PR info - # This puts the `pr-info.json` file in the current directory. - # We need this to get the PR's SHA at the time of the workflow run. - uses: actions/download-artifact@v4 - with: - name: pr-info + - name: Checkout PR + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: Checkout PR (${{ github.event.inputs.pr }}) + uses: dawidd6/action-checkout-pr@v1 + with: + pr: ${{ github.event.inputs.pr }} - - name: Checkout PR - uses: actions/checkout@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - - name: Checkout PR (${{ github.event.inputs.pr }}) - uses: dawidd6/action-checkout-pr@v1 - with: - pr: ${{ github.event.inputs.pr }} + # Same as the `python_pytest.yml` file: - # Same as the `python_pytest.yml` file: + - name: Set up Poetry + uses: Gr1N/setup-poetry@v9 + with: + poetry-version: "1.7.1" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "poetry" + - name: Install dependencies + run: poetry install --all-extras - - name: Set up Poetry - uses: Gr1N/setup-poetry@v9 - with: - poetry-version: "1.7.1" - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies - run: poetry install --all-extras + - name: Run Pytest + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run pytest + --verbose + -m "not super_slow and not flaky" - - name: Run Pytest - timeout-minutes: 60 - env: - GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - run: > - poetry run pytest - --verbose - -m "not super_slow and not flaky" + - name: Run Pytest (Flaky Only) + continue-on-error: true + timeout-minutes: 60 + env: + GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} + run: > + poetry run pytest + --verbose + -m "flaky and not super_slow" - - name: Run Pytest (Flaky Only) - continue-on-error: true - timeout-minutes: 60 - env: - GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - run: > - poetry run pytest - --verbose - -m "flaky and not super_slow" - - - name: Post CI Success to GitHub - run: | - curl --request POST \ - --url "https://api.github.com/repos/${{ github.repository }}/statuses/$(jq -r .head.sha < pr-info.json)" \ - --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ - --header 'content-type: application/json' \ - --data '{ - "state": "success", - "context": "Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }})", - "target_url": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}", - }' \ + - name: Post CI Success to GitHub + run: | + curl --request POST \ + --url "https://api.github.com/repos/${{ github.repository }}/statuses/$(jq -r .head.sha < pr-info.json)" \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + --data '{ + "state": "success", + "context": "Pytest (All, Python ${{ matrix.python-version }}, ${{ matrix.os }})", + "target_url": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}", + }' \ log-success-comment: name: Append 'Success' Comment needs: [pytest-on-demand] runs-on: ubuntu-latest steps: - - name: Append success comment - uses: peter-evans/create-or-update-comment@v4 - with: - issue-number: ${{ github.event.inputs.pr }} - comment-id: ${{ github.event.inputs.comment-id }} - reactions: hooray - body: | - > ✅ Tests passed. + - name: Append success comment + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.inputs.pr }} + comment-id: ${{ github.event.inputs.comment-id }} + reactions: hooray + body: | + > ✅ Tests passed. log-failure-comment: name: Append 'Failure' Comment @@ -149,11 +145,11 @@ jobs: if: always() && needs.pytest-on-demand.result == 'failure' runs-on: ubuntu-latest steps: - - name: Append failure comment - uses: peter-evans/create-or-update-comment@v4 - with: - issue-number: ${{ github.event.inputs.pr }} - comment-id: ${{ github.event.inputs.comment-id }} - reactions: confused - body: | - > ❌ Tests failed. + - name: Append failure comment + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.inputs.pr }} + comment-id: ${{ github.event.inputs.comment-id }} + reactions: confused + body: | + > ❌ Tests failed. diff --git a/CHANGELOG.md b/CHANGELOG.md index 68f33c15..7f3c0bca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,383 +1,506 @@ # Changelog ## 6.5.2 + bugfix: Ensure that streams with partition router are not executed concurrently ## 6.5.1 + Add state migration workaround for legacy substreams ## 6.5.0 + Low-code: Add jinja macros today_with_timezone ## 6.4.0 + Adding parity to concurrent CDK from declarative and fixing composite primary key ## 6.3.0 - add clear method to HttpMocker + +add clear method to HttpMocker ## 6.2.0 + Support multiple input datetime formats as part of the concurrent cursor ## 6.1.1 + fix streams discover ## 6.1.0 + Add option to File Transfer for File-Bases sources ## 6.0.0 + Introduce support for low-code incremental streams to be run within the concurrent CDK framework ## 5.17.0 -Add Per Partition with Global fallback Cursor + +Add Per Partition with Global fallback Cursor ## 5.16.0 + Better structured error log messages in connector_builder module, with message / internal_message / stacktrace split into separate fields ## 5.15.0 + Add new Error: No DBURL given sql [-hnr] [--table-size] [--db-size] [-p pass-through] [-s string] dburl [command] module for SQL-based destinations ## 5.14.1 + HttpClient: Fixes issue where oauth authenticators would not refresh on backed off retries. ## 5.14.0 + Fix yielding parent records in SubstreamPartitionRouter ## 5.13.0 + Add extra fields to StreamSlice ## 5.12.1 + Low Code: Removes deprecated `class_types_registry` and `default_implementation_registry` ## 5.12.0 + Low Code: Adds `XmlDecoder` component ## 5.11.1 + Low Code: Consolidate manifest decoder selection under `SimpleRetriever`, `AsyncRetriever`, and `SessionTokenAuthenticator` ## 5.11.0 + concurrent-cdk: add per slice tracking of the most recent cursor ## 5.10.3 + do not raise exception on missing stream by default ## 5.10.2 + Remove PrintBuffer optimization due to dropped records ## 5.10.1 + Async job component: improve memory usage ## 5.10.0 + concurrent-cdk: add cursor partition generator ## 5.9.0 + concurrent-cdk: change stream availability strategy to always available ## 5.8.1 + concurrent-cdk: fix convert_to_concurrent_stream to use state from state manager ## 5.8.0 + Async job component: support Salesforce ## 5.7.5 + Have headers match during HTTP cache hit ## 5.7.4 + Always return a connection status even if an exception was raised ## 5.7.3 + fix connector builder output serialization ## 5.7.2 + add transform_record() to class DefaultFileBasedStream ## 5.7.1 + add python-snappy to file-based dependencies ## 5.7.0 + concurrent-cdk: add cursor definition based on sync mode to ConcurrentSourceAdapter ## 5.6.0 + Decouple low-code request_parameter_provider from cursor, add optional cursor_granularity to ConcurrentCursor ## 5.5.2 + Fix pandas missing dependency ## 5.5.1 + Bug fix: Return a connection status failure on an expected check failure ## 5.5.0 + Declarative async job components ## 5.4.0 + add migration of global stream_state to per_partition format ## 5.3.0 + Connector builder: add flag to disable cache ## 5.2.1 + Fix error in incremental sync docs ## 5.2.0 + Add Global Parent State Cursor ## 5.1.0 + Add limitation for number of partitions to PerPartitionCursor ## 5.0.1 + Fix source-declarative-manifest ## 5.0.0 + Replace pydantic BaseModel with dataclass ## 4.6.2 + use orjson instead of json to speed up JSON parsing ## 4.6.1 + Update json error message parser to identify additional error message fields in response bodies ## 4.6.0 + Raise exceptions in file-based check, improve UI errors ## 4.5.4 + add codeflash to dev environment ## 4.5.3 + Cache the result of interpolated strings when the evaluated value is equal to its raw representation ## 4.5.2 + CDK: refactor error handling in abstract source ## 4.5.1 + Added support for RFR for Full-Refresh Substreams ## 4.5.0 + Stop support for incoming legacy state message format ## 4.4.2 + Move the @deprecated decorator to the class level. ## 4.4.1 + Added test utils for integration tests ## 4.4.0 + file-based cdk: add excel file type support ## 4.3.3 + Have better fallback error message on HTTP error ## 4.3.2 + Ensure at least one element returned by decoder ## 4.3.1 + resumable full refresh: fix issue when live traffic regression tests pass state to connector ## 4.3.0 + Add PrintBuffer to emit records in batches ## 4.2.0 + Resumable full refresh: Add SubstreamResumableFullRefreshCursor to Python CDK to allow connectors to allow checkpointing on parent records ## 4.1.0 + Align BackoffStrategy interfaces to take attempt_count as a full-fledge parameter ## 4.0.2 + Add ability to stop stream when retry-after is greater than a duration ## 4.0.1 + Fix case where stream wont have a state attribute and needs to resolve get_updated_state ## 4.0.0 + - General performance enhancement - Dropping Python 3.9 support ## 3.9.6 + fix declarative schema refs for Decoder ## 3.9.5 + Fixed: Resolved an issue in HttpClient that prevented correct error messages from being presented. ## 3.9.4 -Adding text field to declarative manifest schema for general connector description. + +Adding text field to declarative manifest schema for general connector description. ## 3.9.3 + add name property to http_client for convenience ## 3.9.2 + low-code: fix record selector factory when using custom components ## 3.9.1 - fix OOM on predicate for streamable responses + +fix OOM on predicate for streamable responses ## 3.9.0 + low code: add new Decoders: JsonlDecoder, IterableDecoder ## 3.8.2 + low-code: fix overwrite for default backoff strategy ## 3.8.1 + CDK: fix handling for rate limit errors when checking connection ## 3.8.0 + resumable full refresh: Automatically apply RFR to streams (not including substreams) that are not incremental and implement next_page_token ## 3.7.0 + Deprecate AvailabilityStrategy ## 3.6.0 + CDK: add not exiting when rate limited ## 3.5.3 + Add failure_type to HttpResponseFilter(retry after pypi read error) ## 3.5.2 + Add failure_type to HttpResponseFilter ## 3.5.1 + Remove 3.11-style union ## 3.5.0 + Clean invalid fields from configured catalog ## 3.4.1 + resumable full refresh: Fix bug where checkpoint reader stops syncing too early if first partition is complete ## 3.4.0 + file-based cdk: add config option to limit number of files for schema discover resumable full refresh: Fix bug for substreams depending on RFR parent stream would not paginate over parent ## 3.3.0 + CDK: add incomplete status to availability check during read ## 3.2.1 + CDK: flush buffer for each RATE_LIMITED message print ## 3.2.0 + CDK: add running stream status with rate limit reason to backoff approach ## 3.1.0 + CDK: add incomplete stream status to nonexistent stream handling ## 3.0.0 + Integrate HttpClient into HttpStream class. See [migration guide](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/cdk-migrations.md) for more details. ## 2.4.0 + CDK: Add support for input format parsing at jinja macro format_datetime ## 2.3.1 - ## 2.3.0 + Add with_json_schema method to ConfiguredAirbyteStreamBuilder ## 2.2.0 + Add with_json_schema method to ConfiguredAirbyteStreamBuilder ## 2.1.0 + Add with_json_schema method to ConfiguredAirbyteStreamBuilder ## 2.0.0 -Update dependency to pydantic v2, and dependency to pydantic v2 models. See [migration guide](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/cdk-migrations.md) for more details. + +Update dependency to pydantic v2, and dependency to pydantic v2 models. See [migration guide](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/cdk-migrations.md) for more details. ## 1.8.0 + low-code: Add is_compare_strictly flag to DatetimeBasedCursor ## 1.7.3 + Exclude airbyte-cdk modules from schema discovery (retry after pypi read error - take 2) ## 1.7.2 + Exclude airbyte-cdk modules from schema discovery (retry after pypi read error) ## 1.7.1 + Exclude airbyte-cdk modules from schema discovery ## 1.7.0 -add from to + +add from to ## 1.6.0 -add from to + +add from to ## 1.5.6 -Jinja interpolation - Allow access to _partition for source-jira (re-release after pypi timeout take 2) + +Jinja interpolation - Allow access to \_partition for source-jira (re-release after pypi timeout take 2) ## 1.5.5 -Jinja interpolation - Allow access to _partition for source-jira (re-release after pypi timeout) + +Jinja interpolation - Allow access to \_partition for source-jira (re-release after pypi timeout) ## 1.5.4 -Jinja interpolation - Allow access to _partition for source-jira + +Jinja interpolation - Allow access to \_partition for source-jira ## 1.5.3 + Ensure error message is the same after migration to HttpClient ## 1.5.2 + PerPartitionState - setting invalid initial state should trigger a config error ## 1.5.1 + Fix client_side_incremental end_datetime comparison ## 1.5.0 + Python/Low Code: Updates ErroHandler, BackoffStrategy, HttpClient. Integrates HttpClient into low-code CDK. ## 1.4.0 + low-code: Add Incremental Parent State Handling to SubstreamPartitionRouter ## 1.3.3 + Mock server tests: adding 'discover' as part of the entrypoint_wrapper ## 1.3.2 + low-code: Added retriever type filter to stream slicer merge ## 1.3.1 -Use for Jinja interpolations + +Use for Jinja interpolations ## 1.3.0 + Added new datetime format: %s_as_float ## 1.2.1 + Python 3.11 compatibility bugfixes ## 1.2.0 + add client side incremental sync ## 1.1.3 + Removed experimental suffix for unstructured file type ## 1.1.2 + CDK: upgrade dpath ## 1.1.1 + Fix bug so that RFR streams don't resync successful streams on subsequent attempts ## 1.1.0 + low-code: Add RFR support automatically for non-substreams ## 1.0.1 + File-based CDK: avoid error on empty stream when running discover ## 1.0.0 + Delete deprecated AirbyteLogger, AirbyteSpec, and Authenticators + move public classes to the top level init file. See [migration guide](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/cdk-migrations.md) for more details. ## 0.90.0 + Python CDK: Adds HttpClient, ErrorHandler, and related interfaces. ## 0.89.0 + low-code: Remove support for last_records and improve memory usage ## 0.88.4 + HttpMocker, Adding the delete method. ## 0.88.2 + Fix dependency for pytz ## 0.88.1 + Fix timestamp formatting in low-code macros ## 0.88.0 + file-based: Increase the maximum parseable field size for CSV files ## 0.87.0 + Python CDK: Allow for configuring resumable full refresh for streams (excluding substreams) ## 0.86.3 diff --git a/cdk-migrations.md b/cdk-migrations.md index d9472626..ba953085 100644 --- a/cdk-migrations.md +++ b/cdk-migrations.md @@ -10,12 +10,14 @@ Version 6.x.x of the CDK introduces concurrent processing of low-code incrementa > It also does not include processing of full-refresh streams in parallel. Low-code incremental streams that match any of the following criteria are not supported by concurrent as of this version: + - Uses a custom implementation of the `DatetimeBasedCursor` component -- The `DatetimeBasedCursor` defines a `step` which will partition a stream's request into time intervals AND a +- The `DatetimeBasedCursor` defines a `step` which will partition a stream's request into time intervals AND a `AddedField` / `HttpRequester` / `RecordFilter` that relies on interpolation of the `stream_state` value. See below for the complete list In order to enable concurrency for a low-code connector, the following changes must be made: + - In the connector's `source.py`, change the method signature to accept catalog, config, and state parameters. Change the invocation of `super()` to pass in those new parameters ```python3 @@ -23,6 +25,7 @@ class SourceName(YamlDeclarativeSource): def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: TState, **kwargs): super().__init__(catalog=catalog, config=config, state=state, **{"path_to_yaml": "manifest.yaml"}) ``` + - In the connector's `run.py`, update it to pass variables ```python3 @@ -76,6 +79,7 @@ concurrency_level: ### Connectors that have streams that cannot be processed concurrently Connectors that have streams that use `stream_state` during interpolation and must be run synchronously until they are fixed or updated: + - Http Requester - `source-insightly`: Uses an DatetimeBasedCursor with a step interval and the HttpRequester has request_parameters relying on `stream_state`. This should be replaced by `step_interval` - `source-intercom`: Uses a custom `incremental_sync` component and `stream_state` used as part of the HttpRequester request_body_json. However, because this processed on a single slice, `stream_interval` can be used @@ -94,6 +98,7 @@ by a thread safe interpolation context like `stream_interval` or `stream_partiti All manifest-only sources are run using the `source-declarative-manifest` which serves as the base image with the common code and flows for connectors that only define a `manifest.yaml` file. Within this package, to enable concurrent processing: + - Modify `airbyte-cdk` package in `pyproject.toml` to the current version - In `run.py`, parse all entrypoint arguments into the respective config, catalog, and state objects - In `run.py`, modify the flow that instantiates a `ManifestDeclarativeSource` from the `__injected_declarative_manifest` to instantiate a `ConcurrentDeclarativeSource` @@ -105,6 +110,7 @@ Version 5.0.0 of the CDK updates the `airbyte_cdk.models` dependency to replace updates the `airbyte-protocol-models` dependency to a version that uses dataclasses models. The changes to Airbyte CDK itself are backwards-compatible, but some changes are required if the connector: + - uses the `airbyte_protocol` models directly, or `airbyte_cdk.models`, which points to `airbyte_protocol` models - uses third-party libraries, such as `pandas`, to read data from sources, which output non-native Python objects that cannot be serialized by the [orjson](https://github.com/ijl/orjson) library. @@ -116,7 +122,7 @@ The changes to Airbyte CDK itself are backwards-compatible, but some changes are - If the connector uses Pydantic based Airbyte Protocol Models, the code will need to be updated to reflect the changes `pydantic`. - It is recommended to import protocol classes not directly by `import airbyte_protocol` statement, but from `airbyte_cdk.models` package. -- It is also recommended to use *-`Serializer` from `airbyte_cdk.models` to manipulate the data or convert to/from JSON. +- It is also recommended to use \*-`Serializer` from `airbyte_cdk.models` to manipulate the data or convert to/from JSON. These are based on the [serpyco-rs](https://pypi.org/project/serpyco-rs/) library. - These classes have a `dump` method that converts the model to a dictionary and a `load` method that converts a dictionary to a model. - The recommended serialization strategy is to pass the dictionary to the `orjson` library when serializing as a JSON string. @@ -150,7 +156,6 @@ yield orjson.loads(df.to_json(orient="records", date_format="iso", date_unit="us ``` - ## Upgrading to 4.5.0 In this release, we are no longer supporting the legacy state format in favor of the current per-stream state @@ -159,16 +164,18 @@ the small number of connectors that instantiate their own `ConnectorStateManager version of the CDK is to stop passing the `stream_instance_map` parameter to the `ConnectorStateManager` constructor. ## Upgrading to 4.1.0 + We are unifying the `BackoffStrategy` interface as it currently differs from the Python CDK package to the declarative one. The different is that the interface will require the attempt_count to be passed. Main impact: This change is mostly internal but we spotted a couple of tests that expect `backoff_time` to not have the `attempt_count` parameter so these tests would fail ([example](https://github.com/airbytehq/airbyte/blob/c9f45a0b85735f58102fcd78385f6f673e731aa6/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py#L99)). -This change should not impact the following classes even though they have a different interface as they accept `kwargs` and `attempt_count` is currently passed as a keyword argument within the CDK. However, once there is a CDK change where `backoff_time` is called not as a keyword argument, they will fail: -* Zendesk Support: ZendeskSupportBackoffStrategy (this one will be updated shortly after as it is used for CI to validate CDK changes) -* Klaviyo: KlaviyoBackoffStrategy (the logic has been generified so we will remove this custom component shortly after this update) -* GitHub: GithubStreamABCBackoffStrategy and ContributorActivityBackoffStrategy -* Airtable: AirtableBackoffStrategy -* Slack: SlackBackoffStrategy +This change should not impact the following classes even though they have a different interface as they accept `kwargs` and `attempt_count` is currently passed as a keyword argument within the CDK. However, once there is a CDK change where `backoff_time` is called not as a keyword argument, they will fail: + +- Zendesk Support: ZendeskSupportBackoffStrategy (this one will be updated shortly after as it is used for CI to validate CDK changes) +- Klaviyo: KlaviyoBackoffStrategy (the logic has been generified so we will remove this custom component shortly after this update) +- GitHub: GithubStreamABCBackoffStrategy and ContributorActivityBackoffStrategy +- Airtable: AirtableBackoffStrategy +- Slack: SlackBackoffStrategy This change should not impact `WaitUntilMidnightBackoffStrategy` from source-gnews as well but it is interesting to note that its interface is also wrong as it considers the first parameter as a `requests.Response` instead of a `Optional[Union[requests.Response, requests.RequestException]]`. @@ -177,6 +184,7 @@ This change should not impact `WaitUntilMidnightBackoffStrategy` from source-gne Updated the codebase to utilize new Python syntax features. As a result, support for Python 3.9 has been dropped. The minimum required Python version is now 3.10. ## Upgrading to 3.0.0 + Version 3.0.0 of the CDK updates the `HTTPStream` class by reusing the `HTTPClient` under the hood. - `backoff_time` and `should_retry` methods are removed from HttpStream @@ -188,18 +196,22 @@ Therefore, catching exceptions should be updated, and error messages might chang See [Migration of Source Zendesk Support](https://github.com/airbytehq/airbyte/pull/41032/commits/4d3a247f36b9826dcea4b98d30fc19802b03d014) as an example. ### Migration of `should_retry` method + In case the connector uses custom logic for backoff based on the response from the server, a new method `get_error_handler` should be implemented. This method should return instance of [`ErrorHandler`](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py). ### Migration of `backoff_time` method + In case the connector uses custom logic for backoff time calculation, a new method `get_backoff_strategy` should be implemented. This method should return instance(s) of [`BackoffStrategy`](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py). ## Upgrading to 2.0.0 + Version 2.0.0 of the CDK updates the `pydantic` dependency to from Pydantic v1 to Pydantic v2. It also updates the `airbyte-protocol-models` dependency to a version that uses Pydantic V2 models. The changes to Airbyte CDK itself are backwards-compatible, but some changes are required if the connector: + - uses Pydantic directly, e.g. for its own custom models, or - uses the `airbyte_protocol` models directly, or `airbyte_cdk.models`, which points to `airbyte_protocol` models, or - customizes HashableStreamDescriptor, which inherits from a protocol model and has therefore been updated to use Pydantic V2 models. @@ -213,19 +225,23 @@ The Pydantic [migration guide](https://docs.pydantic.dev/latest/migration/) is a might arise around upgrade behavior. #### Using Pydantic V1 models with Pydantic V2 + The easiest way to update the code to be compatible without major changes is to update the import statements from `from pydantic` to `from pydantic.v1`, as Pydantic has kept the v1 module for backwards compatibility. Some potential gotchas: - - `ValidationError` must be imported from `pydantic.v1.error_wrappers` instead of `pydantic.v1` - - `ModelMetaclass` must be imported from `pydantic.v1.main` instead of `pydantic.v1` - - `resolve_annotations` must be imported from `pydantic.v1.typing` instead of `pydantic.v1` + +- `ValidationError` must be imported from `pydantic.v1.error_wrappers` instead of `pydantic.v1` +- `ModelMetaclass` must be imported from `pydantic.v1.main` instead of `pydantic.v1` +- `resolve_annotations` must be imported from `pydantic.v1.typing` instead of `pydantic.v1` #### Upgrading to Pydantic V2 + To upgrade all the way to V2 proper, Pydantic also offers a [migration tool](https://docs.pydantic.dev/latest/migration/#code-transformation-tool) to automatically update the code to be compatible with Pydantic V2. #### Updating assertions + It's possible that a connector might make assertions against protocol models without actually importing them - for example when testing methods which return `AirbyteStateBlob` or `AnyUrl`. @@ -245,27 +261,27 @@ assert stream_read.slices[1].state[0].stream.stream_state == AirbyteStateBlob(a_ assert stream_read.slices[1].state[0].stream.stream_state.dict() == {"a_timestamp": 123} ``` - ## Upgrading to 1.0.0 + Starting from 1.0.0, CDK classes and functions should be imported directly from `airbyte_cdk` (example: `from airbyte_cdk import HttpStream`). Lower-level `__init__` files are not considered stable, and will be modified without introducing a major release. Introducing breaking changes to a class or function exported from the top level `__init__.py` will require a major version bump and a migration note to help developer upgrade. Note that the following packages are not part of the top level init because they require extras dependencies, but are still considered stable: + - `destination.vector_db_based` - `source.file_based` The `test` package is not included in the top level init either. The `test` package is still evolving and isn't considered stable. - A few classes were deleted from the Airbyte CDK in version 1.0.0: + - AirbyteLogger - AirbyteSpec - Authenticators in the `sources.streams.http.auth` module - - ### Migrating off AirbyteLogger + No connectors should still be using `AirbyteLogger` directly, but the class is still used in some interfaces. The only required change is to update the type annotation from `AirbyteLogger` to `logging.Logger`. For example: ``` @@ -277,12 +293,15 @@ to ``` def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]: ``` + Don't forget to also update the imports. You can delete `from airbyte_cdk import AirbyteLogger` and replace it with `import logging`. ### Migrating off AirbyteSpec + AirbyteSpec isn't used by any connectors in the repository, and I don't expect any custom connectors to use the class either. This should be a no-op. ### Migrating off Authenticators + Replace usage of authenticators in the `airbyte_cdk.sources.streams.http.auth` module with their sister classes in the `airbyte_cdk.sources.streams.http.requests_native_auth` module. If any of your streams reference `self.authenticator`, you'll also need to update these references to `self._session.auth` as the authenticator is embedded in the session object. diff --git a/sphinx-docs.md b/sphinx-docs.md index 055055cf..4b2e2be3 100644 --- a/sphinx-docs.md +++ b/sphinx-docs.md @@ -20,6 +20,7 @@ Let's dive into using an example: - Assuming we're going to add a new package `airbyte_cdk/new_package`; - Let this file contain a few modules: `airbyte_cdk/new_package/module1.py` and `airbyte_cdk/new_package/module2.py`; - The above structure should be in `rst` config as: + - Add this block directly into `index.rst`: ```