diff --git a/.github/workflows/builds.yaml b/.github/workflows/builds.yaml index 1c7f171..a152862 100644 --- a/.github/workflows/builds.yaml +++ b/.github/workflows/builds.yaml @@ -10,22 +10,34 @@ on: - "*" - "!update-devops-tooling" +env: + package-path: "dist/" + jobs: - parse-project-metadata: - name: "Determine Python versions" - # yamllint disable-line rule:line-length - uses: os-climate/devops-reusable-workflows/.github/workflows/pyproject-toml-fetch-matrix.yaml@main + get-python-versions: + name: "Validate Python project" + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.parse-project-metadata.outputs.python-matrix-versions }} + + steps: + - uses: actions/checkout@v4 - test-builds: - name: "Build: Python" - needs: [parse-project-metadata] + - name: "Parse: pyproject.toml" + id: parse-project-metadata + # yamllint disable-line rule:line-length + uses: os-climate/devops-reusable-workflows/.github/actions/python-versions-matrix@main + + builds: + name: "Python builds" + needs: [get-python-versions] runs-on: "ubuntu-latest" continue-on-error: true # Don't run when pull request is merged if: github.event.pull_request.merged == false strategy: fail-fast: false - matrix: ${{ fromJson(needs.parse-project-metadata.outputs.matrix) }} + matrix: ${{ fromJson(needs.get-python-versions.outputs.matrix) }} steps: - name: "Populate environment variables" @@ -81,8 +93,8 @@ jobs: python -m build fi - - name: "Validating Artefacts with Twine" - run: | - echo "Validating artefacts with: twine check dist/*" - pip install --upgrade twine - twine check dist/* + - name: "Validate Artefacts with Twine" + id: twine-check-artefacts + env: + package-path: ${{ env.package-path }} + uses: os-climate/devops-reusable-workflows/.github/actions/twine-check-artefacts@main diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..876fb54 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,83 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "🔐 CodeQL" + +on: + push: + branches: [ "main", "gh-pages", "master" ] + pull_request: + branches: [ "main", "gh-pages", "master" ] + schedule: + - cron: '39 20 * * 6' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': + # 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/notebooks.yaml b/.github/workflows/notebooks.yaml index 0fad0cc..c10efca 100644 --- a/.github/workflows/notebooks.yaml +++ b/.github/workflows/notebooks.yaml @@ -33,7 +33,7 @@ jobs: needs: [validate-notebook-tests] if: needs.validate-notebook-tests.outputs.proceed == 'true' # yamllint disable-line rule:line-length - uses: os-climate/devops-reusable-workflows/.github/workflows/pyproject-toml-fetch-matrix.yaml@main + uses: os-climate/devops-reusable-workflows/.github/workflows/python-versions-matrix.yaml@main notebook-tests: name: "Test Jupyter Notebooks" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ac254c4..6fe13e0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,17 +1,13 @@ --- -name: "🐍📦 Production build and release" +name: "🐍📦 Old Production build and release" # GitHub/PyPI trusted publisher documentation: # https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ # yamllint disable-line rule:truthy on: - # workflow_dispatch: push: # Only invoked on release tag pushes - branches: - - 'main' - - 'master' tags: - 'v*.*.*' @@ -28,8 +24,9 @@ jobs: if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest permissions: - # IMPORTANT: mandatory for Sigstore + contents: write id-token: write + steps: ### BUILDING ### @@ -44,10 +41,20 @@ jobs: - name: "Setup PDM for build commands" uses: pdm-project/setup-pdm@v4 + - name: "Fetch current semantic tag" + id: fetch-tags + # yamllint disable-line rule:line-length + uses: os-climate/devops-reusable-workflows/.github/actions/latest-semantic-tag@main + - name: "Update version from tags for production release" run: | - echo "Github versioning: ${{ github.ref_name }}" - scripts/release-versioning.sh + echo "Github tag/versioning: ${{ github.ref_name }}" + if (grep 'dynamic = \[\"version\"\]' pyproject.toml > /dev/null); then + echo "Proceeding build with dynamic versioning" + else + echo "Using legacy script to bump release version" + scripts/release-versioning.sh + fi - name: "Build with PDM backend" run: | @@ -56,7 +63,8 @@ jobs: ### SIGNING ### - name: "Sign packages with Sigstore" - uses: sigstore/gh-action-sigstore-python@v2 + # Use new action + uses: sigstore/gh-action-sigstore-python@v3.0.0 with: inputs: >- ./dist/*.tar.gz @@ -72,8 +80,6 @@ jobs: github: name: "📦 Publish to GitHub" - # Only publish on tag pushes - if: startsWith(github.ref, 'refs/tags/') needs: - build runs-on: ubuntu-latest @@ -94,20 +100,17 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} prerelease: false tag_name: ${{ github.ref_name }} - name: "Test/Development Build \ - ${{ github.ref_name }}" + name: ${{ github.ref_name }}" # body_path: ${{ github.workspace }}/CHANGELOG.rst files: | dist/*.tar.gz dist/*.whl - dist/*.sigstore + dist/*.sigstore* ### PUBLISH PYPI TEST ### testpypi: - name: "📦 Publish to PyPi Test" - # Only publish on tag pushes - if: startsWith(github.ref, 'refs/tags/') + name: "📦 Test publishing to PyPI" needs: - build runs-on: ubuntu-latest @@ -128,9 +131,9 @@ jobs: if [ -f dist/buildvars.txt ]; then rm dist/buildvars.txt fi - rm dist/*.sigstore + rm dist/*.sigstore* - - name: Publish distribution to Test PyPI + - name: "Test publishing to PyPI" uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ @@ -140,8 +143,6 @@ jobs: pypi: name: "📦 Publish to PyPi" - # Only publish on tag pushes - if: startsWith(github.ref, 'refs/tags/') needs: - testpypi runs-on: ubuntu-latest @@ -162,7 +163,7 @@ jobs: if [ -f dist/buildvars.txt ]; then rm dist/buildvars.txt fi - rm dist/*.sigstore + rm dist/*.sigstore* - name: "Setup PDM for build commands" uses: pdm-project/setup-pdm@v4 diff --git a/.github/workflows/security.yaml b/.github/workflows/security.yaml index 26251da..89c57f1 100644 --- a/.github/workflows/security.yaml +++ b/.github/workflows/security.yaml @@ -16,21 +16,29 @@ on: - "!update-devops-tooling" jobs: + get-python-versions: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.parse-project-metadata.outputs.python-matrix-versions }} + + steps: + - uses: actions/checkout@v4 - parse-project-metadata: - name: "Determine Python versions" - # yamllint disable-line rule:line-length - uses: os-climate/devops-reusable-workflows/.github/workflows/pyproject-toml-fetch-matrix.yaml@main + - name: "Populate environment variables" + id: parse-project-metadata + # yamllint disable-line rule:line-length + uses: os-climate/devops-reusable-workflows/.github/actions/python-versions-matrix@main - build: - name: "Audit Python dependencies" - needs: [parse-project-metadata] - runs-on: ubuntu-latest + builds: + name: "Python builds" + needs: [get-python-versions] + runs-on: "ubuntu-latest" + continue-on-error: true # Don't run when pull request is merged if: github.event.pull_request.merged == false strategy: fail-fast: false - matrix: ${{ fromJson(needs.parse-project-metadata.outputs.matrix) }} + matrix: ${{ fromJson(needs.get-python-versions.outputs.matrix) }} steps: - name: "Checkout repository" diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml index aebd8a9..125e06e 100644 --- a/.github/workflows/testing.yaml +++ b/.github/workflows/testing.yaml @@ -11,21 +11,28 @@ on: - "!update-devops-tooling" jobs: + get-python-versions: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.parse-project-metadata.outputs.python-matrix-versions }} + + steps: + - uses: actions/checkout@v4 - parse-project-metadata: - name: "Determine Python versions" - # yamllint disable-line rule:line-length - uses: os-climate/devops-reusable-workflows/.github/workflows/pyproject-toml-fetch-matrix.yaml@main + - name: "Populate environment variables" + id: parse-project-metadata + # yamllint disable-line rule:line-length + uses: os-climate/devops-reusable-workflows/.github/actions/python-versions-matrix@main testing: name: "Run unit tests" - needs: [parse-project-metadata] + needs: [get-python-versions] runs-on: ubuntu-latest # Don't run when pull request is merged if: github.event.pull_request.merged == false strategy: fail-fast: false - matrix: ${{ fromJson(needs.parse-project-metadata.outputs.matrix) }} + matrix: ${{ fromJson(needs.get-python-versions.outputs.matrix) }} steps: - name: "Checkout repository" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4af1c71..1e7725f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,8 +26,8 @@ repos: - id: check-yaml - id: detect-private-key - id: end-of-file-fixer - - id: mixed-line-ending - args: ["--fix=lf"] + # - id: mixed-line-ending + # args: ["--fix=lf"] - id: name-tests-test args: ["--pytest-test-first"] - id: no-commit-to-branch @@ -73,13 +73,15 @@ repos: rev: v1.35.1 hooks: - id: yamllint - args: [ "-d", "{rules: {line-length: {max: 120}}, ignore-from-file: [.gitignore],}", ] + args: + ["-d", "{rules: {line-length: {max: 120}}, + ignore-from-file: [.gitignore],}"] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.5.5 hooks: - id: ruff - args: [--fix, --exit-non-zero-on-fix, --config=ruff.toml] + args: [--fix, --exit-non-zero-on-fix, --config=pyproject.toml] - id: ruff-format - repo: local @@ -88,7 +90,8 @@ repos: name: "create mypy cache" language: system pass_filenames: false - entry: bash -c 'if [ ! -d .mypy_cache ]; then /bin/mkdir .mypy_cache; fi; exit 0' + entry: bash -c 'if [ ! -d .mypy_cache ]; + then /bin/mkdir .mypy_cache; fi; exit 0' - repo: https://github.com/pre-commit/mirrors-mypy rev: "v1.11.0" @@ -98,11 +101,12 @@ repos: args: ["--show-error-codes", "--install-types", "--non-interactive"] additional_dependencies: ["pytest", "types-requests"] +# yamllint disable rule:comments-indentation # Check for misspellings in documentation files # - repo: https://github.com/codespell-project/codespell # rev: v2.2.2 # hooks: - # - id: codespell + # - id: codespell # Automatically upgrade Python syntax for newer versions # - repo: https://github.com/asottile/pyupgrade @@ -110,3 +114,4 @@ repos: # hooks: # - id: pyupgrade # args: ['--py37-plus'] +# yamllint enable rule:comments-indentation diff --git a/README.md b/README.md index 7540415..4075373 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,27 @@ # ITR Data Pipeline -The OS-Climate project is building a suite of Portfolio Alignment tools, the first of which is an [Implied Temperature Rise (ITR)](https://github.com/os-climate/ITR) tool. The need for climate-aligned investment tools has been obvious for some time and is articulated clearly in this [Manifesto](https://www.bankofengland.co.uk/news/2019/april/open-letter-on-climate-related-financial-risks) published by Governor of Bank of England Mark Carney, Governor of Banque de France François Villeroy de Galhau and Chair of the Network for Greening the Financial Services Frank Elderson. Subsequently a working group developed methodology for creating a Temperature Alignment score based on a company's usage of their fair (or not) share of the remaining global carbon budget. The ITR tool calculates those scores based on sector benchmarks, corporate production, emissions, emissions targets, and other factors. +The OS-Climate project is building a suite of Portfolio Alignment tools, the +first of which is an [Implied Temperature Rise (ITR)](https://github.com/os-climate/ITR) +tool. The need for climate-aligned investment tools has been obvious for some time +and is articulated clearly in this [Manifesto] +() +published by Governor of Bank of England Mark Carney, Governor of Banque de France +François Villeroy de Galhau and Chair of the Network for Greening the Financial Services +Frank Elderson. Subsequently a working group developed methodology for creating a +Temperature Alignment score based on a company's usage of their fair (or not) share +of the remaining global carbon budget. The ITR tool calculates those scores based +on sector benchmarks, corporate production, emissions, emissions targets, and other +factors. -This repository contains code for generating data that can be used to demonstrate and evaluate the ITR tool using public data. (The tool is designed so that users can use their own data, including privately developed and proprietary data.) +This repository contains code for generating data that can be used to demonstrate +and evaluate the ITR tool using public data. (The tool is designed so that users +can use their own data, including privately developed and proprietary data.) -The notebook [OECM-benchmark-ingest](notebooks/OECM-benchmark-ingest.ipynb) transforms source data from the [OECM version 2 benchmark](https://www.oneearth.org/updated-one-earth-climate-model/) into JSON source that can be ingested by the ITR tool. Presently 14 sectors are supported (with more coming): +The notebook [OECM-benchmark-ingest](notebooks/OECM-benchmark-ingest.ipynb) transforms +source data from the [OECM version 2 benchmark] +() into JSON source that +can be ingested by the ITR tool. Presently 14 sectors are supported (with more coming): - Energy (Oil & Gas) - Utilities (Power, Gas, Combined) @@ -22,13 +38,31 @@ The notebook [OECM-benchmark-ingest](notebooks/OECM-benchmark-ingest.ipynb) tran - Chemical Industry - Textiles and Leather -The notebook [osc-ingest-rmi_utility_transition_hub](osc-ingest-rmi_utility_transition_hub.ipynb) follows the [Data Mesh pattern](https://github.com/opendatahub-io-contrib/datamesh-platform) to ingest and transform RMI utility data published in 2022, 2023, and 2024 (with data dating back to before 2019) into reference corporate data. See instructions in the `dbt/rmi_transform/README.md` file for more information on activating the Data Mesh pattern. +The notebook [osc-ingest-rmi_utility_transition_hub] +(osc-ingest-rmi_utility_transition_hub.ipynb) follows the [Data Mesh pattern] +() to ingest and transform +RMI utility data published in 2022, 2023, and 2024 +(with data dating back to before 2019) into reference corporate data. +See instructions in the `dbt/rmi_transform/README.md` +file for more information on activating the Data Mesh pattern. -The notebook [ITR-data-production](ITR-data-production.ipynb) synthesizes a set of corporate data from a variety of public sources, including [GLEIF](https://www.gleif.org/en) legal entity identifiers, [SEC financial disclosures](https://www.sec.gov/edgar/searchedgar/companysearch), [US Census data](https://www.census.gov/data.html), [RMI-curated production data](https://utilitytransitionhub.rmi.org/data-download/), and some hand-curated sources as well. +The notebook [ITR-data-production](ITR-data-production.ipynb) synthesizes a set of +corporate data from a variety of public sources, including [GLEIF] +() legal entity identifiers, [SEC financial disclosures] +(), [US Census data] +(), [RMI-curated production data] +(), and some hand-curated +sources as well. -Most importantly, this pipeline puts the financial, production, emissions, and other data into Trino so that the ITR can access it via the [Data Commons](https://github.com/os-climate/os_c_data_commons). +Most importantly, this pipeline puts the financial, production, emissions, and +other data into Trino so that the ITR can access it via the [Data Commons] +(). -A logical (and welcomed) next step would be to curate this data within our Trino database (with proper metedata descriptions for all data, not just RMI Utility Transition Hub data). +A logical (and welcomed) next step would be to curate this data within our Trino +database (with proper metedata descriptions for all data, not just RMI Utility +Transition Hub data). -If you have questions, please file [Issues](https://github.com/os-climate/itr-data-pipeline/issues). If you have answers, please contribute [Pull +If you have questions, please file [Issues] +(). If you have answers, +please contribute [Pull Requests](https://github.com/os-climate/itr-data-pipeline/pulls)!