diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4dcc1c554..05fef9964 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -16,6 +16,6 @@ updates: # Maintain dependencies for Python scripts - package-ecosystem: "pip" - directory: "/.github/scripts" + directory: "/scripts" schedule: interval: "daily" diff --git a/.github/labeler.yml b/.github/labeler.yml index 7a82a10fe..5a1e64933 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -3,7 +3,7 @@ ci: - changed-files: - any-glob-to-any-file: - - .github/scripts/** + - scripts/** - .github/workflows/** - .github/*.yml diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml index 630756145..f846ef39a 100644 --- a/.github/workflows/actionlint.yml +++ b/.github/workflows/actionlint.yml @@ -38,7 +38,6 @@ jobs: uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: fetch-depth: 0 - submodules: true - name: "Download actionlint" run: | diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0a91e48da..9af23d21f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -40,7 +40,6 @@ jobs: uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: fetch-depth: 0 - submodules: true - name: "Check Markdown documents" uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0 with: diff --git a/.github/workflows/insight.yml b/.github/workflows/insight.yml index da19e93b2..9d909914e 100644 --- a/.github/workflows/insight.yml +++ b/.github/workflows/insight.yml @@ -7,7 +7,7 @@ run-name: ${{ github.event.workflow_run.display_title || github.workflow }} on: workflow_run: workflows: - - Lint + - "Lint compositional_skills and knowledge" types: - completed diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a26b4d45a..a02e02e4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -name: Lint +name: Lint compositional_skills and knowledge on: workflow_dispatch: @@ -11,7 +11,7 @@ on: - compositional_skills/**/qna.yaml - knowledge/**/qna.yaml - '.github/workflows/lint.yml' # This workflow - - '.github/scripts/**' # Scripts used by this workflow + - 'scripts/**' # Scripts used by this workflow pull_request: branches: @@ -22,7 +22,7 @@ on: - knowledge/**/*.yaml - knowledge/**/*.yml - '.github/workflows/lint.yml' # This workflow - - '.github/scripts/**' # Scripts used by this workflow + - 'scripts/**' # Scripts used by this workflow env: LC_ALL: en_US.UTF-8 @@ -35,7 +35,7 @@ permissions: contents: read jobs: - lint: + yamllint: runs-on: ubuntu-latest steps: - name: "Harden Runner" @@ -47,7 +47,6 @@ jobs: uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 with: fetch-depth: 0 - submodules: true - name: "Setup Python" uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 @@ -56,7 +55,7 @@ jobs: - name: "Install Python Packages" run: | - pip install -r .github/scripts/requirements.txt + pip install -r scripts/requirements.txt - name: "Find changed skills and knowledge files" id: changed-files @@ -69,22 +68,34 @@ jobs: knowledge/**/*.yml - name: "Check changed YAML file contents" - if: ${{ steps.changed-files.outputs.any_changed == 'true' }} + if: ${{ fromJSON(steps.changed-files.outputs.any_changed) }} run: | - .github/scripts/check-yaml.py ${{ steps.changed-files.outputs.all_changed_files }} + scripts/check-yaml.py ${{ steps.changed-files.outputs.all_changed_files }} env: - SCHEMA_BASE: schema YAMLLINT_CONFIG: "{extends: relaxed, rules: {line-length: {max: 120}}}" TAXONOMY_FOLDERS: >- compositional_skills knowledge + - name: "Check all YAML file contents" + if: ${{ !fromJSON(steps.changed-files.outputs.any_changed) }} + run: | + read -ra folders <<< "${TAXONOMY_FOLDERS}" + # shellcheck disable=SC2046 + scripts/check-yaml.py $(find "${folders[@]}" -name "qna.yaml" -print) + env: + YAMLLINT_CONFIG: "{}" # No lint rules + SCHEMA_VERSION: 0 # use the schema version specified in the "version" key + TAXONOMY_FOLDERS: >- + compositional_skills + knowledge + - name: "Save Pull Request number" if: ${{ (github.event_name == 'pull_request') && (github.repository == 'instructlab/taxonomy') }} run: | echo "${PULL_REQUEST_NUMBER}" > pull_request_number.txt env: - PULL_REQUEST_NUMBER: ${{ steps.changed-files.outputs.any_changed == 'true' && github.event.number || '0' }} + PULL_REQUEST_NUMBER: ${{ fromJSON(steps.changed-files.outputs.any_changed) && github.event.number || '0' }} - name: "Upload Pull Request number" if: ${{ (github.event_name == 'pull_request') && (github.repository == 'instructlab/taxonomy') }} diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index d78b2cbdd..000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "schema"] - path = schema - url = https://github.com/instructlab/schema.git diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml index cb08cca48..7f06c29ca 100644 --- a/.markdownlint-cli2.yaml +++ b/.markdownlint-cli2.yaml @@ -11,7 +11,6 @@ globs: ignores: - ".github/**" - ".tox/**" - - "schema/**" - "venv/**" - ".venv/**" - "knowledge/knowledge_domains.md" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9be6bc30e..5018dc377 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,15 +62,6 @@ To contribute to this repo, you'll use the *Fork and Pull* model common in many - For details on the local process, check out the [GitHub flow](https://docs.github.com/en/get-started/using-github/github-flow) documentation from GitHub and [The GitHub Workflow Guide](https://github.com/kubernetes/community/blob/master/contributors/guide/github-workflow.md) documentation from Kubernetes. - For details on contributing using the GitHub webpage UI, see [Contributing using the GH UI](docs/contributing_via_GH_UI.md). -This repository uses [submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) to incorporate the [taxonomy schema](https://github.com/instructlab/schema.git). -So, when using a local clone of this Git repository, be sure to use the [`--recurse-submodules`](https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---recurse-submodulesltpathspecgt) option on the `git clone` command and the `git pull` command when pulling updates from the remote repository. -For example: - -```shell -git clone --recurse-submodules https://github.com/instructlab/taxonomy.git -git pull --recurse-submodules -``` - > [!IMPORTANT] > For all contributions to InstructLab 🐶, you want to become familiar with the workflow described in the [InstructLab 🐶 CLI > `ilab`](https://github.com/instructlab/instructlab) documentation. It would be best to understand how to test diff --git a/schema b/schema deleted file mode 160000 index cf56c4317..000000000 --- a/schema +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cf56c4317ac4590744c0ca677a03458d8df4b920 diff --git a/.github/scripts/check-yaml.py b/scripts/check-yaml.py similarity index 90% rename from .github/scripts/check-yaml.py rename to scripts/check-yaml.py index 2cef6f4fd..fa546ecf0 100755 --- a/.github/scripts/check-yaml.py +++ b/scripts/check-yaml.py @@ -8,12 +8,14 @@ import subprocess import sys from functools import cache, partial +from importlib import resources from importlib.resources.abc import Traversable from pathlib import Path from typing import List, Mapping, Optional, Union # Third Party import yaml +from instructlab.schema import schema_versions from jsonschema.protocols import Validator from jsonschema.validators import validator_for from referencing import Registry, Resource @@ -29,24 +31,19 @@ def __init__( self, *, yaml_files: List[Path], - schema_base: Path, taxonomy_folders: List[str], yamllint_config: YamlLintConfig, schema_version: Optional[int] = None, message_format: Optional[str] = None, ) -> None: self.yaml_files = yaml_files - self.schema_base = schema_base self.taxonomy_folders = taxonomy_folders self.yamllint_config = yamllint_config + self.schema_base = resources.files("instructlab.schema") if schema_version is None: - schema_versions = sorted( - int(v.name[1:]) - for v in self.schema_base.glob("v*") - if v.name[1:].isdigit() - ) - if schema_versions: - schema_version = schema_versions[-1] + versions = schema_versions() + if versions: + schema_version = int(versions[-1].name[1:]) self.schema_version = schema_version if message_format is None or message_format == "auto": message_format = ( @@ -58,7 +55,7 @@ def __init__( self.exit_code: int = 0 @cache - def _load_schema(self, path: Union[Path, Traversable]) -> Resource: + def _load_schema(self, path: Traversable) -> Resource: try: contents = json.loads(path.read_text(encoding="utf-8")) resource = Resource.from_contents( @@ -68,7 +65,7 @@ def _load_schema(self, path: Union[Path, Traversable]) -> Resource: raise NoSuchResource(ref=str(path)) from e return resource - def _retrieve(self, schemas_path: Union[Path, Traversable], uri: URI) -> Resource: + def _retrieve(self, schemas_path: Traversable, uri: URI) -> Resource: path = schemas_path.joinpath(uri) return self._load_schema(path) @@ -279,17 +276,6 @@ def cli() -> int: "TAXONOMY_FOLDERS", "compositional_skills knowledge" ).split(), ) - parser.add_argument( - "-s", - "--schema-base", - help=""" - The base directory of the Taxonomy schema files. - Alternately, the SCHEMA_BASE environment variable can be used - to specify the base directory. - """, - default=os.environ.get("SCHEMA_BASE", _find_schema_base()), - type=Path, - ) parser.add_argument( "-v", "--schema-version", @@ -337,7 +323,6 @@ def cli() -> int: yaml_files=args.yaml_file, taxonomy_folders=args.taxonomy_folders, yamllint_config=args.yamllint_config, - schema_base=args.schema_base, schema_version=args.schema_version, message_format=args.message_format, ) @@ -345,15 +330,5 @@ def cli() -> int: return exit_code -def _find_schema_base() -> Path: - for parent in Path(sys.argv[0]).parents: - candidate = parent.joinpath("schema") - if os.path.isdir(candidate): - return candidate - if os.path.exists(parent.joinpath(".git")): - break - return Path.cwd().joinpath("schema") - - if __name__ == "__main__": sys.exit(cli()) diff --git a/.github/scripts/requirements.txt b/scripts/requirements.txt similarity index 80% rename from .github/scripts/requirements.txt rename to scripts/requirements.txt index fd3c7e09f..5294567fa 100644 --- a/.github/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +instructlab-schema>=0.2.0 jsonschema>=4.21.1,<5.0.0 PyYAML>=6.0.1,<7.0.0 yamllint>=1.35.1,<2.0.0