diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..9bdcc2e --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,22 @@ +name: build +on: + push: + branches: [ "main" ] + workflow_dispatch: +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + call-workflow1: + name: tests + uses: ./.github/workflows/test.yml + call-workflow2: + needs: call-workflow1 + name: docs + uses: ./.github/workflows/docs.yml + print: + needs: [call-workflow1] + runs-on: ubuntu-latest + steps: + - run: echo "${{ needs.call-workflow1 }}" \ No newline at end of file diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e012ac2..b01d680 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,13 +1,6 @@ -name: website +name: docs -# build the documentation whenever there are new commits on main -on: - push: - branches: - - main - # Alternative: only build for tags. - # tags: - # - '*' +on: workflow_call # security: restrict permissions for CI jobs. permissions: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5720f0b..ca8ccfb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,6 @@ name: tests -on: [push] +on: workflow_call jobs: build: diff --git a/src/sciterra/vectorization/preprocessing.py b/src/sciterra/vectorization/preprocessing.py index 764dbc6..f211c6a 100644 --- a/src/sciterra/vectorization/preprocessing.py +++ b/src/sciterra/vectorization/preprocessing.py @@ -2,7 +2,11 @@ import spacy -nlp = spacy.load("en_core_web_sm") +model = "en_core_web_sm" +try: + nlp = spacy.load(model) +except OSError: + raise OSError(f"Can't find model '{model}'; make sure you have run 'python3 -m spacy download {model}'!") # Another off the shelf simple tokenizer from gensim.utils import simple_preprocess