diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9d7a0ee1..cc7135258 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,20 +11,18 @@ jobs: python-version: '3.10' cache: pip cache-dependency-path: '**/requirements*.txt' - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: KINGFISHER_COLLECT_DATABASE_URL: postgresql://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres # Use 127.0.0.1 to avoid log messages about IPv6. RABBIT_URL: amqp://127.0.0.1:${{ job.services.rabbitmq.ports[5672] }} KINGFISHER_API2_TEST_URL: http://localhost:${{ job.services.httpbin.ports[8080] }}/anything/ # For requests.post() in KingfisherProcessAPI2._post_synchronous(). - run: pytest -W error -W ignore::ResourceWarning -rs --cov kingfisher_scrapy + # https://github.com/pytest-dev/pytest-twisted/issues/183 + # https://github.com/scrapy/scrapy/issues/6450 + run: pytest --cov-report=lcov:coverage/lcov.info -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -W ignore::DeprecationWarning:scrapy.core.downloader.webclient -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py - - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coveralls --service=github + - uses: coverallsapp/github-action@v2 services: postgres: image: postgres:15 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 452ccc3a9..5cee382f3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,22 +6,34 @@ jobs: build: if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest + env: + PAT: ${{ secrets.PAT }} steps: - uses: actions/checkout@v4 + with: + token: ${{ secrets.PAT || github.token }} - uses: actions/setup-python@v5 with: python-version: '3.10' cache: pip cache-dependency-path: '**/requirements*.txt' + - run: shasum -c requirements.txt.sha256 + - id: changed-files + uses: tj-actions/changed-files@v45 + - uses: pre-commit/action@v3.0.1 + continue-on-error: true + with: + extra_args: pip-compile --files ${{ steps.changed-files.outputs.all_changed_files }} + - if: ${{ env.PAT }} + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: '[github-actions] pre-commit autoupdate' - shell: bash run: curl -s -S --retry 3 $BASEDIR/tests/install.sh | bash - - shell: bash run: curl -s -S --retry 3 $BASEDIR/tests/script.sh | bash - - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: # scrapyd is run as a command in production. scrapyd-client is run as a command for deployment. STANDARD_MAINTENANCE_SCRIPTS_IGNORE: scrapyd,scrapyd-client run: pytest /tmp/test_requirements.py - - run: shasum -c requirements.txt.sha256 diff --git a/.github/workflows/nonlinux.yml b/.github/workflows/nonlinux.yml index 4fa870480..f7bc37b4e 100644 --- a/.github/workflows/nonlinux.yml +++ b/.github/workflows/nonlinux.yml @@ -17,10 +17,9 @@ jobs: - name: Install postgresql (macOS) if: matrix.os == 'macos-latest' run: brew install postgresql - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: CI_SKIP: true - run: pytest -W error -rs --cov kingfisher_scrapy + # https://github.com/pytest-dev/pytest-twisted/issues/183 + run: pytest -W error -W ignore::DeprecationWarning:pytest_twisted -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..eeca254ed --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +ci: + autoupdate_schedule: quarterly + skip: [pip-compile] +repos: + - repo: https://github.com/pycqa/flake8 + rev: 7.1.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-comprehensions] + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + - repo: https://github.com/astral-sh/uv-pre-commit + rev: 0.4.4 + hooks: + - id: pip-compile + name: pip-compile requirements.in + args: [requirements.in, -o, requirements.txt] + - id: pip-compile + name: pip-compile requirements_dev.in + args: [requirements_dev.in, -o, requirements_dev.txt] + files: ^requirements(_dev)?\.(in|txt)$ diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..c8cfe3959 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/kingfisher_scrapy/base_spiders/compressed_file_spider.py b/kingfisher_scrapy/base_spiders/compressed_file_spider.py index 212427d86..fe3149fb9 100644 --- a/kingfisher_scrapy/base_spiders/compressed_file_spider.py +++ b/kingfisher_scrapy/base_spiders/compressed_file_spider.py @@ -79,6 +79,9 @@ def parse(self, response): if self.sample and number > self.sample: break + if not file_info.file_size: + continue + filename = file_info.filename basename = os.path.basename(filename) if ( diff --git a/kingfisher_scrapy/extensions/files_store.py b/kingfisher_scrapy/extensions/files_store.py index 694cee176..8d0789fd1 100644 --- a/kingfisher_scrapy/extensions/files_store.py +++ b/kingfisher_scrapy/extensions/files_store.py @@ -59,7 +59,7 @@ def spider_closed(self, spider, reason): message_length = math.ceil(len(message) / 2) * 2 title_length = message_length // 2 - 8 - spider.logger.info(f"+-{'-' * title_length } DATA DIRECTORY {'-' * title_length }-+") + spider.logger.info(f"+-{'-' * title_length} DATA DIRECTORY {'-' * title_length}-+") spider.logger.info(f"| {' ' * message_length} |") spider.logger.info(f"| {message.ljust(message_length)} |") spider.logger.info(f"| {' ' * message_length} |") diff --git a/kingfisher_scrapy/items.py b/kingfisher_scrapy/items.py index f79a03482..9e35d3f17 100644 --- a/kingfisher_scrapy/items.py +++ b/kingfisher_scrapy/items.py @@ -47,7 +47,7 @@ class DataResource(Resource, arbitrary_types_allowed=True, use_enum_values=True) @pydantic.validator('data', pre=True) # `pre` is needed to prevent pydantic from type casting def check_data(cls, v): # pydantic has no `condict()` to set `strict=True` or `min_properties=1`. pydantic/pydantic#1277 - assert isinstance(v, (Data, bytes)), f'{v.__class__.__name__} is not a valid type' + assert isinstance(v, (Data, bytes)), f'{type(v).__name__} is not a valid type' assert v, 'ensure this value is non-empty' return v diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py index cb3c6fde9..8697d858b 100644 --- a/kingfisher_scrapy/pipelines.py +++ b/kingfisher_scrapy/pipelines.py @@ -9,6 +9,7 @@ import ijson import jsonpointer from flattentool import unflatten +from flattentool.exceptions import FlattenToolWarning from scrapy.exceptions import DropItem, NotSupported from kingfisher_scrapy.items import File, FileItem, PluckedItem @@ -56,7 +57,7 @@ def process_item(self, item, spider): # Drop FileError items, so that we keep trying to get data. if not isinstance(item, (File, FileItem)): - raise DropItem(f'Sample: Item is a {item.__class__.__name__}, not a File or FileItem') + raise DropItem(f'Sample: Item is a {type(item).__name__}, not a File or FileItem') if self.item_count >= spider.sample: spider.crawler.engine.close_spider(spider, 'sample') raise DropItem('Sample: Maximum sample size reached') @@ -172,7 +173,7 @@ def process_item(self, item, spider): f.write(pkgutil.get_data('kingfisher_scrapy', f'schema/{spider.ocds_version}.json')) with warnings.catch_warnings(): - warnings.filterwarnings('ignore') # flattentool uses UserWarning, so we can't set a specific category + warnings.filterwarnings('ignore', category=FlattenToolWarning) unflatten( input_name, diff --git a/kingfisher_scrapy/spiders/chile_compra_api_base.py b/kingfisher_scrapy/spiders/chile_compra_api_base.py index 2bfc82865..879b3ebdc 100644 --- a/kingfisher_scrapy/spiders/chile_compra_api_base.py +++ b/kingfisher_scrapy/spiders/chile_compra_api_base.py @@ -1,5 +1,6 @@ from abc import abstractmethod from datetime import date +from json import JSONDecodeError from kingfisher_scrapy.base_spiders import IndexSpider, PeriodicSpider from kingfisher_scrapy.exceptions import SpiderArgumentError @@ -82,6 +83,27 @@ def parse_page(self, response): def handle_item(self, item): pass + # from IndexSpider + def parse_list_loader(self, response): + try: + data = response.json() + except JSONDecodeError: + yield self.build_file_error_from_response( + response, errors={'http_code': response.status, 'text': response.text} + ) + return + + # Some files contain invalid packages, e.g.: + # { + # "detail": "Error en la generación. ", + # "status": 500 + # } + if set(data) == {'detail', 'status'}: + data['http_code'] = data['status'] + return self.build_file_error_from_response(response, errors=data) + + return data + # from IndexSpider def url_builder(self, value, data, response): # URL looks like http://api.mercadopublico.cl/APISOCDS/OCDS/listaOCDSAgnoMesTratoDirecto/2021/03/31500/100 diff --git a/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py b/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py index 1b254e238..5aebb5a20 100644 --- a/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py +++ b/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py @@ -8,7 +8,7 @@ class MexicoNuevoLeonRecords(SimpleSpider): Domain Secretaría de Movilidad y Planeación Urbana de Nuevo León Bulk download documentation - http://si.nl.gob.mx/transparencia/publicaciones + https://smpu.nl.gob.mx/transparencia/publicaciones """ name = 'mexico_nuevo_leon_records' @@ -17,6 +17,6 @@ class MexicoNuevoLeonRecords(SimpleSpider): def start_requests(self): yield scrapy.Request( - 'http://si.nl.gob.mx/siasi_ws/api/ocds/DescargarRecordPackage', + 'https://smpu.nl.gob.mx/siasi_ws/api/ocds/DescargarRecordPackage', meta={'file_name': 'records.json'} ) diff --git a/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py b/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py index 0af0fff8f..72d516da9 100644 --- a/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py +++ b/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py @@ -8,7 +8,7 @@ class MexicoNuevoLeonReleases(CompressedFileSpider): Domain Secretaría de Movilidad y Planeación Urbana de Nuevo León Bulk download documentation - http://si.nl.gob.mx/transparencia/acerca-del-proyecto + https://smpu.nl.gob.mx/transparencia/acerca-del-proyecto """ name = 'mexico_nuevo_leon_releases' @@ -22,5 +22,5 @@ class MexicoNuevoLeonReleases(CompressedFileSpider): file_name_must_contain = 'ReleasePackage' def start_requests(self): - url = 'http://si.nl.gob.mx/acceso/DatosAbiertos/JSONsInfraestructuraAbierta.rar' + url = 'https://smpu.nl.gob.mx/acceso/DatosAbiertos/JSONsInfraestructuraAbierta.rar' yield scrapy.Request(url, meta={'file_name': 'all.rar'}) diff --git a/pyproject.toml b/pyproject.toml index c7fe433e4..bd3b2c298 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,23 @@ +[project] +name = "kingfisher-collect" +version = "0.0.0" + +[project.entry-points.scrapy] +settings = "kingfisher_scrapy.settings" + +[tool.setuptools.packages.find] +exclude = ["tests", "tests.*"] + +[tool.setuptools.package-data] +kingfisher_scrapy = ["schema/*.json"] + +[tool.isort] +profile = "black" +line_length = 119 + [tool.pytest.ini_options] -addopts = '--doctest-modules' -asyncio_mode = 'auto' +addopts = "--doctest-modules" +asyncio_mode = "auto" [tool.coverage.run] -omit = ['*/kingfisher_scrapy/spiders/*'] +omit = ["*/kingfisher_scrapy/spiders/*"] diff --git a/requirements.txt b/requirements.txt index 7a382fef1..097495967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile -# +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.in -o requirements.txt attrs==22.2.0 # via # automat @@ -46,7 +42,7 @@ et-xmlfile==1.0.1 # via openpyxl filelock==3.4.1 # via tldextract -flattentool==0.24.0 +flattentool==0.26.0 # via -r requirements.in hyperlink==21.0.0 # via twisted @@ -60,7 +56,7 @@ ijson==3.2.3 # -r requirements.in # flattentool # ocdskit -incremental==22.10.0 +incremental==24.7.2 # via twisted itemadapter==0.7.0 # via @@ -84,14 +80,14 @@ jsonref==1.0.1 # ocdsextensionregistry # ocdskit # ocdsmerge -lxml==4.9.2 +lxml==5.3.0 # via # flattentool # parsel # scrapy ocdsextensionregistry==0.2.2 # via ocdskit -ocdskit[perf]==1.1.13 +ocdskit==1.1.13 # via -r requirements.in ocdsmerge==0.7.0 # via ocdskit @@ -161,14 +157,23 @@ scrapy==2.11.2 # -r requirements.in # scrapyd # scrapyd-client -scrapyd==1.4.3 +scrapyd==1.5.0b1 # via -r requirements.in scrapyd-client==1.2.3 # via -r requirements.in -sentry-sdk==1.19.0 +sentry-sdk==2.10.0 # via -r requirements.in service-identity==24.1.0 # via scrapy +setuptools==74.1.1 + # via + # incremental + # scrapy + # scrapyd + # zc-lockfile + # zc-zlibstorage + # zodbpickle + # zope-interface six==1.16.0 # via # automat @@ -181,7 +186,7 @@ tldextract==3.1.2 # via scrapy transaction==3.1.0 # via zodb -twisted==24.3.0 +twisted==24.7.0rc1 # via # -r requirements.in # scrapy @@ -209,7 +214,7 @@ w3lib==2.1.1 # scrapyd-client xmltodict==0.12.0 # via flattentool -yapw[perf]==0.1.4 +yapw==0.1.4 # via -r requirements.in zc-lockfile==3.0.post1 # via zodb @@ -233,6 +238,3 @@ zope-interface==6.0 # twisted # zc-zlibstorage # zodb - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index 44a5f76f9..eaf35ac18 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -7c48c01584024f7ea7bd8b3ad63b6c15f64f76dddb0d271007f45b451af22cc2 requirements.txt +3f0f25b383baca3102034710cdf6abdc96d1efd027f9e83ede7781287d057d3f requirements.txt diff --git a/requirements_dev.in b/requirements_dev.in index a65633431..b92f0e2ef 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,11 +1,7 @@ -r requirements.txt coverage[toml] -coveralls -flake8 -isort ocdsmerge openpyxl -pip-tools pika psycopg2-binary pytest diff --git a/requirements_dev.txt b/requirements_dev.txt index 40da16af5..dd36975ca 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile requirements_dev.in -# +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o requirements_dev.txt attrs==22.2.0 # via # -r requirements.txt @@ -22,8 +18,6 @@ btrees==4.11.3 # via # -r requirements.txt # zodb -build==0.10.0 - # via pip-tools certifi==2024.7.4 # via # -r requirements.txt @@ -38,8 +32,6 @@ charset-normalizer==3.1.0 # via # -r requirements.txt # requests -click==8.1.3 - # via pip-tools constantly==15.1.0 # via # -r requirements.txt @@ -48,13 +40,10 @@ contextlib2==0.6.0.post1 # via # -r requirements.txt # schema -coverage[toml]==5.5 +coverage==7.6.1 # via # -r requirements_dev.in - # coveralls # pytest-cov -coveralls==4.0.1 - # via -r requirements_dev.in cryptography==42.0.4 # via # -r requirements.txt @@ -73,23 +62,15 @@ defusedxml==0.7.1 # -r requirements.txt # odfpy # scrapy -docopt==0.6.2 - # via coveralls -entrypoints==0.3 - # via flake8 et-xmlfile==1.0.1 # via # -r requirements.txt # openpyxl -exceptiongroup==1.2.1 - # via pytest filelock==3.4.1 # via # -r requirements.txt # tldextract -flake8==3.7.9 - # via -r requirements_dev.in -flattentool==0.24.0 +flattentool==0.26.0 # via -r requirements.txt greenlet==3.0.3 # via pytest-twisted @@ -108,14 +89,12 @@ ijson==3.2.3 # -r requirements.txt # flattentool # ocdskit -incremental==22.10.0 +incremental==24.7.2 # via # -r requirements.txt # twisted iniconfig==1.1.1 # via pytest -isort==5.7.0 - # via -r requirements_dev.in itemadapter==0.7.0 # via # -r requirements.txt @@ -150,19 +129,17 @@ jsonref==1.0.1 # ocdsextensionregistry # ocdskit # ocdsmerge -lxml==4.9.2 +lxml==5.3.0 # via # -r requirements.txt # flattentool # parsel # scrapy -mccabe==0.6.1 - # via flake8 ocdsextensionregistry==0.2.2 # via # -r requirements.txt # ocdskit -ocdskit[perf]==1.1.13 +ocdskit==1.1.13 # via -r requirements.txt ocdsmerge==0.7.0 # via @@ -179,14 +156,10 @@ openpyxl==3.0.5 # -r requirements_dev.in # flattentool orjson==3.9.15 - # via - # -r requirements.txt - # ocdskit - # yapw + # via -r requirements.txt packaging==23.1 # via # -r requirements.txt - # build # pytest # scrapy # scrapyd @@ -205,8 +178,6 @@ pika==1.2.0 # -r requirements.txt # -r requirements_dev.in # yapw -pip-tools==7.3.0 - # via -r requirements_dev.in pluggy==1.3.0 # via pytest protego==0.2.1 @@ -226,8 +197,6 @@ pyasn1-modules==0.2.7 # via # -r requirements.txt # service-identity -pycodestyle==2.5.0 - # via flake8 pycparser==2.19 # via # -r requirements.txt @@ -238,14 +207,10 @@ pydispatcher==2.0.6 # via # -r requirements.txt # scrapy -pyflakes==2.1.1 - # via flake8 pyopenssl==24.0.0 # via # -r requirements.txt # scrapy -pyproject-hooks==1.0.0 - # via build pytest==7.4.4 # via # -r requirements_dev.in @@ -274,7 +239,6 @@ rarfile==3.1 requests==2.32.2 # via # -r requirements.txt - # coveralls # ocdsextensionregistry # ocdsmerge # requests-cache @@ -298,16 +262,26 @@ scrapy==2.11.2 # -r requirements.txt # scrapyd # scrapyd-client -scrapyd==1.4.3 +scrapyd==1.5.0b1 # via -r requirements.txt scrapyd-client==1.2.3 # via -r requirements.txt -sentry-sdk==1.19.0 +sentry-sdk==2.10.0 # via -r requirements.txt service-identity==24.1.0 # via # -r requirements.txt # scrapy +setuptools==74.1.1 + # via + # -r requirements.txt + # incremental + # scrapy + # scrapyd + # zc-lockfile + # zc-zlibstorage + # zodbpickle + # zope-interface six==1.16.0 # via # -r requirements.txt @@ -321,19 +295,11 @@ tldextract==3.1.2 # via # -r requirements.txt # scrapy -toml==0.10.2 - # via coverage -tomli==2.0.1 - # via - # build - # pip-tools - # pyproject-hooks - # pytest transaction==3.1.0 # via # -r requirements.txt # zodb -twisted==24.3.0 +twisted==24.7.0rc1 # via # -r requirements.txt # scrapy @@ -366,13 +332,11 @@ w3lib==2.1.1 # scrapy # scrapyd # scrapyd-client -wheel==0.40.0 - # via pip-tools xmltodict==0.12.0 # via # -r requirements.txt # flattentool -yapw[perf]==0.1.4 +yapw==0.1.4 # via -r requirements.txt zc-lockfile==3.0.post1 # via @@ -406,7 +370,3 @@ zope-interface==6.0 # twisted # zc-zlibstorage # zodb - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/setup.py b/setup.py deleted file mode 100644 index 6e771a733..000000000 --- a/setup.py +++ /dev/null @@ -1,17 +0,0 @@ -from setuptools import find_packages, setup - -# This project is not intended to be released or used as a Python package. This file only exists for scrapyd-client. -# https://github.com/scrapy/scrapyd-client/blob/v1.1.0/README.rst - -setup( - packages=find_packages(exclude=['tests', 'tests.*']), - package_data={ - 'kingfisher_scrapy': ['schema/*.json'], - }, - include_package_data=True, - entry_points={ - 'scrapy': [ - 'settings = kingfisher_scrapy.settings', - ], - }, -) diff --git a/tests/fixtures/test.rar b/tests/fixtures/test.rar index a307587e9..86f6987c6 100644 Binary files a/tests/fixtures/test.rar and b/tests/fixtures/test.rar differ diff --git a/tests/test_items.py b/tests/test_items.py index bd9b38941..8e42d1b6d 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -9,17 +9,17 @@ FILE_ERROR = {**ITEM | {'errors': {'http_code': 500, 'detail': 'timeout'}}} -def check_required(klass, base, pop): +def check_required(cls, base, pop): data = base.copy() data.pop(pop) with pytest.raises(pydantic.ValidationError): - klass(**data) + cls(**data) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_valid(klass, base): - klass(**base) # no exception raised +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_valid(cls, base): + cls(**base) # no exception raised @pytest.mark.parametrize('pop', ['file_name', 'url', 'data_type']) @@ -38,39 +38,39 @@ def test_file_error_required(pop): @pytest.mark.parametrize('invalid', ['path/test', 'path\\test']) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_file_name(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_file_name(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'file_name': invalid}) + cls(**base | {'file_name': invalid}) @pytest.mark.parametrize('invalid', ['://example.com', 'scheme://example.com', 'http://example']) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_url(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_url(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'url': invalid}) + cls(**base | {'url': invalid}) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data_type(klass, base): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data_type(cls, base): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data_type': 'invalid'}) + cls(**base | {'data_type': 'invalid'}) @pytest.mark.parametrize('invalid', [ None, True, 1, 1.0, 'data', [{'ocid': 'x'}], ({'ocid': 'x'},), {('ocid', 'x')}, frozenset((('ocid', 'x'),)) ]) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data': invalid}) + cls(**base | {'data': invalid}) @pytest.mark.parametrize('invalid', [b'', {}]) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data_length(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data_length(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data': invalid}) + cls(**base | {'data': invalid}) @pytest.mark.parametrize('invalid', [-1, 0, '1']) diff --git a/tests/test_spidermiddlewares.py b/tests/test_spidermiddlewares.py index 4f4b47040..4cced69ce 100644 --- a/tests/test_spidermiddlewares.py +++ b/tests/test_spidermiddlewares.py @@ -437,16 +437,16 @@ def test_retry_data_error_middleware(exception): 'release', {'x': {'a': 'b'}}, 'release', {'a': 'b'}), ]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_root_path_middleware(root_path, data_type, data, expected_data_type, expected_data, klass): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_root_path_middleware(root_path, data_type, data, expected_data_type, expected_data, cls): spider = spider_with_crawler() middleware = RootPathMiddleware() spider.data_type = data_type spider.root_path = root_path - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type=data_type, @@ -459,7 +459,7 @@ async def test_root_path_middleware(root_path, data_type, data, expected_data_ty assert len(transformed_items) == int(expected_data is not None) for transformed_item in transformed_items: - assert isinstance(transformed_item, klass) + assert isinstance(transformed_item, cls) assert transformed_item.file_name == 'test.json' assert transformed_item.data == expected_data assert transformed_item.data_type == expected_data_type @@ -488,17 +488,17 @@ async def test_root_path_middleware(root_path, data_type, data, expected_data_ty 'release_package', b'[{"releases": [{"a": "b"}, {"c": "d"}], "x": "y"}, {"releases": [{"e": "f"}, {"g": "h"}]}]', 'release_package', {'releases': [{'a': 'b'}, {'c': 'd'}, {'e': 'f'}, {'g': 'h'}], 'x': 'y'}), ]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_root_path_middleware_item(root_path, sample, data_type, data, expected_data_type, expected_data, klass): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_root_path_middleware_item(root_path, sample, data_type, data, expected_data_type, expected_data, cls): spider = spider_with_crawler() middleware = RootPathMiddleware() spider.data_type = data_type spider.root_path = root_path spider.sample = sample - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type=data_type, @@ -520,15 +520,15 @@ async def test_root_path_middleware_item(root_path, sample, data_type, data, exp @pytest.mark.parametrize('valid', [True, False]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_validate_json_middleware(valid, klass, caplog): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_validate_json_middleware(valid, cls, caplog): spider = spider_with_crawler() middleware = ValidateJSONMiddleware() spider.validate_json = True - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type='release_package', @@ -547,7 +547,7 @@ async def test_validate_json_middleware(valid, klass, caplog): assert invalid_json_count == 0 assert messages == [] else: - number = ", 'number': 1" if klass is FileItem else '' + number = ", 'number': 1" if cls is FileItem else '' assert invalid_json_count == 1 assert [message.splitlines() for message in messages] == [[ "Dropped: Invalid JSON", @@ -556,15 +556,15 @@ async def test_validate_json_middleware(valid, klass, caplog): @pytest.mark.parametrize('data', [b'[{"ocid": "abc"}]', {'ocid': 'abc'}]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_validate_json_middleware_already_parsed(data, klass, caplog): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_validate_json_middleware_already_parsed(data, cls, caplog): spider = spider_with_crawler() middleware = ValidateJSONMiddleware() spider.validate_json = True - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type='release_package',