From 80f85f9c85619be3968301448402335f0b1d1cbd Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:59:19 -0400 Subject: [PATCH 01/20] chore: Prefer type() to __class__ --- kingfisher_scrapy/items.py | 2 +- kingfisher_scrapy/pipelines.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kingfisher_scrapy/items.py b/kingfisher_scrapy/items.py index f79a03482..9e35d3f17 100644 --- a/kingfisher_scrapy/items.py +++ b/kingfisher_scrapy/items.py @@ -47,7 +47,7 @@ class DataResource(Resource, arbitrary_types_allowed=True, use_enum_values=True) @pydantic.validator('data', pre=True) # `pre` is needed to prevent pydantic from type casting def check_data(cls, v): # pydantic has no `condict()` to set `strict=True` or `min_properties=1`. pydantic/pydantic#1277 - assert isinstance(v, (Data, bytes)), f'{v.__class__.__name__} is not a valid type' + assert isinstance(v, (Data, bytes)), f'{type(v).__name__} is not a valid type' assert v, 'ensure this value is non-empty' return v diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py index cb3c6fde9..828a2df08 100644 --- a/kingfisher_scrapy/pipelines.py +++ b/kingfisher_scrapy/pipelines.py @@ -56,7 +56,7 @@ def process_item(self, item, spider): # Drop FileError items, so that we keep trying to get data. if not isinstance(item, (File, FileItem)): - raise DropItem(f'Sample: Item is a {item.__class__.__name__}, not a File or FileItem') + raise DropItem(f'Sample: Item is a {type(item).__name__}, not a File or FileItem') if self.item_count >= spider.sample: spider.crawler.engine.close_spider(spider, 'sample') raise DropItem('Sample: Maximum sample size reached') From 84953289d8f67bb240a72122c4d885bc7dc386d8 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Mon, 15 Jul 2024 23:03:48 -0400 Subject: [PATCH 02/20] chore: Prefer cls to klass --- tests/test_items.py | 40 ++++++++++++++++----------------- tests/test_spidermiddlewares.py | 36 ++++++++++++++--------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tests/test_items.py b/tests/test_items.py index bd9b38941..8e42d1b6d 100644 --- a/tests/test_items.py +++ b/tests/test_items.py @@ -9,17 +9,17 @@ FILE_ERROR = {**ITEM | {'errors': {'http_code': 500, 'detail': 'timeout'}}} -def check_required(klass, base, pop): +def check_required(cls, base, pop): data = base.copy() data.pop(pop) with pytest.raises(pydantic.ValidationError): - klass(**data) + cls(**data) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_valid(klass, base): - klass(**base) # no exception raised +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_valid(cls, base): + cls(**base) # no exception raised @pytest.mark.parametrize('pop', ['file_name', 'url', 'data_type']) @@ -38,39 +38,39 @@ def test_file_error_required(pop): @pytest.mark.parametrize('invalid', ['path/test', 'path\\test']) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_file_name(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_file_name(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'file_name': invalid}) + cls(**base | {'file_name': invalid}) @pytest.mark.parametrize('invalid', ['://example.com', 'scheme://example.com', 'http://example']) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) -def test_url(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM), (FileError, FILE_ERROR)]) +def test_url(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'url': invalid}) + cls(**base | {'url': invalid}) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data_type(klass, base): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data_type(cls, base): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data_type': 'invalid'}) + cls(**base | {'data_type': 'invalid'}) @pytest.mark.parametrize('invalid', [ None, True, 1, 1.0, 'data', [{'ocid': 'x'}], ({'ocid': 'x'},), {('ocid', 'x')}, frozenset((('ocid', 'x'),)) ]) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data': invalid}) + cls(**base | {'data': invalid}) @pytest.mark.parametrize('invalid', [b'', {}]) -@pytest.mark.parametrize('klass,base', [(File, FILE), (FileItem, FILE_ITEM)]) -def test_data_length(klass, base, invalid): +@pytest.mark.parametrize('cls,base', [(File, FILE), (FileItem, FILE_ITEM)]) +def test_data_length(cls, base, invalid): with pytest.raises(pydantic.ValidationError): - klass(**base | {'data': invalid}) + cls(**base | {'data': invalid}) @pytest.mark.parametrize('invalid', [-1, 0, '1']) diff --git a/tests/test_spidermiddlewares.py b/tests/test_spidermiddlewares.py index 4f4b47040..4cced69ce 100644 --- a/tests/test_spidermiddlewares.py +++ b/tests/test_spidermiddlewares.py @@ -437,16 +437,16 @@ def test_retry_data_error_middleware(exception): 'release', {'x': {'a': 'b'}}, 'release', {'a': 'b'}), ]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_root_path_middleware(root_path, data_type, data, expected_data_type, expected_data, klass): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_root_path_middleware(root_path, data_type, data, expected_data_type, expected_data, cls): spider = spider_with_crawler() middleware = RootPathMiddleware() spider.data_type = data_type spider.root_path = root_path - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type=data_type, @@ -459,7 +459,7 @@ async def test_root_path_middleware(root_path, data_type, data, expected_data_ty assert len(transformed_items) == int(expected_data is not None) for transformed_item in transformed_items: - assert isinstance(transformed_item, klass) + assert isinstance(transformed_item, cls) assert transformed_item.file_name == 'test.json' assert transformed_item.data == expected_data assert transformed_item.data_type == expected_data_type @@ -488,17 +488,17 @@ async def test_root_path_middleware(root_path, data_type, data, expected_data_ty 'release_package', b'[{"releases": [{"a": "b"}, {"c": "d"}], "x": "y"}, {"releases": [{"e": "f"}, {"g": "h"}]}]', 'release_package', {'releases': [{'a': 'b'}, {'c': 'd'}, {'e': 'f'}, {'g': 'h'}], 'x': 'y'}), ]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_root_path_middleware_item(root_path, sample, data_type, data, expected_data_type, expected_data, klass): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_root_path_middleware_item(root_path, sample, data_type, data, expected_data_type, expected_data, cls): spider = spider_with_crawler() middleware = RootPathMiddleware() spider.data_type = data_type spider.root_path = root_path spider.sample = sample - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type=data_type, @@ -520,15 +520,15 @@ async def test_root_path_middleware_item(root_path, sample, data_type, data, exp @pytest.mark.parametrize('valid', [True, False]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_validate_json_middleware(valid, klass, caplog): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_validate_json_middleware(valid, cls, caplog): spider = spider_with_crawler() middleware = ValidateJSONMiddleware() spider.validate_json = True - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type='release_package', @@ -547,7 +547,7 @@ async def test_validate_json_middleware(valid, klass, caplog): assert invalid_json_count == 0 assert messages == [] else: - number = ", 'number': 1" if klass is FileItem else '' + number = ", 'number': 1" if cls is FileItem else '' assert invalid_json_count == 1 assert [message.splitlines() for message in messages] == [[ "Dropped: Invalid JSON", @@ -556,15 +556,15 @@ async def test_validate_json_middleware(valid, klass, caplog): @pytest.mark.parametrize('data', [b'[{"ocid": "abc"}]', {'ocid': 'abc'}]) -@pytest.mark.parametrize('klass', [File, FileItem]) -async def test_validate_json_middleware_already_parsed(data, klass, caplog): +@pytest.mark.parametrize('cls', [File, FileItem]) +async def test_validate_json_middleware_already_parsed(data, cls, caplog): spider = spider_with_crawler() middleware = ValidateJSONMiddleware() spider.validate_json = True - kwargs = {'number': 1} if klass is FileItem else {} + kwargs = {'number': 1} if cls is FileItem else {} - item = klass( + item = cls( file_name='test.json', url='http://test.com', data_type='release_package', From 294c4efd9eac6417fa4f59c90826b6404e4b2251 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 23 Jul 2024 17:04:01 -0400 Subject: [PATCH 03/20] build: Upgrade vulnerable dependencies --- requirements.txt | 2 +- requirements.txt.sha256 | 2 +- requirements_dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7a382fef1..6c8baa654 100644 --- a/requirements.txt +++ b/requirements.txt @@ -165,7 +165,7 @@ scrapyd==1.4.3 # via -r requirements.in scrapyd-client==1.2.3 # via -r requirements.in -sentry-sdk==1.19.0 +sentry-sdk==2.10.0 # via -r requirements.in service-identity==24.1.0 # via scrapy diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index 44a5f76f9..a80b51475 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -7c48c01584024f7ea7bd8b3ad63b6c15f64f76dddb0d271007f45b451af22cc2 requirements.txt +a8631d98a0344b2548ccafaeb98aa97bf36fe03a12bfd1eb3610c4796a55b2f8 requirements.txt diff --git a/requirements_dev.txt b/requirements_dev.txt index 40da16af5..2d9f185d7 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -302,7 +302,7 @@ scrapyd==1.4.3 # via -r requirements.txt scrapyd-client==1.2.3 # via -r requirements.txt -sentry-sdk==1.19.0 +sentry-sdk==2.10.0 # via -r requirements.txt service-identity==24.1.0 # via From 63137d5e7066d04fb66659622d1fc8a4ca5f1006 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Thu, 25 Jul 2024 21:22:20 -0400 Subject: [PATCH 04/20] build: Use scrapyd prerelease --- requirements.txt | 4 ++-- requirements.txt.sha256 | 2 +- requirements_dev.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6c8baa654..6ee4013be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile +# pip-compile --pre # attrs==22.2.0 # via @@ -161,7 +161,7 @@ scrapy==2.11.2 # -r requirements.in # scrapyd # scrapyd-client -scrapyd==1.4.3 +scrapyd==1.5.0b1 # via -r requirements.in scrapyd-client==1.2.3 # via -r requirements.in diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index a80b51475..b8dc2cc0c 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -a8631d98a0344b2548ccafaeb98aa97bf36fe03a12bfd1eb3610c4796a55b2f8 requirements.txt +225466f64e5a9a9905eb8f77903c7999fa91f43c7f3bd7156bd0238e79f716aa requirements.txt diff --git a/requirements_dev.txt b/requirements_dev.txt index 2d9f185d7..bd68f37b5 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -298,7 +298,7 @@ scrapy==2.11.2 # -r requirements.txt # scrapyd # scrapyd-client -scrapyd==1.4.3 +scrapyd==1.5.0b1 # via -r requirements.txt scrapyd-client==1.2.3 # via -r requirements.txt From bc143b86708c6b19bcac5da38e3d898ee455dea5 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 31 Jul 2024 10:34:32 -0400 Subject: [PATCH 05/20] build: Upgrade vulnerable dependencies --- requirements.txt | 8 +++++--- requirements.txt.sha256 | 2 +- requirements_dev.txt | 6 ++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6ee4013be..91aa37d68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --pre +# pip-compile # attrs==22.2.0 # via @@ -60,7 +60,7 @@ ijson==3.2.3 # -r requirements.in # flattentool # ocdskit -incremental==22.10.0 +incremental==24.7.2 # via twisted itemadapter==0.7.0 # via @@ -179,9 +179,11 @@ six==1.16.0 # zodb tldextract==3.1.2 # via scrapy +tomli==2.0.1 + # via incremental transaction==3.1.0 # via zodb -twisted==24.3.0 +twisted==24.7.0rc1 # via # -r requirements.in # scrapy diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index b8dc2cc0c..08bc9a61d 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -225466f64e5a9a9905eb8f77903c7999fa91f43c7f3bd7156bd0238e79f716aa requirements.txt +c2bbeeaee2ac654d26c0b3a557708544e21d1f4606d070fd9ee803b6f26c489f requirements.txt diff --git a/requirements_dev.txt b/requirements_dev.txt index bd68f37b5..94c6a84ba 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -108,7 +108,7 @@ ijson==3.2.3 # -r requirements.txt # flattentool # ocdskit -incremental==22.10.0 +incremental==24.7.2 # via # -r requirements.txt # twisted @@ -325,7 +325,9 @@ toml==0.10.2 # via coverage tomli==2.0.1 # via + # -r requirements.txt # build + # incremental # pip-tools # pyproject-hooks # pytest @@ -333,7 +335,7 @@ transaction==3.1.0 # via # -r requirements.txt # zodb -twisted==24.3.0 +twisted==24.7.0rc1 # via # -r requirements.txt # scrapy From c09acd50550dffc349c2263067a2c7674c769c4b Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:44:15 -0400 Subject: [PATCH 06/20] ci: Ignore deprecation warning from pytest-twisted, pytest-dev/pytest-twisted#183 --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9d7a0ee1..d28684e1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,8 @@ jobs: RABBIT_URL: amqp://127.0.0.1:${{ job.services.rabbitmq.ports[5672] }} KINGFISHER_API2_TEST_URL: http://localhost:${{ job.services.httpbin.ports[8080] }}/anything/ # For requests.post() in KingfisherProcessAPI2._post_synchronous(). - run: pytest -W error -W ignore::ResourceWarning -rs --cov kingfisher_scrapy + # https://github.com/pytest-dev/pytest-twisted/issues/183 + run: pytest -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py - env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 13b1b7e3962348c5715345873bdca2a0917361e0 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:13:28 -0400 Subject: [PATCH 07/20] ci: Ignore deprecation warning from scrapy --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d28684e1f..e6d663d20 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,8 @@ jobs: KINGFISHER_API2_TEST_URL: http://localhost:${{ job.services.httpbin.ports[8080] }}/anything/ # For requests.post() in KingfisherProcessAPI2._post_synchronous(). # https://github.com/pytest-dev/pytest-twisted/issues/183 - run: pytest -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -rs --cov kingfisher_scrapy + # https://github.com/scrapy/scrapy/issues/6450 + run: pytest -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -W ignore::DeprecationWarning:scrapy.core.downloader.webclient -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py - env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From e243931e2f8e93cb79857435b54aaa7cc3c58f2c Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:18:26 -0400 Subject: [PATCH 08/20] ci: Ignore deprecation warning from pytest-twisted in nonlinux workflow --- .github/workflows/nonlinux.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nonlinux.yml b/.github/workflows/nonlinux.yml index 4fa870480..754a99012 100644 --- a/.github/workflows/nonlinux.yml +++ b/.github/workflows/nonlinux.yml @@ -22,5 +22,6 @@ jobs: - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt - env: CI_SKIP: true - run: pytest -W error -rs --cov kingfisher_scrapy + # https://github.com/pytest-dev/pytest-twisted/issues/183 + run: pytest -W error -W ignore::DeprecationWarning:pytest_twisted -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py From 6626b362d12bac3c6296eabb1ebe7e0c2d5a1934 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:11:26 -0400 Subject: [PATCH 09/20] fix(chile_compra_api_base): Partially revert 0a88662 ("status" is set on errors) --- .../spiders/chile_compra_api_base.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kingfisher_scrapy/spiders/chile_compra_api_base.py b/kingfisher_scrapy/spiders/chile_compra_api_base.py index 2bfc82865..4e6b41cb2 100644 --- a/kingfisher_scrapy/spiders/chile_compra_api_base.py +++ b/kingfisher_scrapy/spiders/chile_compra_api_base.py @@ -82,6 +82,21 @@ def parse_page(self, response): def handle_item(self, item): pass + # from IndexSpider + def parse_list_loader(self, response): + data = response.json() + + # Some files contain invalid packages, e.g.: + # { + # "detail": "Error en la generación. ", + # "status": 500 + # } + if set(data) == {'detail', 'status'}: + data['http_code'] = data['status'] + return self.build_file_error_from_response(response, errors=data) + + return data + # from IndexSpider def url_builder(self, value, data, response): # URL looks like http://api.mercadopublico.cl/APISOCDS/OCDS/listaOCDSAgnoMesTratoDirecto/2021/03/31500/100 From 5048b04d1d441f5523b786ee04557b76bb85bd7f Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:17:16 -0400 Subject: [PATCH 10/20] fix(chile_compra_api_base): Observed a JSONDecodeError (not easily reproducible) --- kingfisher_scrapy/spiders/chile_compra_api_base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kingfisher_scrapy/spiders/chile_compra_api_base.py b/kingfisher_scrapy/spiders/chile_compra_api_base.py index 4e6b41cb2..879b3ebdc 100644 --- a/kingfisher_scrapy/spiders/chile_compra_api_base.py +++ b/kingfisher_scrapy/spiders/chile_compra_api_base.py @@ -1,5 +1,6 @@ from abc import abstractmethod from datetime import date +from json import JSONDecodeError from kingfisher_scrapy.base_spiders import IndexSpider, PeriodicSpider from kingfisher_scrapy.exceptions import SpiderArgumentError @@ -84,7 +85,13 @@ def handle_item(self, item): # from IndexSpider def parse_list_loader(self, response): - data = response.json() + try: + data = response.json() + except JSONDecodeError: + yield self.build_file_error_from_response( + response, errors={'http_code': response.status, 'text': response.text} + ) + return # Some files contain invalid packages, e.g.: # { From d89b4ca49051e43cc7c7becc53792401d1b1efbd Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Sat, 17 Aug 2024 01:59:38 -0400 Subject: [PATCH 11/20] fix: Skip files with zero size (like chile_compra_bulk with 2009-3-R109.json in https://ocds.blob.core.windows.net/ocds/200901.zip) --- kingfisher_scrapy/base_spiders/compressed_file_spider.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kingfisher_scrapy/base_spiders/compressed_file_spider.py b/kingfisher_scrapy/base_spiders/compressed_file_spider.py index 212427d86..fe3149fb9 100644 --- a/kingfisher_scrapy/base_spiders/compressed_file_spider.py +++ b/kingfisher_scrapy/base_spiders/compressed_file_spider.py @@ -79,6 +79,9 @@ def parse(self, response): if self.sample and number > self.sample: break + if not file_info.file_size: + continue + filename = file_info.filename basename = os.path.basename(filename) if ( From 56803dda95ed35b4ee9eaa163f39e85c0ae35b7c Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Sat, 17 Aug 2024 02:22:26 -0400 Subject: [PATCH 12/20] test: Add non-empty file to test.rar --- tests/fixtures/test.rar | Bin 112 -> 115 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/fixtures/test.rar b/tests/fixtures/test.rar index a307587e913023c865bdec1269d82b923e65e6b2..86f6987c6ba992780d1fb29232e2eb79d0f904de 100644 GIT binary patch delta 55 zcmXRYo*=F-w|<+aIukQ@GXqOA!~bR`whasKHZU-9mZTP!=w%h>=W#I$^E)0$`~CWu LW_2ysL}y(9`bQHV delta 52 zcmXRem>{mhzWh+TIukQ@0|QF~!~bR`1|Vo)VB{=GEiTc^D$dX2VixWR7l?Owdq-)a Gn=Sx?d=EeX From 350dca7e2996e5695ef65987965a0f4c9e65185c Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:42:56 -0400 Subject: [PATCH 13/20] chore: Ignore FlattenToolWarning specifically --- kingfisher_scrapy/pipelines.py | 3 ++- requirements.txt | 2 +- requirements.txt.sha256 | 2 +- requirements_dev.txt | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py index 828a2df08..8697d858b 100644 --- a/kingfisher_scrapy/pipelines.py +++ b/kingfisher_scrapy/pipelines.py @@ -9,6 +9,7 @@ import ijson import jsonpointer from flattentool import unflatten +from flattentool.exceptions import FlattenToolWarning from scrapy.exceptions import DropItem, NotSupported from kingfisher_scrapy.items import File, FileItem, PluckedItem @@ -172,7 +173,7 @@ def process_item(self, item, spider): f.write(pkgutil.get_data('kingfisher_scrapy', f'schema/{spider.ocds_version}.json')) with warnings.catch_warnings(): - warnings.filterwarnings('ignore') # flattentool uses UserWarning, so we can't set a specific category + warnings.filterwarnings('ignore', category=FlattenToolWarning) unflatten( input_name, diff --git a/requirements.txt b/requirements.txt index 91aa37d68..8fcea3e97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -46,7 +46,7 @@ et-xmlfile==1.0.1 # via openpyxl filelock==3.4.1 # via tldextract -flattentool==0.24.0 +flattentool==0.26.0 # via -r requirements.in hyperlink==21.0.0 # via twisted diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index 08bc9a61d..3c3a6c115 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -c2bbeeaee2ac654d26c0b3a557708544e21d1f4606d070fd9ee803b6f26c489f requirements.txt +37b8bc0bb0d2802afb8d856378c890b6acb7cbba26aa7a15167775aeacf3a6b2 requirements.txt diff --git a/requirements_dev.txt b/requirements_dev.txt index 94c6a84ba..a84df1f96 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -89,7 +89,7 @@ filelock==3.4.1 # tldextract flake8==3.7.9 # via -r requirements_dev.in -flattentool==0.24.0 +flattentool==0.26.0 # via -r requirements.txt greenlet==3.0.3 # via pytest-twisted From 46f7567a726b6acce3d4b46d059e0a79d48a9b98 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 3 Sep 2024 13:46:08 -0400 Subject: [PATCH 14/20] build: Upgrade lxml for Python 3.12 --- requirements.txt | 2 +- requirements.txt.sha256 | 2 +- requirements_dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8fcea3e97..4b7134624 100644 --- a/requirements.txt +++ b/requirements.txt @@ -84,7 +84,7 @@ jsonref==1.0.1 # ocdsextensionregistry # ocdskit # ocdsmerge -lxml==4.9.2 +lxml==5.3.0 # via # flattentool # parsel diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index 3c3a6c115..b7b7cdcfe 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -37b8bc0bb0d2802afb8d856378c890b6acb7cbba26aa7a15167775aeacf3a6b2 requirements.txt +931f6a5de1720df67b3fd6176f96157518da5f07862a631962a3f87d52852963 requirements.txt diff --git a/requirements_dev.txt b/requirements_dev.txt index a84df1f96..3dde31bf7 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -150,7 +150,7 @@ jsonref==1.0.1 # ocdsextensionregistry # ocdskit # ocdsmerge -lxml==4.9.2 +lxml==5.3.0 # via # -r requirements.txt # flattentool From 22c84468af3da31e0dfb883b95990d628ac5c424 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 3 Sep 2024 16:29:35 -0400 Subject: [PATCH 15/20] mexico_nuevo_leon: Update URLs --- kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py | 4 ++-- kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py b/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py index 1b254e238..5aebb5a20 100644 --- a/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py +++ b/kingfisher_scrapy/spiders/mexico_nuevo_leon_records.py @@ -8,7 +8,7 @@ class MexicoNuevoLeonRecords(SimpleSpider): Domain Secretaría de Movilidad y Planeación Urbana de Nuevo León Bulk download documentation - http://si.nl.gob.mx/transparencia/publicaciones + https://smpu.nl.gob.mx/transparencia/publicaciones """ name = 'mexico_nuevo_leon_records' @@ -17,6 +17,6 @@ class MexicoNuevoLeonRecords(SimpleSpider): def start_requests(self): yield scrapy.Request( - 'http://si.nl.gob.mx/siasi_ws/api/ocds/DescargarRecordPackage', + 'https://smpu.nl.gob.mx/siasi_ws/api/ocds/DescargarRecordPackage', meta={'file_name': 'records.json'} ) diff --git a/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py b/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py index 0af0fff8f..72d516da9 100644 --- a/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py +++ b/kingfisher_scrapy/spiders/mexico_nuevo_leon_releases.py @@ -8,7 +8,7 @@ class MexicoNuevoLeonReleases(CompressedFileSpider): Domain Secretaría de Movilidad y Planeación Urbana de Nuevo León Bulk download documentation - http://si.nl.gob.mx/transparencia/acerca-del-proyecto + https://smpu.nl.gob.mx/transparencia/acerca-del-proyecto """ name = 'mexico_nuevo_leon_releases' @@ -22,5 +22,5 @@ class MexicoNuevoLeonReleases(CompressedFileSpider): file_name_must_contain = 'ReleasePackage' def start_requests(self): - url = 'http://si.nl.gob.mx/acceso/DatosAbiertos/JSONsInfraestructuraAbierta.rar' + url = 'https://smpu.nl.gob.mx/acceso/DatosAbiertos/JSONsInfraestructuraAbierta.rar' yield scrapy.Request(url, meta={'file_name': 'all.rar'}) From b58764fb3cd515e04ca71846d95b1c3525795693 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 4 Sep 2024 01:25:54 -0400 Subject: [PATCH 16/20] build: Remove pip-tools. Add .python-version. --- .pre-commit-config.yaml | 23 ++++++++ .python-version | 1 + kingfisher_scrapy/extensions/files_store.py | 2 +- pyproject.toml | 29 ++++++++-- requirements.txt | 22 ++++---- requirements_dev.in | 2 +- requirements_dev.txt | 60 ++++++++++----------- setup.py | 17 ------ 8 files changed, 92 insertions(+), 64 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 .python-version delete mode 100644 setup.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..234f16c82 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +ci: + autoupdate_schedule: quarterly + skip: [pip-compile] +repos: + - repo: https://github.com/pycqa/flake8 + rev: 7.1.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-comprehensions] + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + - repo: https://github.com/astral-sh/uv-pre-commit + rev: 0.4.4 + hooks: + - id: pip-compile + name: pip-compile requirements.in + args: [requirements.in, -o, requirements.txt, --no-strip-extras] + - id: pip-compile + name: pip-compile requirements_dev.in + args: [requirements_dev.in, -o, requirements_dev.txt, --no-strip-extras] + files: ^requirements_dev\.(in|txt)$ diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..c8cfe3959 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/kingfisher_scrapy/extensions/files_store.py b/kingfisher_scrapy/extensions/files_store.py index 694cee176..8d0789fd1 100644 --- a/kingfisher_scrapy/extensions/files_store.py +++ b/kingfisher_scrapy/extensions/files_store.py @@ -59,7 +59,7 @@ def spider_closed(self, spider, reason): message_length = math.ceil(len(message) / 2) * 2 title_length = message_length // 2 - 8 - spider.logger.info(f"+-{'-' * title_length } DATA DIRECTORY {'-' * title_length }-+") + spider.logger.info(f"+-{'-' * title_length} DATA DIRECTORY {'-' * title_length}-+") spider.logger.info(f"| {' ' * message_length} |") spider.logger.info(f"| {message.ljust(message_length)} |") spider.logger.info(f"| {' ' * message_length} |") diff --git a/pyproject.toml b/pyproject.toml index c7fe433e4..7db4c9483 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,29 @@ +[build-system] +requires = ["setuptools>=61.2"] +build-backend = "setuptools.build_meta" + +# This project is not intended to be released or used as a Python package. This file only exists for scrapyd-client. +[project] +name = "kingfisher-collect" +version = "0.0.0" +license = {text = "BSD"} + +[tool.setuptools.packages.find] +exclude = ["tests", "tests.*"] + +[tool.setuptools.package-data] +kingfisher_scrapy = ["schema/*.json"] + +[options.entry_points] +scrapy = {settings = "kingfisher_scrapy.settings"} + +[tool.isort] +profile = "black" +line_length = 119 + [tool.pytest.ini_options] -addopts = '--doctest-modules' -asyncio_mode = 'auto' +addopts = "--doctest-modules" +asyncio_mode = "auto" [tool.coverage.run] -omit = ['*/kingfisher_scrapy/spiders/*'] +omit = ["*/kingfisher_scrapy/spiders/*"] diff --git a/requirements.txt b/requirements.txt index 4b7134624..e9de94013 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile -# +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.in -o requirements.txt --no-strip-extras attrs==22.2.0 # via # automat @@ -169,6 +165,15 @@ sentry-sdk==2.10.0 # via -r requirements.in service-identity==24.1.0 # via scrapy +setuptools==74.1.1 + # via + # incremental + # scrapy + # scrapyd + # zc-lockfile + # zc-zlibstorage + # zodbpickle + # zope-interface six==1.16.0 # via # automat @@ -179,8 +184,6 @@ six==1.16.0 # zodb tldextract==3.1.2 # via scrapy -tomli==2.0.1 - # via incremental transaction==3.1.0 # via zodb twisted==24.7.0rc1 @@ -235,6 +238,3 @@ zope-interface==6.0 # twisted # zc-zlibstorage # zodb - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements_dev.in b/requirements_dev.in index a65633431..dd4197d69 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -5,8 +5,8 @@ flake8 isort ocdsmerge openpyxl -pip-tools pika +pre-commit psycopg2-binary pytest pytest-asyncio diff --git a/requirements_dev.txt b/requirements_dev.txt index 3dde31bf7..e8cc0e73e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,9 +1,5 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile requirements_dev.in -# +# This file was autogenerated by uv via the following command: +# uv pip compile requirements_dev.in -o requirements_dev.txt --no-strip-extras attrs==22.2.0 # via # -r requirements.txt @@ -22,8 +18,6 @@ btrees==4.11.3 # via # -r requirements.txt # zodb -build==0.10.0 - # via pip-tools certifi==2024.7.4 # via # -r requirements.txt @@ -34,12 +28,12 @@ cffi==1.15.0 # -r requirements.txt # cryptography # persistent +cfgv==3.4.0 + # via pre-commit charset-normalizer==3.1.0 # via # -r requirements.txt # requests -click==8.1.3 - # via pip-tools constantly==15.1.0 # via # -r requirements.txt @@ -73,6 +67,8 @@ defusedxml==0.7.1 # -r requirements.txt # odfpy # scrapy +distlib==0.3.8 + # via virtualenv docopt==0.6.2 # via coveralls entrypoints==0.3 @@ -81,12 +77,11 @@ et-xmlfile==1.0.1 # via # -r requirements.txt # openpyxl -exceptiongroup==1.2.1 - # via pytest filelock==3.4.1 # via # -r requirements.txt # tldextract + # virtualenv flake8==3.7.9 # via -r requirements_dev.in flattentool==0.26.0 @@ -97,6 +92,8 @@ hyperlink==21.0.0 # via # -r requirements.txt # twisted +identify==2.6.0 + # via pre-commit idna==3.7 # via # -r requirements.txt @@ -158,6 +155,8 @@ lxml==5.3.0 # scrapy mccabe==0.6.1 # via flake8 +nodeenv==1.9.1 + # via pre-commit ocdsextensionregistry==0.2.2 # via # -r requirements.txt @@ -186,7 +185,6 @@ orjson==3.9.15 packaging==23.1 # via # -r requirements.txt - # build # pytest # scrapy # scrapyd @@ -205,10 +203,12 @@ pika==1.2.0 # -r requirements.txt # -r requirements_dev.in # yapw -pip-tools==7.3.0 - # via -r requirements_dev.in +platformdirs==3.11.0 + # via virtualenv pluggy==1.3.0 # via pytest +pre-commit==3.8.0 + # via -r requirements_dev.in protego==0.2.1 # via # -r requirements.txt @@ -244,8 +244,6 @@ pyopenssl==24.0.0 # via # -r requirements.txt # scrapy -pyproject-hooks==1.0.0 - # via build pytest==7.4.4 # via # -r requirements_dev.in @@ -265,6 +263,8 @@ pytz==2020.1 # via # -r requirements.txt # flattentool +pyyaml==6.0.2 + # via pre-commit queuelib==1.6.2 # via # -r requirements.txt @@ -308,6 +308,16 @@ service-identity==24.1.0 # via # -r requirements.txt # scrapy +setuptools==74.1.1 + # via + # -r requirements.txt + # incremental + # scrapy + # scrapyd + # zc-lockfile + # zc-zlibstorage + # zodbpickle + # zope-interface six==1.16.0 # via # -r requirements.txt @@ -323,14 +333,6 @@ tldextract==3.1.2 # scrapy toml==0.10.2 # via coverage -tomli==2.0.1 - # via - # -r requirements.txt - # build - # incremental - # pip-tools - # pyproject-hooks - # pytest transaction==3.1.0 # via # -r requirements.txt @@ -360,6 +362,8 @@ urllib3==1.26.19 # requests # scrapyd-client # sentry-sdk +virtualenv==20.21.1 + # via pre-commit w3lib==2.1.1 # via # -r requirements.txt @@ -368,8 +372,6 @@ w3lib==2.1.1 # scrapy # scrapyd # scrapyd-client -wheel==0.40.0 - # via pip-tools xmltodict==0.12.0 # via # -r requirements.txt @@ -408,7 +410,3 @@ zope-interface==6.0 # twisted # zc-zlibstorage # zodb - -# The following packages are considered to be unsafe in a requirements file: -# pip -# setuptools diff --git a/setup.py b/setup.py deleted file mode 100644 index 6e771a733..000000000 --- a/setup.py +++ /dev/null @@ -1,17 +0,0 @@ -from setuptools import find_packages, setup - -# This project is not intended to be released or used as a Python package. This file only exists for scrapyd-client. -# https://github.com/scrapy/scrapyd-client/blob/v1.1.0/README.rst - -setup( - packages=find_packages(exclude=['tests', 'tests.*']), - package_data={ - 'kingfisher_scrapy': ['schema/*.json'], - }, - include_package_data=True, - entry_points={ - 'scrapy': [ - 'settings = kingfisher_scrapy.settings', - ], - }, -) From 82c752dbed32d67d0c558ce91a886dad334adbdc Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 4 Sep 2024 01:43:37 -0400 Subject: [PATCH 17/20] build: Update shasum --- requirements.txt.sha256 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index b7b7cdcfe..c3b6c087e 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -931f6a5de1720df67b3fd6176f96157518da5f07862a631962a3f87d52852963 requirements.txt +76d192345f84cc7fddb062a2c8907d8266445b853d48c734725a13a34be88c7e requirements.txt From b8b07de34ed9de4be69dfb87e4c3b5d315f57524 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 4 Sep 2024 13:13:33 -0400 Subject: [PATCH 18/20] build: Fix entry points --- .pre-commit-config.yaml | 4 ++-- pyproject.toml | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 234f16c82..6208e2082 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,8 +16,8 @@ repos: hooks: - id: pip-compile name: pip-compile requirements.in - args: [requirements.in, -o, requirements.txt, --no-strip-extras] + args: [requirements.in, -o, requirements.txt] - id: pip-compile name: pip-compile requirements_dev.in - args: [requirements_dev.in, -o, requirements_dev.txt, --no-strip-extras] + args: [requirements_dev.in, -o, requirements_dev.txt] files: ^requirements_dev\.(in|txt)$ diff --git a/pyproject.toml b/pyproject.toml index 7db4c9483..bd3b2c298 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,9 @@ -[build-system] -requires = ["setuptools>=61.2"] -build-backend = "setuptools.build_meta" - -# This project is not intended to be released or used as a Python package. This file only exists for scrapyd-client. [project] name = "kingfisher-collect" version = "0.0.0" -license = {text = "BSD"} + +[project.entry-points.scrapy] +settings = "kingfisher_scrapy.settings" [tool.setuptools.packages.find] exclude = ["tests", "tests.*"] @@ -14,9 +11,6 @@ exclude = ["tests", "tests.*"] [tool.setuptools.package-data] kingfisher_scrapy = ["schema/*.json"] -[options.entry_points] -scrapy = {settings = "kingfisher_scrapy.settings"} - [tool.isort] profile = "black" line_length = 119 From f3d00c13ffdaa74dda47c7240faafe46538b4a7a Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 6 Sep 2024 00:37:16 -0400 Subject: [PATCH 19/20] ci: Lint requirements. Use official coveralls action. chore: Remove linters from requirements. --- .github/workflows/ci.yml | 10 ++----- .github/workflows/lint.yml | 18 ++++++++++-- .github/workflows/nonlinux.yml | 4 +-- .pre-commit-config.yaml | 2 +- requirements.txt | 6 ++-- requirements.txt.sha256 | 2 +- requirements_dev.in | 4 --- requirements_dev.txt | 50 ++++------------------------------ 8 files changed, 29 insertions(+), 67 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6d663d20..cc7135258 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,9 +11,7 @@ jobs: python-version: '3.10' cache: pip cache-dependency-path: '**/requirements*.txt' - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: KINGFISHER_COLLECT_DATABASE_URL: postgresql://postgres:postgres@localhost:${{ job.services.postgres.ports[5432] }}/postgres # Use 127.0.0.1 to avoid log messages about IPv6. @@ -22,11 +20,9 @@ jobs: # For requests.post() in KingfisherProcessAPI2._post_synchronous(). # https://github.com/pytest-dev/pytest-twisted/issues/183 # https://github.com/scrapy/scrapy/issues/6450 - run: pytest -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -W ignore::DeprecationWarning:scrapy.core.downloader.webclient -rs --cov kingfisher_scrapy + run: pytest --cov-report=lcov:coverage/lcov.info -W error -W ignore::ResourceWarning -W ignore::DeprecationWarning:pytest_twisted -W ignore::DeprecationWarning:scrapy.core.downloader.webclient -rs --cov kingfisher_scrapy - run: python test_delayed_request_middleware.py - - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coveralls --service=github + - uses: coverallsapp/github-action@v2 services: postgres: image: postgres:15 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 452ccc3a9..420171e34 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,20 +6,32 @@ jobs: build: if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest + env: + PAT: ${{ secrets.PAT }} steps: - uses: actions/checkout@v4 + with: + token: ${{ secrets.PAT || github.token }} - uses: actions/setup-python@v5 with: python-version: '3.10' cache: pip cache-dependency-path: '**/requirements*.txt' + - id: changed-files + uses: tj-actions/changed-files@v45 + - uses: pre-commit/action@v3.0.1 + continue-on-error: true + with: + extra_args: pip-compile --files ${{ steps.changed-files.outputs.all_changed_files }} + - if: ${{ env.PAT }} + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: '[github-actions] pre-commit autoupdate' - shell: bash run: curl -s -S --retry 3 $BASEDIR/tests/install.sh | bash - - shell: bash run: curl -s -S --retry 3 $BASEDIR/tests/script.sh | bash - - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: # scrapyd is run as a command in production. scrapyd-client is run as a command for deployment. STANDARD_MAINTENANCE_SCRIPTS_IGNORE: scrapyd,scrapyd-client diff --git a/.github/workflows/nonlinux.yml b/.github/workflows/nonlinux.yml index 754a99012..f7bc37b4e 100644 --- a/.github/workflows/nonlinux.yml +++ b/.github/workflows/nonlinux.yml @@ -17,9 +17,7 @@ jobs: - name: Install postgresql (macOS) if: matrix.os == 'macos-latest' run: brew install postgresql - # Don't install editable projects in the current working directory. - # https://pip.pypa.io/en/latest/reference/pip_install/#install-src - - run: pip install --src $GITHUB_WORKSPACE/../src -r requirements_dev.txt + - run: pip install -r requirements_dev.txt - env: CI_SKIP: true # https://github.com/pytest-dev/pytest-twisted/issues/183 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6208e2082..eeca254ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,4 +20,4 @@ repos: - id: pip-compile name: pip-compile requirements_dev.in args: [requirements_dev.in, -o, requirements_dev.txt] - files: ^requirements_dev\.(in|txt)$ + files: ^requirements(_dev)?\.(in|txt)$ diff --git a/requirements.txt b/requirements.txt index e9de94013..097495967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements.in -o requirements.txt --no-strip-extras +# uv pip compile requirements.in -o requirements.txt attrs==22.2.0 # via # automat @@ -87,7 +87,7 @@ lxml==5.3.0 # scrapy ocdsextensionregistry==0.2.2 # via ocdskit -ocdskit[perf]==1.1.13 +ocdskit==1.1.13 # via -r requirements.in ocdsmerge==0.7.0 # via ocdskit @@ -214,7 +214,7 @@ w3lib==2.1.1 # scrapyd-client xmltodict==0.12.0 # via flattentool -yapw[perf]==0.1.4 +yapw==0.1.4 # via -r requirements.in zc-lockfile==3.0.post1 # via zodb diff --git a/requirements.txt.sha256 b/requirements.txt.sha256 index c3b6c087e..eaf35ac18 100644 --- a/requirements.txt.sha256 +++ b/requirements.txt.sha256 @@ -1 +1 @@ -76d192345f84cc7fddb062a2c8907d8266445b853d48c734725a13a34be88c7e requirements.txt +3f0f25b383baca3102034710cdf6abdc96d1efd027f9e83ede7781287d057d3f requirements.txt diff --git a/requirements_dev.in b/requirements_dev.in index dd4197d69..b92f0e2ef 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -1,12 +1,8 @@ -r requirements.txt coverage[toml] -coveralls -flake8 -isort ocdsmerge openpyxl pika -pre-commit psycopg2-binary pytest pytest-asyncio diff --git a/requirements_dev.txt b/requirements_dev.txt index e8cc0e73e..dd36975ca 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements_dev.in -o requirements_dev.txt --no-strip-extras +# uv pip compile requirements_dev.in -o requirements_dev.txt attrs==22.2.0 # via # -r requirements.txt @@ -28,8 +28,6 @@ cffi==1.15.0 # -r requirements.txt # cryptography # persistent -cfgv==3.4.0 - # via pre-commit charset-normalizer==3.1.0 # via # -r requirements.txt @@ -42,13 +40,10 @@ contextlib2==0.6.0.post1 # via # -r requirements.txt # schema -coverage[toml]==5.5 +coverage==7.6.1 # via # -r requirements_dev.in - # coveralls # pytest-cov -coveralls==4.0.1 - # via -r requirements_dev.in cryptography==42.0.4 # via # -r requirements.txt @@ -67,12 +62,6 @@ defusedxml==0.7.1 # -r requirements.txt # odfpy # scrapy -distlib==0.3.8 - # via virtualenv -docopt==0.6.2 - # via coveralls -entrypoints==0.3 - # via flake8 et-xmlfile==1.0.1 # via # -r requirements.txt @@ -81,9 +70,6 @@ filelock==3.4.1 # via # -r requirements.txt # tldextract - # virtualenv -flake8==3.7.9 - # via -r requirements_dev.in flattentool==0.26.0 # via -r requirements.txt greenlet==3.0.3 @@ -92,8 +78,6 @@ hyperlink==21.0.0 # via # -r requirements.txt # twisted -identify==2.6.0 - # via pre-commit idna==3.7 # via # -r requirements.txt @@ -111,8 +95,6 @@ incremental==24.7.2 # twisted iniconfig==1.1.1 # via pytest -isort==5.7.0 - # via -r requirements_dev.in itemadapter==0.7.0 # via # -r requirements.txt @@ -153,15 +135,11 @@ lxml==5.3.0 # flattentool # parsel # scrapy -mccabe==0.6.1 - # via flake8 -nodeenv==1.9.1 - # via pre-commit ocdsextensionregistry==0.2.2 # via # -r requirements.txt # ocdskit -ocdskit[perf]==1.1.13 +ocdskit==1.1.13 # via -r requirements.txt ocdsmerge==0.7.0 # via @@ -178,10 +156,7 @@ openpyxl==3.0.5 # -r requirements_dev.in # flattentool orjson==3.9.15 - # via - # -r requirements.txt - # ocdskit - # yapw + # via -r requirements.txt packaging==23.1 # via # -r requirements.txt @@ -203,12 +178,8 @@ pika==1.2.0 # -r requirements.txt # -r requirements_dev.in # yapw -platformdirs==3.11.0 - # via virtualenv pluggy==1.3.0 # via pytest -pre-commit==3.8.0 - # via -r requirements_dev.in protego==0.2.1 # via # -r requirements.txt @@ -226,8 +197,6 @@ pyasn1-modules==0.2.7 # via # -r requirements.txt # service-identity -pycodestyle==2.5.0 - # via flake8 pycparser==2.19 # via # -r requirements.txt @@ -238,8 +207,6 @@ pydispatcher==2.0.6 # via # -r requirements.txt # scrapy -pyflakes==2.1.1 - # via flake8 pyopenssl==24.0.0 # via # -r requirements.txt @@ -263,8 +230,6 @@ pytz==2020.1 # via # -r requirements.txt # flattentool -pyyaml==6.0.2 - # via pre-commit queuelib==1.6.2 # via # -r requirements.txt @@ -274,7 +239,6 @@ rarfile==3.1 requests==2.32.2 # via # -r requirements.txt - # coveralls # ocdsextensionregistry # ocdsmerge # requests-cache @@ -331,8 +295,6 @@ tldextract==3.1.2 # via # -r requirements.txt # scrapy -toml==0.10.2 - # via coverage transaction==3.1.0 # via # -r requirements.txt @@ -362,8 +324,6 @@ urllib3==1.26.19 # requests # scrapyd-client # sentry-sdk -virtualenv==20.21.1 - # via pre-commit w3lib==2.1.1 # via # -r requirements.txt @@ -376,7 +336,7 @@ xmltodict==0.12.0 # via # -r requirements.txt # flattentool -yapw[perf]==0.1.4 +yapw==0.1.4 # via -r requirements.txt zc-lockfile==3.0.post1 # via From 5f2735d7c5a654a16bb636d6a231442faf9a7dff Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 6 Sep 2024 03:19:48 -0400 Subject: [PATCH 20/20] ci: Run shasum early --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 420171e34..5cee382f3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -17,6 +17,7 @@ jobs: python-version: '3.10' cache: pip cache-dependency-path: '**/requirements*.txt' + - run: shasum -c requirements.txt.sha256 - id: changed-files uses: tj-actions/changed-files@v45 - uses: pre-commit/action@v3.0.1 @@ -36,4 +37,3 @@ jobs: # scrapyd is run as a command in production. scrapyd-client is run as a command for deployment. STANDARD_MAINTENANCE_SCRIPTS_IGNORE: scrapyd,scrapyd-client run: pytest /tmp/test_requirements.py - - run: shasum -c requirements.txt.sha256