Skip to content

Commit

Permalink
Merge pull request #471 from scrapy/369-deprecation-warnings-v2
Browse files Browse the repository at this point in the history
Scrapy 2.8 support
  • Loading branch information
jpmckinney authored Feb 7, 2023
2 parents 105e36e + 76337cc commit 0b778f6
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 22 deletions.
2 changes: 1 addition & 1 deletion docs/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ The directory where the Scrapy items will be stored.
This option is disabled by default
because you are expected to use a database or a feed exporter.
Setting it to non-empty results in storing scraped item feeds
to the specified directory by overriding the scrapy setting ``FEED_URI``.
to the specified directory by overriding the scrapy setting ``FEEDS``.

.. _jobs_to_keep:

Expand Down
17 changes: 17 additions & 0 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,25 @@ Added
~~~~~

- Add ``item_url`` and ``log_url`` to the response from the listjobs.json webservice. (@mxdev88)
- Scrapy 2.8 support. Scrapyd sets ``LOG_FILE`` and ``FEEDS`` command-line arguments, instead of ``SCRAPY_LOG_FILE`` and ``SCRAPY_FEED_URI`` environment variables.
- Python 3.11 support.

Changed
~~~~~~~

- Rename environment variables to avoid spurious Scrapy deprecation warnings.

- ``SCRAPY_EGG_VERSION`` to ``SCRAPYD_EGG_VERSION``
- ``SCRAPY_FEED_URI`` to ``SCRAPYD_FEED_URI``
- ``SCRAPY_JOB`` to ``SCRAPYD_JOB``
- ``SCRAPY_LOG_FILE`` to ``SCRAPYD_LOG_FILE``
- ``SCRAPY_SLOT`` to ``SCRAPYD_SLOT``
- ``SCRAPY_SPIDER`` to ``SCRAPYD_SPIDER``

.. attention::

These are undocumented and unused, and may be removed in future versions. If you use these environment variables, please `report your use in an issue <https://github.com/scrapy/scrapyd/issues>`__.

Removed
~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion integration_tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def req(method, path, auth=None, **kwargs):
for badauth in (None, ("baduser", "badpass")):
response = getattr(requests, method)(url, auth=badauth, **kwargs)

assert response.status_code == 401
assert response.status_code == 401, response.status_code
assert response.text == "Unauthorized"

response = getattr(requests, method)(url, auth=("hello12345", "67890world"), **kwargs)
Expand Down
20 changes: 14 additions & 6 deletions scrapyd/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,29 @@ def __init__(self, config, initenv=os.environ):
self.settings = {}
self.initenv = initenv

def get_settings(self, message):
settings = {}
if self.logs_dir:
settings['LOG_FILE'] = self._get_file(message, self.logs_dir, 'log')
if self.items_dir:
settings['FEEDS'] = {self._get_feed_uri(message, 'jl'): {'format': 'jsonlines'}}
return settings

def get_environment(self, message, slot):
project = message['_project']
env = self.initenv.copy()
env['SCRAPY_SLOT'] = str(slot)
env['SCRAPY_PROJECT'] = project
env['SCRAPY_SPIDER'] = message['_spider']
env['SCRAPY_JOB'] = message['_job']
env['SCRAPYD_SLOT'] = str(slot)
env['SCRAPYD_SPIDER'] = message['_spider']
env['SCRAPYD_JOB'] = message['_job']
if '_version' in message:
env['SCRAPY_EGG_VERSION'] = message['_version']
env['SCRAPYD_EGG_VERSION'] = message['_version']
if project in self.settings:
env['SCRAPY_SETTINGS_MODULE'] = self.settings[project]
if self.logs_dir:
env['SCRAPY_LOG_FILE'] = self._get_file(message, self.logs_dir, 'log')
env['SCRAPYD_LOG_FILE'] = self._get_file(message, self.logs_dir, 'log')
if self.items_dir:
env['SCRAPY_FEED_URI'] = self._get_feed_uri(message, 'jl')
env['SCRAPYD_FEED_URI'] = self._get_feed_uri(message, 'jl')
return env

def _get_feed_uri(self, message, ext):
Expand Down
8 changes: 5 additions & 3 deletions scrapyd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ def _wait_for_project(self, slot):
poller.next().addCallback(self._spawn_process, slot)

def _spawn_process(self, message, slot):
e = self.app.getComponent(IEnvironment)
message.setdefault('settings', {})
message['settings'].update(e.get_settings(message))
msg = native_stringify_dict(message, keys_only=False)
project = msg['_project']
args = [sys.executable, '-m', self.runner, 'crawl']
args += get_crawl_args(msg)
e = self.app.getComponent(IEnvironment)
env = e.get_environment(msg, slot)
env = native_stringify_dict(env, keys_only=False)
pp = ScrapyProcessProtocol(slot, project, msg['_spider'],
Expand Down Expand Up @@ -75,8 +77,8 @@ def __init__(self, slot, project, spider, job, env):
self.start_time = datetime.now()
self.end_time = None
self.env = env
self.logfile = env.get('SCRAPY_LOG_FILE')
self.itemsfile = env.get('SCRAPY_FEED_URI')
self.logfile = env.get('SCRAPYD_LOG_FILE')
self.itemsfile = env.get('SCRAPYD_FEED_URI')
self.deferred = defer.Deferred()

def outReceived(self, data):
Expand Down
2 changes: 1 addition & 1 deletion scrapyd/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

@contextmanager
def project_environment(project):
eggversion = os.environ.get('SCRAPY_EGG_VERSION', None)
eggversion = os.environ.get('SCRAPYD_EGG_VERSION', None)
config = Config()
eggstorage_path = config.get(
'eggstorage', 'scrapyd.eggstorage.FilesystemEggStorage'
Expand Down
18 changes: 9 additions & 9 deletions scrapyd/tests/test_environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def test_get_environment_with_eggfile(self):
env = self.environ.get_environment(msg, slot)

self.assertEqual(env['SCRAPY_PROJECT'], 'mybot')
self.assertEqual(env['SCRAPY_SLOT'], '3')
self.assertEqual(env['SCRAPY_SPIDER'], 'myspider')
self.assertEqual(env['SCRAPY_JOB'], 'ID')
self.assert_(env['SCRAPY_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log')))
if env.get('SCRAPY_FEED_URI'): # Not compulsory
self.assert_(env['SCRAPY_FEED_URI'].startswith('file://{}'.format(os.getcwd())))
self.assert_(env['SCRAPY_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl')))
self.assertEqual(env['SCRAPYD_SLOT'], '3')
self.assertEqual(env['SCRAPYD_SPIDER'], 'myspider')
self.assertEqual(env['SCRAPYD_JOB'], 'ID')
self.assert_(env['SCRAPYD_LOG_FILE'].endswith(os.path.join('mybot', 'myspider', 'ID.log')))
if env.get('SCRAPYD_FEED_URI'): # Not compulsory
self.assert_(env['SCRAPYD_FEED_URI'].startswith('file://{}'.format(os.getcwd())))
self.assert_(env['SCRAPYD_FEED_URI'].endswith(os.path.join('mybot', 'myspider', 'ID.jl')))
self.assertNotIn('SCRAPY_SETTINGS_MODULE', env)

def test_get_environment_with_no_items_dir(self):
Expand All @@ -45,5 +45,5 @@ def test_get_environment_with_no_items_dir(self):
environ = Environment(config, initenv={})
env = environ.get_environment(msg, slot)

self.assertNotIn('SCRAPY_FEED_URI', env)
self.assertNotIn('SCRAPY_LOG_FILE', env)
self.assertNotIn('SCRAPYD_FEED_URI', env)
self.assertNotIn('SCRAPYD_LOG_FILE', env)
2 changes: 1 addition & 1 deletion scrapyd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def get_spider_list(project, runner=None, pythonpath=None, version=''):
if pythonpath:
env['PYTHONPATH'] = pythonpath
if version:
env['SCRAPY_EGG_VERSION'] = version
env['SCRAPYD_EGG_VERSION'] = version
pargs = [sys.executable, '-m', runner, 'list', '-s', 'LOG_STDOUT=0']
proc = Popen(pargs, stdout=PIPE, stderr=PIPE, env=env)
out, err = proc.communicate()
Expand Down

0 comments on commit 0b778f6

Please sign in to comment.