Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Handle umlaut in resource URLs #58

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bin/travis-run.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/sh -e

nosetests --ckan \
--nologcapture \
--with-pylons=subdir/test.ini \
--with-coverage \
--cover-package=ckanext.xloader \
Expand Down
11 changes: 7 additions & 4 deletions ckanext/xloader/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,13 @@ def _submit_resource(self, resource, user, indent=0):
r=resource))
return
dataset_ref = resource.get('package_name', resource['package_id'])
print('{indent}Submitting /dataset/{dataset}/resource/{r[id]}\n'
'{indent} url={r[url]}\n'
'{indent} format={r[format]}'
.format(dataset=dataset_ref, r=resource, indent=' ' * indent))
print(u'{indent}Submitting /dataset/{dataset}/resource/{r[id]}\n'
u'{indent} url={url}\n'
u'{indent} format={format}'
.format(dataset=dataset_ref,
format=r['format'].decode('utf-8'),
url=r['url'].decode('utf-8'),
indent=' ' * indent))
data_dict = {
'resource_id': resource['id'],
'ignore_hash': True,
Expand Down
2 changes: 1 addition & 1 deletion ckanext/xloader/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def xloader_data_into_datastore_(input, job_dict):
)

# fetch the resource data
logger.info('Fetching from: {0}'.format(url))
logger.info(u'Fetching from: {0}'.format(url))
try:
headers = {}
if resource.get('url_type') == 'upload':
Expand Down
11 changes: 11 additions & 0 deletions ckanext/xloader/tests/samples/umlaut_name_äöü.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SET_ID,SET_NAME,INDIKATOR_ID,INDIKATOR_NAME,INDIKATOR_BESCHREIB,KOERPERSCHAFT_ID,KOERPERSCHAFT_NAME,JAHR,VALUE
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",433,"Berufswahlschule Bezirk Horgen",1995,192230
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",440,"Schulpsychologischer Dienst des Bezirks Horgen",1995,97576
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",456,"Gemeindezentrum Brüelmatt",1995,128003
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",474,"Schulzweckverband Bezirk Affoltern",1995,1074294
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",510,"Berufswahlschule Limmattal (BWL)",1995,338128
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",841,"Fürsorgeverband Weiningen",1996,335003
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",6,"Friedhofverband Weiningen",1996,287025
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",7,"Seniorenzentrum Im Morgen Weiningen",1996,899498
6,"Erfolgsrechnung",101,"31 Sachaufwand [Fr.]","Sachaufwand",10,"Regionale Tierkörpersammelstelle Regensdorf",1996,32213

145 changes: 99 additions & 46 deletions ckanext/xloader/tests/test_jobs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: utf-8

import os
import json
import random
Expand All @@ -21,47 +23,49 @@

SOURCE_URL = 'http://www.example.com/static/file'

def mock_actions(func):
'''
Decorator that mocks actions used by these tests
Based on ckan.test.helpers.mock_action
'''
def wrapper(*args, **kwargs):
# Mock CKAN's resource_show API
from ckan.logic import get_action as original_get_action

def side_effect(called_action_name):
if called_action_name == 'resource_show':
def mock_resource_show(context, data_dict):
return {
'id': data_dict['id'],
'name': 'short name',
'url': SOURCE_URL,
'format': 'CSV',
'package_id': 'test-pkg',
}
return mock_resource_show
elif called_action_name == 'package_show':
def mock_package_show(context, data_dict):
return {
'id': data_dict['id'],
'name': 'pkg-name',
}
return mock_package_show
else:
return original_get_action(called_action_name)
try:
with mock.patch('ckanext.xloader.jobs.get_action') as mock_get_action:
mock_get_action.side_effect = side_effect

return_value = func(*args, **kwargs)
finally:
pass
# Make sure to stop the mock, even with an exception
# mock_action.stop()
return return_value

return make_decorator(func)(wrapper)
def mock_actions(resource_url=None):
def decorator(func):
'''
Decorator that mocks actions used by these tests
Based on ckan.test.helpers.mock_action
'''
def wrapper(*args, **kwargs):
# Mock CKAN's resource_show API
from ckan.logic import get_action as original_get_action

def side_effect(called_action_name):
if called_action_name == 'resource_show':
def mock_resource_show(context, data_dict):
return {
'id': data_dict['id'],
'name': 'short name',
'url': resource_url,
'format': 'CSV',
'package_id': 'test-pkg',
}
return mock_resource_show
elif called_action_name == 'package_show':
def mock_package_show(context, data_dict):
return {
'id': data_dict['id'],
'name': 'pkg-name',
}
return mock_package_show
else:
return original_get_action(called_action_name)
try:
with mock.patch('ckanext.xloader.jobs.get_action') as mock_get_action:
mock_get_action.side_effect = side_effect

return_value = func(*args, **kwargs)
finally:
pass
# Make sure to stop the mock, even with an exception
# mock_action.stop()
return return_value

return make_decorator(func)(wrapper)
return decorator


class TestxloaderDataIntoDatastore(util.PluginsMixin):
Expand All @@ -87,7 +91,8 @@ def teardown_class(cls):
connection.close()

def register_urls(self, filename='simple.csv',
content_type='application/csv'):
content_type='application/csv',
resource_url=None):
"""Mock some test URLs with responses.

Mocks some URLs related to a data file and a CKAN resource that
Expand All @@ -98,10 +103,13 @@ def register_urls(self, filename='simple.csv',
resource_show URL for the resource that contains the data file

"""
if not resource_url:
resource_url = SOURCE_URL

responses.add_passthru(config['solr_url'])

# A URL that just returns a static file
responses.add(responses.GET, SOURCE_URL,
responses.add(responses.GET, resource_url,
body=get_sample_file(filename),
content_type=content_type)

Expand Down Expand Up @@ -157,7 +165,7 @@ def get_load_logs(self, task_id):
.where(logs.c.job_id == task_id))
return Logs(result.fetchall())

@mock_actions
@mock_actions(resource_url=SOURCE_URL)
@responses.activate
def test_simple_csv(self):
# Test not only the load and xloader_hook is called at the end
Expand Down Expand Up @@ -216,7 +224,7 @@ def test_simple_csv(self):
eq_(job['status'], u'complete')
eq_(job['error'], None)

@mock_actions
@mock_actions(resource_url=SOURCE_URL)
@responses.activate
def test_umlaut_and_extra_comma(self):
self.register_urls(filename='umlaut_and_extra_comma.csv')
Expand Down Expand Up @@ -260,7 +268,52 @@ def test_umlaut_and_extra_comma(self):
eq_(job['status'], u'complete')
eq_(job['error'], None)

@mock_actions
@mock_actions(resource_url=u'http://example.com/umlaut_name_äöü.csv')
@responses.activate
def test_resource_url_with_umlaut(self):
# test that xloader can handle URLs with umlauts
# e.g. http://www.web.statistik.zh.ch/ogd/data/KANTON_ZUERICH_gpfi_Jahresrechung_Zweckverbände.csv
self.register_urls(
filename=u'umlaut_name_äöü.csv',
resource_url=u'http://example.com/umlaut_name_%C3%A4%C3%B6%C3%BC.csv'
)
data = {
'api_key': self.api_key,
'job_type': 'xloader_to_datastore',
'result_url': self.callback_url,
'metadata': {
'ckan_url': 'http://%s/' % self.host,
'resource_id': self.resource_id
}
}
job_id = 'test{}'.format(random.randint(0, 1e5))

with mock.patch('ckanext.xloader.jobs.set_datastore_active_flag') \
as mocked_set_datastore_active_flag:
# in tests we call jobs directly, rather than use rq, so mock
# get_current_job()
with mock.patch('ckanext.xloader.jobs.get_current_job',
return_value=mock.Mock(id=job_id)):
result = jobs.xloader_data_into_datastore(data)
assert result is None, jobs_db.get_job(job_id)['error']['message'].decode('utf-8')

# Check it said it was successful
eq_(responses.calls[-1].request.url, 'http://www.ckan.org/api/3/action/xloader_hook')
job_dict = json.loads(responses.calls[-1].request.body)
assert job_dict['status'] == u'complete', job_dict
eq_(job_dict,
{u'metadata': {u'ckan_url': u'http://www.ckan.org/',
u'resource_id': u'foo-bar-42'},
u'status': u'complete'})

logs = self.get_load_logs(job_id)
logs.assert_no_errors()

job = jobs_db.get_job(job_id)
eq_(job['status'], u'complete')
eq_(job['error'], None)

@mock_actions(resource_url=SOURCE_URL)
@responses.activate
def test_first_request_is_202_pending_response(self):
# when you first get the CSV it returns this 202 response, which is
Expand Down