From 6772c62d48acf3254e4798ff1462be8c2aad37bf Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 10 Sep 2020 16:20:10 +0200 Subject: [PATCH 001/257] drop 2.7 and add 3.8 to travis test matrix --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1c692d69..bf869142 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,9 @@ language: python sudo: required dist: "xenial" python: - - "2.7" - "3.6" - "3.7" + - "3.8" addons: apt: packages: From 147954e42efdd457338de043659b8189079dbc66 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 17 Sep 2020 14:21:08 +0200 Subject: [PATCH 002/257] hex2uuid formats UUIDs This allows UUIDs to be recognized by databases with a UUID data type, like PostgreSQL and SQL Server. --- README.md | 1 + commcare_export/env.py | 22 ++++++++++++++++++++++ tests/test_env.py | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 tests/test_env.py diff --git a/README.md b/README.md index f3ef96be..63b7cf75 100644 --- a/README.md +++ b/README.md @@ -387,6 +387,7 @@ List of builtin functions: | str2date | Convert string to date | | | bool2int | Convert boolean to integer (0, 1) | | | str2num | Parse string as a number | | +| hex2uuid | Parse a hex UUID, and format it into hyphen-separated groups | | | substr | Returns substring indexed by [first arg, second arg), zero-indexed. | substr(2, 5) of 'abcdef' = 'cde' | | selected-at | Returns the Nth word in a string. N is zero-indexed. | selected-at(3) - return 4th word | | selected | Returns True if the given word is in the value. | selected(fever) | diff --git a/commcare_export/env.py b/commcare_export/env.py index 4f8d675c..eef92101 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -2,6 +2,7 @@ import hashlib import json +import uuid from datetime import datetime import operator import pytz @@ -355,6 +356,26 @@ def json2str(val): return +@unwrap('val') +def hex2uuid(val): + """ + Renders a hex UUID in hyphen-separated groups + + >>> hex2uuid('00a3e0194ce1458794c50971dee2de22') + '00a3e019-4ce1-4587-94c5-0971dee2de22' + >>> hex2uuid(0x00a3e0194ce1458794c50971dee2de22) + '00a3e019-4ce1-4587-94c5-0971dee2de22' + """ + if not val: + return None + if isinstance(val, int): + val = hex(val) + try: + return str(uuid.UUID(val)) + except ValueError: + return None + + def join(*args): args = [unwrap_val(arg)for arg in args] try: @@ -442,6 +463,7 @@ def __init__(self, d=None): 'str2num': str2num, 'str2date': str2date, 'json2str': json2str, + 'hex2uuid': hex2uuid, 'selected': selected, 'selected-at': selected_at, 'count-selected': count_selected, diff --git a/tests/test_env.py b/tests/test_env.py new file mode 100644 index 00000000..8329c840 --- /dev/null +++ b/tests/test_env.py @@ -0,0 +1,22 @@ +import doctest + +from commcare_export.env import hex2uuid + + +class TestHex2UUID: + def test_invalid_hex_int(self): + assert hex2uuid(0xf00) is None + + def test_invalid_hex_str(self): + assert hex2uuid('f00') is None + + def test_uuid(self): + assert hex2uuid('00a3e019-4ce1-4587-94c5-0971dee2de22') \ + == '00a3e019-4ce1-4587-94c5-0971dee2de22' + + +def test_doctests(): + import commcare_export.env + + results = doctest.testmod(commcare_export.env) + assert results.failed == 0 From 5b18307a27454e8d301094d8d18ac650eaece434 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 17 Sep 2020 15:41:49 +0200 Subject: [PATCH 003/257] Move tests --- tests/test_env.py | 17 +---------------- tests/test_minilinq.py | 3 +++ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/test_env.py b/tests/test_env.py index 8329c840..0e1ea718 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,22 +1,7 @@ import doctest - -from commcare_export.env import hex2uuid - - -class TestHex2UUID: - def test_invalid_hex_int(self): - assert hex2uuid(0xf00) is None - - def test_invalid_hex_str(self): - assert hex2uuid('f00') is None - - def test_uuid(self): - assert hex2uuid('00a3e019-4ce1-4587-94c5-0971dee2de22') \ - == '00a3e019-4ce1-4587-94c5-0971dee2de22' +import commcare_export.env def test_doctests(): - import commcare_export.env - results = doctest.testmod(commcare_export.env) assert results.failed == 0 diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 6d257c13..f8309028 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -77,6 +77,9 @@ def test_eval_collapsed_list(self): assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57.001200Z')).eval(env) == datetime(2015, 1, 1, 18, 32, 57) assert Apply(Reference("str2date"), Literal(u'日'.encode('utf8'))).eval(env) == None assert Apply(Reference("str2date"), Literal(u'日')).eval(env) == None + assert Apply(Reference("hex2uuid"), Literal(0xf00)).eval(env) == None + assert Apply(Reference("hex2uuid"), Literal('f00')).eval(env) == None + assert Apply(Reference("hex2uuid"), Literal('00a3e019-4ce1-4587-94c5-0971dee2de22')).eval(env) == '00a3e019-4ce1-4587-94c5-0971dee2de22' assert Apply(Reference("selected-at"), Literal('a b c'), Literal('1')).eval(env) == 'b' assert Apply(Reference("selected-at"), Literal(u'a b 日'), Literal('-1')).eval(env) == u'日' assert Apply(Reference("selected-at"), Literal('a b c'), Literal('5')).eval(env) is None From 9edbff7eac400f93649f6bd359cf14c35edaf8a8 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 17 Sep 2020 19:36:50 +0200 Subject: [PATCH 004/257] Rename to "format-uuid" --- README.md | 2 +- commcare_export/env.py | 8 ++++---- tests/test_minilinq.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 63b7cf75..7ff892da 100644 --- a/README.md +++ b/README.md @@ -387,7 +387,7 @@ List of builtin functions: | str2date | Convert string to date | | | bool2int | Convert boolean to integer (0, 1) | | | str2num | Parse string as a number | | -| hex2uuid | Parse a hex UUID, and format it into hyphen-separated groups | | +| format-uuid | Parse a hex UUID, and format it into hyphen-separated groups | | | substr | Returns substring indexed by [first arg, second arg), zero-indexed. | substr(2, 5) of 'abcdef' = 'cde' | | selected-at | Returns the Nth word in a string. N is zero-indexed. | selected-at(3) - return 4th word | | selected | Returns True if the given word is in the value. | selected(fever) | diff --git a/commcare_export/env.py b/commcare_export/env.py index eef92101..2f0e23e5 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -357,13 +357,13 @@ def json2str(val): @unwrap('val') -def hex2uuid(val): +def format_uuid(val): """ Renders a hex UUID in hyphen-separated groups - >>> hex2uuid('00a3e0194ce1458794c50971dee2de22') + >>> format_uuid('00a3e0194ce1458794c50971dee2de22') '00a3e019-4ce1-4587-94c5-0971dee2de22' - >>> hex2uuid(0x00a3e0194ce1458794c50971dee2de22) + >>> format_uuid(0x00a3e0194ce1458794c50971dee2de22) '00a3e019-4ce1-4587-94c5-0971dee2de22' """ if not val: @@ -463,7 +463,7 @@ def __init__(self, d=None): 'str2num': str2num, 'str2date': str2date, 'json2str': json2str, - 'hex2uuid': hex2uuid, + 'format-uuid': format_uuid, 'selected': selected, 'selected-at': selected_at, 'count-selected': count_selected, diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index f8309028..4456b648 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -77,9 +77,9 @@ def test_eval_collapsed_list(self): assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57.001200Z')).eval(env) == datetime(2015, 1, 1, 18, 32, 57) assert Apply(Reference("str2date"), Literal(u'日'.encode('utf8'))).eval(env) == None assert Apply(Reference("str2date"), Literal(u'日')).eval(env) == None - assert Apply(Reference("hex2uuid"), Literal(0xf00)).eval(env) == None - assert Apply(Reference("hex2uuid"), Literal('f00')).eval(env) == None - assert Apply(Reference("hex2uuid"), Literal('00a3e019-4ce1-4587-94c5-0971dee2de22')).eval(env) == '00a3e019-4ce1-4587-94c5-0971dee2de22' + assert Apply(Reference("format-uuid"), Literal(0xf00)).eval(env) == None + assert Apply(Reference("format-uuid"), Literal('f00')).eval(env) == None + assert Apply(Reference("format-uuid"), Literal('00a3e019-4ce1-4587-94c5-0971dee2de22')).eval(env) == '00a3e019-4ce1-4587-94c5-0971dee2de22' assert Apply(Reference("selected-at"), Literal('a b c'), Literal('1')).eval(env) == 'b' assert Apply(Reference("selected-at"), Literal(u'a b 日'), Literal('-1')).eval(env) == u'日' assert Apply(Reference("selected-at"), Literal('a b c'), Literal('5')).eval(env) is None From 6b9b3f9936e4ca300774c7793fef78c3d0989b49 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Sun, 27 Sep 2020 11:04:41 -0400 Subject: [PATCH 005/257] Replaced server_date_modified with inserted_at for cases --- commcare_export/commcare_minilinq.py | 4 ++-- tests/test_cli.py | 2 +- tests/test_commcare_hq_client.py | 12 ++++++------ tests/test_commcare_minilinq.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 78e8660e..8942a763 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -76,7 +76,7 @@ def __call__(self, since, until): resource_since_params = { 'form': FormFilterSinceParams(), - 'case': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end'), + 'case': SimpleSinceParams('inserted_at_start', 'inserted_at_end'), 'user': None, 'location': None, 'application': None, @@ -87,7 +87,7 @@ def __call__(self, since, until): def get_paginator(resource, page_size=1000): return { 'form': DatePaginator('form', ['server_modified_on','received_on'], page_size), - 'case': DatePaginator('case', 'server_date_modified', page_size), + 'case': DatePaginator('case', 'inserted_at', page_size), 'user': SimplePaginator('user', page_size), 'location': SimplePaginator('location', page_size), 'application': SimplePaginator('application', page_size), diff --git a/tests/test_cli.py b/tests/test_cli.py index 6835cc85..b8fffeef 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -62,7 +62,7 @@ def mock_hq_client(include_parent): ], 'case': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified'}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, [ {'id': 'case1'}, {'id': 'case2'}, diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 0b9ba172..b20e78c0 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -43,7 +43,7 @@ def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 2}, - 'objects': [{'id': 1, 'foo': 1, 'server_date_modified': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}] } else: since_query_param =resource_since_params['case'].start_param @@ -57,22 +57,22 @@ def _get_results(self, params): class FakeRepeatedDateCaseSession(FakeSession): # Model the case where there are as many or more cases with the same - # server_date_modified than the batch size (2), so the client requests + # inserted_at than the batch size (2), so the client requests # the same set of cases in a loop. def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'server_date_modified': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'server_date_modified': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}, + {'id': 2, 'foo': 2, 'inserted_at': '2017-01-01T15:36:22Z'}] } else: since_query_param =resource_since_params['case'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' return { 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'server_date_modified': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'server_date_modified': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}, + {'id': 2, 'foo': 2, 'inserted_at': '2017-01-01T15:36:22Z'}] } diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 425bb928..baada2fe 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -48,7 +48,7 @@ def die(msg): raise Exception(msg) 'case': [ ( - {'limit': 1000, 'type': 'foo', 'order_by': 'server_date_modified'}, + {'limit': 1000, 'type': 'foo', 'order_by': 'inserted_at'}, [ { 'x': 1 }, { 'x': 2 }, From 9321587eaf148b84eb87470176c55a309d4d6835 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Sun, 27 Sep 2020 11:05:33 -0400 Subject: [PATCH 006/257] Replaced server_modified_on with inserted_at for forms --- commcare_export/commcare_minilinq.py | 6 +++--- tests/009_expected_form_data.csv | 2 +- tests/009b_expected_form_1_data.csv | 2 +- tests/009b_expected_form_2_data.csv | 2 +- tests/test_cli.py | 8 ++++---- tests/test_commcare_hq_client.py | 4 ++-- tests/test_commcare_minilinq.py | 8 ++++---- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 8942a763..b4a32188 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -41,7 +41,7 @@ def __call__(self, since, until): range_expression['lte'] = until.isoformat() server_modified_missing = {"missing": { - "field": "server_modified_on", "null_value": True, "existence": True} + "field": "inserted_at", "null_value": True, "existence": True} } query = json.dumps({ 'filter': { @@ -53,7 +53,7 @@ def __call__(self, since, until): }, { "range": { - "server_modified_on": range_expression + "inserted_at": range_expression } } ] @@ -86,7 +86,7 @@ def __call__(self, since, until): def get_paginator(resource, page_size=1000): return { - 'form': DatePaginator('form', ['server_modified_on','received_on'], page_size), + 'form': DatePaginator('form', ['inserted_at','received_on'], page_size), 'case': DatePaginator('case', 'inserted_at', page_size), 'user': SimplePaginator('user', page_size), 'location': SimplePaginator('location', page_size), diff --git a/tests/009_expected_form_data.csv b/tests/009_expected_form_data.csv index 14960175..febbb0f9 100644 --- a/tests/009_expected_form_data.csv +++ b/tests/009_expected_form_data.csv @@ -1,4 +1,4 @@ -id,name,received_on,server_modified_on +id,name,received_on,inserted_at 3a8776b3-b613-465f-8d2c-431972597222,Sheel,2012-04-24T05:13:01.000000Z,2012-04-24T05:13:01.000000Z e56abced-bf46-4739-af88-0ec644645b9b,Michel ,2012-04-25T07:02:09.000000Z,2012-04-25T07:02:09.000000Z 4bbd52c6-cef7-41d7-aec8-4a4050c47897,Dionisia,2012-04-25T14:07:05.000000Z,2012-04-25T14:07:05.000000Z diff --git a/tests/009b_expected_form_1_data.csv b/tests/009b_expected_form_1_data.csv index b460d85b..eb407c69 100644 --- a/tests/009b_expected_form_1_data.csv +++ b/tests/009b_expected_form_1_data.csv @@ -1,4 +1,4 @@ -id,name,received_on,server_modified_on +id,name,received_on,inserted_at 3a8776b3-b613-465f-8d2c-431972597222,Sheel,2012-04-24T05:13:01.000000Z,2012-04-24T05:13:01.000000Z e56abced-bf46-4739-af88-0ec644645b9b,Michel ,2012-04-25T07:02:09.000000Z,2012-04-25T07:02:09.000000Z 4bbd52c6-cef7-41d7-aec8-4a4050c47897,Dionisia,2012-04-25T14:07:05.000000Z,2012-04-25T14:07:05.000000Z diff --git a/tests/009b_expected_form_2_data.csv b/tests/009b_expected_form_2_data.csv index c27897ad..03ac588b 100644 --- a/tests/009b_expected_form_2_data.csv +++ b/tests/009b_expected_form_2_data.csv @@ -1,4 +1,4 @@ -id,name,received_on,server_modified_on +id,name,received_on,inserted_at bbe20343-e00b-42c2-bede-b86342ed46dd,New Form,2012-04-02T18:38:50.000000Z,2012-04-02T18:38:50.000000Z 0492cb9d-b8e7-4628-9aff-c772a83b1c5b,New Form,2012-04-03T14:51:46.000000Z,2012-04-03T14:51:46.000000Z 162b6042-b96b-4008-8673-1d38b5771307,New Form,2012-04-18T20:07:22.000000Z,2012-04-18T20:07:22.000000Z diff --git a/tests/test_cli.py b/tests/test_cli.py index b8fffeef..baa2e061 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -51,7 +51,7 @@ def mock_hq_client(include_parent): return MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['server_modified_on', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, [ {'id': 1, 'form': {'name': 'f1', 'case': {'@case_id': 'c1'}}, 'metadata': {'userID': 'id1'}}, @@ -367,7 +367,7 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ def _check_data(self, writer, expected, table_name): actual = [ list(row) for row in - writer.engine.execute("SELECT id, name, received_on, server_modified_on FROM {}".format(table_name)) + writer.engine.execute("SELECT id, name, received_on, inserted_at FROM {}".format(table_name)) ] message = '' @@ -402,7 +402,7 @@ def _check_checkpoints(self, caplog, expected): CONFLICTING_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['server_modified_on', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, [ {'id': 1, 'form': {'name': 'n1', 'count': 10}}, {'id': 2, 'form': {'name': 'f2', 'count': 'abc'}} @@ -452,7 +452,7 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf DATA_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['server_modified_on', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, [ {'id': 1, 'form': {}}, {'id': 2, 'form': {}} diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index b20e78c0..ba352a30 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -89,12 +89,12 @@ def _get_results(self, params): search = json.loads(params['_search']) _or = search['filter']['or'] received_on = _or[1]['and'][1]['range']['received_on']['gte'] - modified_on = _or[0]['and'][1]['range']['server_modified_on']['gte'] + modified_on = _or[0]['and'][1]['range']['inserted_at']['gte'] if received_on == modified_on == since1: # include ID=1 again to make sure it gets filtered out return { 'meta': { 'next': '?offset=2', 'offset': 0, 'limit': 1, 'total_count': 3 }, - 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'server_modified_on': '{}Z'.format(since2)}] + 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'inserted_at': '{}Z'.format(since2)}] } elif received_on == modified_on == since2: return { diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index baada2fe..66db37c3 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -25,11 +25,11 @@ def die(msg): raise Exception(msg) client = MockCommCareHqClient({ 'form': [ ( - {'limit': 1000, 'filter': 'test1', 'order_by': ['server_modified_on', 'received_on']}, + {'limit': 1000, 'filter': 'test1', 'order_by': ['inserted_at', 'received_on']}, [1, 2, 3], ), ( - {'limit': 1000, 'filter': 'test2', 'order_by': ['server_modified_on', 'received_on']}, + {'limit': 1000, 'filter': 'test2', 'order_by': ['inserted_at', 'received_on']}, [ { 'x': [{ 'y': 1 }, {'y': 2}] }, { 'x': [{ 'y': 3 }, {'z': 4}] }, @@ -37,11 +37,11 @@ def die(msg): raise Exception(msg) ] ), ( - {'limit': 1000, 'filter': 'laziness-test', 'order_by': ['server_modified_on', 'received_on']}, + {'limit': 1000, 'filter': 'laziness-test', 'order_by': ['inserted_at', 'received_on']}, (i if i < 5 else die('Not lazy enough') for i in range(12)) ), ( - {'limit': 1000, 'cases__full': 'true', 'order_by': ['server_modified_on', 'received_on']}, + {'limit': 1000, 'cases__full': 'true', 'order_by': ['inserted_at', 'received_on']}, [1, 2, 3, 4, 5] ), ], From 39c06dfd90bf20fa879a9a8ebfdbd301ee64b227 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Tue, 29 Sep 2020 10:31:47 -0400 Subject: [PATCH 007/257] Simplified FormFilterSinceParams query --- commcare_export/commcare_minilinq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index b4a32188..aa2183e0 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -40,16 +40,15 @@ def __call__(self, since, until): if until: range_expression['lte'] = until.isoformat() - server_modified_missing = {"missing": { - "field": "inserted_at", "null_value": True, "existence": True} - } query = json.dumps({ 'filter': { "or": [ { "and": [ { - "not": server_modified_missing + "not": { + "missing": "inserted_at" + } }, { "range": { From dbfb8fa0c5d08c9c15435f31d2455d4a38cffc15 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Tue, 29 Sep 2020 12:21:18 -0400 Subject: [PATCH 008/257] Replaced other usage of server_modified_missing --- commcare_export/commcare_minilinq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index aa2183e0..5bde73d5 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -59,7 +59,9 @@ def __call__(self, since, until): }, { "and": [ - server_modified_missing, + { + "missing": "inserted_at" + }, { "range": { "received_on": range_expression From ab9a9f4e20db77a21a79b68df29c214da55bc6c8 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Wed, 30 Sep 2020 15:22:00 -0400 Subject: [PATCH 009/257] Removed FormFilterSinceParams --- commcare_export/commcare_minilinq.py | 46 +--------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 5bde73d5..b01c1f3a 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -31,52 +31,8 @@ def __call__(self, since, until): return params -class FormFilterSinceParams(object): - def __call__(self, since, until): - range_expression = {} - if since: - range_expression['gte'] = since.isoformat() - - if until: - range_expression['lte'] = until.isoformat() - - query = json.dumps({ - 'filter': { - "or": [ - { - "and": [ - { - "not": { - "missing": "inserted_at" - } - }, - { - "range": { - "inserted_at": range_expression - } - } - ] - }, - { - "and": [ - { - "missing": "inserted_at" - }, - { - "range": { - "received_on": range_expression - } - } - ] - } - ] - }}) - - return {'_search': query} - - resource_since_params = { - 'form': FormFilterSinceParams(), + 'form': SimpleSinceParams('inserted_at_start', 'inserted_at_end'), 'case': SimpleSinceParams('inserted_at_start', 'inserted_at_end'), 'user': None, 'location': None, From 7f03f7450ca02ea51433843035559fb840a493c3 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Thu, 1 Oct 2020 12:31:37 -0400 Subject: [PATCH 010/257] Removed unneeded secondary sort --- commcare_export/commcare_minilinq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index b01c1f3a..0967a3c7 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -43,7 +43,7 @@ def __call__(self, since, until): def get_paginator(resource, page_size=1000): return { - 'form': DatePaginator('form', ['inserted_at','received_on'], page_size), + 'form': DatePaginator('form', 'inserted_at', page_size), 'case': DatePaginator('case', 'inserted_at', page_size), 'user': SimplePaginator('user', page_size), 'location': SimplePaginator('location', page_size), @@ -57,7 +57,7 @@ class CommCareHqEnv(DictEnv): An environment providing primitives for pulling from the CommCareHq API. """ - + def __init__(self, commcare_hq_client, until=None, page_size=1000): self.commcare_hq_client = commcare_hq_client self.until = until From 6db6e912b18a1c278ddc8859f7c3a350388cf066 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Fri, 2 Oct 2020 12:57:25 -0400 Subject: [PATCH 011/257] Removed received_on from mock clients now that's it's not used to sort --- tests/test_cli.py | 8 ++++---- tests/test_commcare_minilinq.py | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index baa2e061..7fdea637 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -51,7 +51,7 @@ def mock_hq_client(include_parent): return MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, [ {'id': 1, 'form': {'name': 'f1', 'case': {'@case_id': 'c1'}}, 'metadata': {'userID': 'id1'}}, @@ -367,7 +367,7 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ def _check_data(self, writer, expected, table_name): actual = [ list(row) for row in - writer.engine.execute("SELECT id, name, received_on, inserted_at FROM {}".format(table_name)) + writer.engine.execute("SELECT id, name, inserted_at FROM {}".format(table_name)) ] message = '' @@ -402,7 +402,7 @@ def _check_checkpoints(self, caplog, expected): CONFLICTING_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, [ {'id': 1, 'form': {'name': 'n1', 'count': 10}}, {'id': 2, 'form': {'name': 'f2', 'count': 'abc'}} @@ -452,7 +452,7 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf DATA_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': ['inserted_at', 'received_on']}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, [ {'id': 1, 'form': {}}, {'id': 2, 'form': {}} diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 66db37c3..08c83da2 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -21,15 +21,15 @@ def check_case(self, val, result): def test_eval(self): def die(msg): raise Exception(msg) - + client = MockCommCareHqClient({ 'form': [ ( - {'limit': 1000, 'filter': 'test1', 'order_by': ['inserted_at', 'received_on']}, + {'limit': 1000, 'filter': 'test1', 'order_by': 'inserted_at'}, [1, 2, 3], ), ( - {'limit': 1000, 'filter': 'test2', 'order_by': ['inserted_at', 'received_on']}, + {'limit': 1000, 'filter': 'test2', 'order_by': 'inserted_at'}, [ { 'x': [{ 'y': 1 }, {'y': 2}] }, { 'x': [{ 'y': 3 }, {'z': 4}] }, @@ -37,11 +37,11 @@ def die(msg): raise Exception(msg) ] ), ( - {'limit': 1000, 'filter': 'laziness-test', 'order_by': ['inserted_at', 'received_on']}, + {'limit': 1000, 'filter': 'laziness-test', 'order_by': 'inserted_at'}, (i if i < 5 else die('Not lazy enough') for i in range(12)) ), ( - {'limit': 1000, 'cases__full': 'true', 'order_by': ['inserted_at', 'received_on']}, + {'limit': 1000, 'cases__full': 'true', 'order_by': 'inserted_at'}, [1, 2, 3, 4, 5] ), ], @@ -56,7 +56,7 @@ def die(msg): raise Exception(msg) ] ) ], - + 'user': [ ( {'limit': 1000}, @@ -113,7 +113,7 @@ def die(msg): raise Exception(msg) Literal({'type': 'foo'})), body=Reference('x')).eval(env), [1, 2, 3]) - + self.check_case(FlatMap(source=Apply(Reference('api_data'), Literal('user'), Literal(checkpoint_manager), From 9438bf31baedb09449a9c47dec3e7eb231e653b7 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Fri, 2 Oct 2020 15:53:37 -0400 Subject: [PATCH 012/257] Updated FakeDateFormSession now that FormFilterSinceParams is gone --- tests/test_commcare_hq_client.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index ba352a30..042e5396 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -67,7 +67,7 @@ def _get_results(self, params): {'id': 2, 'foo': 2, 'inserted_at': '2017-01-01T15:36:22Z'}] } else: - since_query_param =resource_since_params['case'].start_param + since_query_param = resource_since_params['case'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' return { 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, @@ -83,26 +83,24 @@ def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 3}, - 'objects': [{'id': 1, 'foo': 1, 'received_on': '{}Z'.format(since1)}] + 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '{}Z'.format(since1)}] } else: - search = json.loads(params['_search']) - _or = search['filter']['or'] - received_on = _or[1]['and'][1]['range']['received_on']['gte'] - modified_on = _or[0]['and'][1]['range']['inserted_at']['gte'] - if received_on == modified_on == since1: + since_query_param = resource_since_params['form'].start_param + inserted_at = params[since_query_param] + if inserted_at == since1: # include ID=1 again to make sure it gets filtered out return { 'meta': { 'next': '?offset=2', 'offset': 0, 'limit': 1, 'total_count': 3 }, 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'inserted_at': '{}Z'.format(since2)}] } - elif received_on == modified_on == since2: + elif inserted_at == since2: return { 'meta': { 'next': None, 'offset': 0, 'limit': 1, 'total_count': 3 }, 'objects': [{'id': 3, 'foo': 3}] } else: - raise Exception(modified_on) + raise Exception(inserted_at) class TestCommCareHqClient(unittest.TestCase): From 2f94eeb93bd7f5b32fa4c34a00b98e3c46c6647e Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Fri, 2 Oct 2020 21:49:32 -0400 Subject: [PATCH 013/257] Updated xlsx queries to use indexed_on instead of server dates --- tests/009b_integration_multiple.xlsx | Bin 7396 -> 11429 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/009b_integration_multiple.xlsx b/tests/009b_integration_multiple.xlsx index f8dd46671d0aa3d53e85302b405073cdaf7c7bcf..fec98f9e03756ad97a03aa5b36e99d39285915d3 100644 GIT binary patch literal 11429 zcmeHN^;?_iw#A{iL-FDc#l5&Y#a)8CJH_4IwYa;};8MI4hvM#~INZ$4Ikz*Nx%VG9 z=ldZ~@+5oZ%bT_KyY{=cq6{PyCKwDD92gkb8?e%}vK3o!FtB83Ffa@-I0$VKJ6mUf zt+W1n4|{-?ZjhCB8+~$UsRI%U_Im z47=<*q>dK}w(p2B^`j3JOnJGq5L}V15k*ehjr(T$hP#biT|n4UN5*vp5#1-Ln#Roe zW1oASwK|AIduiOWJX3r@4kBHBq>@Z94aMfZ40f7euL&g%186mY4~5z3v1;U=(N07` zI$_-BLN#tToXy>yITm)hlr0L>6lFzanR?M-^zok$wl@17#9TCr0%s;owE`|3 zEy^IULg0iAA>%5`F10lCS92ad_#unuq&^1=EnNXNFt}k}`s4T4z$YiKWa0ra1O&5# zV0!mAX4A$K(P4RO%05Mg#*uI>}6ZqToCj-v(xT5x_ z8*w4${AME0tQxl*kMwKP`a<_YR4-Tb6tPzUS*EicYP~W~t*x+ViH_pY$CADNq}Js} z71ylMv^)6=*8&lhn*~9P6fYleT_n^70{Gi>EVwk19dWh3OQV!i=p^qDhOFZ$jNDi#^9X$veeIPZ4 zB%;!K%H2P+npRd8rwGn~%-+2_=7MhQEE03V@LNe2_ugafWI-v>uw@!4xOrR0Z#F z&%Qbn#x47q5rfWxy91}Y91@-tM|r$xg1efRVVUdDL*=ZE%ieiBWF(z()E1>q6_T@E zz5Q(Z!2Nxbkj!O`hPs{rs(mSQq6uE_MnV0CB&>*(0V-~Dh@dpSh-C+1|{9wAeKSVIX5 z28IHn1c*6*=1h^Qu3aWGrcZ9;i@{wsUZNl*ZF)Vqd2(~v;n$No$4@kRNT$serRC2z zx+0SAE@y10FXq{Y_VjYSxF4oe(N`U8g34876}+R|vnzzZaME<&=XkJ2MFCuZMdcn5 zXKg^@-Lv)VlcerdYL`f`JPsPhDjRfRS-s1Mbd0%kJbLYqbf$*oc3~E5X2CSvEy?8m zXA(t51%r^n1?vI%&D!x-$)n2=^R8giIxGm^V&VludOB$Ye zDMpp^arcxZo8rH+bz>9F4$+75I~2i)qVhl3FO}Zbdfn7S1tteGBk`<~t9VZewd#Pl z7Q#l6vHr>}6jqJrUP5geoX!)X@Y%0WDNd~>87kl=)_lWX`@^1@_M5v{q4qHLadow} z%miF1)8ls36dW3X;IgF)9E5fofo9==q2u| znzck?4yf((f)ofAZPgpuAIllS9M862;A@BUQsB2^81K&1%#k%=j39^d0Uo^^y8(;n zOeb5 zfsgw-LOp{}ffhBXp1V=dUB}zXfhtiqdFbTElz7B5|EkM3Em#v(*0gRjQGObi%I!4h||1%dauTqUe`( z+5=KHr{vyf6AWSYd~3#Q1)RHAdjO;P$mZv@NfvKPPo4Tz0ZvGVr#KJp#zh!zVFFC0 zPG|1Cn4NvDxWvt{PuHGNN!h-xtu{28cVAV%1$$Sa^~i2 zbIBIELyy%IO|X;)Xk}IL`i)DsX~I{kb|d(;q$Np9ARC>sddPHvYh)2g{#k5zkI?YTUw_1`hpauwr{S>Muy~9i*wQzBB>C{HUVhkN*GacVR*B!7F{Ttk~#Rg zM>9EKjMIP-rytC#H0kIW#BpGE07)Uy$Me!3gPo`rqKF+^F#=hGAJv-0+t#Qvzo|%m zX)y<^?UO1{&!m8GRYr}jQy zfABFx$pt#W3-}LQmU&@(6SXlYvviamdapo;B_2!aAcVRzgZ&KX7&e$PP1E-)r|$c4%0OYNO@UVuN+QX4{t8vI%wjThGmy3NP)F`gpp6E@QX;iofG!AB8|vi zVE96#lc8dAZ9^}!hhUdpd}U9Tq7~BC*@M~rZ|2oa9HZtm_X28?ObEU4X4gb-Lew}b z5Bd^{z55mvzMu)TP%)Jj?%?AR4ev{edU2Dud6M5Y@E$^E@$gDdJN5Jho(CaU&wqKgCZWx)c|s$d)8%M2d`T!3zOe6KkvKBGrQJ<6$^Z| z*8^hMAS#?sj@^YGY_cG9g}=*nFI2(ym^JS{FPI+it{X1)-PeD_(;=#9D-3rgp7ntbQTE!$;1H5|^?^drk? zfzatRLJe*%l#rWd+je34PQt5d&uY(*5IvGtpQ$29_%XK6-^4$|{F7E9QjaLFK^n0K zITEs;`;U`30O0KOFB9|o3zq-2FhNstmVL~q(BFLW-B26cwzljF5ql!|tf2hJUAYrx>gxsweM?86aJ%+4O36a*QF*1l7h~||a!k}BK!9&Ur#wrQn9zN?4D6cpxgP}3>gqN0a=29a3*`7(s=U6xa=n+e?k`M5_1uup*s#Ok~728d< zq9x+OV@E|&IZO1hhm0o$HV>WQ^^^()T#>|wCyeXIGrO5yZ&}!ECW#ncA$8K;j^~yu z{9}gc;qN-N0%aJ&S72bce_OSm>y-scHcQO7e{A1fZGw3p;AUa6zlO@qR!>qogo&Y> zHAC8zJlzoTqS{&oV>g8x`>k_7GYXj8@8;7FhtM&YV9ib;P;aP`swh<%-9IfqWTH_l z)zSijE8S&@+luW>YxLz>l&f4O+TIA3I?Fm4ABLYDk?&E==2UtFC(D*6yzVSduCW@> zJj@#HJ%a{LF{PK9M=~qT8w}H9ywV>oqVWJ|p$;IJ*7{{fYMo7wZsWBkl)0#^lh;>* z*I@=T$08>su%xfl(3UOK3$CCT_M>0sozM6A`6+DeGNnn!QcF7*np^P&2%(GhfGQnV*_o5f{#dduox}r~uU#=%nfGzjtvN=WcJ(QK-a;^KY%y+=5tTUZyc9t^M z1Np$JccB%mJ5WQ0LMA=HuR6f*A5)EcQQ3TlocPw9y4`G^G~+2|y%{5ON!i;dK9cXe zwt{y`Bk(BnW!ewrS3Jh<89sq!RBQ8(9FMlCmb!2K#v`O|oU6Cb5kCwO)tachuoYfy zf^yoniiX^2j|(jlna#E|?AU{WFR~YA#4`j(v&fAc>v|nPWuLuQdvy%GHfGYoi^Jz; z^7QOtGL^y6{b{hliO8#OkL>$0Vw)C2nqfm(f_6Y6Xlu*R)P<+|bng}t|HkU!D;_US zkzSz~M}xtko#Ks&*~RG$DPL2XzSehn2JBSqCzZ$!naI+Hb2JSc!b1;&?2|(6#6nY!p>s<0h@(u^P^QrGv`SLBTc#M4yc z3uIM0$HnGhJz(5@`s%2`T#BikJLZQ44A0vnhU7T-0pd z3X(}tXn*@*D0fhBzwtn zv4;o<1J}~9={_n~;gTiX>IE1ndik)tAv;D&miF^1>r~iy8!wC$MZKC!OgEicig*USwyI=Zj<9mUfgP74$Ev>K)84veOVZQct@o2-Gi#!Utosm{>#(UVqB^Gpf2Ia@$-aq|MtQL=5EA(G<#Tz-f8nd%t!+LXR- zLmOdvZ18;Ed%xNcV#M}uVhA-tGy*z(q1mP-W=HdRnEKj%^D-xTiY?!*6h);RTSFLw zn=IYhmat5E_hT^D5Ces}5@;rs;*ctil~rUlNI7@3qE1-^z&=1IlS^%m3?M2@!PU1$ zQgeUnU#6IaL$?b-r!;7NPlF__dRrsl|6EKK(ag+Jl&otIu7R)o!`-NVM7H^nE^m&L zgeiyL>-qfPDzt~+@B98;pMHH?6RnIbEQN~7>0GS-%hMCintspo`QZiU!56BF-u|A) zyRp=smkXZj+RYU#rf#3h%Y#Imb)V}!g;>I+Wdg#q^Ecsj=hYj=XOxkK{>Tp(;P0~g znT7ik1kv_gOHap!A@sCK4trf{PkNULGzOtN`FgAt+lepQ$YauL#Ot6}jH6s}-FUv0 zXN;Yn+vmuxCSJ9LcZ5&HTb(jWSAyN_vF537DlbOY0(Oxi(8G(lfHNvRbs|xO6&)gB zJM;W3o3gdx-%n9%0>fr0L>vt$lo=aEjKaox0VF|7m*ZWWQL&oVD=3w z+Zh})XH+^?<`#X+PS_o3vd!Y?2b7?!W+E5$jG(!?H>MAg+2skn#uMs+N8CbwAkyvU zNRV;dpJ|K4WsT=|U5HlXb6S#381DC_M1CU{?-nS9;~l%mE6OiIpjiKQrR7y~?~^HM zEA+NyMiN!nJz#>ct}`mVix|f_pf(b)#MivgG#<{%%X3whywriK;v6-I>(wIQq%md4 zI=0s~txjr?On59^NjE&9dYxqe4oE1s`by)N3P%UHxk$39HM4UFaL1+&M_Vtt!xLSV z*5%sCdheMXgDm%r9b$)gJ93t09(tcSp`%CYC>l=FEY%bH)LYk{&eyJC$UuLM|D+r0 zwYTMKk{>NU#!;250{9F=;NTH)Rs+JYo^?dCE3~XnlQI`FIZz-m#nrkn61SrVCd2Eg z9^FQX+`ye+pkuW!9+G%6sR4VU{td5gi0Q37&2v^h*(X#wef3av(wn<|c_=rX)f1=z zOjxB~X$@ZgxE8WZ@AUQ`r98Xb&r#^C+hKi^bUFq-&>`&R1JyYl1_?Bj5e^Zknku)QhvYhO(ykN-nrc`pCkVdS*d- zi|HEE&KlE-hpG8m;to6T%r;n9?mIueiFNIS3M~nRm=YxI8Do_+rguhF!%w7(dnU4$ zrGqOPF|m@wI;$+UJ9t>SSd@2(6)})SIX6Tz&oBqzY|!(Lr#xs5wx@@FVNqx^YQuMA zJ&Z;{vl1hvYW1h2RGI4MwY_VyglnNCW3#%$*vIXgcZX>s`=X6gpwjC4S}Mo(*3B^G zHBNn~XcCW3qWmb#+xRrEq<#i{2ESw^wrPR_8#?t5r~+DB8G>}gnGUGVyulYq8>Y z?%XyFM$P9tBWAdhV#s(R6d0}E6@?+DlBFI^N$$$(#j!!bSbQI63>k~ z!C35?6%WuJesNAb2<6>Is<;&SQcwHQ_DP<>vD%8U4H5rtdZ|_G}I_FGb-Te7H?ftk) zl=l^d?NZ68?`QCblQ;|_R}wX&;nq93sTf{-X{=#ar56~I@CwY8h|3Cck z?|X2Tnl8v6|Lb_GOX{-(e3)X>l%7eRd}63r z^qxW8OycVkiRMDW;*N5mG&$Nc23sy&D~8uA*VK~*eZJRJ5UvaD!Vx8C-4ulnL_esE zQ{TOV)<5dvU}5v!cx@IE2m{=g8+{C(j(WEU^*N8}!5L5U9L3Xya) zI)R1N_nhj|;bpoY3 z10HUAo~h%vVbslxYBWDexlHYm?J!OZ4324^vX-f zyiN0C^ye1VI-(`P1s$A)Kne?pzAnw|a40>T*Uj8%yU_Ob#pLZ#@5L{w4&7xaPM z4KNOF5hS5u2Q&pvVQJZ)_7Fs$;X};3T(G{Oq@n2{u2yj??M1V?~HL?>;@3yulQ5)%GGml-P1K1Zzt+ISa)rf9#M)4sL>r1L97OH8gZt~S8J8rQjTUgmy1WEp71-UBsiMAT z4IWac+aT2Obo2HOWOe_p!clsOCw`DV9YBpd%s))9lewWIz(mE_(Zbg3_v5gj1O%kfy#Bg1@^Z>Q9~1UrfZq0*vnT_g(%Ew zma3bduHHh-9m$|$8CBWv%b`*mdT8Qk>GlKMu@;<>4wjl)aE0Xt zl;=#LMSfm2?Y>SoDWexG41dH1(|6K)hhU*~1~*+S)Kofa0l=bP+iGBg`6{~4-rGHc zMyyn*T`WQk4@Cp&vB1)ecm+}-D)Jwk;3J7iiIgiJrBL5aXmsn)`f;n#nN6~+;oYvQ zXoRB3rCxnO-~ctIJGRaV*!zF3IV$&6 z23vkxZcySw1vP<8?2Hv1?d+YH4DIcID>y*S)Bm=9Ks3xs1jfuVgPMnuog&*0UoC_X z$_Z-n5SAe6XxVjRhPe_4%Z(*+7E5;y>JnGkuwcn;SzjLU{Br6BR?C_+QNW|!ptTtt zQ~YPUSxyyeeeSMz$;RT6Dw3{|;ay=&+%8Y>>yWww1!0&Y83?c|-yD9;Dzwd0Z4Cq9 z&^4h{6{Lkg-niJXt$01H1dw%)UTGs!L8uG4%*r7xE;y{tzHn4^%6s3w&*^}p7kg!6 zhUJg>h@O&@#2|;@c>U&3g;KyJn_WvOE8Lrn$Nf7yAXXY*f++KNOezq|+ZN+OjdyAi zIHkE8FsV{EJK9SLF`}gj1!M|v2TKm=Ql-qp+*>R2RZDf(e<1|{u>5())on5Oz-{V|- zV!n*wI$-}cxkF$tOlUPk-TSUd_jdUOo0?>M}$%AJqt$N%#g zyK`UrZX$^6KR~$!?ayR4wsQphm+c^$|LgKj0LpLu&h`i7__MMts5fECWGAGIG6T|$ z5Dr&MiJKI43|`U)%Qah#{t%m{laE&(yTqKSH9o0bA5fMuxD6WuObm)tmn!#6nmQ*Y zrLYxoDR`_5|zTWE@ zbl$VO2n$5APt{XTn4?f92@1g)oKli5k|k%w*=}_}DJBTR^5Y^VjpO>vlHLkH$9hz*oaPGw>j5*r%&A2Qir&>i4O%pK06J#mU*u=D%G2Hk;USF^w&d^Fcf^hU;_*HPIC2F+41U&`ALM?i*A@DM%;K%N$%Sk{Eg8#5}MCweWL zkuD3QpjZlWJUTBZWs&q~F8S2BWyt4T-!+r9v?+t!QWii9ER( zt*WTIgXXF5L@wuZyu8LIX^qpt%DWGX!a-zsJl<)7Cc8&Gj6O6*RF9iKc>g|3?m9tg{SqdmcqnK^S}vw zj>PsUV|=djp73<~F{6^MSw%`}0sHdnHNu{YPMj8S3S? z3h>vFu0I2mfXvFD2E2X^{Oe%Lp97ab?WF&E%;i^{U&kDNB4L7BD8CFn{2KggKlf*_ zIpXiZKfAlXhW^^#`Wc!8nzs1Y_x^v)u3rIv?fm=%97X*V@Mj1NBjS ASO5S3 literal 7396 zcmbVRby(D0w}oNokcOdCkdToMX#s~4X{5V5B&17_F6jF#b2kd|&q>As9#={;Gg+nMWG zTbn<1wzPN({iG>s#H<617I@%}l6Ad8L#!Xd-4rAIS^XTQ`}N9bCuh`Ip0!|jD6_x3 z6FncO@9g|q$ZYc9I9n^yIiyWUDJC6UI~}SEK5TTWt@m|oFe9@qk4h^g$0@v_D}`l= z+OIO&w5`_Y^rKuni8NxY%NC~le&!^k`Ek8Cmlkf8I(OCy!DLP;-6_z|Q@5v?oYX4Q zE#X;j5Ymucd<5HjL3B$%ChY0~4DDR!?~e zt&tn(G8zpBE}7;QSqsI<_q48I<)jdiY}QM+XCMCmasUjB+kb?kC$fU<2c`oYTU(q>j6*AjR;*MD|RUIatF8D@7inLfzwCXi0`{vDs-LjT1WXJ z&kG_ObU@iSwcX2Yp^n`__rsQbQjnp^Z2Sr`c0p9R%6^|urqpRKER!Rq7lAZVV{z=u zlrlb_%nBLo?N+Q(P^O&Y82F9f2qWI0+`dMk^U$2ItxQ|${ATDG&Q{csXOt9KP$e3Y zB>JM-5g*(0JDpNvU#85PhMN0}3QsdZgG+d5F%-nC=%rv`V20uTt{Bw+CZoVT09QI?}Swpg~79cADjn2dYX$=`EcRp&R!8KJ1Gs9 zvo=`QG?-N|)?4q->rUd>=x|_8GGDn%|VcmMcjqh z9{Jk4+m9R_bqTy6_gJ6c(W68*X5%8fDDhPr#A!ccJHH;d8bO?jL@5Kl9zGm&?;5)W z-&6D2+D3r7`@I3u!}HSlD}BcX$;%{tuqbxUf~0>IvZmYthcUlSS0-F zs@tP4Qiz_*Ktet~e`-G%!%PJhL)jV#!5vGHKv+()kr%{^S=#7ki1=7n^EeKfo ztR;ZaUeoq7Ts#tF=DtNU^E%uMN>e?2OwlU<7cj!DBm?nxQkS+Zvay4$c!CEXvuEDC zTA3fQgd3}9is98GMk)wcPZ+2y!8Q9KqR#}*aZM?~`99wQtHbJ)!Zmzs8AAUF_rl}* zWuZ$G@mE)YEEO#x%>uP=Onk$Sc(cD>xqcjEt!#!L3sow!Aq9S+uC`EKJ*#m48hM{U z>Ro5l;jnyGyO306N;>^r5*hQ3ZL%CtwhZ&;8XfqVIyQM;@-iemUZmOEqDKhQ05|~X zk$GE;-An=G6?-Six^fhs-wR{Meocyqq-on_C~6!0!lHwm=9lm)U+eU-36Et01qo2J zxXqoi2XF0=Xw3keGEZ(NFC3sk5qPKbyk2mv(kEHVn5zokToY>1g#jQKdcCTa)Qs;yueHH&j1Ib8dI+m%b=0-|bJibS zAuEH@u0U;u-I;068V6O+*u+1A3O%}O`woSY#M772y}P5mmG7HT(9zjix|nw9Cr08O zaLYj*%H5S&5krJktYo|>-}-CiOir4aD@jh@^E-VyX9L{xo*(hqlb=^V-EuhJG3tlE zJWS~ebT>+2S=ccOrah>_$PmHI<@A5{MiyZbo@8ZaXLp1%j~KS$D@n9Q1lKTbCaR6K zY9)qJX?k$yE%JzKHOtFgWEchgfSWeKn9nbCk2iUr zZ0=meOgVR!!x+o!l+jvGYqc^H#9&j#p7ClAyfbE~n|!4_B_hYqY1QHAFcS(6!S>K& z0T~gia_kmRl$z-48e6sbW54%$H2WHtT%deSLR?Y*b;^tMIMR?dJZ9&TrqYRkFyW#Y z=Wm88Z`HHz%P9_v#~Jhe1ts*fFfC*?Fgt>4IbC^>AqvwZxoE8k3Zx4(eLK!pD=wYU zo%I@9Ig^9|)tTsAtDNz5?t*PB0lq+S+6tNL36uKGqTnYy|Xlj#3&G^UF2-SaP zz%TKoaP5p6GMezHmLk0j=fR9OAC@Ap3jdxplQ`_1RT2a9twpDp0TUH*bmnvVThe^o z4ds4`cb&LD&<*$yBxgyTRWJv5dLIvYGE<28Pe>@@g-Yc+-$m0f6h5M)hWO0LsQM;k zX}4kvAhV^(r<_v$Bp4!K#^jPar39XVGpgP5_>e_%I}KrRE(%dkFj2GMTDDK}ycY=u zV)XU{!FUiVeHthJRMF;a#5;QyXov4Ehv zNF=)m97}K4b1x?QcsI{y=A+cT=^<{DQnq94HwF6c(S69PKR!ap0U|RQN5b_e70RI8 zsCh3&>U{-T4HM$jpNK21xUZ4{Rms4T!dWavA=kZ}M^^ zz^CjlG$z=I30&#?c%7!U*)`Xgpx5VCEJe>ppbB#(T-`lMwEYk^y?oK{9Y^1MM)LW^^x#YIX+L3zcIuSJq^+$h#hyy$)_J_$+3@z2jTES(t;|shX zepnqdjRe8?7qPDk$yzo*&IwBv<|B}No`ZjP#SFaXnihM_tbB;dv`!|HVHgJ zr2_x)jqsqX&dkut&`tW#({b0$*G=Wm3Ne!s4(1wt66cz&gkI-PS7(0BO`;u(v-Uyp1=PtpeU=OCwc z!{M7Q)V-1J0q5p9t|uvBYJEBI*l)Hw@^*>WUIy333^Hu4XWIZyau9m)7|mcEM<1{1-o2mG8dl z0zm-A#6@Q?NK3DVq8hK;0$Y}PgK}hlPN2&IS5ECoh}hd8(dFzl(3U{EiPDuy@Oob>rN{@B%C#-NO z_sMt(HC$9if(v0;wyYqDCKC{bt8OjX=>mE{mR7y2!p^&L$=vN% z_A};_dx?AEi^F+(4X50BC!0t)?pN}=SfFimwDE-x2fu5&1CW*;}{r(rDhfn%5ZCHq}xwu%)xxp3?lY) z$+%^+rvg2!=~zs*jr8G++vsSEM1?iN3P+Z5cVt`2x)rYqpEK-7EpOVs%N&}c(7Cmm)Co90^t48rg|(v2o;$64(9E9Gps3g zWBQ)z8`2Mi&kS0D>wabE?MH9B3lEAxu|g{0;BL780sk8mDFdi0Z^#R^(NXIwf-RMaS=jRNOoLaZF; z66-jU@hT-ND~EHe+>_a&4)VU8ZB6F-mz-1Rg($8r#0RIvN(HE&=uJR&)bcW@sPBpr z9nw)H<>OYX0#tk$N%D6%;ax_@j9|uHXA*0|s3N_0k44fCMch|W)bRGU zwbJzp+8)1XUn$Vnq+=7F-iry$&YygS7m6g(N<6?PgV733hTyhtRH&y<$V>F-NzWmnRO#P0(S=^+$kSk&) zDb;k zje{pgHYAYUC5D{xpdM=W&dKwPy=>_>pA30d|ua)2oe2@%5#=V;CNA z&>hhM>rOrOuph%3__5ueREwsEN%+|y6PA_Lc+KOob{8&mWNJ%XS(DYc}J+E`%&eny6b12iJ z8;e^)-ddEwF8r0`$>6eZf3}-Lge1z=v5eFg*iHRi0~mgxKs!yPx2$&t?>GF^c0;gj zjg>Q}J%NpWGM45XRuFO$R{^#a6{=`Tr(Q|vtao|bjo9dh0VpmDhVFF(iw7|nAT;nx z8LW?#8~eTZ<27NFw=o0T@$oqIbsKgzdM5}&jo5x*V+S`GYWJ7Y`)PQjY}%3^-lX0P zY~Vm)@=s(>+N8w8CwbNRW;`_$No7a&RYQe=&uQdaR{4V6tiUDZdstTl`| za%jYSif$?~v_p``Kd7naYXwB~xdhFqFCHedNZ6C#|<|<5lxS72A=5!*t&#+;x6oAGI$$IME)~A;w{| zDkaHQc!YmeMS)`-F-1B40P-c{v&e+;S%Vv!^L)pv;<{MkNL|tW5Lwp~+8z#wcX`cM?O!f4XBD!C8h&YEj4q~FKC;;xeagEY0DT(s3Ew4l@-l`dnrU< z>9z1gEOSj_M>^DTReKcM`X$?NQ(80CoTBYyTnv|H6p2D`bl|MBnbuu{tA|a4%huH+ z1~D2%Ti#Ppfk%6xbN%sb@H!?rBc4j1GPq-O|mNkoycV=Cg& zknSdV!DCwPqhJEbC-aaFB~yvBoh-bjLmO^13<&kSXW5%zWLda!DL~jh_ilAuRGH3dI}^UMwZB zCcXGXVGyJv5_2 z)XL6s%m82ajozNya&gl^s&LoLAgSa624t%)%?RJ=w0msz3YuayTy4xuScAFqv2Xc~=f`8Z?FfeEjFM@~er~yHr F{s)+Imzn?o From 14baad649d682ab5ffb5e3838637a2aff8116c93 Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Fri, 2 Oct 2020 22:03:26 -0400 Subject: [PATCH 014/257] Replaced inserted_at, the ES name, with indexed_on, the API name --- commcare_export/commcare_minilinq.py | 8 ++++---- tests/test_cli.py | 10 +++++----- tests/test_commcare_hq_client.py | 24 ++++++++++++------------ tests/test_commcare_minilinq.py | 10 +++++----- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 0967a3c7..dc0eaf97 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -32,8 +32,8 @@ def __call__(self, since, until): resource_since_params = { - 'form': SimpleSinceParams('inserted_at_start', 'inserted_at_end'), - 'case': SimpleSinceParams('inserted_at_start', 'inserted_at_end'), + 'form': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), + 'case': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), 'user': None, 'location': None, 'application': None, @@ -43,8 +43,8 @@ def __call__(self, since, until): def get_paginator(resource, page_size=1000): return { - 'form': DatePaginator('form', 'inserted_at', page_size), - 'case': DatePaginator('case', 'inserted_at', page_size), + 'form': DatePaginator('form', 'indexed_on', page_size), + 'case': DatePaginator('case', 'indexed_on', page_size), 'user': SimplePaginator('user', page_size), 'location': SimplePaginator('location', page_size), 'application': SimplePaginator('application', page_size), diff --git a/tests/test_cli.py b/tests/test_cli.py index 7fdea637..d2fe929b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -51,7 +51,7 @@ def mock_hq_client(include_parent): return MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, [ {'id': 1, 'form': {'name': 'f1', 'case': {'@case_id': 'c1'}}, 'metadata': {'userID': 'id1'}}, @@ -62,7 +62,7 @@ def mock_hq_client(include_parent): ], 'case': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, [ {'id': 'case1'}, {'id': 'case2'}, @@ -367,7 +367,7 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ def _check_data(self, writer, expected, table_name): actual = [ list(row) for row in - writer.engine.execute("SELECT id, name, inserted_at FROM {}".format(table_name)) + writer.engine.execute("SELECT id, name, indexed_on FROM {}".format(table_name)) ] message = '' @@ -402,7 +402,7 @@ def _check_checkpoints(self, caplog, expected): CONFLICTING_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, [ {'id': 1, 'form': {'name': 'n1', 'count': 10}}, {'id': 2, 'form': {'name': 'f2', 'count': 'abc'}} @@ -452,7 +452,7 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf DATA_TYPES_CLIENT = MockCommCareHqClient({ 'form': [ ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'inserted_at'}, + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, [ {'id': 1, 'form': {}}, {'id': 2, 'form': {}} diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 042e5396..5fc5c0f4 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -43,7 +43,7 @@ def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 2}, - 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}] } else: since_query_param =resource_since_params['case'].start_param @@ -57,22 +57,22 @@ def _get_results(self, params): class FakeRepeatedDateCaseSession(FakeSession): # Model the case where there are as many or more cases with the same - # inserted_at than the batch size (2), so the client requests + # indexed_on than the batch size (2), so the client requests # the same set of cases in a loop. def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'inserted_at': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}, + {'id': 2, 'foo': 2, 'indexed_on': '2017-01-01T15:36:22Z'}] } else: since_query_param = resource_since_params['case'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' return { 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'inserted_at': '2017-01-01T15:36:22Z'}] + 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}, + {'id': 2, 'foo': 2, 'indexed_on': '2017-01-01T15:36:22Z'}] } @@ -83,24 +83,24 @@ def _get_results(self, params): if not params: return { 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 3}, - 'objects': [{'id': 1, 'foo': 1, 'inserted_at': '{}Z'.format(since1)}] + 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '{}Z'.format(since1)}] } else: since_query_param = resource_since_params['form'].start_param - inserted_at = params[since_query_param] - if inserted_at == since1: + indexed_on = params[since_query_param] + if indexed_on == since1: # include ID=1 again to make sure it gets filtered out return { 'meta': { 'next': '?offset=2', 'offset': 0, 'limit': 1, 'total_count': 3 }, - 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'inserted_at': '{}Z'.format(since2)}] + 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'indexed_on': '{}Z'.format(since2)}] } - elif inserted_at == since2: + elif indexed_on == since2: return { 'meta': { 'next': None, 'offset': 0, 'limit': 1, 'total_count': 3 }, 'objects': [{'id': 3, 'foo': 3}] } else: - raise Exception(inserted_at) + raise Exception(indexed_on) class TestCommCareHqClient(unittest.TestCase): diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 08c83da2..c0da3c3c 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -25,11 +25,11 @@ def die(msg): raise Exception(msg) client = MockCommCareHqClient({ 'form': [ ( - {'limit': 1000, 'filter': 'test1', 'order_by': 'inserted_at'}, + {'limit': 1000, 'filter': 'test1', 'order_by': 'indexed_on'}, [1, 2, 3], ), ( - {'limit': 1000, 'filter': 'test2', 'order_by': 'inserted_at'}, + {'limit': 1000, 'filter': 'test2', 'order_by': 'indexed_on'}, [ { 'x': [{ 'y': 1 }, {'y': 2}] }, { 'x': [{ 'y': 3 }, {'z': 4}] }, @@ -37,18 +37,18 @@ def die(msg): raise Exception(msg) ] ), ( - {'limit': 1000, 'filter': 'laziness-test', 'order_by': 'inserted_at'}, + {'limit': 1000, 'filter': 'laziness-test', 'order_by': 'indexed_on'}, (i if i < 5 else die('Not lazy enough') for i in range(12)) ), ( - {'limit': 1000, 'cases__full': 'true', 'order_by': 'inserted_at'}, + {'limit': 1000, 'cases__full': 'true', 'order_by': 'indexed_on'}, [1, 2, 3, 4, 5] ), ], 'case': [ ( - {'limit': 1000, 'type': 'foo', 'order_by': 'inserted_at'}, + {'limit': 1000, 'type': 'foo', 'order_by': 'indexed_on'}, [ { 'x': 1 }, { 'x': 2 }, From ec72b6c8cd5313dcdfad2d4d22662efd92bfb61b Mon Sep 17 00:00:00 2001 From: Jennifer Schweers Date: Sun, 11 Oct 2020 09:37:52 -0400 Subject: [PATCH 015/257] Replaced test data --- tests/009_expected_form_data.csv | 55 +++++++++++++-------------- tests/009_integration.xlsx | Bin 5620 -> 10503 bytes tests/009b_expected_form_1_data.csv | 32 +++++++++++++--- tests/009b_expected_form_2_data.csv | 10 ++--- tests/009b_integration_multiple.xlsx | Bin 11429 -> 11955 bytes tests/test_cli.py | 8 ++-- 6 files changed, 62 insertions(+), 43 deletions(-) diff --git a/tests/009_expected_form_data.csv b/tests/009_expected_form_data.csv index febbb0f9..4e8da371 100644 --- a/tests/009_expected_form_data.csv +++ b/tests/009_expected_form_data.csv @@ -1,28 +1,27 @@ -id,name,received_on,inserted_at -3a8776b3-b613-465f-8d2c-431972597222,Sheel,2012-04-24T05:13:01.000000Z,2012-04-24T05:13:01.000000Z -e56abced-bf46-4739-af88-0ec644645b9b,Michel ,2012-04-25T07:02:09.000000Z,2012-04-25T07:02:09.000000Z -4bbd52c6-cef7-41d7-aec8-4a4050c47897,Dionisia,2012-04-25T14:07:05.000000Z,2012-04-25T14:07:05.000000Z -5aa938cc-bade-41d3-baf1-a18b72c0d844,Michel-2.1,2012-04-27T10:05:55.000000Z,2012-04-27T10:05:55.000000Z -674d4fd0-a3df-4b9c-87cd-b6756886581d,Mauro-1,2012-05-02T08:26:09.000000Z,2012-05-02T08:26:09.000000Z -8f209da8-2a4b-4470-86bb-1ae5afcb32d1,Santos,2012-05-10T15:41:32.000000Z,2012-05-10T15:41:32.000000Z -24125f93-67c1-4b91-9988-38588bdead1c,JM,2012-07-13T11:37:22.000000Z,2012-07-13T11:37:22.000000Z -6bd013f2-e549-46b2-ab55-951e90d7cf0d,EUCLIDES1,2012-07-30T10:49:47.000000Z,2012-07-30T10:49:47.000000Z -d6363916-9e54-44d6-b04f-dd34fcb2d0cd,EUCLIDES SYNC 2,2012-07-30T11:07:31.000000Z,2012-07-30T11:07:31.000000Z -0a25740f-c733-4372-a2ea-bb15cac5076c,ECC SYNC 2,2012-07-30T11:07:44.000000Z,2012-07-30T11:07:44.000000Z -43670a3a-d038-4ece-8840-5521a75d2028,ECC SYNC 3,2012-07-30T11:07:51.000000Z,2012-07-30T11:07:51.000000Z -ca54d9e1-d2e8-48ef-b670-1e6df7c49cb8,ECC SYNC 4,2012-07-30T11:10:09.000000Z,2012-07-30T11:10:09.000000Z -1d21a76c-42a0-489b-839f-1c265e6df791,ECC SYNC 2,2012-07-30T11:10:13.000000Z,2012-07-30T11:10:13.000000Z -e2a86f5e-a074-4996-ab2d-c51322451c0a,ECC OTA3,2012-07-30T13:23:58.000000Z,2012-07-30T13:23:58.000000Z -53fd70c3-b79f-4876-b678-f73a5f0dbbe1,ECC OTA2,2012-07-30T14:59:52.000000Z,2012-07-30T14:59:52.000000Z -450d2636-1ebe-4f16-a697-27a759788916,ECC7,2012-07-31T14:49:57.000000Z,2012-07-31T14:49:57.000000Z -54218afe-f32a-49a6-ab0a-35b4673d4887,Euclides 2,2012-08-02T10:15:39.000000Z,2012-08-02T10:15:39.000000Z -40568357-5350-4215-9d51-a646b4d64b70,Euclides Carlos,2012-08-02T10:53:35.000000Z,2012-08-02T10:53:35.000000Z -0c87ad90-2ba0-4806-87a5-daf3473cf829,Euclidez,2012-08-07T05:27:53.000000Z,2012-08-07T05:27:53.000000Z -bbc758fd-ebc9-47b1-9a4f-cc144c0e5839,Euclidez 2,2012-08-07T05:49:04.000000Z,2012-08-07T05:49:04.000000Z -0319a0c3-705c-471d-a164-36e63692b4af,Euclidez new case,2012-08-11T09:03:51.000000Z,2012-08-11T09:03:51.000000Z -b2d58a2e-8c59-4c6e-b221-8e6d23e3dc97,Euclidez new case 2,2012-08-11T09:14:56.000000Z,2012-08-11T09:14:56.000000Z -a31102dc-793a-4f27-b967-d4dfca86b4a6,Guy Mabota,2012-08-16T16:23:40.000000Z,2012-08-16T16:23:40.000000Z -7a20c373-fc6b-4437-ab3a-73d3e4a64be1,EUCLIDES CARLOS,2012-08-26T08:52:37.000000Z,2012-08-26T08:52:37.000000Z -5d39ec4a-7217-48ba-8484-78f8b08124a9,CARLOS,2012-08-26T09:05:57.000000Z,2012-08-26T09:05:57.000000Z -4c4b5ad7-9642-46f2-9947-4a5a5f07973c,MABOTA,2012-08-26T09:07:18.000000Z,2012-08-26T09:07:18.000000Z -6cfb2b4a-6994-415e-bd51-4c25d54628d2,GUY,2012-08-27T06:14:41.000000Z,2012-08-27T06:14:41.000000Z +id,name,received_on,server_modified_on +722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2016-08-23T18:10:46.354652Z,2016-08-23T18:10:46.354652Z +00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-02-22T04:20:41.350824Z,2017-02-22T04:20:41.350824Z +7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-03-08T04:12:52.073633Z,2017-03-08T04:12:52.073633Z +c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-06-23T06:25:10.350764Z,2017-06-23T06:25:10.350764Z +e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-07-24T14:14:19.371576Z,2017-07-24T14:14:19.371576Z +44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-02-21T14:21:58.426183Z,2017-02-21T14:21:58.426183Z +6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-03-28T21:06:01.613880Z,2017-03-28T21:06:01.613880Z +6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-06-01T20:28:55.166268Z,2017-06-01T20:28:55.166268Z +b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-06-26T17:38:08.849806Z,2017-06-26T17:38:08.849806Z +1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-06-01T20:47:56.618572Z,2017-06-01T20:47:56.618572Z +a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-06-01T20:53:00.263914Z,2017-06-01T20:53:00.263914Z +733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-06-16T08:43:17.086223Z,2017-06-16T08:43:17.086223Z +232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-06-19T09:57:59.335584Z,2017-06-19T09:57:59.335584Z +ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-11T19:13:33.035079Z,2017-08-11T19:13:33.035079Z +47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-11T19:48:12.705214Z,2017-08-11T19:48:12.705214Z +43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-02-17T11:25:51.670858Z,2017-02-17T11:25:51.670858Z +c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-02-17T11:25:59.421127Z,2017-02-17T11:25:59.421127Z +dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-02-21T10:55:54.158411Z,2017-02-21T10:55:54.158411Z +13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-02-21T11:34:40.289691Z,2017-02-21T11:34:40.289691Z +3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-04-04T14:53:43.570695Z,2017-04-04T14:53:43.570695Z +0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-06-01T20:37:13.258852Z,2017-06-01T20:37:13.258852Z +5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-06-19T11:07:26.044592Z,2017-06-19T11:07:26.044592Z +d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-11T18:55:10.480754Z,2017-08-11T18:55:10.480754Z +a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-06-01T20:32:20.337554Z,2017-06-01T20:32:20.337554Z +27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-06-22T07:17:01.907612Z,2017-06-22T07:17:01.907612Z +3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-06-22T07:17:41.231408Z,2017-06-22T07:17:41.231408Z diff --git a/tests/009_integration.xlsx b/tests/009_integration.xlsx index dc8be15aa94b70522bf1a13ca5221eb4f9337396..7bb527f2626d27969ad89b81d3773d96a4a3a793 100644 GIT binary patch literal 10503 zcmeHtg;!kJ@^wQaO>h!i8h3XO!GgOx!Gbo>I0Q|Q;FjR-4#8c61&81gBxr!(&X3H@ zdzs11_x%HJPOo)W-@do1`<~vrPF3xrBnyOl0zd$O0000PARlGd{Ur))JO+SZ zb;TfdPG)vahHCEiW>9@*H(MLhEI3%&3;^uI{r`^t;Tb528?@_W#gI6Uze10$Q-86Q zRYVUPD*TQ~WgD^n?6uYf)riQKgI&CWB6@hTnQj?D=*UnvPj>%Y(hGRZ8dokt4WNc^ ztbrv@`E2iAZnFh^jJ*urL54Y@2sgf=VYqoRKr6>Ch|4;p6SzsMTLH@eZqH5q_?&@u z@gpC)D04T_#Wy4S-dtC>2$>zu8MhO-a4iD*wCU~dpQYgfO57H3TuPEcFv8yX$EdbC z?x5!`U=~d#T31ZLKw?lodLk_j-}~w>>k`OMK6+aid_r$i)iE9Db6J0J;;l@Kv+b~* zEGesGz-*f&@`lV`X$CfmOWlBhp?dyWxP7Fj@0h*0B6|O}>JSqg*UEF~CA2L1jt^FR z4zFD{QYe&!qK@3(BR?U=!2#G-gMKNsX-@E}JuU;3uV@U8E^9)O29aA{AW1J9Uz6pX zqzbug9hHit4!o14gp7QIn0c%S-hcYI{D7_ddmupRF9dyT&}s(zK+xp}{vkgQ)X>q) z2Fk+x|;pu|)P6Ek4#eAYkqf_>_z8(=X)=@HiPpZ?8+@@%^ z_>?1@ej{_{RQPSlYSwFJs(W8N=jR$d{z46Umb_Z=j(9pJGU!EAdhr{?fy)F+L!3Rs zQXl~+FiZHN`LdatN50&0&;!z+OTiG8Cq5jdeGxS)x^v8xrIDFS*-_y=upK;bO>P1t zq0zZb-#V}!Q&EwmdXof9{d&2_3t!!wBLRKvvydq1waMDdj-Ib&$I_p5&Q>X8GORhm zgEhQVb@h*w@e_`FfBbNz@qsb~01%9u4a@JWafLWqzkoojf2?Z%VGzv2()K{F|Lswt zA}`y`iqV367sTqC?1GIs>%>C7r@o60>aV7orzYd@K3OKDZ`9ZNEXx9f1iBv#^}3vM zp{)WRpLQ@5yv2g^cx-bZ01p{D9s+`1d@CjKe-8f`Yj0<__!}}>67+F{a7-U#+7sTd z7qnFPLD_la;wKH`ft*&utj_FK&{5(6dF`da>UH0%*Q$G^{?&DLq)Bw<8}|)j^JfU@ z{_fzhV|*lOK~#6Z*w!Q&!$pV08R)3_#$4Wg*t#S83T1<+SeB%SOUNtGi8QG1IoO9T z2D*`z*ra~&#YyyffUa%+h;i`bzyhy0GU67B}aYU zE}IqOm7(Q*$B_URHIlrNbbN8`_s{?_+QSKqxI*Aa7NyJFl z2ntQUGxHe5v}6JK$CJL^VB8QH2?y}nZgDP8cb}FVJl7EWJ)ge+QQcuxK?mXN(gKnDEbZ*s8cS{ zxk+E>XfT5#4((?gNK4|+!f_8Uy}E|6hSc1OZsAbYdOoq@L@?c`!eJEVcNP__-2 zBJd`=DMXhaO@+$~$aai=X*M}W0>f!34Zcu(CAw_@+UdgcLMc?_^m($6W~jA&7+cbb zTXKjUc4a7;O5Z1`<`mwMT8+6vFT!6kUPUPJVj$*+we{z<9ZwjYeNr z^P&pUfz((-Y@-07!6$vh`iE`tqeN9G-3Sq4gE7ISpZGrD7gdL`HE?4f9U&e;x)uut zF~vmu-8(tY-Sp{gL*5wS9Z9zCv1;ClwKF>Skz$0rvlZ|LW0RIAUS8Xg_7fO*B3JM_v0LM2BX^`s0+=tsOB zkI*pEP>r&n8$1L9)=Z(u z@t*eNvnwE^L5n%ai8Faba?oPbJAeS@sD#SABoHzMZ)j+TyvyvOOrxiuLt{kPSD7u@ z2>E;gejv%;lNBX4QUvUWXbSrfd7u1FBj`(GM>A7ZCr3*=3+Ru)>l7^y31CGDJcvC5 zXDyJ4=<3Le^b|NQn1QG1$VtNC6mWw=Zm*x|T4LoszpgZ#FaK~mTC;~pgdfTm*%i#u zK4%pmKfJtbTCo<^ELy4)8g*<)+R8!#W+xx{XpoZvZh0z%^z0OqUm{ZoCSrZCQQ?x! zDLf+>sXD{ESA>mu6;8&_C9)-%i;6yOO;g?Xc=%ML}4(HaakJQcTUU z){#t!$wfXH9o!;m@0NM7)7Nn(d3^>&BxFKyc_O-epY7wSwjUgOAa46shw+y zap-Lmi^>Y7q_bqq^hNfV(TGIfASA$FjRv3YX{+@ia{rX0~Q5za4*@sXgtXP(q$3ZMauY zQJ}6jTx*e3ODiKbv5S9+4X{#O2hk42AAD>q{!@nDE7iEm9bTyUPqO*PyR=Ve@CO}w_KrCvbdNn#!8TE^DVZXaPinq0_>^brJMj={+Drasly8Gp7 zZx}MBct}~&ggoyKw~vF`gnUl6F1rk?8fxfe?GUL{RrjYN4exJnahD9+?hbdp^K4Jj zeDCaTySW^E-**3<|FmLt0h^`u)zQ&*Ebj8F(@n)l;<o{@y?tkytKS3BOFc?pp(We$t^(^RAum%3 zkD>K$CT@p14av+|w_pEw&AacijQI=iD!$8+j37D{oCw{*u>CXBCTzw!s25Ff5UMkAAmb`6lm^@^Mo5El|7QmSHG zs6(plyfr7UhhMi!ilYg>G8-nYYz|9o0pmLPSA?3)3D(Zk425tA@E;e(&o$wxI)#12 z^Q;qwYKq8Jwm}<^bf|q(-TiE*0I@-OP)#QMh8BT)M$Ae zyB^*%5>iET<2pd%3Ij!ekJSBs9mk(cYqlxw_W|h~%xLZLGHs#VKBffv*O)5-RShf#oo*0ZRrRdER{~~dT*(@YQIOIot?llmNO?*cE(dS zL=$~tVDZ{uHc@NLNo!1L=Y6J*q{GG|R$FXrw~ezEiI$C^XLHX2&57bRO|bK;6Pv@z z9t|f-dc<-RCc-GaVBsK*bW)vdba1zFwk&Fq$YG)gbNUd@I?n2k`-M?p=-svU$Hv5< zNp{7C7Y&4t96RCYnAQ^HH1&Se)T*Q10(O@*R-ih13Qp_G$6I(^({2b26qCBRS*rCe z_|j>17p}(d@Nuhx#N+t&Vig7u*rF3W@sZeM6@9MVtpbx{nKg3*8rGD%O!Jq?SE0ZT zR;Kh%?DE>p3SDnMd{CaR=3oS&C{A);*g8w{3Vko5dD@ryNyk})aaShw;%Z=_dCDBo z)lbmF-iJ3|WlKpE@}7eBYMihyj@u}7AyzvawA{=`!*uUWXM?yXGs6@|VBChG7NC-e zSzG+Z1y5t}wnA^mnI3(`C|X$$ZDRKdM$dRu!o{+@cba*BZG&m(4Q6PloZ%lJqB8T&)-FJdmk}taBfQPHC1o%%+Ms)7+6p+gYO1~e$D9S*qJ8w)Lj~3M z&{szNeffntx!aiimh)9>EJNmbQM{iT-3Mg2YSt2N#Fa2zlHRZ8%Lnp9P=G{GlxC(TU>sg_h7}K(2$F~j>W6Hs)>(uc;JNVag&pC9m zgp-o-PR*TGYtC<^3&)WBH>z@XZDJH;8#$Mr@Tk`_6&!w7ORHTTL&eTOV(V9xpdwr^|D>j-oL^G7 zzRFr)l23a+S*`TFW7IMj%EvoA!9?o~v~fkRofIWB%^b@ZzL$j+j>N(GBD%niCr~94 zoj+>z4qx5ZH)6L~3^HaHLi-2MY{5 zINP^&2Da(iGC5hLX{78|XLU}!sODuRO^rcv*tXj-JWVaRA}CbA8E-9TM{!+JchD0h z@7?g_0r}oXDWjaVaf?=4c7IW&sr&TA8^aA^uv;%EZJmBE?M-o^ApP|Uuk#ooJ-oK0 z+FR)|jcQ^qP>;rP*tc^tKjiAF-zz)ln7A}3crg{KxsSO|bdGL+Zd3Anht=rWN?Jcs zL)q9?AJ>)nOK8s(+spCkQS}J373Xm|SDo!zP~T7#N?U-ZF`VW!tGD3$73X;e;|9l1 z%r`{%^G7d_`%eS$P<>-VIuvsQd5i8QKY33#`W?*QaI?1;Xvqs0m)r4hcoF2CF*V}JO--jzA2DatP%ok@=WB5U{%<^EOW9kii=cGU>847X!1g{p&H8{%(z z=F&3zWRMkY`HsWg<*qAx%~1N~=F3_uctQEE343f8gR#tAkBvdgii99ff@}7)AV>@? z%G+bsN9NE*5YF*2C2b7t(kF}E&>Si1N*Wfg6j3aRE{;uqMU8i|2&HVFtx87KkR@Qh zM^pA`AvsXj`nO zA%P0`bFx!|G_r>c(&Sl*1)k6>+wScLgCzwDKYR2vkQo)6CX|I)2BzhQ zVwWfE4+c(aX-lcI1w5}gamwaRZjx8ue$$%nf&G}v7C&$Y2dzOX*nhKXMy;8fDP)z3 zX>}Cv`x-SH4f6f>q~`GM$`{Dr!Ni~H zFBxRxY2zjxjjxQ(bkfVaz;3`uY~l&$8=f(^%KQtB8Ca6S&SfZ_6o1*F6wWQR_FVL= zQ{flMPrWBiZTF}4=zBd+4bZHgZkcl87F2}4rESHVQ0zE_%C+?OosK;9KRD@6z~lD( zO2vW_6z&_tLsz`%AAeJ9+l~bl)|u!2CO1&Ccn8Wr7nrmC;G$*R{Z7#x$t^A8;2!#p z?X`CK2Ty+Ci^G+<8|i5L2`~CVpPq%15@ngMULD-Id@ECwLno@`}KORhU6b97NtP`Sd< zz+`R*YU?8f&Rly-Vr2GsR~gm87mm|zyR1ewGwrDDVV+M1uMn}dNWR_-z9GO&r4Tlx zSVj9nR>90axyH$_doEBtP@dhL$K1M3-a1ipLe6e&BR8h8xW7U!t#FK6%%%5ei-`W{TGC03_#oBG6TXg8Rc>Z>L!O` z4b|?PK#haRH6q)wLc@f0Z7hTdgC|0iEu4|D24}3{p67|_8`3zrs!uPzoP3RIF_yi1 zQ=cz&=q7pj;G!5<bT_4*|35$)7>NYXK^(mV4BaXFtuj_h7bQ%{Gi*hry+d8ww>Cd|C{X`i_HP@o370-BuBs2z#(3ykn?g*K z93l2l7Grz+KZMwSyVVcjE=Bd3-8?HsGj1OkZplPOItmA&C&O7~eKZ4sk;WoWNBcEh z3@MNQ)b#r}EL};D9a~oy)`V25*~QEWZFFH;YZ(MbrGY??^OH7w>IAGGhX*}FsHkQU(7-GFqS9nJ zbwcJ*SmGfKRSRlu$DXv5*=QiQC$H3K*svv|T@B3TS8Oyv3#X;3@FxTsV;GRvXzg1_ zMvV=vbvA{#TFJTFGaNK~bM+rCFM>5Z3rZxVcP9^+iSuOO}Ih0Kn{M zRzDNI&0%ZjuFzE|qR(sGy3iZ`NZd5zurzPrJ@P+6-0-GHGvNW^XAccR%s)fi1mbA+FUB9h z{m1Z%>4=zMebDrzn#I;VcX($@wHQuie9vI9rRE=X56#4ukySEzHZHabMNF9M`b9Zb zSlnH*A4kq#zt3bzBNJcSds`tP@u*_J(!`aaYEdxmKnhb?hso2zSRw+Wzs2o3Pd$wR zcg5PC1ePFJf_%0w-Y}X*e1tB12MGZmSK3&$3ThmcP-gtRMXhZE?WMlDEI&YoNqHPo z>nc%7L8{a|XE{h1Se-T61;C*&LzXnG0T~D1GxjASZx|hJO~~cDu*~Yn_iNu`4KWH5 zp@oXY&P&~LT)n!zWQ)`x5b((L4c1_YK=8z1dL@*nx^RC3|0w^^OBxAVBnx@a(AAh!Pk`af_60GeaS;C0WgaRwC950kH-u~zw-36{@ri|$T!l90(- zb~>s+5iqSpGNrwIaD`==MRp#=nNcRZRVu8(6Nng)$q+x4OG=dMprV?4?YMiGRlqLg zUYp-uy=E-vRMj$`GPf#=R^zc2a93(N{K3T8>Vws5Iv%u<)o^ts{S8bH#al}GS9|kI zf>JiPO&kJSi0IrT#(P7}rkHY@;6AM0L6R!G%#T-U!SdHil1CZ6HE!xUAIqk@%-;-e z&^qz2f@g0dQtjzM)~$ZiAzZ+Q0D>eRKXm^3%L*z}Pwj0e%oM5JZPL(G^A+*_*iBLi zL#?W`^bF3?=TqcOXT2z$j{a|S%>-UE9qADZHCo@}Ue?20WCB?@RK#A)ogpZ3OxVh@ z$dX4MUvT6>6E83Rn^M5QGChbd|GdHL=l}8hyMNg3r6l`T2Y=mw@@EG>N{~NoNBPz8 zuQL3f4JRK|*8jIC|Er&0wb`GZ5FhHFUzFNkjek|7ei|#F{AT<&ed<>azlt$GJ;Xos zIsW+i|DQDTtHWP~ik}V#(0+6Ht8DSBgI`-MKOIQp{QgY;ZM^(y`fKI*)3gTv*N6F8 zNB-*NuZikU9{@lYF#zyu+WM>YUvq#zTSG|yWc{Cv;8*j%#@s)f3sd~r{J#-cNf!PA SZvX(w!wc&HvddII&i)TNldm}d literal 5620 zcmaJ_1yq#n)}F$AL$J{rG!DzW<6IjyGcj4*BCpz#vsEbTo1qf2)1}EvLANC{VKNP~C$?61a z2U;4#=&LqcwrJ9FQDC$CK7tpv(JSF5Tk+(c4u;PsgRz>!wcK1p+|`(F_{#__8nM7{ zdiSVn76g=tH&=T>hYLPv;xw6wkHCmT=^E|C@JtKpnv0(-5 zVSR`*&`ciA?*N5qu?DJY?&MK22wAAdJ$QQ7w?d(-fr}TBC7iW`0$~sX4Ndod!bE}k zg)PWN7Yy=%@Y#4lpddG1UpLpcqgtR)ej41pA2Ld#Qb1h;B1?d<)K-=4MoSn=!Xdkx z7i(YZj}N2++8UzUrJ&Qxr?pPP!44wgWJRS50*5$w7;O+e-PR&%+L%I8_tmz~>h!Ah7?_i6oeaf&Ek%pj7OGxM1ek!&^dKs6e6;yVKUZpFC~ z{3v9HyngsuQQoWe&J7BTDfgqB3mH>zzQS3rV_Q}od&534H-p7fOxuS`<|Qn3)R#_w z!K5fPt>?}j)o~eSGc-CZgL;0fk~B)1C(sNvc+SV4IY1qrFvb46$f}77ccEZ z9MtPwwzMn2JpF2Fpv4EeB0ml{oWc)C9)I(KTRklVX{ZWu9Bzeuc*04TLW@Wf8G25O z@nDpGf3X6Si9MW$Ow(~#K)J3>e2>+UDi)BPMl|k11wX`fjgklZz6lt!WO>zBjo??} z$GEpMD0iq~n!6bMtZjeyyv;?dsiU9}a~lvRhu@l(o1p0h#H`qX2OBOSB-kb5*4d#P z^U`-O```O0-4|ABnN$pqWk^KOpefh$nEq zQBcmUHsu9UNlo*SW5CPLb_R8?oCw2$L3OWV)B~E1b4DTW=suFzA-Cn}^?I>VnC~&F zn%gO?;6JEbxgg$;w-dBcV!tV`!ta=F-f605?&LfE7{d*l<|D0|VuSk7_yZ*Y(Evhr zp%8sl6?bkk!A)SUP3H+mA9_-oN32E_k4-wVYse^!Rno*hpvpkX>KC)aWC%%1R3JaKT7nP9YObH8Cl=JvT>8(`czYI&sw7wI5y>gqTh>#3 zXp}0xbBgKVhp=shXz0j)-?Y0;o4=BSE#!?Pc}JG~nIX3uyL`9ouGM9d*O#aXnHWP` zh4yIeo+;xN`G+R$=E9eJ>{buGda`Q=cFr;Dx9#c3sUKPqt}V!%9dM2EU9lAe7leXP zOfCFTe&|qa`MUD`Ez~|Bu!}Va-=dwOUs&ncCUJJoP);>BCy==8>G2|3@{M$#G%JfB-| zWKXJMB7fOTG5LsEjqgFtfw&2;bz8EOSz)ibcIiZKW7xJHU*EpkXb$=SrGCXj%s1}0 zb(@>v)?k`&wXuGoTw)#I{5X;Gw$?DCe{zq>;Yv&9@ZyoF z#x7nLL4`FH3Fb=32d|rBy!P{qqlh#=ss`@*F#>CE^gj0|yHp%*N15ZWq*fzQdrE7e z*-7Rg%aOx$rQf1;GKU#yN9@q6uM@+j+ajt<9iB8;@{@~gw6YJNt6nLE z#Xl_i*p%$^A2QXc^-Ix^)iCXH46|iwKbL<)pc2MEJ4+-J-z63Xg%^+2+vfI<1-Aw7yGUwfdFPAP>IoRjP1nB* zV(RkTGWeWTK03n9v4G8JF7y6ocu8DnfmE3=+&UB&S4P>OL(qyPdtr0x@LWK7nly!! z7dv+$);WN#h45{uc*_Sa5j;Ev9S?WSQ^Wx@XEbyT(*%evQfh!Lplgs>qM@Yr+^r-O zrZ~?8!?waud6q#v+U5x8Yv8vmc$};u!@-=08GmC(;g;nx1*HL5VhiUXogAPTST4@G z70x;K*IJ^-qm$YOW^AuzkOBKW(%Z#_md)V(Ap%eBw8+zj#Bcm0N3f+#>p=tRwO!+5 zdLyf`oashtVx8Q{BFmgQ4S^2r=(bcE#m5;6=pv^k_uKplCM`+y>wYM1YNj~z-|&(g z!;got-p8PCY|6|DCs!8a!h6TdAMmPV#H$6{*qtF+m`9PS!RU~{&T*&{`V7BLeQXM#c=Kas~&K)!T}!OdIJ>+v<3 z0B8lWC^fRHSZ4O^jR@N*(q-UlT)7$S{;YP}ci54ghTwzFo}q|SZemZs9<)+b0`oO7 z3x%3A|Jb^zfTt8(f80nUAG6e{<&OjZi&wpS$wdp(svKd1q;oxi*Z~|#ZR=fHuVLpGBUZs z5-bXna(&LvioQ*Fm+VH|ndEWht6Qj)bf$N&T~*nR2~N6XaddwnMp`3QHQKzQKjHV@ zQ5I%1tCteUn63@FDPUajP$c?}ymZz3 z!sy(L?)(Zs>G>%&(ZA3V0{l4PndE&KFwK5npxl>Gay>*@C(KaRsj7Z%11b9VB#_W| z0Ubc8=Q-t{&H>FY1%)`;*+C(EKi>qdjzo)*8E8fn5IA2+dMLBmp1zG!9XIYhj_$fJswtk^MgWeetoKYma%}LClXvm zP4QY0DoERKZXTHffTKgSIuV3ja)H6nLPMzX=?N=VmJ^5;_HEHFhx|qUeNU`Gs-36> zcZzi)_?{TTVTxADp$!-m1TaP3qvg6&$O;K+jUqMx^uAnR3N8FA+$x$|?-SzPiC^bB zM>4+TNbU(P;T#T@#FE@Ox^&bcnzkksOnk$sFnT$&S&jMLknY7EtO zpA>RO84D9A$kM6QSdtGGWUl-;=z-HoG)LTJ7_di*zo|zoO(d6yfT5TYhbr)|lb1<3 z4kG(oIQzbLvj)QWzl(IdkD{c424Nn4c9hJcRsHBa_y&>yc+eG^$6zH1HvoQZ6Fmi1 zNMM~eiZ)4bcqPwJZ=vV$i(ug#S{XIw1celq62yIT*m!ejvs;KiPRkTUi}31l&LA^) zD37B3vKc?XZ!===o3C|S_as&hzOK4?dC3i+vk!_g8Pj*n5ZLCj6>8J&DlbVMpz#r0 zk&ZC$s|Pxs)v7p3Gm^q43S|26&SqvgK>dM&j%GtrpR5vXxpA2^kS)$s0!xnm1BR8 z?+B5za^?w_4b>^~q8z}SZCs&$y-ENTYM-qq&n3l1%qC4UP4tSNJk8cOcWEdi=RG8k zWAH;9J=n^KipMK`qjUnKgoQ0VfE`t|PHc1^rFt_EH&I5CiSr$$+0;guDH>siXt8d# zsq?1EVQne0e=Qil5%;#`fNy$-OFfav`1>7>GLbNCZryGR_5~rs=Al>m&T$=8AtsxLH1i0!mk?ce`^*3wbTsioCIi9>cwq zzw>)J$QNKvjbgvgmM^a^2Piu)X-?q>R^5@v)(zGiy5Ipcm#T_=vaT-BD_dajzUg7K zFaSSHZJF%ldJzud7N`l{%VoSPe#A5I?!-qz^EC#|ch3DL7b?CCK0yN0MOEaj$4QI9 z;Oql%`cmqc>p14RP$9NXrWj81KKwFb${ZZMqMZK8IDlY_wr7@y#< zBbZRkz&T{JmPA!>V&|bkA%jr6OpV#w$^N-JM63yu-$KnfB7dB7uB?lPiXeBWojcUR zz|X@DV)j$o)3jVL1V|%yFJjpzs_D0%rB%O-Qb?-Mh4DHGHX;Uag@!lY?b^OoQkPd+ zXIJcL-WAFWDdnJ_pmdR|Mu=G(5G$dZ)MgIV+fADQq3vF_)Y4rXCk_ql*JCq`DFVH&sa^C z^#Msz^7f(%os%XNAH=diyV)Du=7~5|I_h%rY zdg2;2$>eFB(aEXjyFYMkChc*#N#(nY!bT~A<_J}ftf0#*L1#%``SR0@Ut*iVN79Rq;&%U`>0DP8$%{~6cZHeHSXOuTM;T+#L` zI8e9xFaFc-1+MS@SMJEKXhzKe|1R*4SMq!I^+^_IQF@mZB`CagSuW-G!{<8}fQLOxzqpYiejRFP@jR18$ KLh)?=GwWZOcV-;` diff --git a/tests/009b_expected_form_1_data.csv b/tests/009b_expected_form_1_data.csv index eb407c69..ffe5ad01 100644 --- a/tests/009b_expected_form_1_data.csv +++ b/tests/009b_expected_form_1_data.csv @@ -1,5 +1,27 @@ -id,name,received_on,inserted_at -3a8776b3-b613-465f-8d2c-431972597222,Sheel,2012-04-24T05:13:01.000000Z,2012-04-24T05:13:01.000000Z -e56abced-bf46-4739-af88-0ec644645b9b,Michel ,2012-04-25T07:02:09.000000Z,2012-04-25T07:02:09.000000Z -4bbd52c6-cef7-41d7-aec8-4a4050c47897,Dionisia,2012-04-25T14:07:05.000000Z,2012-04-25T14:07:05.000000Z -5aa938cc-bade-41d3-baf1-a18b72c0d844,Michel-2.1,2012-04-27T10:05:55.000000Z,2012-04-27T10:05:55.000000Z +id,name,received_on,server_modified_on +722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2016-08-23T18:10:46.354652Z,2016-08-23T18:10:46.354652Z +00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-02-22T04:20:41.350824Z,2017-02-22T04:20:41.350824Z +7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-03-08T04:12:52.073633Z,2017-03-08T04:12:52.073633Z +c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-06-23T06:25:10.350764Z,2017-06-23T06:25:10.350764Z +e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-07-24T14:14:19.371576Z,2017-07-24T14:14:19.371576Z +44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-02-21T14:21:58.426183Z,2017-02-21T14:21:58.426183Z +6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-03-28T21:06:01.613880Z,2017-03-28T21:06:01.613880Z +6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-06-01T20:28:55.166268Z,2017-06-01T20:28:55.166268Z +b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-06-26T17:38:08.849806Z,2017-06-26T17:38:08.849806Z +1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-06-01T20:47:56.618572Z,2017-06-01T20:47:56.618572Z +a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-06-01T20:53:00.263914Z,2017-06-01T20:53:00.263914Z +733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-06-16T08:43:17.086223Z,2017-06-16T08:43:17.086223Z +232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-06-19T09:57:59.335584Z,2017-06-19T09:57:59.335584Z +ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-11T19:13:33.035079Z,2017-08-11T19:13:33.035079Z +47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-11T19:48:12.705214Z,2017-08-11T19:48:12.705214Z +43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-02-17T11:25:51.670858Z,2017-02-17T11:25:51.670858Z +c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-02-17T11:25:59.421127Z,2017-02-17T11:25:59.421127Z +dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-02-21T10:55:54.158411Z,2017-02-21T10:55:54.158411Z +13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-02-21T11:34:40.289691Z,2017-02-21T11:34:40.289691Z +3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-04-04T14:53:43.570695Z,2017-04-04T14:53:43.570695Z +0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-06-01T20:37:13.258852Z,2017-06-01T20:37:13.258852Z +5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-06-19T11:07:26.044592Z,2017-06-19T11:07:26.044592Z +d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-11T18:55:10.480754Z,2017-08-11T18:55:10.480754Z +a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-06-01T20:32:20.337554Z,2017-06-01T20:32:20.337554Z +27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-06-22T07:17:01.907612Z,2017-06-22T07:17:01.907612Z +3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-06-22T07:17:41.231408Z,2017-06-22T07:17:41.231408Z diff --git a/tests/009b_expected_form_2_data.csv b/tests/009b_expected_form_2_data.csv index 03ac588b..4d8abfa1 100644 --- a/tests/009b_expected_form_2_data.csv +++ b/tests/009b_expected_form_2_data.csv @@ -1,6 +1,4 @@ -id,name,received_on,inserted_at -bbe20343-e00b-42c2-bede-b86342ed46dd,New Form,2012-04-02T18:38:50.000000Z,2012-04-02T18:38:50.000000Z -0492cb9d-b8e7-4628-9aff-c772a83b1c5b,New Form,2012-04-03T14:51:46.000000Z,2012-04-03T14:51:46.000000Z -162b6042-b96b-4008-8673-1d38b5771307,New Form,2012-04-18T20:07:22.000000Z,2012-04-18T20:07:22.000000Z -9e4f67e0-6d30-4f4b-9c0d-0dfe8bf691c1,New Form,2012-04-23T08:51:13.000000Z,2012-04-23T08:51:13.000000Z -68dd2433-8f52-4dca-a851-bc58f1d71f4a,New Form,2012-04-27T14:23:50.000000Z,2012-04-27T14:23:50.000000Z +id,name,received_on,server_modified_on +d0cf1846-204b-4d04-819c-f688228c2c9e,Registration Form,2020-05-16T20:04:15.702415Z,2020-05-16T20:04:15.815824Z +db38a72d-dd04-4893-9f2f-5548b8e1fa9f,Registration Form,2020-05-16T20:18:47.035695Z,2020-05-16T20:18:47.140120Z +f34bec9a-0af3-495d-b53f-3d953e3b3d4b,Registration Form,2020-06-01T17:43:04.870657Z,2020-06-01T17:43:05.008484Z diff --git a/tests/009b_integration_multiple.xlsx b/tests/009b_integration_multiple.xlsx index fec98f9e03756ad97a03aa5b36e99d39285915d3..4f91f0e92799a31f378a9420193f8bf40ecf4b19 100644 GIT binary patch delta 5087 zcmZvgcQo8hxW`wwva*P>TJ#dVlPHNAMDJGgx_T!hep(Pj*^LscM2jwZOM+FRMg-9l zy@epbVpq8Go^$Ve+x#_W=6mKmQ$F*2XG$y^tZO;J;7Z7oWP1%A=0v}h`_Sag zeZldTE;^mX>5Qz3Lc)aSY28ZAuVHkE$8~}kF3ezW*}zyP%1)|_$#~u^eZq-B!>5`} z67C+KK{u{5;*S*V4o*oHh=z_oD97bHF4lE zVLo+4|D(TYARlp#g{E%e%yoUj{ja~9J5fFO@MtO}w{XMk#mhN)1POKM@yutgrKB$% zP2UqoHuUZjwUk)lNBTs_hNw+6l7;3UwpC;SxCH&1~O3imyti_{i6%ocMghn$D|}8zv>nPQNNf&R_@&#Ux)> z&^G7tQG%tsKX(6+vgK6wP-jX%O;p$&GLV##UB}en!({##|EI1|4^qjy?mvaov18V3 zE!Dv7FC0Np_7FNSoom;a@)axtC;&naEc(pLK_c$jUBhwxM0R0og`{Qdm`NL% z_e|^7)yg7Z^$WM<7cP~wQxwQNs}UNgJEYfR&c8mPG5Xj}U= z4IgAxa$49;!Yc#>8XXvwYUw*iZU3Yvt>&x95yOh8M#Tw$(zw>x zgR7k6%dX};Orvs&QA>G6EboR^TQ^a+cIG9ino+=L-rbaJj~!}RUPTt0!wm!MA%g6}TySkl1r(?Dm=nuaRfr+RxI z9&wB(!-9LP&wTTz;Is4@?Zz@d&n8t_kzG*rk*sN}aM{5xllO9PA zZ%0f_3`?uZ*H*;J98396CL0ZYVa5;%0k`EwCxrPUh&)4}&66t3P6c0Hk6u0?R7$0% zSyow)fXUb2OD`RB&tx=>j7-_7R3#dvTS$x|1NNRx$g4sP^K>8yA>@+)!WbjW;;}g- zRrcGYc8-uJZXdhn5k%B!%1il!;I^7CTLaQK7q@^uoVeTg^q6Ui^HR{bP6&LvnS9_=8l*EO+@9EcS}Z~# zYbj{n&%VunMC6=>asId4;oxx`j4~4S45MEXCs%PXITA759tG^rpUX&em09S@+rRae zm3#~boapws#ex-%UoN`sa7bJ=_=bq&yi1G9fC}r}IV~JLfLuPQy9j7&<6p6(u3^CJ z=J8-8EPbM#u)!Z@X@|wGFJ4GK3clzmTD8@3+jMJoCsEMd$let{9Y%@t+^{EK)Mq9S zXTnS5$NM1pl^}_SViYa`?d0@_hur)~{_i8MJ4wZwymfq{kMmS$)O#d1qxH=)ACOjy zPq|l(nOsvR{FTl3-)VAzE!<(cQjP3 zqW0e^DTdbWrnIZMI)_*xTlU|Kjo&Nix6j_ceCZ#a&xehgLa5SlSHfUODeVaZ%zRyv zOjjO5ajz4(VC(A`uP11CJc64{Q zsX|)samIUkv#q0hZjaLGEaAKDWrJ*3!yK<}{qf>wIS%N%d+n`WbDKVUePv^T0M<=Q zr)vkfcwT-%j4B|aB@uEFHT*kwZa`cl)XP)E-`O)Db`0a*BL*Q|!rSfJ1?r=VJ>VL7 z$dYSEW5ZiFyLz=9ehgr+8hC5~8M81Xm{w)+vgm3ZAb7RSCLV%WPLjAu%Mt|DL4+rY zQjCrJ7DA{X@>%co&(s=Osr>2Ij2&UG;5^ZLGvEPecq_t*u^tDC#t=WjWAJp3bG-hwW-BcJFF zk~Gxx_q)5MwjdDb@{$OIa)VqadFI)A;)D7|#RJTHuZU4Su|-^BeuoD{-6No`pl_d3 z@-+22`R&OPZz1P572F!wblKrZrV`n#@_eqh&UN%9V=9z!(bGT5Oa~#71dF$cq!ujG z39U=`^_k7Kh7^WSO^hYCtcCf!{3wh1{OY4edFCC*5%zohfyt7NL+Se-Idyq%`k7Uc z874qQ9xT-Knr&8MdhhM^?tNnA#m`_--{)dRS#|?b>n}cWSNoe~GpUIyLwesz+Wb(2 z>xo7UKZBdC(`7sYQw4~$&fYOd_P-NmL2H!W7WrP*UX6c_|8%Dd2+?2N_?+#pmoX_J zeUlv@G_tu3{ZpXt7896%sV-)}CfK|ZOauiC(5X*pu}oE?1XXzD+6W?-;SvoxeTQ?| z8Ef+c$WP%`3YBmV>&pYDJl`4nTskA6=fAw<+EZ)qGQPVw>zjqQeB4qWI)6!nx#`sd zc3TI*XJxh@4wXm)px+>0=^M#&!OFxp1!bHGM@p#*FIKbd$fY8BCT{NE?rd$2FmeO* zq;j=?wxyF6*KvZYNh6U-)z7_Z4C~S2x&@USdZN3t z>}9lFSj+a&zVYS|Lb$mphvwzUHeXKzD4T+B1#f}Sqed`jRz)ge8I`M}yG4;+GS zu@Fq{(8Z0z=dC<;AHF_ff;&lrKVDE^(R)IBqr%;eN!xAen==l<-d_}(Tkp0cFkNG# z@u-jaAuvnL1z_q2)B$RrubU zv=>a47!xS?sIT%jchZ~A&41De#D zQ^@?Dkt*hT>*W>C8AT;dx`}x!dGcNo{Jn`99?=41;5#ejYbjUWMhLY`#e`yJ0Rt;? zw0|*-Sr7S*KH%tfi!_^$XMNs`|Mj03zD9pNKlu>MZm?zIW{?ZRe-C)9aq%ImykF(B{E6_DCNpdPC^y*M{ z@CH7|tcrIICG25W@&c&fCGDuFerwgIxr}sQzOS5iCcR9#NA2?CCd4u`ufD0+`V-Sc z3_ou|)SsQi7F?{^MEH_>VX(D)VZAqjLl24U+v~JNIivZ_&rO`T?wvfyrb@J-Jy$!y(+8ZHW;Oz_kfK-HxO~V^&@f z50i>niAe8+rmtQ`ueuVSjM+pVt~X*?MK?&|`8-WI`i5A5>#k5RmLXvJE)Wnr#^C5Q zu<;VCK}nQCRpetd_4x%kwom4DWE%)RFY{Al2(1f5cY#71A2cY$GIh8L^0J>(AMF6F zl8~q=wouXjQb19mUFNc4EY7VNxx!|XJ%n^HsYM2M=kf7<=T3>1OCGoX1$}gSMZYt9 z-_(%Id{wBmq;A2m{VSX7IbDNWk<>M18`xHrEF?q7T^6vh=m?U%VWx3HW2zL*GJ#j4 z+N8O`KPWE#Zssf5%g7yx1`bJz_o;&)`)OO*23Jm%0EvTq8(X_$7F4MX*70eCEyqdK zRBu|}!$G2G6iLaraGes}C}`&og(|{p*OhtK=pP(A$u}-;=F2N;2>Ebzw@_EkCv183 z;$%1@n1A^)r$eZf$eAZ+T|OAN;@u$CCw1DIOh+Z=IFadkLFY`N{;@Zz!J)#{&^BU5 zTR;oXzw%sasSS^*_VP^;eZ*(}zJ+??+SU?#&^%l^QrnLxGH?XL)b@x?OM`XGhy2NGJ<3psN4o6N33dB=+| zp`o8G&!K-Y1qSr#P2>qcAU{G7i1sRXhI)zyxY_$VI~fQ1BfMP${fwpc+Xid}EBnBIVug>K*l^rgZjgq*%~pgaNOv!%=O~pWb~B`BCg3A($cA>?CFgy zE$Ex58diRmIH$4b3^I~9QUX^~8p-aS3SPt3_1K@yO68$c zyt%sF7L1oxQqL#kv>ExF>mRWjzGwc-L!oW-#=hQp;q7ocj;>^NkaqE}7j5{-{*#b| z2lvh*yJ3-wmjE#InP;c!U*!8KZ}DCp_O9A&%>a>$t5?sRRLjMET*|ATja(X+T8x14 zc^8_Z&OsrU__IBrF+^0=RzWr4<1CMTQX;Kg#>7YOET@JJ(YtA!g^h6RXg89D9V!3o z!n#tyT%_w{Nr703DoL$AU9gi|KOx_k>FW)Mgb1?x4dArd%`R@;lNnZiF(5qFUg5n3 z;%`#Zx^K7b%6Xf!DJMOG0S6gNRDlv%B)<@bJ^9KR%h6$(Ch0naS;CaQO!=AL(3FF0 z!13f*wl3~7;CWlpajr+3KXz1aHc15^X%kRao3T)0+2Ob_ozhnPF*B{+@)?pwAX1H? zm!Vb$n9d{Q(pELWgz8x32#69Q7(OJoYJ2QJwo=LJBm`Qw?g(`Rp$!c@Ib$;~1X)gC zOS8#%hN@BIS^h)ur5AisJ9_q_E!Gd+~k^E15L zEZ! zO!VJ=t!M?QkP4mBF32>m^3Em^j;za!sf~3;92uMidf~dp_ zTwc%p$G!7q&Y5}6%$Ip;nk~yr8##%H^3D&)d2v7>9+)5z51@!6kn@hqIisy=fBvY+ z=RLm_nPZzvW%c!$rHaO*AJZOuCks+TJ644uGMGt2sugdK$Z7*E-Is}h@EYaW`~1C^ z1wj&ti4OkA((0giGy=)Kjb4KvrT4Dz`6qzhO7n}qX;2J2=Pcdr9zzsi3c3yvN{%Xg` z{LV$PQfI8xXJaEoUK+y_$HPu=COyAGR2Axko}|-5p-nkFp%_sg4~$ zY?Mgc>rmISrZl%{+Q_En4ene}Qyb<_pQ~*gG6Z0jJ!MZnyNd0cA1LARcRIz94W1 zlG&vQjvaUpW#~&HPMII!n}LPC2_ z#>1qcVfa@HHl^Vw`j#oSB^P>%m}EkH1WD{i7aG+uQ<^|zxQ?9WLeZJGI9ZQ7+(Cj# z)OhScNaXW9eQHH3@`1#;W0$V2Wly@ZR{w`40Ww($t4`R=GTL08 zb-6T6-~)`V75+dV$wvCE-Y^Sosj?=2pXa$G_JxZJxK&mUuNC zrQwQ{6nthJJyi@%DCHvtM2ah%US<&J;y_g)ZCLaGfsAX}YLn zVL$T-r~Ir>ov0nlG(+IhL$RSzz-)OiT>ui9av1UbEw;j+LzT;(0=fVkG?Y~T99RAx z>;}82l6Gw-DdZ298MQq|tn%f3HPD!Cg+Ob5_KbcJgaqCfd~?9Qn?!tko?Y5hoaX=g zz9VsUrElhQpQfWPMZQ~ZuC9=7^If@ts6*_E6DpP)tltdc3(Cy)Mq{{6ITU?DAraW9 zhpv2)Bu!|@ZZ2MuBz_fO>i8TVAN9twjW$*bS-*QZIgS}S%qUvsNW>@6UNrX3x*}teeOR3P#D_U9djhTUMcvnL;M1~_E znCrY*b{{YQp{(YVPk+?6eM&4W$0-)BB{rpb*5x{rGkwJzJi)!ki+NXLxa@<4Cdm6# z!A*I;qSjoNmAQa(vFlx^ju`YEs9a&*%L+;DL8^N^F?JENw;@i_6Zfbvu*85aXvVqs zxOnR0%ckM-uwCmvOcrbn9S>1m1sr;uLYj*hnC zAo|1`&mm8}b$loF8Xs1KLxUHeb-?or#zDyiqX3fv+OGX#V7#xPMFHUEfUO5rcJ#|d zzbNddw4YFL)HVroCMT9=Zv8eS7AB+VWQo35wTr(Bnm@#;Gl-2^H{0T-7RG5z7|e z*fbc{d2iHgwKl3&JXRh%D1B0Z5=!0|IV$$RbjQ8p(Jg+Ez`lPZvUWJVofmRXT+(BL zO_+$RTj2b2VYTi*5<%1_-}Bw=Rw7top{z`R_&>sse=7_qF^15;!q9_IeEW)U1|L2j zqdikU!Q&mPM&;Co<56*S#wrVb;TA>L8fOQr$y|#l*k5iJ2@OXJ3ft4pOp@@e8*&=x z*V$fPEo1U1xlxS*4pFs%TI`5&Psavp?e=GN{^N){ij{s^zIOX@=mV}D?irw!6?W5^_{^$oZ9&%DNr_wn=On(6~EJRIYJzUBkLrzBC9_ExuFyLSZvN_2cdFLO%fLZf=WaYo_v5IXKF3 zV2%nz``Iyeh1{&u4u6c1kw9w0`Qt@wwzt7I?PXq*HowRrS2wuKP>8gp>vXJSd0)he ze}t2>CsG@XTAun9GhSY$80GO?^+c%Lvwy@=^=fJM*8~N!{nA;hpj4=jr}k^1dq1u7 zH&C5>o*(}=RPI`Y9H@R9PuIN*96c005rlQxg?0HS%kBeMGHl3K4p{Z=4e+=!P34|> zDUwjgDTUxJyf}cSzSad_*8(gm|6s`He^Ylgo~;50#AV3axgf zQ-`q=J7CWiH{Ydp;oqV6>lxxklHv6d~OqWG>WsuPIG@-Kjn9_uv zbw7v&epF)KvO1>!p(?@Ci*D@%bu1+Ql+K_`Cq^r18|{AP)}5=D8x>ENe$9bpo9C0v#p``tqQC1FK%6|&|p7O+{zD`ANPa2mxssZhWMUUZ-jek{421CrgPZZ_JPC;7a|X~hME}GVFIBn1VU*Knpxk%f3F1`MIN_uKF5u-D-20W4h`);cZ4}Pf6SnJ z7b|vxofwF;=Zp`E1YQCiOpsDCVXEAbaP{*Hx0ezZe_XmVU90HxnQ=*@l8!%Q`)pxy zg#&e3d7oOn*AOt&0m$U45e-j2x@?61_=Lt)+TD3h)8WWa_4I*#F7cpKnxTxSmUeOA zW3o@yqN-Kx?lN7GA{DNt8OB=M+Kc2I{9L9>n)t>qYbv7w8d%JCJHz1$@jw%@#VS&ts!Ss(OBi%W*B zGde{_;3IdnNB=}kB|cih{q$D!)oj}n=BJ33^<)sKqI#EDjYGySnFv@uGp`sX?6~gx(5Oro;)WE#&9qX9ujN55S|fX#@x24$Ibve>_=|mG zv*DbbF|il})dsb5NUc8s27w;mo{}nE4ax$~ULLStEVGj&=bJVWqVLux=g$#pY-Ivo zTZb&b#(TyTi*8Q|lBDC_@5dTvG)hqO5DrrHO&=RjyV1yrNUwxR&74D-&Rp8~g>m(E z@g(17mF(F1!wUL?KJp!RhyV(9d|?hB8~iEUL=*N&se$Y(2gFS?(Er5tqql z`WMwyWw#_FP1ZGJ>mp1nrP2ewj7fbcd;VBU-$u+#@g^_wwYw3qiI=(A`lConJ+I}0 z&ebnSL7<8fmQiO{522;~Y;oqdEWWzj94PL@!J8Mzv6yg$8%J-4q@trAywmT%8s%KMx*xbE#hA>^d3vxmRd z-#OQ?Z9siTCm&flpa;}7?TseAA@AK-$)|X2T^X{r;t3}0) z%mbHJknT$557OkncR4OLsH!Q(kO#;p(L+6UZ}=rhjG#o#8xJ zcxjp+hUS0jm*WzD>u(FhB^*^1peJ2n1sI58x;mW<|vWqZ1aucB6qQ3v;vlpP~IXcAps* zAuPc1myx$OdSin?H2-m!7dvc3SP^@T14bgk$@2HL-Qs@V;wb+E_vtR|iHJOICJzV% Y^X8+5!9@tk$!@!@$*qsx=l=)#FFTgAV*mgE diff --git a/tests/test_cli.py b/tests/test_cli.py index d2fe929b..abe33529 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -326,12 +326,12 @@ def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog) reader = csv.reader(f) expected_form_data = list(reader)[1:] - _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', '2012-01-01', '2012-08-01') + _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', '2012-01-01', '2017-08-29') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) - self._check_data(writer, expected_form_data[:16], 'forms') + self._check_data(writer, expected_form_data[:12], 'forms') caplog.clear() - _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', None, '2012-09-01', batch_size=8) + _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', None, '2020-10-11', batch_size=8) self._check_data(writer, expected_form_data, 'forms') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) @@ -349,7 +349,7 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ reader = csv.reader(f) expected_form_2_data = list(reader)[1:] - _pull_data(writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', None, '2012-05-01') + _pull_data(writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', None, '2020-10-11') self._check_checkpoints(caplog, ['forms_1', 'final', 'forms_2', 'final']) self._check_checkpoints(caplog, ['forms_1', 'forms_1', 'forms_2', 'forms_2']) self._check_data(writer, expected_form_1_data, 'forms_1') From 838e0c9fec2ea0195124992981b8dfd8374b1f8a Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Tue, 20 Oct 2020 15:20:39 -0400 Subject: [PATCH 016/257] Add clearer error message --- commcare_export/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/exceptions.py b/commcare_export/exceptions.py index 86fde5da..0e924fe4 100644 --- a/commcare_export/exceptions.py +++ b/commcare_export/exceptions.py @@ -29,7 +29,7 @@ def __init__(self, errors_by_sheet): @property def message(self): lines = [ - 'Table "{}" is missing required columns: "{}"'.format( + 'Sheet "{}" is missing definitions for required fields: "{}"'.format( sheet, '", "'.join(missing_cols) ) for sheet, missing_cols in self.errors_by_sheet.items() ] From 88ca2fa7b99287e5b7afbba59236ead139e43e64 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Tue, 20 Oct 2020 15:21:00 -0400 Subject: [PATCH 017/257] Strip whitespace in column names --- commcare_export/excel_query.py | 2 +- tests/003_DataSourceAndEmitColumns.xlsx | Bin 8213 -> 8557 bytes 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index e2279033..4a4611e9 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -43,7 +43,7 @@ def get_column_by_name(worksheet, column_name): # columns and rows are indexed from 1 for col in xrange(1, worksheet.max_column + 1): value = worksheet.cell(row=1, column=col).value - value = value.lower() if value else value + value = value.lower().strip() if value else value if column_name == value: return without_empty_tail([ worksheet.cell(row=i, column=col) for i in xrange(2, worksheet.max_row + 1) diff --git a/tests/003_DataSourceAndEmitColumns.xlsx b/tests/003_DataSourceAndEmitColumns.xlsx index d6962b5676da376458c01c12335fa30b4e1a9664..7df68cce84b405d963fc0eb6324011563be6ef81 100644 GIT binary patch literal 8557 zcmeHMXH-+$wicudf{FrCLKOrgAXRA!2-4ZKP=iPkK}zVo2?9zBy(3rv0qMPoBA}EY zf=DwP=}mesfd|)npFYn!XPohVzHw&ONLJ?9b7kyr&b2c4_i3q~B%(b*Mn-nRFhv(Q z@rzwNn!DRT%^Z<7PEa?rqxD@h+FHQf)<)piQxL6g{Z5cFcnN=AajQj{)FND>A^c_$ zew&~b80&5pi}_M<=qtczOo9B~9oyZ%_WJh5oJhpnqh@(ZlDua`L2p}J$r>Ku7Z*ku z^s&#tZZro&>3eyY=E&%H7PKTwfl9a*A^@yE6@z!~X;|KN636m$h z$=;;NLYuLM4cisAE}WHcdOa>owz{V?{3hC4uQ%{DSL#hU}Za*imhWLD- z)F}E{mo?^zsd)+w)&ASlaH@7kwX8>z79n@d+z)0Dw#B;DT&5hlM1DZ(mnbB6N`BNV zc45S|)rlGCEY>$H=Dyw*!}LLa;#LpqhPQ$jD|K#hD8TfP{9=zFEWrtkf~I2quhgbU8;3yGw?TZEOnP})Z0pS1`tT?(#Bd3P62oJe`ty?X9A znbpivGhqZLPCOv}t7KCDNan8v9--4_H!Migu*F9N+FpU*XG}N=tBg8(M`e7Ds;0ukh7~v9F2OCAURFVecQl5i^BMe}_!s5DBDYqIE%dY?KaT=dwr z3E8#Z$G1?_+TrdiA$A%0{?$Yep{UVpZ{l~!iQCK$my==k5A9jP_u^z(MYJ!)v)!9_ zr?oF_950xqL06oC8nGqTeYL!75j^Eypr`;$9EN@$x+%Tfx!JWAs}&^E60Yy;;UrTt z9?Mhwj95dd%wegHxTHuBRc6m3MD5`^a+((v>t7h(WZ-;@i6Tei$%DCy3!)Q^vmX{- z9eTD?iDVTFBwVgpq~QII2%z=2ppS?lOAX5Oui;}S9eA!)buGe-mA+kb9XJzMoQ25T ze{m|n;M|btL2YV&1Ht1oiPM3sWX2{E<4R$RUV1T~oexyiwGkxO@e-GXmMj%Ied{w6 zdl6z0B%iJ`Q8n31GI-NRa0fc4W{XrKVmo^?IQc?qGr4ECjYZZmU*&7+y6=BXZ~ij6 znHFdwiIU-RLo0PJ1(oa)AJ+~}>T1W~QSG>#{Hxlb`u9QL>|ula*9owYtf@IHNZ-1& zN1L$3VP(~k6vKLFuDs)+V)7+*$?C_k2HTp5y6)xKd+)7M;`E;1dKA#qvSP8fCF5Zs zX-C$T5FQ}OfBuZr!$1xQVW&kj&6A*Ilo)=SoG|N*Ce3-Kkm6>FC$}JkS*oE$PT``0 zM1^um)~4Q@Qs<|W*jzX%I@MKqIpv3RkyE*HcjI^@9|7o@y>l40CUe?l;?%1JH~24@ zFfv9EnXFt98bc60cxM{gY_Jo0E#Z+Il%oD34RP)_>n?IrxrLd} zGADHnZi^D6+XF7Mx2NL}X`>O5Fl|ew`TLOk37#AIk*stvQuO6M{)P?c0b9-30hL8x zwr=-aKe7m>7Wdl3mq`pI%Iu@c=LgDjP0spCp3~_ONfkLCL3QJncUWwETN)?IGC!wE zP5kO)(VEDBxMsyitU|@4Yqoyxw}m8zO;lylpqK3i%2T)@8B{}~Ai~ebwCKzLyC1-3$xVT9ZQ*R(d8?u zTnna}70fV8**>^)Uc>YY_1zb?swuEWUE_@Q>(Bv3;!Sx3S>y1>;qv&Zod7L$chfMs z_OBm;dW}lNgc1XU%AZlTGIy>-(k~Am_GVsU{)`~~ET_^rUdop4eET_0nb|DkdUX`6SCwJ8s+6yd0qig*6^r1^|p_HQs;rWc(Ndai;q_znNe;QDMs0M4J$66f;)@a z2u)K8RYd0*@r6h3(|bM7IK4H`*kBFAa&+mda8g^e=Y4LrS!a#0BPeAQ954Zz4B*$t z?qo65gdlJTuZOm#W+x93GntSMX?!01%+XS3Y@!F>`*t4iBTRe(T z;rcV09~E@;GB0Y>*+AWPyfnyoz8IH$d*qN_ty>nJ)aW8r6XKXIdv5w+;epe&YBEL4 zL4Az+Y=>pvn@`-9vPT^RDH$p+pz7PvZA<=TM?rIR1p;lM<%qU-f|}dgA9oY!QCN+4 zf;118q;4TLQUecO$Jd<_afsKM-XYl`4f>tHcfk z#^$zHe(a)AmZyB@DzK4{`aZfEhnl!#C1o3BR-d6y%L@+9Jl=@V#38D^M=y0B>-ZBkk1^J$1a`{;8|7{ zx%!0MIMbFa!Tfv61(7X7-K5)g7jrxwb?Xf{uDs)WbB%4wK>k|{X{6-4&ykTch|4XO zR&IrhR4?9W1(KYT%4TTs8#xiOmaT)a*(v2igX+}78T&aK%KWsIJ--I}h&Fr4IN zD_Q4?4v=c6I+?Q!iYEa5{xz~ZVW{b^K&PM%eIsc?ed40~_$ifazq#L=EzyaFIB*5_L>N7&XTlV{*c1o|^H5{CYa}Chy5NCLp&*!lnwM=) zUxuTcb3{rE3W^g;UYL#eU{|$E`}8dD!!P&GUn?|`TqAzs=O62s^mK@%_8S44ACXQ0 zO{ayzqS|Axi#^ap{lJyo3Jc|%1UDJ-2-wB9Cww@S@?>UipYE9GG~7IB{7@npQ07Yk z57RJo`z|$>9aIKszT^3*zQOO%VOnC0$%nPA)vu6TnvW_;s+j9ed5H1i;1EaJ=@A$8 zxus^{atNelo%tB``$e?jqDQF1i2iC1&>x|G^k{b6vpQKKkc& ze8|e4FU9F4Ylp40GAM&`@=#=ImxM#MYf3WsmYvgF?JPf8n}duGT_&e0XWtb2Xlz&# zQKHu-zrXBka)!t5LQzM{MCGEGdfoNZTDr2?7jV<^HI$2s7=GtnYGd{+q8FC$QDF(& z7+hCvNcQzQNbLP+yJh8M+K+#sRy>rZUbN?n$F{Xijn%^SQ|T6J;qfqmVLQW(Nh>#( zQbRWvQOHQSN!s=xe;R*2N)*?%tiI(nWeJ6*hV8m|Ih4JE;ay`2;krJC2UE3%=()_{n#YQhEg4j7vanT2^iZwD+!TKNye_8ftKRN`TNzyi~2(8 zx|1`u4W?Mtt?ix7f>^9ytFSiQOTzqJGX^q}ks>~xrPH;6(}?pqe9huo!Qa`Q*I_pz z+c-SpS{IF;ANB+kF~krE-(I!pVOuQWbPlptZ2T}}!B}@5Yag(9siAssePJ~b1)7ay z1k9skieRK2k)nxxMaYQBk0{ZDn%PFmoI3-Z3nBsy#S-7&K#Y4v;@3%4JP(`NAZFT& zD(w$qR;S}W*Ts0g>=?`_Eg3C*!OUyrr;>Q^8YQJA-0)}-1D!>Ii$$68dg-sVWWlQ8 zJ!Ji3ILybzcc>zR8|Pkt4nw00LOjQ%=gky`joVze@jmsY-@&;}BmQ*i$>93oaJ>$% z5KYYR+Td*4Lh+lrGte!i>KqRE0%loLsUC!|X)1YF$;v?#)8ZOD23Y+js8+|D_MZn0 zc9AS#$RrudW!YKmG$Mz;sty_(3>oeoJgRq^+El6@99>-hfSWyNcZK3k%DWaFy-nEr zxlSC|_#tb1M;@?ACN9mNly4e6Xo}uDwZ|7GiD#xSJ%aX;6idwMEjr9sv#)0RUD%De zH&M8Q&5-STi;d0`2R%m%E;ZuWyztCPEs#Yhq^5EIaBmn=+CTEZ@JSn;xTIsC{99bb z!lllqLYtktnElDAtYP2$?)TQicpDi()DkUo%5<`(z1zeRHx#$o5hM*CUJTjBB^~0% zA9$<^OS9}*hsHKkwAoZC?J!HTB)ymJ9WQBoC4n=QS`fA;$%?g&ecOjr)9%E%2(R9?9`zmICjM_13=M>PR8H(qE8Rw|I)!<6vjn z^T{Ul{^gF(tXI*3bMX|Njf0W5=Mzop1+4Pk^!VXM@)TW-gE6${Q%>pyuky-!#6PWX z*Mk3WkIUL4DC8ecqO=e-3~`2bBtZ$OH*+UZw2bM1owMTGPYm%UO8e8al0mr>*T-Zv z1}>{{^`Bul)G+6*5nPUZ>=s_HGlc_YaKIc6_>Kb>alkSTSj7SBIA9Y8Y~z4k9I%fA z4sgH;5Fi8rA`l<}0WuJv00Bx6I1K_cAaE80XhGlt2+)JTMG#;D0TvKo1p#&t-~@q7 zAix6xydc020sv9a9PbnL;g5f;!(72~`?cJx4Gyk~%jmc&CqPVB9_HI7pO#Ib#6$Z~N z$_DLV_&@F+m_%G%S7h*fPT6p@?|9_VaZknDyBU!)|G0l(BBK@j&-l9tXa~#uGyWz5 zK(Nd|<8LCM6D;%3_@x&9@z!=|jH z&u;khrt(N(2a6gvWcam+lnm1aL$`M)oCL7nM;m7+N^SSvL2-S2VXqA}zx35Eg5r&R zQxyXF~FrY<`h&1Rp&TJl|M_?MuP zjrU=b1K4cevbx&6`3dtwv!c|pT*~yX=>2|+f@Oz1gp3P!+gl zWJPUmWr!ILA|;C-@JSJwj8Pu<-^;f(#CwWSp-yfah~RBp{?c@lzT9RT(~3#{Z}NxlaE3I=_mN$2#&)yLd$V9If*Q zRr&W7ewC(<#mk>IeMHs$n-=tkf6Wu?}hWwAT_wUia@{q^e?@vpl_$B&3Oz`jFzY=Q4WaUreqWoW}%ikCH iwJ45xx1W}J1mK^sa9XOwB*#~hAH8Ic(n1e8{`Oxv%MAGd literal 8213 zcmd^E2UL?=wx&wxNROcOP?cUJASgw8Z=zHKp$Sr?6Qp;fN(WH{6r}gwAs}5qK)Rwp zXaa#yAK=WB>zjM;teJVUX06#-EBXH~|JnaJd!Mt<{?1lY#Ka;;0{{SM75-gnXcvU~ z^xD;i8{%N%VrdI?{Obkp4cA9DX#)nfEj$D`8)OI~?P*JWYBDKu!vPf@qFEMOn&@ZU zM6W)<1{H)XJXAUL_u4vD>`9xFc`_}RkYc-2lx-R?5v|*m#>BW3cl;F%VAvgFw7~r>=oXsub?9?I?G4!ZMcl^Gktc9=o{QVd1sxzyk(j1l7M87 zwyF%!{Bkp9GUme8vGSOIg&^MU?KIkb5e9!s(|zn+^V$42I0fqBYKQ0@-oE^y7(qhM zIjUCJ$K~!EiQ$SIU)h=J(rD-&G3uHk??k_<+R9Ze<=z!k3^HtpHIsE&^QpAJyz0}_ zRVA{wFjdR}7b<7tv?(^6e)qnVvC>FQ5eI-{YUziKj)sPViH4^3XQp(qbFg-_Ff(&< z+N2fUqsi;VJ4oS`Sgqq@!$}Yv^;xR%2;jr} z9CGMpYr{NhSe+#if7vs-TkK$GYiDGzbKF7D=u@2-5o_$itMT6Uc?Z}Q^2=u9G3tQ( zXwRZNefoh)A*FC>Lgz{PyGU>Glb-RM_U}i_Q#KX1CIqVP)r-L@4QCoe#0?DUQVdje zS;HpBdl!Z6D0^0x+_xU1kWNo(Q!)@AEz4ij-5>R?R;EZ=vg4{NueSZhq=)%*!G6?B8@!j8um2~ zZ$eT$)_ZAl7E+cZg1(=ukb`m<5M|xNN4qVZ$%bOiH5Ee7Lc|b^U!jxFy}A1PZFB6P zCyDP)o*Z<3-#$caZ=zO-Pxv#u^uWFtyR(G}+|O7$pM|B59ybtfKl?J+ z9yH=^e`HjOnsiy|?VQ{D=72QcpR+oeoL~0-ny})M^7XsC%Cr6xug#_6s#Kd=Bd!9S zNpHGjuX`?@PU4N#C*J#R2PZi}GuyF4*L1EKlF^N}bqeyP`yV2bw~IjH1@aOPV(CUd z7fvP}T2y>5%7z(e!phQ+Uv@kVKB(${G^wCU`_N^Xra zwpBO^&cotrF+{4`7H>;txYw1QynwLIj^QXQ&%NPL*4{lRPkTcYhfsN96qEjZyH!&# z`R%Hm12hM+*A3cV89;Y@!~v#^T$)(iBK~2`5+1w&0VAWlvTbgq%Zf;abYsR(#sAZ<3WpAa6mMf=TC)C7*JvWwO#R zBpEa;kdzGc3=J|Xl)m$77hn-oYAaWTNR zjoAQP*gkLQizu)5G6^vVKigB#DF=wYQB$09-geOY}uo zu{!*k+({CNF%E7UO9Hq6KDZ1;3|O-MO`#--modP1jO74axIXL*MJ!ky{!PgwiDVca zcZ}5lTzEe63`Oi%vH?wRNfIeAYVR290l4seEE$TfV|4^H)sQ4oVI15sHU@AJ_=GSN z@nXpaHg%IE0x`f+5NI%hAUchqgkRb@u&a+`9LV@5;64fmhS8#$>eB>7-2}i$eKP`P za~SsA05HaXNZ;T8=Ksehqdo#j!BVDBIRrsS8bdKZvvXiu-#-eXgs&ZP5+A1pW$X=M zLO&aMY^;m#QPCZy5Fd}={4-8C884(9gR18;aNW$|(;`@&7$t_>*CgI|sJ(seJ_Efu&5KYzTsYw9F#@ zec>l}Q@qb^xIKWb7&|CyTd|GA+zm{`g zcc0og@IM0_S!bFPj@baq@4aI_G$e)bhh$WxU_n3c#%|Ck6#c6^8^*wHue z5qJeG^$==-Aox!QlV_@j>#b05z~;C~&+padfz%*k^p9pB$^ z5*L@KHR!V&^{zGOrxMQVEj%X~;syi-CyghjkvyE>2P9PUsZ2(IIv4sx)U_{s@>sM?LF0Xp}p^W&czS7bGCL2PWjUE zh=Y(#g_(_5g?)9q6wwvpk7h5fD4o-f`o@Yj=j&Qs+04$oEZ@s+G61!QB|fNtp5U4HF7Ysxky-VoZUzC zE8?~AkX)K2a}DD0h#d_G7Fd<_w|#xhST38`<{IV^WgoN0gQ_n`8o%>))yfJnR;>PxXWVi`82aV`lI#kH}qO za!I#Qai=2477M)XszpUBXA^45*^-04lK|Jh&QMl^HF`b$xY%GuHIzujfN*?!Az%P~ zy;={%OAyEW_;{keH3mv4Z4gozx8Va=t!`o&6ZYoEJ2t){*Ec#)UBb}B^`5CEM(_~z@WdY1Cn{Jbc#)uRj)`)^d~5E? zJ4h%$K^&Njvn=pUTzhwkbnbh(2Vbzwt+@K!0<)ICw}pzhtC{dyRab~t@<~(YAE6Wv zw?se;4Lz-zfl2wkkCmdeKFBEGp`%MB&AZC+KF6@_W0p{V;2u0gq5%U4ZlMd+xOCGs z$1X%VY~TzEmhIrY>n$Ha%cY%1?&sMH($~0TgniZ746HH~Ba}4B)!|K@oa9bhHHZ+D z)OMuyfF4wgwbN`uF+Gc>Y>!^}Wt6+xJvEweTpwMXt*2ZkM%mf9)VkrUSMH(IQe=2` zYg`Ex*iCj{g$gSOMTz8OWCT^R^?bY;FA=MGz$Vq`*r1D+_MluG81)Q0Qa$%ESQ;K3 z>wr<#VqwKhzQClz?J4^)rEes#^1X~TZb<>g%=YF7yf*(KKhLbauz@yd75Oq<*{MO5 zh*_Dp$vaM0AGAcPNoNOcl7+jUO*80=%#2*xj(GZuTT~3#dgT zSK<8RMxQfFR(}4LjE8Z`Moj`B_GzJCL#jXTy@WO!|29)3A8Di)J7qHnSJLf%ygnUGu458k=a8-h`iSl2#X&_$DPzL0-7SbIZnLRB|pj0($d!Xb6S zCUrtBgttdP$1W6Cg{)05wvg7t^ytE#(uUlN}nS zI39izjjWbLl4O{j?-6z3x+M!i5AQYM+{+19krp7tj(QH#+>Z(#>$P%VZQ@-fz<>f%aYh9kIQ7yf$>I|F=b%S3+dO` za+=mEZKmumMwjceJ5>_7Wv+nZJ5p~kFy$*hVrfUuu^;G8E7#D1B$x{0`AzfdY-AU8 z-qgM7Fd;>t+`6c>v%K-Nf-#8+g2m0I{YI{|nid{qX^osrD@i5RwRRq0DqQ*~byW@u z;no)BLHX}XQntX7bX>foZ^y8w&_8RTA?Dh@PJzWF zKOQ9%Z}5NyPx=PcZN=kI)SxiCs26T3L#k)t(HVDB-SIRuhGYGa(+L0KG$%J3vvY5a zXv1wMc}R|Yk1?A)l;FyKpeS;3Y_08~Yx9)V8suh|=NO}$ICfUR3?BkAV%@dcz9N%35@Bkx<}ZHoD&F&)`&G=CEN*JP|+x{MCfwf9dTMPvIT9KW6i8X{|vzE~tW9tRg zerkH)mR|2&4jHp{_1-NvXpawjmZ!ysD|hck-@YqeS%ZbC{Iz!gy&;f2)Z*c^QOWC% z700o!TVFyB(HFZvURiWrRM|w}z0oAO(fPU#x72--HVa7@dNX5%wsin!00j_EiFD6V z5Tq$v`MiP7Y}wAFS@f>YH|sV=OhkYSSp)BM;FIEalgNkPimQ9Q3o-q#zi z2*K;v`2AIU(=-8t#EP|q>z$9Q6lYSIg}rdA3$tyHa%^@v&bPbUo% z&L2%0;vbXdEUz%Nv$H-gBE%@377<7SRR_S*(|)m`Is%L)nheEnYAlG25xB)0Oy3g) zJ~c?mF{%rx>%#hkGZ5aTtNHT!fZTdcy$Yp!7%J|sr;3edjB4AHS*QavTU>FlYtc#` zp~_I3b50w_accFgz9YOsH@~u37hxncpxy5)(VFIH^IDwL5kA*#?FR~WIHW*1qBMU)3olm47g{FH zmZeWzfbppI+ca0X>&N^p@tuio&{2oUFX+c=p463_*L^a}bDJi9hU%DEL)0R^cxQ2a zxrf9lCw*qeYoyo}0{Lo^O$TJuP_aKCsQa|-N!wv9%7k4yl1qF^Z{Gj)m&oJbp#h_D zP6Z*wI|Y`cNUL^0%iU041+W^OR$v)-K(rUtEhLvW`q{#LN$mLc?x_!0{;&*j<%bV` zBy<-&ah}q>i@9xe8gpe%-4Q%KRuZeB5R$6Q2ARypAsPC}SIokoJh6{V6PgtzxkF_b za?*ZyGGI}9bj5wshux)_GdGkJ%bm>T>wWC?po9ZflBBZA8scjd;fW^qXC4_AkG1iS zKjFnE+Pvq(6xy$0zVLXU8gN;BMu34ZSua~h_EBVwOf^T(-K0gJnh_+lUONg|fniV0 zbj%7CP=-FHdT%MIs>{K=$>s1u<$8>-k(*GSZ>yvsRPXv}JVl@gS%XU(v>Oe}rkDJY z5v1I%D!i3;&0<`*TfiY+u=GA=Qa+v5t?c%$Fgjgt%rlEU2R3UipW34u{~TvLU?oqC$#6GRHEnWpz~}0zV-ZwFS%0PjE9O z-Zr7*M9qmauJGTft(e{o+RTHVH-_u*NE}zA6 zuR(!)4i>}NOcFhbvUXLmR&ke5H~M!!HPWbgCFT!ec)r3nUR~axOR2F{b7g#zG1x&z zVHCowXvc%62Rde!m=5mX%52Ju@fOpoKizt909kgK^d+YQVf)jh_r8>!t&^FplaZF& zV>3sCvqf62*>koNA7UIce5Vk!y-Dw%jBHu}$i)%sA(S7EaJx;7&6LXPGry(x`CwRA z(Fx{XDbLwTD{a3-mNd)B%ij4?vd~uub}!^v`_?xV^0;Q}qUMJzNIBD6waGV-m*31n zRO2liB6l@ntOn5I8D2uOh~T#u&GfqgR@jPh!V8=4w3?yYZgKaCcvEhf5?ZH(1;H9$ z7httnMEJgMV2%x$x9l{wD>dkm`248u7B9ajH9OhF8i62aXy*;us}CG?-+P%~Z5okg z7DU{+`eDLeL7tbv!iuD{^ovlIw(z>(4E-0X$4VS7Ixs)xS{`=d`1BbZ!6>!HuWb?8 z4BQe#sl!1kMP_{ry_nLSaF&&RkFeGNm#&-f{NDJqG0RV3bryLypA&_jtoC{Z7KtwR ztU>!5&aM6^xJ*s()Z)PNy%ZgT9PMYB^ Date: Wed, 21 Oct 2020 08:22:49 -0400 Subject: [PATCH 018/257] Updated postgres test instructions --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 7ff892da..b1fc924a 100644 --- a/README.md +++ b/README.md @@ -554,8 +554,11 @@ Postgresql ``` $ docker pull postgres:9.6 $ docker run --name ccexport-postgres -p 5432:5432 -d postgres:9.6 +$ export POSTGRES_URL=postgresql://postgres:postgres@postgres/ ``` +[Docker postgres image docs](https://hub.docker.com/_/postgres/) + MySQL ===== ``` From ab48ea9c81b45e93a319738b8a3d4e47a7983a1d Mon Sep 17 00:00:00 2001 From: Jenny Schweers Date: Wed, 21 Oct 2020 11:48:12 -0400 Subject: [PATCH 019/257] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b1fc924a..cdcb1ec7 100644 --- a/README.md +++ b/README.md @@ -553,8 +553,8 @@ Postgresql ========== ``` $ docker pull postgres:9.6 -$ docker run --name ccexport-postgres -p 5432:5432 -d postgres:9.6 -$ export POSTGRES_URL=postgresql://postgres:postgres@postgres/ +$ docker run --name ccexport-postgres -p 5432:5432 -e POSTGRES_PASSWORD=postgres -d postgres:9.6 +$ export POSTGRES_URL=postgresql://postgres:postgres@localhost/ ``` [Docker postgres image docs](https://hub.docker.com/_/postgres/) From 0fd7964786f4b2fbde7bbc9bffcc8eb6b3dc00c0 Mon Sep 17 00:00:00 2001 From: Daniel Miller Date: Fri, 23 Oct 2020 09:33:19 -0400 Subject: [PATCH 020/257] travis-ci.{org -> com} https://docs.travis-ci.com/user/migrate/open-source-repository-migration#do-i-need-to-make-any-other-changes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cdcb1ec7..2d527097 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ CommCare Export https://github.com/dimagi/commcare-export -[![Build Status](https://travis-ci.org/dimagi/commcare-export.png)](https://travis-ci.org/dimagi/commcare-export) +[![Build Status](https://travis-ci.com/dimagi/commcare-export.png)](https://travis-ci.com/dimagi/commcare-export) [![Test coverage](https://coveralls.io/repos/dimagi/commcare-export/badge.png?branch=master)](https://coveralls.io/r/dimagi/commcare-export) [![PyPI version](https://badge.fury.io/py/commcare-export.svg)](https://badge.fury.io/py/commcare-export) @@ -15,7 +15,7 @@ A command-line tool (and Python library) to generate customized exports from the Installation & Quick Start -------------------------- -0a\. Install Python and `pip`. This tool is [tested with Python 2.7, 3.6 and 3.7](https://travis-ci.org/dimagi/commcare-export). +0a\. Install Python and `pip`. This tool is [tested with Python 2.7, 3.6 and 3.7](https://travis-ci.com/dimagi/commcare-export). 0b\. Sign up for [CommCareHQ](https://www.commcarehq.org/) if you have not already. From 6f7c15198c490d9d5013cd036378570310d7fbb4 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 23 Oct 2020 14:25:28 -0400 Subject: [PATCH 021/257] Use dateutil.parser to parse dates --- commcare_export/commcare_minilinq.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index dc0eaf97..b6eb48e0 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -6,15 +6,14 @@ """ import json -from commcare_export.env import DictEnv, CannotBind, CannotReplace -from datetime import datetime - +from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap +from dateutil.parser import parse try: - from urllib.parse import urlparse, parse_qs + from urllib.parse import parse_qs, urlparse except ImportError: - from urlparse import urlparse, parse_qs + from urlparse import parse_qs, urlparse class SimpleSinceParams(object): @@ -162,8 +161,4 @@ def get_since_date(self, batch): since = last_obj.get(self.since_field) if since: - for fmt in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S.%fZ'): - try: - return datetime.strptime(since, fmt) - except ValueError: - pass + return parse(since) From b4a997e9c1bfdeb46b9f7c8c3ebb20cf478a6e0d Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 23 Oct 2020 15:49:27 -0400 Subject: [PATCH 022/257] Use form.@name instead of form.name to get the name --- tests/009_integration.xlsx | Bin 10503 -> 6325 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/009_integration.xlsx b/tests/009_integration.xlsx index 7bb527f2626d27969ad89b81d3773d96a4a3a793..0b2fcd53bb6506c625059a58fc0a058da9ff4557 100644 GIT binary patch literal 6325 zcmai2bzGF&)+U7+KpLf_C8Y%<1SE%&MnpnVYUoZWX%LV`q`Mmiqnge&$xQ32E6X~c>QjIb_^ja{xYKv3nRi()c z-c7?CuT1WKJOI@3&TGCg^%&!2b}nA56WecVVr$~`)E^lJD*`js1>68rhURznJ#p*+ zurgKO9R%a?QA3JMS;gXlH=3#<7WU(VZ=U1V|C%QUwHH8ogMXMGY`K<(# z8P5|)(g34N-nT^*He%{;@$1HhkD!35q!SsNZs*C42=nC}*ISR6PiFG*!yomQ1UPHW zE1mKWjCh^vVDDhMyCkSo-{OPa1g;c6;@F|_F2TY|6dO7;M6^(?tD@qvg(9|6gK-jN zZoF&|GHx6k!90Y|Cqhc;JQS^XG$fkkD(RJB%>#liljcyKrxq}QVmBD0)g1qjP6?zu zQ$SKh*hU>U%3_Px7q|C5TVsGQIie@{s(nfjiSDiPat7t}ZJTsA*y8Kdm)2dIoD>$OwyW?l~+Zmo11vxvIM0eu9Zr z!O7Q%=mQ9ov8qgG;IQSqM071SZpI@5z=a{EQGxR0#1Omb^w4JUSglsVAUi?QEzFX% zwdxgwBNb`hird&JY+T zM6&Qf2YU|YsasF^k&{Y1A3fKFM@7%bwN_d;h*e!m-8G{kQ&C-g6)S*~7fPSS$+iDL z6q2oSm&T85@_{iOz(PZC^SnGzw^O6@R09q>ZWeTQZUty|fM?b|ek&T``Wz7dlaAE^ z2Lr(1D3^^}qdv5qg!LVlle&VN01vida6Dmg<=L>ty9_!up5ZqyVj7xOir zN?tW#I$Up+)zPq@lFBXHd>ILRH6>w@9JG&-X=<_4pPo70UXv{^u%>kISaO`XlUw>t z;g{tlMD6kkaUp7_8}nsHKm)4e=>u->&J-&}wpy5RIC1dAUbAu@i@(i>p6H2siEutm zwLDf7s$0qk?8pMniE4G00Y$v3_Jl9C%_`YWR{`I zg|}Dd2=c~(?(vC8iv{_@=y!V=gaE_AJ|7K2dp$7C1gCt?^Q(Dy-kyuW65^RL>L%7K zxt?5>o`fg?Eh>%P=3zpAT(;A8KAc;1#s7~MyWYER^C}YFen?moXUSD*Unj|Lw##Cm@jw} zZT=QnMg$>8F8#o6kcHTc1>-4(e-RI#uOG?GWxj|1{(FxJw`U!I)aPKtYn9MIqn*Cp zm~GSal5-7%?ZaZCQp_DcMT5$V5LX)|*&xliQtzVhjH1=2;!g~BB`-eCVa|-133hTT zdOq4_R4L$Q6D!#%6DbTT?p zvNX=!2U@=MUHi%mw2V0}Vj1KY#Ry^iIG;uvLh4|95M;s2;i|Ct@4GzP#hZh}@gJI12z zJp(JsM0B0{SQPyLA?1+-{^}=b?d<)91Qq=5Mn&BS7m2U!za<0R17^~MmGjAmWV)-`2XBxaADvIba{@~sWZUf8s| zeS<_s!PY9;G_}l*5*<=WiZVSO6JWG6^NeMhZg}|0XtSE+L8)_Fh2(QAV+lvb(|V#P zvj`7XMo||$4~S*kQR)lkXY0d;kg+yFgWltXHV~_bIa3l@;g|x=doyubxi6uL1CRiQ zpb4YfeEeQ##F7k5>ZFeK(aflF=OT-#HxINa53@-fn+Bp1kI>ePyNP8kEBf;@CKx{K zOnG>c6_55MkA;lvkI1=slVNJtzCSeLg?tA~+OS8?GN#K8OT87Vbqb$ld1sx+O{P!y zs<4pCqc2j`Yog@J9>l z{5~&@Sj5M?WN8ttT<1BjEE4A zO}@GMIiVZUO2~8m&4DT^en9Ap%#i&kPS@8$xFt8$*KwEB@#Q!zUS+@6#Z+G+-{yX{ z8yn?1LRni^DiQ0k@fnolq-M?>Z*It0H)N`E$3!L$sInmgaox_q=)X%Gygw7i$=Sok z?2pN6S63}2M}WY0ze2)SvVgaMQ)s zH6-oULyPI2zT7zWKK2lGH5Tw}DJeV-4$WV+3AwWmTkZ2iwC!!>Q=v5xH7hl# zNBf=gGZeALiYVt;<26$fIlwRq5?~s_z2S@+$8%D#C6=2NB+Rx~%wZ~OlJram?#Q)vVJ)oK2x@}~aDrxepw0uOYqw=P$HQ%GrJWEjhPDr-Cfe`W}s2ZZ- zWSkb5^u*d-$T9yy(OACa+w(HFjM&zF$(1~1aje>i^ugAdq&-M|c8|U6LG(dZPR&X- zqPXxmS_GK7OMr2RM9CA<|JszBex@^@|WbH+ptYjpO~+0eL!g(I!mE;eNs zuC6T6R)$&0)%@Xv?RNz&LyvjasE_$QtlkP|6ogTvzDf~J=olj8YYndo^*EYq4sZJF z@d!^&@dGU-)C|ZJrRXQ5ZZQ=BxIn|3a^DdtYfpUhXeJT2#3x|5ja}%G4@YoScxwvo zn7V%JGR5ON7G&Pf%dnEQOv^~B-(dfCE2B=1EO%t?vOA3_1PB$tZ4V^Yzej#?yhJB<8#D+3+)w;@BP+w zUQ+;+)$AbG)T6^zgo{l;LoHn&PRP#~ce@^6*wBQ^PesC|S3+N0UPxEe??fMZomw@j z;TDp)7Jp7MnP`2e=V8@6#3Cx6#sG<+o)bh08@abGVOJ+ zi|E^iN_zj4S(}GACHNnq@}D~T$oh7Oi-$zVxlb-=E6w;pkw7B7#enu>8yf-JzN91k z&l0)Ej<%V$UcsUUSoIHlN*~!cSyf2G4H2WNTI+dA-@N1K#TN2L%Hvev}x zca_f7&T3au1EeU?>RyzO(N>x=`^R|r+LUy|yGA4ZV{~8c!w8;4xpwEH@P8y&vV)(* zU%BN2a=Z(KAVhd(AwZ8sGoPlR zrTv7PwEoV6AjUH7YRdK$;rgcP3w;K#wRzR5TYP%3uiB71|NB)y-ZSS zK0}3wr*9EM>%_=98l1|Xx_1r2$Zx3>@%1lE?Mzf1?d+YnOkA9t?QD_!scu5ju9Nom zjpcJ+%oT&Isp9*#QT3_Ru1|QsjT$(FN~-tAYU+Bq_3KA!1U8a;LPY_QCQ zMszv3qaV#B=wgVgi-R<1&Z8TEM14JbAv*%&2=rRNZ$k4`L@F_I8uZw!|%Iw$ccel&O0~N z-mH83JCeA;>jKjpd_RcI>1b#T;0v>2FV&Pv83q@Q?7$Dq4;o)(K6PRjq-P3QAkR42 z#9<;;w3*E4*uvxpwGtoRztenqc!qW4RuU{++N9{v#@Oj%LW1_^1zE zr!WEJnDq62*ymi$9tCxPDwistNQM^NJDVTUU1bCR3Kz080cy^>TJ+X%Sxoq7VCjv@ z*7C|RhO42(HRQ!=MZZm|u@68qN#W34Q0N~+#Z4vCA=gQ0pm^(CBB6(R*Iss9wcN3` zURx~^&rqsMX_aLBb&0m%aI>b)6Q0WfwssIzj01Y&n_=F0@{mE1?0B<**1WmRWlSVuY~Yvy+^VN3JEG z{KI!9vDYY)gL&<5oIC*=8H~4_ui=t{l*YMLq~bURlB2K^q)pjyf@XRK*<`x&Wki?HWMl7u9d&KD!{b?dNO>yp>HZTC zRh*F0I8#%84nSGf!UXSzj~Slc*v0#R*tX&iE%ng#3-m^@kyYYbde5zeI;GAwG{_8* z(P}b|Gqbyn*79%f5W&yUdTwuz3|2-g6dA0beNh?Huc;riSft;44kvRc#&|{I5D{5p ziFX;lt2qEOk*F1Og3apsw1_y1S-#0=$Ffuo91M7e8(G=P|LVprtNH>gdV*&_pm0@kzS>p>$%t?N7>zjoK#x&CtOvUqnt={;h@ z)xemL%532yxFFIgI5T`<*8}%ismsv+#4{J*8ZKuDYMV&^iYj6$wKTLf%g zzhq8%konFi|EvOZ$fR@n#~?TFQvdYo8k+RqY7@u;q$gzub~Xb$8)|ykn>pzt@oP;O z0(Wu~$iTg2R&C$hZZ0fl14Jn3*URT^57uEB6N<0$kX*KXM#;k`4VNgTXXb;?i`>bh z;#5w%NBhpT-Z(J(18?Oxt8D?P7rFRqgUu88;Nc*;xu$iMv2x^Pv{M4!?U%vFMq5MO zm<;sPT?t-v7REqC^V0j3@mXwvbso#gW^ejuqrmFd*oSJj3vV0NBkoH?AQpmW)0}Do zNFq)`%;4LXrdh_F=h}*d1Du9WzM{XiYcHxP>5%n;WTW+7VVE$W6>i*S;k&!%ATOBm z0x&qxl$D6)gOXK=8k(@2c8W^F>N)c&P1pHV2PX4jPe-&*uch~wtFAs*z=$XL2}k7U z>8NbC7LJ8su!=K{htGCWqjj|qaD*YrDRLw|-T7X;*LV=Y*UMy)MjU%lPf7t78b!vf z_?pf*Vba2-9fN_EE7J>9WE%T8>>h)zqo9q6EIO!YBq%@h+@JMKq@Mdv`*(HsS3f^1 zcS!Z~hf!R=`P$EaX`#P*_*u(BDttf8{rdV{d-(tKzF!^wOhJ(n*bghb9v}Yc@P{Dw ztCyeo2$HV;ut(T`d-)G}{dM!tvkH>3{4g)zpQ!vzV1C{DbN@yTGe0c-=08T8UpM~T zVvx7%hjm?_|9>?5Umg5h^^l$Dhv{A~e>nK7Fa5gx=X;MVjXwr^%p1s8;9Ngr*NHMhid_94?)beT literal 10503 zcmeHtg;!kJ@^wQaO>h!i8h3XO!GgOx!Gbo>I0Q|Q;FjR-4#8c61&81gBxr!(&X3H@ zdzs11_x%HJPOo)W-@do1`<~vrPF3xrBnyOl0zd$O0000PARlGd{Ur))JO+SZ zb;TfdPG)vahHCEiW>9@*H(MLhEI3%&3;^uI{r`^t;Tb528?@_W#gI6Uze10$Q-86Q zRYVUPD*TQ~WgD^n?6uYf)riQKgI&CWB6@hTnQj?D=*UnvPj>%Y(hGRZ8dokt4WNc^ ztbrv@`E2iAZnFh^jJ*urL54Y@2sgf=VYqoRKr6>Ch|4;p6SzsMTLH@eZqH5q_?&@u z@gpC)D04T_#Wy4S-dtC>2$>zu8MhO-a4iD*wCU~dpQYgfO57H3TuPEcFv8yX$EdbC z?x5!`U=~d#T31ZLKw?lodLk_j-}~w>>k`OMK6+aid_r$i)iE9Db6J0J;;l@Kv+b~* zEGesGz-*f&@`lV`X$CfmOWlBhp?dyWxP7Fj@0h*0B6|O}>JSqg*UEF~CA2L1jt^FR z4zFD{QYe&!qK@3(BR?U=!2#G-gMKNsX-@E}JuU;3uV@U8E^9)O29aA{AW1J9Uz6pX zqzbug9hHit4!o14gp7QIn0c%S-hcYI{D7_ddmupRF9dyT&}s(zK+xp}{vkgQ)X>q) z2Fk+x|;pu|)P6Ek4#eAYkqf_>_z8(=X)=@HiPpZ?8+@@%^ z_>?1@ej{_{RQPSlYSwFJs(W8N=jR$d{z46Umb_Z=j(9pJGU!EAdhr{?fy)F+L!3Rs zQXl~+FiZHN`LdatN50&0&;!z+OTiG8Cq5jdeGxS)x^v8xrIDFS*-_y=upK;bO>P1t zq0zZb-#V}!Q&EwmdXof9{d&2_3t!!wBLRKvvydq1waMDdj-Ib&$I_p5&Q>X8GORhm zgEhQVb@h*w@e_`FfBbNz@qsb~01%9u4a@JWafLWqzkoojf2?Z%VGzv2()K{F|Lswt zA}`y`iqV367sTqC?1GIs>%>C7r@o60>aV7orzYd@K3OKDZ`9ZNEXx9f1iBv#^}3vM zp{)WRpLQ@5yv2g^cx-bZ01p{D9s+`1d@CjKe-8f`Yj0<__!}}>67+F{a7-U#+7sTd z7qnFPLD_la;wKH`ft*&utj_FK&{5(6dF`da>UH0%*Q$G^{?&DLq)Bw<8}|)j^JfU@ z{_fzhV|*lOK~#6Z*w!Q&!$pV08R)3_#$4Wg*t#S83T1<+SeB%SOUNtGi8QG1IoO9T z2D*`z*ra~&#YyyffUa%+h;i`bzyhy0GU67B}aYU zE}IqOm7(Q*$B_URHIlrNbbN8`_s{?_+QSKqxI*Aa7NyJFl z2ntQUGxHe5v}6JK$CJL^VB8QH2?y}nZgDP8cb}FVJl7EWJ)ge+QQcuxK?mXN(gKnDEbZ*s8cS{ zxk+E>XfT5#4((?gNK4|+!f_8Uy}E|6hSc1OZsAbYdOoq@L@?c`!eJEVcNP__-2 zBJd`=DMXhaO@+$~$aai=X*M}W0>f!34Zcu(CAw_@+UdgcLMc?_^m($6W~jA&7+cbb zTXKjUc4a7;O5Z1`<`mwMT8+6vFT!6kUPUPJVj$*+we{z<9ZwjYeNr z^P&pUfz((-Y@-07!6$vh`iE`tqeN9G-3Sq4gE7ISpZGrD7gdL`HE?4f9U&e;x)uut zF~vmu-8(tY-Sp{gL*5wS9Z9zCv1;ClwKF>Skz$0rvlZ|LW0RIAUS8Xg_7fO*B3JM_v0LM2BX^`s0+=tsOB zkI*pEP>r&n8$1L9)=Z(u z@t*eNvnwE^L5n%ai8Faba?oPbJAeS@sD#SABoHzMZ)j+TyvyvOOrxiuLt{kPSD7u@ z2>E;gejv%;lNBX4QUvUWXbSrfd7u1FBj`(GM>A7ZCr3*=3+Ru)>l7^y31CGDJcvC5 zXDyJ4=<3Le^b|NQn1QG1$VtNC6mWw=Zm*x|T4LoszpgZ#FaK~mTC;~pgdfTm*%i#u zK4%pmKfJtbTCo<^ELy4)8g*<)+R8!#W+xx{XpoZvZh0z%^z0OqUm{ZoCSrZCQQ?x! zDLf+>sXD{ESA>mu6;8&_C9)-%i;6yOO;g?Xc=%ML}4(HaakJQcTUU z){#t!$wfXH9o!;m@0NM7)7Nn(d3^>&BxFKyc_O-epY7wSwjUgOAa46shw+y zap-Lmi^>Y7q_bqq^hNfV(TGIfASA$FjRv3YX{+@ia{rX0~Q5za4*@sXgtXP(q$3ZMauY zQJ}6jTx*e3ODiKbv5S9+4X{#O2hk42AAD>q{!@nDE7iEm9bTyUPqO*PyR=Ve@CO}w_KrCvbdNn#!8TE^DVZXaPinq0_>^brJMj={+Drasly8Gp7 zZx}MBct}~&ggoyKw~vF`gnUl6F1rk?8fxfe?GUL{RrjYN4exJnahD9+?hbdp^K4Jj zeDCaTySW^E-**3<|FmLt0h^`u)zQ&*Ebj8F(@n)l;<o{@y?tkytKS3BOFc?pp(We$t^(^RAum%3 zkD>K$CT@p14av+|w_pEw&AacijQI=iD!$8+j37D{oCw{*u>CXBCTzw!s25Ff5UMkAAmb`6lm^@^Mo5El|7QmSHG zs6(plyfr7UhhMi!ilYg>G8-nYYz|9o0pmLPSA?3)3D(Zk425tA@E;e(&o$wxI)#12 z^Q;qwYKq8Jwm}<^bf|q(-TiE*0I@-OP)#QMh8BT)M$Ae zyB^*%5>iET<2pd%3Ij!ekJSBs9mk(cYqlxw_W|h~%xLZLGHs#VKBffv*O)5-RShf#oo*0ZRrRdER{~~dT*(@YQIOIot?llmNO?*cE(dS zL=$~tVDZ{uHc@NLNo!1L=Y6J*q{GG|R$FXrw~ezEiI$C^XLHX2&57bRO|bK;6Pv@z z9t|f-dc<-RCc-GaVBsK*bW)vdba1zFwk&Fq$YG)gbNUd@I?n2k`-M?p=-svU$Hv5< zNp{7C7Y&4t96RCYnAQ^HH1&Se)T*Q10(O@*R-ih13Qp_G$6I(^({2b26qCBRS*rCe z_|j>17p}(d@Nuhx#N+t&Vig7u*rF3W@sZeM6@9MVtpbx{nKg3*8rGD%O!Jq?SE0ZT zR;Kh%?DE>p3SDnMd{CaR=3oS&C{A);*g8w{3Vko5dD@ryNyk})aaShw;%Z=_dCDBo z)lbmF-iJ3|WlKpE@}7eBYMihyj@u}7AyzvawA{=`!*uUWXM?yXGs6@|VBChG7NC-e zSzG+Z1y5t}wnA^mnI3(`C|X$$ZDRKdM$dRu!o{+@cba*BZG&m(4Q6PloZ%lJqB8T&)-FJdmk}taBfQPHC1o%%+Ms)7+6p+gYO1~e$D9S*qJ8w)Lj~3M z&{szNeffntx!aiimh)9>EJNmbQM{iT-3Mg2YSt2N#Fa2zlHRZ8%Lnp9P=G{GlxC(TU>sg_h7}K(2$F~j>W6Hs)>(uc;JNVag&pC9m zgp-o-PR*TGYtC<^3&)WBH>z@XZDJH;8#$Mr@Tk`_6&!w7ORHTTL&eTOV(V9xpdwr^|D>j-oL^G7 zzRFr)l23a+S*`TFW7IMj%EvoA!9?o~v~fkRofIWB%^b@ZzL$j+j>N(GBD%niCr~94 zoj+>z4qx5ZH)6L~3^HaHLi-2MY{5 zINP^&2Da(iGC5hLX{78|XLU}!sODuRO^rcv*tXj-JWVaRA}CbA8E-9TM{!+JchD0h z@7?g_0r}oXDWjaVaf?=4c7IW&sr&TA8^aA^uv;%EZJmBE?M-o^ApP|Uuk#ooJ-oK0 z+FR)|jcQ^qP>;rP*tc^tKjiAF-zz)ln7A}3crg{KxsSO|bdGL+Zd3Anht=rWN?Jcs zL)q9?AJ>)nOK8s(+spCkQS}J373Xm|SDo!zP~T7#N?U-ZF`VW!tGD3$73X;e;|9l1 z%r`{%^G7d_`%eS$P<>-VIuvsQd5i8QKY33#`W?*QaI?1;Xvqs0m)r4hcoF2CF*V}JO--jzA2DatP%ok@=WB5U{%<^EOW9kii=cGU>847X!1g{p&H8{%(z z=F&3zWRMkY`HsWg<*qAx%~1N~=F3_uctQEE343f8gR#tAkBvdgii99ff@}7)AV>@? z%G+bsN9NE*5YF*2C2b7t(kF}E&>Si1N*Wfg6j3aRE{;uqMU8i|2&HVFtx87KkR@Qh zM^pA`AvsXj`nO zA%P0`bFx!|G_r>c(&Sl*1)k6>+wScLgCzwDKYR2vkQo)6CX|I)2BzhQ zVwWfE4+c(aX-lcI1w5}gamwaRZjx8ue$$%nf&G}v7C&$Y2dzOX*nhKXMy;8fDP)z3 zX>}Cv`x-SH4f6f>q~`GM$`{Dr!Ni~H zFBxRxY2zjxjjxQ(bkfVaz;3`uY~l&$8=f(^%KQtB8Ca6S&SfZ_6o1*F6wWQR_FVL= zQ{flMPrWBiZTF}4=zBd+4bZHgZkcl87F2}4rESHVQ0zE_%C+?OosK;9KRD@6z~lD( zO2vW_6z&_tLsz`%AAeJ9+l~bl)|u!2CO1&Ccn8Wr7nrmC;G$*R{Z7#x$t^A8;2!#p z?X`CK2Ty+Ci^G+<8|i5L2`~CVpPq%15@ngMULD-Id@ECwLno@`}KORhU6b97NtP`Sd< zz+`R*YU?8f&Rly-Vr2GsR~gm87mm|zyR1ewGwrDDVV+M1uMn}dNWR_-z9GO&r4Tlx zSVj9nR>90axyH$_doEBtP@dhL$K1M3-a1ipLe6e&BR8h8xW7U!t#FK6%%%5ei-`W{TGC03_#oBG6TXg8Rc>Z>L!O` z4b|?PK#haRH6q)wLc@f0Z7hTdgC|0iEu4|D24}3{p67|_8`3zrs!uPzoP3RIF_yi1 zQ=cz&=q7pj;G!5<bT_4*|35$)7>NYXK^(mV4BaXFtuj_h7bQ%{Gi*hry+d8ww>Cd|C{X`i_HP@o370-BuBs2z#(3ykn?g*K z93l2l7Grz+KZMwSyVVcjE=Bd3-8?HsGj1OkZplPOItmA&C&O7~eKZ4sk;WoWNBcEh z3@MNQ)b#r}EL};D9a~oy)`V25*~QEWZFFH;YZ(MbrGY??^OH7w>IAGGhX*}FsHkQU(7-GFqS9nJ zbwcJ*SmGfKRSRlu$DXv5*=QiQC$H3K*svv|T@B3TS8Oyv3#X;3@FxTsV;GRvXzg1_ zMvV=vbvA{#TFJTFGaNK~bM+rCFM>5Z3rZxVcP9^+iSuOO}Ih0Kn{M zRzDNI&0%ZjuFzE|qR(sGy3iZ`NZd5zurzPrJ@P+6-0-GHGvNW^XAccR%s)fi1mbA+FUB9h z{m1Z%>4=zMebDrzn#I;VcX($@wHQuie9vI9rRE=X56#4ukySEzHZHabMNF9M`b9Zb zSlnH*A4kq#zt3bzBNJcSds`tP@u*_J(!`aaYEdxmKnhb?hso2zSRw+Wzs2o3Pd$wR zcg5PC1ePFJf_%0w-Y}X*e1tB12MGZmSK3&$3ThmcP-gtRMXhZE?WMlDEI&YoNqHPo z>nc%7L8{a|XE{h1Se-T61;C*&LzXnG0T~D1GxjASZx|hJO~~cDu*~Yn_iNu`4KWH5 zp@oXY&P&~LT)n!zWQ)`x5b((L4c1_YK=8z1dL@*nx^RC3|0w^^OBxAVBnx@a(AAh!Pk`af_60GeaS;C0WgaRwC950kH-u~zw-36{@ri|$T!l90(- zb~>s+5iqSpGNrwIaD`==MRp#=nNcRZRVu8(6Nng)$q+x4OG=dMprV?4?YMiGRlqLg zUYp-uy=E-vRMj$`GPf#=R^zc2a93(N{K3T8>Vws5Iv%u<)o^ts{S8bH#al}GS9|kI zf>JiPO&kJSi0IrT#(P7}rkHY@;6AM0L6R!G%#T-U!SdHil1CZ6HE!xUAIqk@%-;-e z&^qz2f@g0dQtjzM)~$ZiAzZ+Q0D>eRKXm^3%L*z}Pwj0e%oM5JZPL(G^A+*_*iBLi zL#?W`^bF3?=TqcOXT2z$j{a|S%>-UE9qADZHCo@}Ue?20WCB?@RK#A)ogpZ3OxVh@ z$dX4MUvT6>6E83Rn^M5QGChbd|GdHL=l}8hyMNg3r6l`T2Y=mw@@EG>N{~NoNBPz8 zuQL3f4JRK|*8jIC|Er&0wb`GZ5FhHFUzFNkjek|7ei|#F{AT<&ed<>azlt$GJ;Xos zIsW+i|DQDTtHWP~ik}V#(0+6Ht8DSBgI`-MKOIQp{QgY;ZM^(y`fKI*)3gTv*N6F8 zNB-*NuZikU9{@lYF#zyu+WM>YUvq#zTSG|yWc{Cv;8*j%#@s)f3sd~r{J#-cNf!PA SZvX(w!wc&HvddII&i)TNldm}d From 8f4e0228f88f16d35169d9e33a63a6277b66679e Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 23 Oct 2020 16:14:38 -0400 Subject: [PATCH 023/257] Update 009_expected_form_data.csv with inserted_at info --- tests/009_expected_form_data.csv | 54 ++++++++++++++++---------------- tests/test_cli.py | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/009_expected_form_data.csv b/tests/009_expected_form_data.csv index 4e8da371..d985e115 100644 --- a/tests/009_expected_form_data.csv +++ b/tests/009_expected_form_data.csv @@ -1,27 +1,27 @@ -id,name,received_on,server_modified_on -722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2016-08-23T18:10:46.354652Z,2016-08-23T18:10:46.354652Z -00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-02-22T04:20:41.350824Z,2017-02-22T04:20:41.350824Z -7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-03-08T04:12:52.073633Z,2017-03-08T04:12:52.073633Z -c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-06-23T06:25:10.350764Z,2017-06-23T06:25:10.350764Z -e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-07-24T14:14:19.371576Z,2017-07-24T14:14:19.371576Z -44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-02-21T14:21:58.426183Z,2017-02-21T14:21:58.426183Z -6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-03-28T21:06:01.613880Z,2017-03-28T21:06:01.613880Z -6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-06-01T20:28:55.166268Z,2017-06-01T20:28:55.166268Z -b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-06-26T17:38:08.849806Z,2017-06-26T17:38:08.849806Z -1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-06-01T20:47:56.618572Z,2017-06-01T20:47:56.618572Z -a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-06-01T20:53:00.263914Z,2017-06-01T20:53:00.263914Z -733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-06-16T08:43:17.086223Z,2017-06-16T08:43:17.086223Z -232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-06-19T09:57:59.335584Z,2017-06-19T09:57:59.335584Z -ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-11T19:13:33.035079Z,2017-08-11T19:13:33.035079Z -47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-11T19:48:12.705214Z,2017-08-11T19:48:12.705214Z -43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-02-17T11:25:51.670858Z,2017-02-17T11:25:51.670858Z -c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-02-17T11:25:59.421127Z,2017-02-17T11:25:59.421127Z -dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-02-21T10:55:54.158411Z,2017-02-21T10:55:54.158411Z -13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-02-21T11:34:40.289691Z,2017-02-21T11:34:40.289691Z -3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-04-04T14:53:43.570695Z,2017-04-04T14:53:43.570695Z -0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-06-01T20:37:13.258852Z,2017-06-01T20:37:13.258852Z -5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-06-19T11:07:26.044592Z,2017-06-19T11:07:26.044592Z -d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-11T18:55:10.480754Z,2017-08-11T18:55:10.480754Z -a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-06-01T20:32:20.337554Z,2017-06-01T20:32:20.337554Z -27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-06-22T07:17:01.907612Z,2017-06-22T07:17:01.907612Z -3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-06-22T07:17:41.231408Z,2017-06-22T07:17:41.231408Z +id,name,inserted_at +722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2017-08-21T15:11:03.897195 +00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-08-21T22:08:08.903432 +7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-08-21T23:12:09.212486 +c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-08-23T00:26:39.476117 +e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-08-23T12:40:47.438764 +44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-08-24T09:25:43.960309 +6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-08-24T13:52:47.079309 +6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-08-25T04:40:11.724417 +b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-08-25T14:21:18.744358 +1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-08-27T20:52:06.531792 +a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-08-27T20:52:06.545798 +733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-08-28T01:51:34.108067 +232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-08-28T02:47:58.986850 +ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-29T03:38:54.521605 +47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-29T03:38:54.656597 +43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-08-29T16:40:41.928598 +c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-08-29T16:40:41.930963 +dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-08-29T17:01:45.618568 +13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-08-29T17:01:46.310261 +3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-08-29T22:34:04.594038 +0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-08-30T12:31:38.126856 +5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-08-30T20:13:18.057229 +d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-31T22:56:41.027155 +a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-09-02T09:39:30.272531 +27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-09-02T20:05:35.451835 +3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-09-02T20:05:35.459547 diff --git a/tests/test_cli.py b/tests/test_cli.py index abe33529..02f818ab 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -328,7 +328,7 @@ def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog) _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', '2012-01-01', '2017-08-29') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) - self._check_data(writer, expected_form_data[:12], 'forms') + self._check_data(writer, expected_form_data[:13], 'forms') caplog.clear() _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', None, '2020-10-11', batch_size=8) From 8be09ed20c86208c1f7362d1a3890e81f6f209ad Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 23 Oct 2020 16:39:47 -0400 Subject: [PATCH 024/257] Only update start_param if it exists --- commcare_export/commcare_minilinq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index b6eb48e0..89a7efd1 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -22,9 +22,9 @@ def __init__(self, start, end): self.end_param = end def __call__(self, since, until): - params = { - self.start_param: since.isoformat() - } + params = {} + if since: + params[self.start_param] = since.isoformat() if until: params[self.end_param] = until.isoformat() return params From 6076d6ffecb6ec9c8c6431068b5b80ddb639cf2c Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 23 Oct 2020 16:54:42 -0400 Subject: [PATCH 025/257] Use form.@name for form name in query file --- tests/009b_integration_multiple.xlsx | Bin 11955 -> 8020 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/009b_integration_multiple.xlsx b/tests/009b_integration_multiple.xlsx index 4f91f0e92799a31f378a9420193f8bf40ecf4b19..1faa54dc49aa06c1e50d9192dd3e2f0940526b45 100644 GIT binary patch literal 8020 zcmai3bzD^I) z;Q7Thzu64vG*2tWCA^bgF+K~>I2WM{IBIooZ1#6-F?(TK9h+H6gz*tfQTc=+_OQ-q z+qPb#CxB%6EXIhYDNlf6@7#%BGjX%5;3X80tN`fr)MP<1%PASt05sEWd4yM%zrUtZ=!2wXdv%kXKH0^ z|3FY#oTzm-F=`;_1}pe=pF8%vR9n10gJwBabs>B#GW`nW%|)+h{t`9ycQ}tIc4ldL z0{Y3|(X0rW>K<8CJ?Ls>|LA`7c5deC-T@VQSYB%Qm$%UVDASs{;cum=B}1fVM%~3E zz5PN``A3Kshgv2u8V-x1riSq@b)+Di@JtEbbEY>&<9s)Zsqd*`R?WLq$!gOSf-grP zD_JwEW@iSaH(dAn@IsYyS#i3_g&k&R)l=_$zU^XWR2$!kk@jBGzG}Qg&>?6YP@f*q zu6#Cu*xHBI5qL2Zb~kD{B2Lp?AzEexg<~6^Wr|tVJW`WV*?1s=XN^)1p0`({Q$^J$ELulHY7#cd*{}ro0Z7utSjkr{))0< z(|QrACuF5l@21bz=O}2JO*3XdfMZl)a8WviZU1FTBuS%kUn`F8mX|JJ(i|@BD0dG! zmc2$=BV@hAWbCT7KO4%c`Jt3GEu(k>K0#cZu7W$wGRvnya&L;gP_+cxO|J6P+6-O zhKv;Fs0ybr4}|4}B%7|>)1iG&+CRtZ9|>dp8(~l+s83hiGvBo0&q`LqzijEWqTU!6 z7bW6%m`T_#DWck4n8j4X+sW?NHTr~myLihby+f#1y;PS(M;+clRZ+%F=k*%Cy7&4} z_W~hq*pNJejaq_~k;!VtPnq zbMJ8x?z_S$h`XvAfC>z31wC^kxF4Pin@VaX9g#ZM7u>znxDyBVvnqOT5aBO^i_vO5 zlNQ{}7p>Ta7GTYewF~nxb+dXEY`L1*Mi-pr>b|=DORB^OT zBYJhxwa0ez;7K6QhNK)MoEnz!ip954|Db7pu(bf$sv{y(f^0q&1i^DjFkE;c{baQq<)AonRY&|c%Vwz@`#ZlM-M)LRHd+=K zLIQam7q7>(hfns3KAgg*yBZo48#`9)bMWm%m+$QfGPR7Kd=R-%9O-gI6^fdxR(-o@ z+%gq&mjdP6bl&Q)df3RBQnyRM@l6eW{03vD1d@^R2GWdutnSlSgsXt4q&M|X_WSsS zhn8C)k|g*#vzkrhM0>-C$Q51fp*cn6hY!2(h9PgDBaadRr^+QYypVJS zbQnE1I(b+w+5-Z}xJ&*7=lx%<1)rG@Ts||{xmdptCYTP+eH|=&)^5Rc)ZO@wa9QAH zoamY4>jgHZwmKn%kyn111iES+Zx$lWzoaOABGt8D-m!%8s^OWddpeou!;(~~vRu@k z%y%4_qLqH^l8UBbL~F&|H59&S`OYEvUUYG4abHkQe#ZDjGB)6C?fOP=2s9pAvC%Yw zqcy0{+$GhwBlaQ{&5@~3`J^?$mPGpOb&E0X;`k4nw_rB1BM>tYSZT{YRUfe&==R+x zH2=_1YZsBTb-X9mG}vf`p$9uKx~fvo?q?t6Lz0)(3cmKy|Mea*jlKi z+hMxgptA1XYIZo`y(JIs6ZbGj)MvRS9GQG6>XUmpiGF}Ost8#-yUG_UM_Of{Z>I}1 zggfZ#lnI}21TqKiKz@CWogRipn@2i$5MazRYfOF)?NiLi?B$I)dt2b@18R4jbk^vE zPtmmk$4CYH>+JU*MC}=*eY}>}+B+^mFNN9n7T2o0dGBN)@h(6CSn5$6nqIu2#;2lv zAaFAWgqi(f$le7Iy9(jiPO{XQ9oP2jD2RYbZ+`h)j2QBrk+7^?Qq7sLl!7^9xw$S zAa&3bzG@YnoId{Um?G(tqLzp-GKJecu2j3~^c99#dtBHC3=1DlZSw%IUgZ^R%NqadeAYh-9GDt4k<9r>rjYN z=ILoac6=OR&vkRgTsVq`D9VxABrxVoIINd}TA$gptUZgOKO2Kk1uZ*zG|Z#F>y>iH z#$#(6jobIx7gA#Mt3>hIms5kZRf5463;_Rx_%H&hq|i1BL|<9G7oUq~FK)6E$+|Su zS7pJ8c-)er;mX|f_QUaXq=2^wJ44}^6Y0{(UF@rQ0o1ggj-X8ngT;`{VSwv&O3R2f zKrs`t@)7jsoDl(L7?ys>qnV)S$*JC|WZ zwcxr@6?01cyiX=$C&#j$!YylE;v+6WL(dy`9KjTpP!*3`g;7OfNtNrc<>&ZR^K8GC zN=1N|k`+zZf^gB8Iwq6JXp-0y%YMbc(uXcza;FW67a7u2w& zJ+j^)TegkJbvm&lSN6?l-qvfJM00kw(4ap1J6*I@X=vacaqx>O!y2Ro`|BdIJ4Omf z^AX?Ot^-?7HuFg?o0$Z3Vz6r3o@gTHr5fGS~}P zlcUbKBM$lejch<9R>$f2jLy~$g-t+vKytfW`gb%FM$2U4Z!lzK9sO^fQ4iem4Cq=D zcyVu3`h_3zpWkSm9S7$k*gC*6BHblQKK0@&k?D7qp=etjudT5pn^dRC8~32T97HRC zp{+e=d|GC;iI2{9k-o&Xl77%x&YJWIIT1DQ0tz6)8@$W8-H>vVrXKw1281TV-? zV;AH0^SNpup;{dN+G2P|`)$~!vO?sGEW2_qGnoN2>J-LP{(tT~~sV*~7T?J;6`?vPc0Zu}93`v_p6qddDE z+E)@EgUcR~KAs;sZ8$RL@~9`42v;^@&;i|7?LGZd?V61yry4nRrl(bMYv1Wwn)mQ; zlwo|Ii-7X5Z_z_s1uC031dwRffIBdB9|~+ZByfEY3%OcFW4G5g9aE%TV^uQmbqVjM zE*X7&%A7e4<8tc{{qAKA=~i&U!23i;8{Wkb|6J{WD$$~r5uiUM!k%=Ywl|rjDh~Ty z(D2>Q3^PQaCQKme*INLk1$NTU%GRrf8_m-4C94+n?( zo8%a4YjZI?wBsdrOCeC|C1oYN@ZZq-kT!J;ZNLo zZSbeGc8WH|SUvmD9~NnshEag~Ol8h5Hfiu{5UP@|K8?^6H683d#dH<3G>dd#RbOU% z>Y_^nQJPN#(>YhsBvUGwlzi7YZb)UPvg@Jk-}S7=XG3J)@+IC7_NknR%RaPgt10hE ziV20r*06T#_^m~2_;!nDoSeh37(HF|xEK+y;WDwXTu;7%Rwr7QDIgp-g$Epx6$4p} zfUDk}>*IfOU|sR-ND2Q~lV-y%4xerl_w!XSMJU)hLvo=IHe){Q_>x>98-+0!KD~St z*4YCcMv0~UScj=~&Pu?7CVZYe?cK0o#$tnK=saastsG8Rd9dJW-Ue=wmOGiiZ2~EK z_0X(*dFFx4{%qx@SYL~uSPPH92@fI-lW6>iM0EF8ELp@WE}mwwsM{D`n{axTB2CT+ zX=0pE=`*dEfXZouEN~X0p9*?%&7EQW_B8^z9r`?qAF+k)YA}ed8RUwQUvT=-GMg~?)xz*+ zNqwLy8IJIc_u?*wg@?zN3Ya8REcv8RxTNJMHw_DmBw%h~khZYlb>j*kiLPr% zJg6TjAyM<)KJ_?t<#y6zb8GonbQ&03uwfDMs)A<&^}|W2K`**zv*t}BzZ4nc5MVQ$ z_DcrKjwM)dYJle@5qp4$Fk_`r2%FrDwMK&|dtTRwxfVu2bXpjK_%vqV!j#`i>660H zX8AG1uLy}qy=V(cAH~?DJ~v2Ezh}GVd=8$gqytdrCJAzbN5ofMpSP8kfw|?@+dpEW;F*~ zQltwRW<}V|Mm~fo;nq43V?g1(j&6>fzpJ(KLkahq`~$5y6iw-TaO~pZiQ;@WwWK3M zcQ$t$*}T9;!EpRuR$95N* zG%49zbUD6e@NIFjW_-%-%VKy2(cvB8AK>Io6|a1vDe!Z?$pxgwZm4kHQ6wp*n!*528*G*{2l>x8lMMuBH*V`E$s?TOXQv)gwi<6o@`@mo80 zo!jtPD!GolEJC~j8?qdt8^71*RU2s1EV7vqgX%0%_u^lzDh8s^#$5&X@x}FBbzj#M zBD_=R#8pryffK*rnH&i=b>f{AnbqrYI=!(V3Kp3TzYtmtHo3hKs;J+OJa#`fYm`MQ z!geZIPt>1m<5hGn>LDDAEvE`~WOAzlYOQ#N-ltP@9^5Fm!*DwF9BeRbM3t#AfsWi6 z4V9!m$@3Pxh#yhrMnu*+`?34?F-nHGCje(R#Fd6DqTvYW>Pvndrxq4$mi>t*DC_)r zr*@W^mhOSvTCnw;o~3Ur?9D2K7DNu3p7wC_*#u_0)?3=L7)9!TB#cN#EuyN40(F-H zYv+|J3E>6tA?r-azYtd%Q20c-dRctzx#%7b_lZ*V1udYeL^$;nK(NfAE?Zxm#oa9M z?C%{O-+er0e@K8w5jv%}?!_UG@J|Ua;zRNNvora#8(9|HDRtlDc@J_ArM_TY0~dR`zrWt7Sxe3Zat<)Av!zx(GT{{syCYZz9byqr#V|OFmJpC>{Yew_3_8 zJ@ND~QCY7$PyteJl}l?Y$*X7A z7HQi1VL&Erq%_EGsjwdj>k@1mTrN)Xt=Ajay(A?>2XF1wu>eh*%vBl6>f9;JW7Trq zwIiFb<>6z8>3mV=pK9wh#sgHAR~$s)qJmqlDHYTXEE?UvI5IkF49Zlqr(q|3`F;^M z{cH#EC6>6wRC?zw3~jI(-!SM!^X>5^;)7QPqi;JE?|X2R_nFc^_unE>uHV9mQX z6cR=z`r#6Q84D=ZX9#iir%<=2RB z_Nz+05SzPIF#%LrPZ7rO$UU2UT|+>JiT3frBq29|bJ1GzNdvn$iM0mIJ2w#0b))PR z6x=E%*~$PUi@XM&dw(mLHw0Tn5oLqZE+?G#_phSw2GVPCfVBIovjm|F{@*$Lku|4< z_m``3#woIidaX)_MA0?V!&z}060r6gue~`#T1xZ}finviC^`k1rVL>H z#Bs{2cg_MT1D_>dT_{C#02g_-zj|BTTUIq=L zA=cbDTYyZcR?qp2l>>!@D){ZIxw$NPF|QC?ObP(E5@$W4PKn`py=iy0n4f()bf-@}5?L9N8f9yCv=#RRfwUvXRm4mi|tBs+( z<^z4b2}4$00Mxe^9&a}+6L3@%6cYm8=F_X^O*n1>p{jz3PH+&smqmSYKJlZ$DaEWO zyB|9CiSeo>bE&@JKd|V0s+##yCtn^oig3>5Dgl}++TV<;gOwy+d7RkSn%~!Ghbo<+x-xqvCUI7CJR2oTL<}EIOLHKLbDIc2{l1#y z-iSC(*oL^^;QTW7Y*1DAZ92k~wG+ek0Z}d(`oSYZg5Y%7dvEAGebAmKkeCp^=6a9k zSr2o)zuP}g`F_WFJeYZye*H4i`(M7t`L9{n?+A}46%X@>UzUBp3-1yBzj4m*fR9`G ze+Qhn&m#U2@YlHGcfiLTxQ9O5FN1{n9q`vY;CGbA_440Q(%}Cm%Fo{2?8bIfIz;#9)^`O_z!pg2es+nh5!Hn literal 11955 zcmeHtbyQpH(td#APLW{6rFijTE!yJl?(SNkXtCnKwP^7o#XY#YLvbw>cm1V3_kKNx zbG~o={=au;t*q=M^X#3;Jo7&9%*aW?Ji!LQ0T2KH02v@Zv0%Xl3IK?M1puA`5TG@M zZEc*4ZJczK-q{&DYBRZ6Tajixfu_v>KtsO&-{U{n10AvbHf<~zq8D-3=+QOG272#E@L^&hcacTXAxrQD-}6*;!!{0@&<1R`hY4 zVvAN^ny_fnOE^N}!$20`fs>i*J?G3ulY)7ciky^)6muIEoG#)0&f04Gji|F)j{n%O ziH6^?gLwfoP9TDiK1@_`!Lf#V=C|}4A8$0V^o08W!MPLTm8UP^9y+487omoSPb8y_ zBZ!E`y8t@Z7p9|zL*c<0iwZetP28=rKZJ>#ylF<^b7Ugy+;PrJ=1dZNKaf(RE)C8( z@3at`Bc*D)m~DkYI~Uey862Nm-IPcLy&M*HK7|nR;QHim}_R=g;pU#iJ?gG!s^U2QEqW{iVa_}<$# zUvjI$-J&zLaJr4mnKOZp#j9B#n5Z6nfX;7JI{o zIltkM>M$49&|<|k#0!6?j6}g_{hyz_fdK#r00>ZSR?L5}#?{ur(!kc%^5@$2Zw5g@ z7B~pK{bq zMzSf{PhJd|+`K$pC8ls*d`??I^rU_+WvB{C=R#KXf;6~_t^NWHC+uxm2@^Bt@QkI5 ztWm>j@&axJy`*;kSS3v&c4L7I$%L+MK}P~ZdjBuz^`h#VS7~7_92v>xAJgP9TsH|i zyij_tY^|5rwZkKuy$DB+@R55%kYh3;7%TkDZ*_3hp<&;pXKKZ)w0Fa(iwM{gsZ!=i z);&?YWg9SHt+8y2n#3GL?p6}u$z{`IrgHpBhZ6bz`*k@z`{1i{2bG8K$o~{MMJVs9 z=^;@=4-WvKLqY-)Ie(6s9A$0W6c+6FpDG{puF?o%K42)ZZYUO2N-Ix)iHw7hIkaH; zDlN=$)^b1~RL{w*9fakr?dZ9GR+n%hD@`^EGcA_S3O*g^4o)cO6RDFt96Xx~Wnqnv z!;dLkNAy~U^G^1GK=TOx*@m~3Ww^ghbmcxK#rySYaJb}zp zQH11iU?ZyeTXt`ZeYD78r%M@?6-;_hxa@oHZ22gaqBx-6g-qpWw5CtqmL}O%q)dIV z@+6l^Z3+k|Zn8)BG~N{PS-aAYt58^N)f0`kWeVOxL5Wh44$Tv^hVkPeub$A0;V)Ha z%vs7gd*c=dtxy8%re9OBnb$F`qy^A11|Ki4KOmKL>Z~KJM|`;~)i6U-hckfb&NMb3 z;M@ew5>0AQCoTYkpIjkc)oj^JYz32%g2|1OJsJyQt2=~PP_87K_t;hM1zQ=MeMvDw zBCNT*h;c|u)^u_bR49PD3@xpq?@iI|tVtvGq9*b7b>Li$3k}^b=0Az9>9Ygh`P)^T z;`*-)ga)pSq>R`vkLSv6oh&1!j_RkPa`hJx7n+ zx>l98^>9FIn7a@@Y^KgSQx{+R$XEIa&(GNA9og_m!RD&rdIn}-6pKyF+NMcx%gvjfLR-nCmL{XnttdFrCdTdmQcN5-p z&g{HA4JwXC>Z0is{20Eejbq(H31nv`#a&}Tz>^2Mfy2=FBvdJE1%MHaXBJHn+eYPD zQd6=~%#0Q&J)seawU7ajy8A_!fz=PeJ;u|i`nqi^5Ji|bw}WDuHYd{GX}{)$2ISwh>ss{b&d zh+Iu?LZgt5uF?QspbWQVNcS$f`kZuTBmG4vK&T3TQFFc+!MvY8rjmagRk2LblA-{1 zbnopvP^@SQUtDmNCH1LzT^mfBMEnDsh=Suc4#5IrVa6o~@DH$|K4t&pP1;zX4eW=x zH*6YN0?8@BGZUxPs*BsVIt1Uo&!s~#jXDoXy1ZJTQpeIrj{;m7zm*qiyOh~)dPg=E z26SoHzOqZ7(wN1Kt5@V~-@?ex)^jd79x)Gge91jD&PeM8W95ooJt0JBlsTF)^dJc>5Q&SmBDBB? zf2>qC z2MrT=aK3Np4AY`%&FEy2u9CW6nbkIFP|3|ing&9&-?90wdzMywO;8|%JJwvvisHJc zY_BCm-o4?&h2ZlLC6996!YNc~-m#)cQ!{|=h2dtavs=q2Zu#j!+>7E;Mtt)c*meT< zDZIM4(o60tooak9P>aTV$fs>9FXZ~V*E2ilgt#Oq_*OST^(FMBQg;nvJ?<{(P z%jvy{bzeredpNF5%p5zfUzm+ek0?hNFFTJ(xoYfGBlHYLp|tpW=s!`NX7S=nSazPb z*ROLJV7ev3pFcJ`={*YsqWZ*yfMs(7xeMqCmdtj^SRrrNxiTd;MES;{GfMGXW(^&pJiITzcdV%;D$9l z$QDG4^6`WP$;7c90r%vDk`_d}IAFRPn)8;roQBymRR~M8oo&lcRwZ5%uH?m(Me&Fd zvMBUXG-Zz(qQf;Z=;0r^wm2(%ehT@-4x(x({@~irHO$e>*x1SOUqa~D3G4sJpbsNp z3;)*`aLXQ<2W11_ia%Qz$_p1`I&A0WpqK4M)QgxJyJ~`J4nB{Jz0T~`Pds=?a_~r} zIrp717sjD10s=wo6612<`SMWC)>OQl4kHBm+6;f|_ui__tCXJH4%s?H$VhcaVh0H{ zLuo|zxHiVPIb}QHX#wcj#Fymj;wU}S_Nk6H^FG&IM)1nM{P|C2jw(K^KUj}nXc$n>>Nw&4EBKP4yz6K47UFQ>iui__X!P~ett^bYi# zWYcmiHN;XuxyYv@{oBu4J0uLg_khpEZq6Oy2^mpbu!WW$vLQ}Gxe?i_qJo+whYg%i z&o#$5^s(xUuGE7J6i)o~^5R=?%qcQ2MW?jw zH%eimZ>^@YY3n|Gvg3XYnlOdDF+bt}HLCb!YsL}15A)Rr&OyBM@1@?ZL?Q@?_2NtC z1xs-0T^g4vr=1-)tXGu@mBI^ke=ROaF;J$lUfcuad48ilGfAVCPTA;mP@g#K{~pAv zhLh5q4#NY2b5lNUc%`iSkC7H)ReNCz$?C+2008iBk@h>rQdQ<{R#`CK>zY4+k3AfH zUqfLQ;?~azSSedy99t0usd3v+xzsvUJv1A_3;9$Fh0k@An+_%6N222|SvmNsNSQN* z0)upXFzNH9+{%NGhDmh4zyZyLKYl<^F9X^pb#cWEf9kR;ECYUFe0;EIEzApUJv_5>>y={gGwTS$O}P~ zj#4(A@BA7v%)b308Y&w&aqkc@9GPlodE~+zc-+;n6{72W8oBgEXy<4zcU!$(W!E-` z6CGsYCRjP1k={5DZ0(@mWy+fQJEcE}vgohVSFgIjpv$F224J5P6c0fa-w2j@!I<>t zis8r_+;Yums~)2KV5~(F^03r?I5bMNo6%3fQD6u^vgWRjEPI9DaDUY~t5MUnE!uaR zgav+P-HvFs0nnJ`+|laGmne@F~{L{Q`>3HYa8Mfu-x#0sB(R&M)Z)T*nfP&&K5qBy| z2X4?`PWHIc(^@?~?YVD34->1`c;MrkNvG-Roy@*OZ@Qnyzjh8-&>0LSizx?LKA}OB z$dxp3eoh26wTlxtrZKN+xu=zJN}yrHf!KAAbe^`)us_7uW*E9tC%QFb;9@h%UtRmE zCYS)31k19*|IW(P^k$ntK zo6H}SV6!kl&|N#o%e}&1gMjYgA%bGM=3u}gptP2Q^ZK)ncJZu*c7X@Se7qE*4#P?m z*P6!u=crf$b+8gT;%R5mF@DqYPb#f*+f?dhwjzvfnbCR$F^QJkc!ju`tzVhN;`gx! za-qoV+flsA@QEYl523u0R2}3Qp#De^w#OiXXSlcLogHt+;7uv zzD3wSkCXpE7{;3D9bHChLKXDoD;?S=nn3?*F19MxMeuZo0tFpCeUc1e=tl%D{%^7} zU5-^E)e-RaA#nUDHZrL>a|*s6C(KxE1gxkZAPN8P(hH1-(JTYm&UBIfEc5=_(m0tJ zTN^X~dj6Hp_S6SM3AwOa@UCB>IJ(|)tVL2SE)QG9e4{iurNo zR+;f6;xFgva61tKqib7DxP6zX!@A$LQGCMfCL;DcfG&0X4CFlCvyxHl zp~gvgdlWRvuDpmALJgx`QY|c}$}C@NuMi3j*jCslCYw9fPk+JtY}I4zf))$Q%j}Bk zZVRu58Fy5LT4V!GUDzh$fwfLb&r@yG@puPpF-D5uui=ra3zM(aeYSh2(NaB@2W7_Y z7x%e(hG#dso-+~t@fZa zlRVSi=?rXL&Gu=@A3Ab1r>PZv@A1)D(6hvlPCi}n;0 zrPRo|HZ;QaSp@(-&!h-+tp=BTZs<@TySq2=L-=d5gX0f)Ky{qgZm4BpcFvn|<3 z;<1I;kn+PXD~1Quq58gPH%Cyf(mGg#IzX>6w_Wo0`+J~uG)cd=IhXxtnYEEgvCA0FDJOMQzusS9Zc8Hu*sXObueTx_vr zsIDr^hL;&{qJG2*`OFO-Q*0?04kIpV5Dwm$=4V}%DhoN?$0+p=9;Xs^(4$gdsuVT| z?r$?D{V;btxM*G4o^LtH?`cG#54jH(ui%1>u6{En`8|b~A}=_In?sG(SslE=Z>Ya8 zQ;N8Me139m$nZgOlPBnmK(GY~Wex4-nRW*!NYY_@tS%JD7R~Q66E4T+I41?_>F}aP zBNL5w^%uwUjGW~a;TI;7t9Y?cgIL{mXF^^JyKa#jOA~x;JVadH7?#vTg6HH{7HT}l zS3OfT7{bQOb5amD*8o&>3hM%T)Cf4Ljp(!WZ`F;elIz70?@1KX_Y5hYrRqT$g9LSI#afq{+S>cM;nId+F6CSih zXc6BHM^HCSaL3*E)V8Ddvi;hvr@P4iqxlKGrv*OgdCmDChP*1ZlAr&;Yu6@s3p&a30BQ=k1VtG<%jkiR6zan0c zFj%^qr7yJFfr=fc-YGlkOkO_R6q=0d2$UKRStgxmbe)}_30fpIdiwTJ9~^I|%Qlv8 zu#v^?_rrEJ2)Vw0(l{Ch&yqhcUr$qK0X0N^naq*MunPM+FB{w$e48xotXL_7ZBPBO7CvK79<1(go#r4Sr$grR*x93{n!U+9_=7E2~} zy|$HH)f3=oXUBp1QYM7*&OmuxG$Cvq(+@hcNou1`YNK-B6EZc#>^G2DtZ{JMHqMtt zn>K<3=L7>yh+?-4aq=pY8pFOI4JC=W$FLP7LCF~~vynzRDbCj0zq4>QFKiOcVWbFi z`WntM#^Q&!!oWKif1}=2pAT zCK#SAF3cP?HD-GFh!CGpe~0gbFMLzVaH2P^Tvzq_%%&SZj=hYHpx_CWbID3HN^j!` z`7$snOHTrnolOR3?lSQu7Q&o2$$sM^ne20fGv74*+)4L+!YO7ah<6>e=va87f-cwQ zPKMFpizQPX8rI}HiN=+Q_n|Og7RFBltkUX@GVLF~ewClEWMe=;k)7bYw00Kb=0EyE z^QtFpK*RYp!>&Zy<#pde-RLX3Ji!cyZ) z8pa2&=T`9FBxV?63G`bql>8MkF{_ILT!1S5cV$}No$1h*^`hmq(8hPKp|tczL|x2F zyQi7<*ESdj1298FrE~}Is#dO7o3&;=?GG?X({8HzGSsbIECNib_-UUs$z+f0P4NWi zlxQBHx_>JhCE`15qCiEPo3xSJ*b)DBas`mJKXlM-&bCqC0q#>rhNgjAz%=st@S0kY~CcR6jRT|Fk0&6#uie;H5ZnY zYIeR|0a>iP5sbRxSV7&#H2MAvMLj;XqAEvwkYL=OmO9w?Zueu&^#_%4k7bK27j53G z3L7Ycc37@bH0A9b*AMGr|nnJwI+Dh?KaG~&Bpj9QjXN^T2Y zogorO*_`s89n$M0mL z@|>7?6E=v(N|~&)kH{O%6dmy#&vE*dx1-Alo}p3CW)h+pDog}Mwyna{a4f=kCufFF z9YABA^M_bpnzXlHE5NN%@+;p5f@V{CYSLTG(_L?3HWc3}5;x}VJbu1<&WFO0K(lJl zDv3^FAILzd4CK_7N<*_q8R5qrHl}}s{_YZ@%#BGqr}{YOi`}%@q%-^VKj|dSNy|O= z!TgfHd0s1&0K#*9MTDrxJOCp7I#W3=HkFq~G=3w3v4!=Nw)e%0hM7tDB%fW@uVidU zjgh@w-PpAxy{nf3tcU43x_W0sF?l-Yh5=r>WEaI#9Z{WHy)b?;u(@N5u$g$)^-uh?`TvT zYcR(zB$3{%dFdbs;H?!tZLM0=-cvrW9;>l9n`5PN`|8go5aw~A=}XP=^}Zk-$t)Bs z9m4?2rcRaJ%^mv2JgOsmhy6W}F`@s+2w$q*KGw_i-Z^9u#CVgI&uGhzRqTYu7HF91 zj*i54)7djc><76%g^e2cfXn|W^cw*b8bSAKO2tEe1&Nbv(L<)65_&odL~q^PHqETv zWRDLN?*#8G47){W7KD*6=qZ73xe$>JzB!ojSgsyc8*r=Lu#6L~Vd*fr5XjG}cRNkZ zIEmEmOqx5qSjs1^aOU%nWEC%2NalW1rGid9mJN1u8&Y`Fd56xonuR~<6;IH0`)(ONDI6_P3LIMA)#T;-vtOYkl?v=bP<-vs1*0w~JLf0ffSi8Rtf?E6)*@uOkARKaJ?T z4Uo@<-*6y`VDbl%z$iJA2irxfs~dgE4|uvQj{eyA&-_eG(_a1B zXv8lL9ySmT%2XMPt*yO~TE2GU!;fl#0yhlm6zMiK{0B|;@NsSld>Z45cck)-gu_%w z5(=O7D~uOP`|Gc9a@TsXmmseEyR(b86}xJFIvbKSF(A!5BU?i`2U|NwW_>%mpS2fA z)BC?IJBa5}W55yPERd$}+eYE_n<;Za9hG8JLNIo z*m2+~ZP*@e3B1$GJHHiFsiQ-MyTWQRImG*pH?!``mA$_@+ob4^iYg|3n(^jJn|WQQ}d?~2p{&S@MP@~I)7 z>^yF#9LA9ngm0du?DdQL<9OOUJyPKv83vE2Z~6@@mXG(fQA77?s6PQS`?&$)0-Kbn zGqCwfJVw@q12Rg|t zTsUw54eYF?P{pFuYQ-2()msnFRA8_1j#^ocBe-`smWMZp?1YFdDMpCA65nVnoPi0K zT>8>Bykg!5+C4!w1?!3)4c1cZH;gUim9&K2X<#>q!RD`i{#+&H=N9}Fos(mhy^lD< z{p?oz?eK>At)RoZhMuJj`v*4Z!+zap?|%$-s_O=|L`bloLoy5IpM%}d*1`C{(GKz1 zzYb53yzM+GM)wlU4_>uKUSLCs%DS9+%5{Zk%7jSDb=Pug#jMZr82geJkj5rUIp!AN z`fzi#?_KH2EUe2X9NaZar62kcTQ6?B%O=6-Ycun!DDtdIjq{;45D_Ve_vl``h)$*Yp1z3{G4Mm`Fm!>EPDG6LUEl7Ap;R*zyc?E^1^^O7Qdq<^hS27SGzN>;C3 zI>5W3u8F=jJ>B}gEBvgA-Q`pQ3bRCo2=$y)34YxWi~u;$5!KK0nF6I(_$ERz$n}b8#|g($b~9)aFY0gj>d5{#89IY@lJ$ z1J#sr?^yR#@#ge&V>3TRI$(BNXYL0RKMhp%0-w>zrd;~=aqhN|VUv35o z=a-NgJ7lErckVWHc673}{%=74JDdT4#u#l|KNjr3gM__TQHvC?ogq+RekOPgYOCGv z;o-@~A}c4u8Ey`W$k`e+IZaF5-IFg~+Iq-vzss(21{BZ@k*}Y#RQMS4mCkSq?M}8m zBa<|5b5L-E!?bvvDemcwCm_l6`a4prF=gUMxq>PnZ$zI&hRB&za*`C7ifZnI!|q{L zKI_|e)p@OzYx;an6-{HQbE}eQRqkv4_a#O{Uk#ltzFK^E&V@F-8m=s-y@Ba2dq*k# zes6w}@2wSH0~_x)JUZty{k_3PBTT6+k{+z?{$~}y%&u#tVCfq=_m+|km{A1I{ zeZ}9VGZ5AMuj~F_UB<_Nk9%&v0Z}pk55V6YxW|Bx`%%9Eg|Hq2{_ajaMtNK?|BbSR z^9$v1`TQ}!<1PPh02AVWyp3PK=rPLUO~-E(I%U5qKU;sG`jhp)h00^|zb*xTHdmzmv-$rl404jN5E=jgXplz%gh{7#KhORjoTgst From b3016a8b5e20e2fe1cbfc3a392fe72141682440c Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Mon, 26 Oct 2020 14:35:02 -0400 Subject: [PATCH 026/257] Multiple batches for forms_1 --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 02f818ab..912cf281 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -350,8 +350,8 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ expected_form_2_data = list(reader)[1:] _pull_data(writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', None, '2020-10-11') - self._check_checkpoints(caplog, ['forms_1', 'final', 'forms_2', 'final']) - self._check_checkpoints(caplog, ['forms_1', 'forms_1', 'forms_2', 'forms_2']) + self._check_checkpoints(caplog, ['forms_1', 'batch', 'batch', 'final', 'forms_2', 'final']) + self._check_checkpoints(caplog, ['forms_1', 'forms_1', 'forms_1', 'forms_1', 'forms_2', 'forms_2']) self._check_data(writer, expected_form_1_data, 'forms_1') self._check_data(writer, expected_form_2_data, 'forms_2') From d4f45ea93294bc1abb766169107d10d80b7516c1 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Mon, 26 Oct 2020 14:41:36 -0400 Subject: [PATCH 027/257] Update test data --- tests/009b_expected_form_1_data.csv | 54 ++++++++++++++--------------- tests/009b_expected_form_2_data.csv | 8 ++--- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tests/009b_expected_form_1_data.csv b/tests/009b_expected_form_1_data.csv index ffe5ad01..d985e115 100644 --- a/tests/009b_expected_form_1_data.csv +++ b/tests/009b_expected_form_1_data.csv @@ -1,27 +1,27 @@ -id,name,received_on,server_modified_on -722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2016-08-23T18:10:46.354652Z,2016-08-23T18:10:46.354652Z -00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-02-22T04:20:41.350824Z,2017-02-22T04:20:41.350824Z -7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-03-08T04:12:52.073633Z,2017-03-08T04:12:52.073633Z -c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-06-23T06:25:10.350764Z,2017-06-23T06:25:10.350764Z -e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-07-24T14:14:19.371576Z,2017-07-24T14:14:19.371576Z -44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-02-21T14:21:58.426183Z,2017-02-21T14:21:58.426183Z -6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-03-28T21:06:01.613880Z,2017-03-28T21:06:01.613880Z -6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-06-01T20:28:55.166268Z,2017-06-01T20:28:55.166268Z -b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-06-26T17:38:08.849806Z,2017-06-26T17:38:08.849806Z -1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-06-01T20:47:56.618572Z,2017-06-01T20:47:56.618572Z -a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-06-01T20:53:00.263914Z,2017-06-01T20:53:00.263914Z -733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-06-16T08:43:17.086223Z,2017-06-16T08:43:17.086223Z -232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-06-19T09:57:59.335584Z,2017-06-19T09:57:59.335584Z -ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-11T19:13:33.035079Z,2017-08-11T19:13:33.035079Z -47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-11T19:48:12.705214Z,2017-08-11T19:48:12.705214Z -43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-02-17T11:25:51.670858Z,2017-02-17T11:25:51.670858Z -c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-02-17T11:25:59.421127Z,2017-02-17T11:25:59.421127Z -dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-02-21T10:55:54.158411Z,2017-02-21T10:55:54.158411Z -13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-02-21T11:34:40.289691Z,2017-02-21T11:34:40.289691Z -3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-04-04T14:53:43.570695Z,2017-04-04T14:53:43.570695Z -0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-06-01T20:37:13.258852Z,2017-06-01T20:37:13.258852Z -5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-06-19T11:07:26.044592Z,2017-06-19T11:07:26.044592Z -d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-11T18:55:10.480754Z,2017-08-11T18:55:10.480754Z -a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-06-01T20:32:20.337554Z,2017-06-01T20:32:20.337554Z -27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-06-22T07:17:01.907612Z,2017-06-22T07:17:01.907612Z -3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-06-22T07:17:41.231408Z,2017-06-22T07:17:41.231408Z +id,name,inserted_at +722cd0f0-df75-44fe-9f3b-eafdde749556,Register Woman,2017-08-21T15:11:03.897195 +00d52675-48de-4453-a0ab-bf5140e91529,Date Picker Field List OQPS,2017-08-21T22:08:08.903432 +7a30381b-072d-43d5-9a8c-d917c18c4ed0,Date Picker Field List OQPS,2017-08-21T23:12:09.212486 +c8db2245-43a5-42bc-bd9a-5637a4e3e3c5,Registration,2017-08-23T00:26:39.476117 +e533dc9b-86ad-4b88-9c68-000485539c84,Registration,2017-08-23T12:40:47.438764 +44849a67-273c-46d8-9f6e-2a1909b2dc64,Date Picker Field List OQPS,2017-08-24T09:25:43.960309 +6debb614-5937-4e72-8d85-79339b183d44,Registration,2017-08-24T13:52:47.079309 +6cf320ce-cb67-4d73-b13f-73d706c25a58,Registration,2017-08-25T04:40:11.724417 +b921ff1d-d8f1-4fa8-8a0d-7bcb89c64900,Registration,2017-08-25T14:21:18.744358 +1bf4c4a2-c74e-433c-861a-12d908ca3b37,Registration,2017-08-27T20:52:06.531792 +a72e6609-c9fc-4929-8316-67cbe7370faa,Registration,2017-08-27T20:52:06.545798 +733c1f60-55e8-40d8-b11e-25e0b60a1bd3,Registration,2017-08-28T01:51:34.108067 +232d1461-97a2-4bf2-a322-bf00bd7631ab,Registration,2017-08-28T02:47:58.986850 +ef936958-063f-4862-9289-ee041154c3c3,Registration,2017-08-29T03:38:54.521605 +47bc1c89-6006-48b1-8e5c-6468f0d7509c,Registration,2017-08-29T03:38:54.656597 +43cf5491-055a-4a8d-839c-10ea888373dd,Registration,2017-08-29T16:40:41.928598 +c5f0eb63-a5d9-4b45-bc30-eb47a25590d0,Registration,2017-08-29T16:40:41.930963 +dfa09ad4-3efd-4a54-a628-25e1ccaded17,Date Picker Field List OQPS,2017-08-29T17:01:45.618568 +13a82b6a-2e82-4e1a-9f17-e57052b622e3,Date Picker Field List OQPS,2017-08-29T17:01:46.310261 +3407042d-4db0-4564-ba4c-f9e64a9fb17f,Date Picker Field List OQPS,2017-08-29T22:34:04.594038 +0bc4799d-ef00-4de8-9061-cca31e1e630e,Registration,2017-08-30T12:31:38.126856 +5ae9056c-a3b0-4b66-a66f-5830523fb6de,Registration,2017-08-30T20:13:18.057229 +d0cf0d73-d453-4245-9baa-d15577618f9f,Registration,2017-08-31T22:56:41.027155 +a6074fd8-9671-444c-ad10-4ea5f14a8b8e,Registration,2017-09-02T09:39:30.272531 +27b6f2e5-0891-4c55-8fd6-51c639c0cd87,Registration,2017-09-02T20:05:35.451835 +3012b9dc-5d1e-410a-aa39-803191e935ac,Registration,2017-09-02T20:05:35.459547 diff --git a/tests/009b_expected_form_2_data.csv b/tests/009b_expected_form_2_data.csv index 4d8abfa1..bb2350b4 100644 --- a/tests/009b_expected_form_2_data.csv +++ b/tests/009b_expected_form_2_data.csv @@ -1,4 +1,4 @@ -id,name,received_on,server_modified_on -d0cf1846-204b-4d04-819c-f688228c2c9e,Registration Form,2020-05-16T20:04:15.702415Z,2020-05-16T20:04:15.815824Z -db38a72d-dd04-4893-9f2f-5548b8e1fa9f,Registration Form,2020-05-16T20:18:47.035695Z,2020-05-16T20:18:47.140120Z -f34bec9a-0af3-495d-b53f-3d953e3b3d4b,Registration Form,2020-06-01T17:43:04.870657Z,2020-06-01T17:43:05.008484Z +id,name,inserted_at +d0cf1846-204b-4d04-819c-f688228c2c9e,Registration Form,2020-05-16T20:04:16.230195 +db38a72d-dd04-4893-9f2f-5548b8e1fa9f,Registration Form,2020-05-16T20:18:47.823616 +f34bec9a-0af3-495d-b53f-3d953e3b3d4b,Registration Form,2020-06-01T17:43:26.107701 From 81e8a691dd9ff562663a47cd03407dd12c1c1f6b Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Mon, 26 Oct 2020 14:43:32 -0400 Subject: [PATCH 028/257] Update last run checkpoint dates --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 912cf281..9d11b789 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -360,8 +360,8 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ 'tests/009b_integration_multiple.xlsx' )) assert {r[0]: r[1] for r in runs} == { - 'forms_1': '2012-04-27T10:05:55', - 'forms_2': '2012-04-27T14:23:50' + 'forms_1': '2017-09-02T20:05:35.459547', + 'forms_2': '2020-06-01T17:43:26.107701', } def _check_data(self, writer, expected, table_name): From e64b5674311f0e56b49acf9a7bf62cade46d66be Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Mon, 26 Oct 2020 17:02:59 -0400 Subject: [PATCH 029/257] Ignore since timezone since we assume UTC --- commcare_export/commcare_minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 89a7efd1..0db1bc2b 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -161,4 +161,4 @@ def get_since_date(self, batch): since = last_obj.get(self.since_field) if since: - return parse(since) + return parse(since, ignoretz=True) From c39968393adf023473a1506d05482e656b145c45 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Mon, 26 Oct 2020 17:04:54 -0400 Subject: [PATCH 030/257] Catch not-a-date errors --- commcare_export/commcare_minilinq.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 0db1bc2b..ca8e8dda 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -8,7 +8,7 @@ from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap -from dateutil.parser import parse +from dateutil.parser import ParserError, parse try: from urllib.parse import parse_qs, urlparse @@ -161,4 +161,7 @@ def get_since_date(self, batch): since = last_obj.get(self.since_field) if since: - return parse(since, ignoretz=True) + try: + return parse(since, ignoretz=True) # ignoretz since we assume utc, and use naive datetimes everywhere + except ParserError: + return None From 0d43d87074a0af4f6b6f36e5c77ae83059f65d3a Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 30 Oct 2020 13:41:42 +0200 Subject: [PATCH 031/257] add form_url & case_url functions --- README.md | 2 ++ commcare_export/env.py | 23 +++++++++++++++++++++++ tests/test_map_format.py | 9 +++++++++ tests/test_minilinq.py | 12 ++++++++++++ 4 files changed, 46 insertions(+) diff --git a/README.md b/README.md index f3ef96be..118421e1 100644 --- a/README.md +++ b/README.md @@ -394,6 +394,8 @@ List of builtin functions: | json2str | Convert a JSON object to a string | | template | Render a string template (not robust) | template({} on {}, state, date) | | attachment_url | Convert an attachment name into it's download URL | | +| form_url | Output the URL to the form view on CommCare HQ | | +| case_url | Output the URL to the case view on CommCare HQ | | Output Formats -------------- diff --git a/commcare_export/env.py b/commcare_export/env.py index 4f8d675c..0370eedb 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -385,6 +385,27 @@ def attachment_url(val): ) +@unwrap('val') +def form_url(val): + return _doc_url('form_data') + + +@unwrap('val') +def case_url(val): + return _doc_url('case_data') + + +def _doc_url(url_path): + from commcare_export.minilinq import Apply, Reference, Literal + return Apply( + Reference('template'), + Literal('{}/a/{}/reports/'+ url_path + '/{}/'), + Reference('commcarehq_base_url'), + Reference('$.domain'), + Reference('$.id'), + ) + + def template(format_template, *args): args = [unwrap_val(arg) for arg in args] return format_template.format(*args) @@ -448,6 +469,8 @@ def __init__(self, d=None): 'join': join, 'default': default, 'template': template, + 'form_url': form_url, + 'case_url': case_url, 'attachment_url': attachment_url, 'filter_empty': _not_val, 'or': _or, diff --git a/tests/test_map_format.py b/tests/test_map_format.py index c91a541f..b8ba3e96 100644 --- a/tests/test_map_format.py +++ b/tests/test_map_format.py @@ -19,6 +19,15 @@ def test_parse_template_args(self): expected = Apply(Reference('template'), Literal('my name is {}'), Reference('form.question2')) assert parse_template('form.question1', 'template(my name is {}, form.question2)') == expected + def test_parse_template_args_long(self): + expected = Apply( + Reference('template'), + Literal('https://www.commcarehq.org/a/{}/reports/form_data/{}/'), + Reference('$.domain'), + Reference('$.id'), + ) + assert parse_template('form.id', 'template(https://www.commcarehq.org/a/{}/reports/form_data/{}/, $.domain, $.id)') == expected + def test_parse_template_no_template(self): expected = Literal('Error: template function requires the format template: template()') assert parse_template('form.question1', 'template()') == expected diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 6d257c13..8f833c9c 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -128,6 +128,18 @@ def test_attachment_url_repeat(self): ).eval(env)) assert result == expected + def test_form_url(self): + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv( + {'id': '123', 'domain': 'd1'}) + expected = 'https://www.commcarehq.org/a/d1/reports/form_data/123/' + assert Apply(Reference('form_url'), Reference('id')).eval(env) == expected + + def test_case_url(self): + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv( + {'id': '123', 'domain': 'd1'}) + expected = 'https://www.commcarehq.org/a/d1/reports/case_data/123/' + assert Apply(Reference('case_url'), Reference('id')).eval(env) == expected + def test_template(self): env = BuiltInEnv() | JsonPathEnv({'a': '1', 'b': '2'}) assert Apply(Reference('template'), Literal('{}.{}'), Reference('a'), Reference('b')).eval(env) == '1.2' From c9a7c94d8ff9f20d751591de68814f887e44ab6e Mon Sep 17 00:00:00 2001 From: Karen Tracey Date: Wed, 25 Nov 2020 12:26:09 -0500 Subject: [PATCH 032/257] When writing to MS SQL, use special DATETIME2... ...type for datetime objects. This avoids problems with the restricted year range of default datetime type for MS SQL (1753 is oldest year allowed for that type). --- commcare_export/writers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index fd104755..d46016e1 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -357,7 +357,10 @@ def best_type_for(self, val): if isinstance(val, bool): return sqlalchemy.Boolean() elif isinstance(val, datetime.datetime): - return sqlalchemy.DateTime() + if self.is_mssql: + return sqlalchemy.dialects.mssql.DATETIME2() + else: + return sqlalchemy.DateTime() elif isinstance(val, datetime.date): return sqlalchemy.Date() From 6361d301875843bab2c48e5e151476209d09bb91 Mon Sep 17 00:00:00 2001 From: Karen Tracey Date: Wed, 25 Nov 2020 15:38:19 -0500 Subject: [PATCH 033/257] Adjust a test to acocunt for different way... ...datetime2 is formatted to text than datetime by MS SQL Server. --- tests/test_writers.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_writers.py b/tests/test_writers.py index b770af3d..befbd84a 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -281,9 +281,8 @@ def test_change_type(self, writer): expected['bazzle']['c'] = '0' if 'pyodbc' in writer.connection.engine.driver: expected['bazzle']['c'] = '0' - # couldn't figure out how to make SQL Server convert date to ISO8601 - # see https://docs.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-2017#date-and-time-styles - expected['bazzle']['e'] = 'May 1 2014 11:16AM' + # MSSQL includes fractional seconds in returned value. + expected['bazzle']['e'] = '2014-05-01 11:16:45.0000000' for id, row in result.items(): assert id in expected From 91474b36faeae391d30b3139b232008256e823b9 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Thu, 26 Nov 2020 15:32:39 -0500 Subject: [PATCH 034/257] Set mssql nvarchar length to 900 bytes if possible --- commcare_export/writers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index fd104755..e9b56f11 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -375,7 +375,13 @@ def best_type_for(self, val): else: return sqlalchemy.UnicodeText(collation=self.collation) elif self.is_mssql: - return sqlalchemy.NVARCHAR(collation=self.collation) + # MSSQL (pre 2016) doesn't allow indices on columns longer than 900 bytes + # - https://docs.microsoft.com/en-us/sql/t-sql/statements/create-index-transact-sql + # If any of our data is bigger than this, then set the column to NVARCHAR(max) + # `length` here is the size in bytes - https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String.params.length + length_in_bytes = len(val.encode('utf-8')) + column_length_in_bytes = None if length_in_bytes > 900 else 900 + return sqlalchemy.NVARCHAR(length=column_length_in_bytes, collation=self.collation) if self.is_oracle: return sqlalchemy.Unicode(4000, collation=self.collation) else: From 42c844c4cc8565fc5d52572435f39cd4636cb502 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Thu, 26 Nov 2020 16:14:15 -0500 Subject: [PATCH 035/257] Test mssql resizes nvarchar columns correctly --- tests/test_writers.py | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_writers.py b/tests/test_writers.py index b770af3d..377abff9 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -317,3 +317,47 @@ def test_explicit_types(self, strict_writer): # a casts strings to ints, b casts ints to text, c default falls back to ints, d default falls back to text assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 1, 'b': '2', 'c': 3, 'd': '7'} assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': '5', 'c': 6, 'd': '8'} + + def test_mssql_nvarchar_length(self, writer): + with writer: + if 'mssql' not in writer.connection.engine.driver: + return + + # Initialize a table with columns where we expect the "some_data" + # column to be of length 900 bytes, and the "big_data" column to be + # of nvarchar(max) + writer.write_table(TableSpec(**{ + 'name': 'mssql_nvarchar_length', + 'headings': ['id', 'some_data', 'big_data'], + 'rows': [ + ['bizzle', (b'\0' * 800).decode('utf-8'), (b'\0' * 901).decode('utf-8')], + ['bazzle', (b'\0' * 500).decode('utf-8'), (b'\0' * 800).decode('utf-8')], + ] + })) + + connection = writer.connection + + result = self._get_column_lengths(connection, 'mssql_nvarchar_length') + assert result['some_data'] == ('some_data', 'nvarchar', 900) + assert result['big_data'] == ('big_data', 'nvarchar', -1) # nvarchar(max) is listed as -1 + + # put bigger data into "some_column" to ensure it is resized properly + writer.write_table(TableSpec(**{ + 'name': 'mssql_nvarchar_length', + 'headings': ['id', 'some_data', 'big_data'], + 'rows': [ + ['sizzle', (b'\0' * 901).decode('utf-8'), (b'\0' * 901).decode('utf-8')], + ] + })) + + result = self._get_column_lengths(connection, 'mssql_nvarchar_length') + assert result['some_data'] == ('some_data', 'nvarchar', -1) + assert result['big_data'] == ('big_data', 'nvarchar', -1) + + def _get_column_lengths(connection, table_name): + return { + row['COLUMN_NAME']: row for row in connection.execute( + "SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH " + "FROM INFORMATION_SCHEMA.COLUMNS " + "WHERE TABLE_NAME = {};".format(table_name)) + } From 1e1a8da9f3e108900fe828d8c7a91f16736cd244 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 27 Nov 2020 09:42:03 -0500 Subject: [PATCH 036/257] Actually run the test... --- tests/test_writers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_writers.py b/tests/test_writers.py index 377abff9..7c855bff 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -320,7 +320,7 @@ def test_explicit_types(self, strict_writer): def test_mssql_nvarchar_length(self, writer): with writer: - if 'mssql' not in writer.connection.engine.driver: + if 'odbc' not in writer.connection.engine.driver: return # Initialize a table with columns where we expect the "some_data" @@ -354,10 +354,10 @@ def test_mssql_nvarchar_length(self, writer): assert result['some_data'] == ('some_data', 'nvarchar', -1) assert result['big_data'] == ('big_data', 'nvarchar', -1) - def _get_column_lengths(connection, table_name): + def _get_column_lengths(self, connection, table_name): return { row['COLUMN_NAME']: row for row in connection.execute( "SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH " "FROM INFORMATION_SCHEMA.COLUMNS " - "WHERE TABLE_NAME = {};".format(table_name)) + "WHERE TABLE_NAME = '{}';".format(table_name)) } From 853fe0f0dea6b21af5c37b8a2cc729b895d48067 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Fri, 27 Nov 2020 10:20:39 -0500 Subject: [PATCH 037/257] Ensure that columns are not downsized --- tests/test_writers.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/test_writers.py b/tests/test_writers.py index 7c855bff..1a844a60 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -318,7 +318,7 @@ def test_explicit_types(self, strict_writer): assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 1, 'b': '2', 'c': 3, 'd': '7'} assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': '5', 'c': 6, 'd': '8'} - def test_mssql_nvarchar_length(self, writer): + def test_mssql_nvarchar_length_upsize(self, writer): with writer: if 'odbc' not in writer.connection.engine.driver: return @@ -354,6 +354,34 @@ def test_mssql_nvarchar_length(self, writer): assert result['some_data'] == ('some_data', 'nvarchar', -1) assert result['big_data'] == ('big_data', 'nvarchar', -1) + def test_mssql_nvarchar_length_downsize(self, writer): + with writer: + if 'odbc' not in writer.connection.engine.driver: + return + + # Initialize a table with NVARCHAR(max), and make sure smaller data + # doesn't reduce the size of the column + metadata = sqlalchemy.MetaData() + create_sql = sqlalchemy.schema.CreateTable(sqlalchemy.Table( + 'mssql_nvarchar_length_downsize', + metadata, + sqlalchemy.Column('id', sqlalchemy.NVARCHAR(length=100), primary_key=True), + sqlalchemy.Column('some_data', sqlalchemy.NVARCHAR(length=None)), + )).compile(writer.connection.engine) + metadata.create_all(writer.connection.engine) + + writer.write_table(TableSpec(**{ + 'name': 'mssql_nvarchar_length', + 'headings': ['id', 'some_data'], + 'rows': [ + ['bizzle', (b'\0' * 800).decode('utf-8'), (b'\0' * 800).decode('utf-8')], + ['bazzle', (b'\0' * 500).decode('utf-8'), (b'\0' * 800).decode('utf-8')], + ] + })) + result = self._get_column_lengths(writer.connection, 'mssql_nvarchar_length_downsize') + assert result['some_data'] == ('some_data', 'nvarchar', -1) + + def _get_column_lengths(self, connection, table_name): return { row['COLUMN_NAME']: row for row in connection.execute( From 180a7bd120ef6f0f8f09410dae0dd92e7ef0224c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 11 Dec 2020 16:34:42 +0200 Subject: [PATCH 038/257] don't evaluate the generator --- commcare_export/minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 66aa79f2..9db8ae52 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -434,7 +434,7 @@ def eval(self, env): env.emit_table(TableSpec( name=self.table, headings=[heading.eval(env) for heading in self.headings], - rows=list(map(self.coerce_row, rows)), + rows=map(self.coerce_row, rows), data_types=[lit.v for lit in self.data_types] )) From e09fb75c2ad7d7be4f8e09bb510b9b191e37f581 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 11 Dec 2020 16:38:40 +0200 Subject: [PATCH 039/257] remove rows from TableSpec equality and json --- commcare_export/specs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/commcare_export/specs.py b/commcare_export/specs.py index d5b1e0a6..a121e36e 100644 --- a/commcare_export/specs.py +++ b/commcare_export/specs.py @@ -13,7 +13,6 @@ def __eq__(self, other): isinstance(other, TableSpec) and other.name == self.name and other.headings == self.headings - and other.rows == self.rows and other.data_types == self.data_types ) @@ -21,6 +20,5 @@ def toJSON(self): return { 'name': self.name, 'headings': self.headings, - 'rows': self.rows, 'data_types': self.data_types, } From 27751f95b741b77affd5f757a58c17633b20d5a1 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 11 Dec 2020 16:59:19 +0200 Subject: [PATCH 040/257] update memory writer --- commcare_export/writers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index e9b56f11..a13ba6eb 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -210,9 +210,9 @@ def write_table(self, table): else: assert self.tables[table.name].headings == list(table.headings) - self.tables[table.name].rows.extend( + self.tables[table.name].rows = list(self.tables[table.name].rows) + [ [to_jvalue(v) for v in row] for row in table.rows - ) + ] class StreamingMarkdownTableWriter(TableWriter): From 067c013f2453f9b9ec3b1488509e631dd18c10a3 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 14 Dec 2020 09:18:03 +0200 Subject: [PATCH 041/257] lint --- tests/test_cli.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 6835cc85..51303763 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -30,6 +30,7 @@ DEFAULT_BATCH_SIZE = 200 + def make_args(project='test', username='test', password='test', **kwargs): kwargs['project'] = project kwargs['username'] = username @@ -125,6 +126,7 @@ def mock_hq_client(include_parent): ], }) + EXPECTED_MULTIPLE_TABLES_RESULTS = [ { "name": "Forms", @@ -179,6 +181,7 @@ def mock_hq_client(include_parent): } ] + def get_expected_locations_results(include_parent): return [ { @@ -295,6 +298,7 @@ def checkpoint_manager(pg_db_params): cm.create_checkpoint_table() return cm + def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): args = make_args( query=query, @@ -411,16 +415,19 @@ def _check_checkpoints(self, caplog, expected): ], }) + @pytest.fixture(scope='class') def strict_writer(db_params): return SqlTableWriter(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) + @pytest.fixture(scope='class') def all_db_checkpoint_manager(db_params): cm = CheckpointManager(db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) cm.create_checkpoint_table() return cm + def _pull_mock_data(writer, checkpoint_manager, api_client, query): args = make_args( query=query, @@ -438,6 +445,7 @@ def _pull_mock_data(writer, checkpoint_manager, api_client, query): with api_client_patch, writer_patch, checkpoint_patch: return main_with_args(args) + @pytest.mark.dbtest class TestCLIWithDatabaseErrors(object): def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capfd): @@ -461,6 +469,7 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf ], }) + @pytest.mark.dbtest class TestCLIWithDataTypes(object): def test_cli_data_types_add_columns(self, strict_writer, all_db_checkpoint_manager, capfd): From 6822e17bb210414f1ecee50d79d14680c9f2155c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 14 Dec 2020 15:19:57 +0200 Subject: [PATCH 042/257] reformat mock client --- commcare_export/commcare_hq_client.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 0761532b..cca2c05b 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -183,7 +183,7 @@ class MockCommCareHqClient(object): Since dictionaries are not hashable, the mapping is written as a pair of tuples, handled appropriately - internallly. + internally. MockCommCareHqClient({ 'forms': [ @@ -197,16 +197,21 @@ class MockCommCareHqClient(object): }) """ def __init__(self, mock_data): - self.mock_data = dict([(resource, dict([(urlencode(OrderedDict(sorted(params.items()))), result) for params, result in resource_results])) - for resource, resource_results in mock_data.items()]) + self.mock_data = { + resource: { + _params_to_url(params): result + for params, result in resource_results + } + for resource, resource_results in mock_data.items() + } def iterate(self, resource, paginator, params=None, checkpoint_manager=None): logger.debug('Mock client call to resource "%s" with params "%s"', resource, params) - return self.mock_data[resource][urlencode(OrderedDict(sorted(params.items())))] + return self.mock_data[resource][_params_to_url(params)] def get(self, resource): logger.debug('Mock client call to get resource "%s"', resource) - objects = self.mock_data[resource][urlencode(OrderedDict([('get', True)]))] + objects = self.mock_data[resource][_params_to_url({'get': True})] if objects: return {'meta': {'limit': len(objects), 'next': None, 'offset': 0, 'previous': None, @@ -216,6 +221,10 @@ def get(self, resource): return None +def _params_to_url(params): + return urlencode(OrderedDict(sorted(params.items()))) + + class ApiKeyAuth(AuthBase): def __init__(self, username, apikey): self.username = username From 715b1d09fb4d4bbe06f2df9f2fcdf55fc67f4451 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 14 Dec 2020 15:20:11 +0200 Subject: [PATCH 043/257] test emit generator --- tests/test_minilinq.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index ab8c5c34..a1bb335a 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +import inspect +import types import unittest from itertools import * @@ -227,18 +229,39 @@ def test_flatmap(self): except LazinessException: pass - def test_emit(self): - writer = JValueTableWriter() - env = BuiltInEnv() | JsonPathEnv({'foo': {'baz': 3, 'bar': True, 'boo': None}}) | EmitterEnv(writer) + def _setup_emit_test(self, emitter_env): + env = BuiltInEnv() | JsonPathEnv({'foo': {'baz': 3, 'bar': True, 'boo': None}}) | emitter_env Emit(table='Foo', headings=[Literal('foo')], source=List([ - List([ Reference('foo.baz'), Reference('foo.bar'), Reference('foo.foo'), Reference('foo.boo') ]) + List([Reference('foo.baz'), Reference('foo.bar'), Reference('foo.foo'), Reference('foo.boo')]) ]), missing_value='---').eval(env) + def test_emit(self): + writer = JValueTableWriter() + self._setup_emit_test(EmitterEnv(writer)) assert list(writer.tables['Foo'].rows) == [[3, True, '---', None]] + def test_emit_generator(self): + class TestWriter(JValueTableWriter): + def write_table(self, table): + self.tables[table.name] = table + + writer = TestWriter() + self._setup_emit_test(EmitterEnv(writer)) + assert isinstance(writer.tables['Foo'].rows, (map, filter, types.GeneratorType)) + + def test_emit_env_generator(self): + class TestEmitterEnv(EmitterEnv): + def emit_table(self, table_spec): + self.table = table_spec + + env = TestEmitterEnv(JValueTableWriter()) + self._setup_emit_test(env) + assert isinstance(env.table.rows, (map, filter, types.GeneratorType)) + + def test_emit_multi_same_query(self): """Test that we can emit multiple tables from the same set of source data. This is useful if you need to generate multiple tables from the same datasource. @@ -323,7 +346,6 @@ def test_emit_mutli_different_query(self): # evaluate result list(result) - print(writer.tables) assert writer.tables['t1'].rows == [['1'], ['2']] assert writer.tables['t2'].rows == [['1', 3], ['1', 4], ['2', 5], ['2', 6]] From 3af126d8af6003920872367ba0a1eb2e5bd048d2 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 15 Dec 2020 11:56:28 +0200 Subject: [PATCH 044/257] switch to using case as datasource The case query filtering is much simpler to match --- tests/013_ConflictingTypes.xlsx | Bin 7867 -> 8256 bytes tests/014_ExportWithDataTypes.xlsx | Bin 7952 -> 8366 bytes tests/test_cli.py | 10 +++++----- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/013_ConflictingTypes.xlsx b/tests/013_ConflictingTypes.xlsx index a54a4b48e5f5499476f4ce5d90b51077d52e367b..5fcd1937039dd07adb0c6952caa75f4ecb9944dd 100644 GIT binary patch literal 8256 zcmeHMc{o&U|3`~`sVGZGl7q61ERjh>2xA?)N+a1u_HAU#$XXfMmm#~>%w}Hub~1G-tXO%NfhUB zNBIWRnC8Bqj#2kt>6B4U!XLF+3r|rCS4HNTibO{5u={4FGi@wz)!D;4`MZoX&M;fI z=Y4$buzKo=6J;rv_$PDSW@F2XUv9iOS%tohzwI01Xkm4LbbEF^EO&j>c9a-pgx&~= zFV0<1MSD(KQmM*)_yT|a===ttDh8=NqSLLSZLXXXa6z;joy`oW#*{gk`HP>O=-}ph$7D@rrq^7aS3#uF^cUzu{f_(ko zfMF)z;YEH1(#8H35@|2$<>(Nl-{yo9XQ|r|IXLQg-qMci=>d4fBMLlAPFvy=)#LWz ztncnQSmpioe>dIeGU^Ke~(hHd|jRkCE=bqUDnO|m_2!sHo=S7S)+nPF9aohtH_pLW+Dat}uOirP+G$QET!vr0AhX zGoov!Uu3c9ZHJe?th5lK^^eJ3stcxbeeqxAC$Dke6H0_ThdA>@5;4$IQhI0O`0gxt zu{xJFOcczqAj^-4nexTgjMxfUhfaGHsH*5rlES|ZD_#2be!XiY7WP2#O{9^Vx2s~+ zL@ZGJ{E)U9_8wSssH8~T2J6ft!R+lZ#w2VL8&DY6Z0vURBwdbnTHt*7@e7j;bMF@a zAo~Xp=StataP@h(B+{E~?dU)c4jDRYU)9Arq7*J)ps&Z_Q_z+%MhjGZj{ zAmd}h7xt=?^6MxcrT|RAr)bQ~Whc}k2)=AjKDzB`Ue~*Q_~MqVfCOl(^4`BTUA6DF zbkyMw7f&)aJIizUu}298yCr8!Ro;$$-$T8$V{uBz#==}v7P_`IGH zY%Xu3DBy`y>jB+Mwh!$O982-Z&pVV96mAFpZs0I}A2_aV-VO-Y{b93cY=KM`XGM(_ z74Otv(Mzg026Io9TtwszxFty~Hc=ih+6{EbVccEAO7K0B!X+7+Ui@%kV!^lT(n*I_ zcdwG$tRvRBVdF#3^k$2=>ckz*G%#Vw9R0K*$fF$6B0EVhjW7*VIrmCh za+t-rGw0($LCS;C1dS}!cD45(j z3PF9jR-_?uHHMn9BIE2Zey;Z{{ADgTsYM?dOqX+8S>XG<@_Lh&^^WGaM|=wyca_PQ zqQaFtbzEIucYs;)X-q$^3lw@!&b4f%BFMB|k)cV6#xRVO2K zX6}lAY^tU~)LTLB%lL$9S3Z3#XN^eHgUcE)QQ7B}h=ULnwou2}V6XA~8z=Qr?kb;C zxBO%j(ThD}N^iqs#BMe$QOTU`7bH|yqDeHX@b?{iB#K>p@oph{NaBz6=?@YYxDB3` zCDCc>=?I;Eg@@De8Lc>(CM6 zm5e35`$stCj%Us(J$JDQDtH)@<9-v8O{czzzkXd^-oZiGj3Xd?k|{^z@ga4_ve>!v zSjL>4a-B>R%FfM+v!Bs3)l@5Zkl#ijY>3YWEWZ%O@tWb*VTs@b)a!$-=v7UQNG1&} zXPDhvbk_6^Z^Tk{tEXoIm@*NJidpJ#F!P3YTWQ>PytAF=*tFt6Tw$)CAZ3u6%VL|# zW|E~juHz6%gJ0cJi4h)y$&K2qpK#A;{3|NM$|i7rN-xijtkpU;#@%2jSXL9f)VQQg zu}L7=D#h~HE}yaG6PQ--zoqS)Fb+?!EWq**R)t0`Jbk!z`21uo^*tGVdx+K@)MqZz z!TovBHm%N%CoiTKj{n`f;QT%>zHc@^FRR}-p2u4DE#eHJ;4Yi$#v65N>qyzUNTs6P zP0F{rUwU3kKN%<|`HOOy(ILL}#P$rX)L&bhmx`JXd#%iHIPdwv2TgB0XzBuY35%aO zj5?o>j@t^v6!RXx5h=pB&k;&x27&lK~Hmk;%_es zPS=z6r0ut|;Q@WvTpLmOH&N}fv^(##RJB2@eywKqCcVAaWW5b zHRC?4caP0Loiy1qCq;mW*VNF6FW$+ujE?v9yQ5n9@Suyq2!>yzb~T zLN}^2T+w0GJHSii-Nz$n@g#Q_xwm|Oi^mc29k-FzFc+k=tC%&?1+l-1I_jArgTz_= z$BIs&(0XniS17QWeo1XlsvMRI>|;JLYV`43CGFAyeeXliD;AsD-FLA=^@Ty)#yOm+ z$Bw@_Lvfxn*DW!2V@jb`Fi^Qrpoo)EPA-5S`{nGhvusfh;5hc;myL%MuhkeNq&To> zY2+T`dRY}wtL%FZ|0yXGU6BK^#OQ+;5|1ml{3l_r@EL`xxE6}uJZC;P-BcE7zqTh zCbH%hHEr@n^L(3eH(Cuhyo-o_-iBdt3dTQ+(tLeUcX3qOT*M9{V75K24)(JDdNj z_F{$A*ePfBy9eu4AFwg3=I(6xgr1oa2o*sEi3;)}Mu>72mo6gSr`AB`Fvp-?q zfJD}&)g)PZKFpU_e_&GSaC}@fhq{eSnMo~18rzFAjy>zO$!Teu>OKxN#`-CIXBRDW zc=D52$FtzW+`h}w3i0-OXIQmqC%9K9sUA9P9q^4(ysW~Q`|e=8kINMXE{#})K(^fW zijJ5~!Y9lF24X~-HZqyv=Rml4Z2MVsp z%j1!w?M<2D!4-`QBlUFatU(|2ZfJ+_uHFfWA7d-aAI{59>Q}1q=70|1Kb611HzrtZ zQavmyfY+FC8A?Zvwy@W|c-ug^k5dz@x_zFEf)5$1|NhQ+jC_ZgGSbNn;pApz;O&fX zwb+NM&OlzLMVw`nC~~?8@aPC&(HIVFrHKO|l1l;-QM{%2-aX$gAp=8Np|G&FQm{T= z$3q(T@~lblO7CpS_1T0o8T9;Fg)YlwuQbKl94@vWH=|c z_7BWkAs*)iVH@v~Nw(sZrOhBVRq7r-#>c_ev0T7b9&Ov=;TX?q8o{=;LNSf05HsAn zAnJ2iV??SfbreWhST5@l?=PDf=HTmhz5q(JY?`0Hb~!KMT$(4p<#{*dVX9j^->Oqt zAHA=l-!Q*qn@J2Yuz;wLFqbO5yxscH&gMU5w;_Kc>%fKQl4wQ_?2ntiTWb zU1r$HnfdE&-qjX?aB~&=^CYpKo;M>+u3VLDtxLWnIwtTj&3GO4DSn|?4fj3{!LEs;@4Hp}mC&izlTjLVYLAQK(||b75$-#h%|5gO2}ZnfZ0a0etpdc#giO zRI>JbKSCa9G?>#jHd?nWswJ3QZ`zrEwI)v|Cw-xF6^(6387ar3JGCq}VoMwCTrbwm z5!W_aTa!DIeM}sqhu!Oh;f2FaCTmlUo~T+APxlMF&@%Ir#G&`m@rKwWC-`PP5n^8k zhj$`SA1p^j7YrLLvhq(DT2(Yu-5p5YwM(DAZ&&9H$Hy6M5WS1vL@dS;*HRQrkVEDk zi`jMaF`~FBCwrZm_pMvH=^Y(VYzd+t^~&Nt{J3TO^r%ckHc5eS&&NYjbkAO<%I-J{ zzdO>^fvUg{>A?M&j{ zX(q^9T9Vy-O=s|6Y^Rp*M&4J@L)US=BP#eln6$NEV;KLTz6Q^@hpm%Go`A*$13CjO z^rXU$hl|wJ)7#l!zqzCO$Ha!<;MhRKc4^TJGjwCR!U}U52^AZwj36GGUNdyBnMI$+ zxHwkwl8in4E?O|QRsot73(sDA*BLW7Y754dbBG8)}lT658lW3+M+ebd(1Q+gz;B;vEzVE?x_LyNaA=C;qa`qU)Tr}Hv7 zK;?&a?$q+Pydlatf}fht?O1 z@0g8uky!^lXU3eEx7~M3EHOh9{yfofYn8j)P~rO?dQ9$+y-O#-=cdR|u2*KO%N~7| z9r*f+%GvD1?uC9VqBW^wUVq4XtI{Nj zF`=3#pA(;qa;tBy3%JAe(uFuyaf4WkOTGkyJEVC1HDc0IV$vK?y;M_UgYY)%C8yoUXm-G%ok^&{u_~b!taL znYczeHy8Ri2uqPGIgp(RjuS)<<(U^Bh)%ooo(rw0O=@{YYP{2(5NgO|296{*aSJ+8 zjQiG%54EUlB~CZTuCHZOR8dDL+G$(9l9O}LPII`#BemhCdeK&UFGCLXd3v?D zswHrxC6InMxBy3w#4}a;%Q}<&=KbQe&0A*o4WuOy5dDG9jB2qBD1L%`yx&DW#_z=} z)SHV#3yCg?TdxQmheju=VWx6G;Ykdfr(J}3Dh|EeuIxSatace@=?xSq{8eojuq=Gp zn=PZ34rX}>D5Ao^DckcYroLATz$|GoaH^@e%ijN1q??Mn;{5_8SekzD}fVrI{5q#pSq zstb_0m^nThsUaF&t>5k;l10=rl1YkAdCp%ntKN&wafz&ZigAOM>LV2c23 z696Is*dYMB1YnN5D<_8feRoY4FWPC zAPWL=AaD@`Zi)r=0oF=*Y|IY&@SAY9IMBoaC&ohQPSg0kSEQWb@qP>S6G5bJr^>_7m zOo}ki;O#vkh}k&FRSgcG7YucJp_W3*zpMYkM3uuQlc7#GbQDlVVxGyg_i!O*qa;^< zSAWNZS4jD{|3d`Q0p;KRPZ1DOPXF!y3<1zaEP6IZM#dd)Wq@+`C?zBrl5FrYi*}Hz z+ zTNEr199LfO4B{}0{SzpFUJ9R0D!;xn<+0^&ewTfL{8=iqvS*W`SdUNl^M;-yW^;K; zAweV1X}&+X*S(6@LO-c6H1<+zA+8Y2%N9#Lm)F4zEu3x`lR}~HYp)bmZ~IJ$Exh?s zYa!Y()6rHmY{*fA|5(K2S$SWZ&`r99Ptk7aY1j6JC<`RjxndD)`E)Hla}9olD$bPJD;zExaLm5+hKEIB(t|?X}5H0%>svF zPhY~4{-l}JCbLKC$t0ccBqf=hqNHM__#yWGtak2;z5m;Ol74>;{8{bV7ejv-2bpg9 zMH>Az_-9FAUl99YdSvm89Q;Eb`!)1u&U{}p_+fU3{|x;v3c|1L{LD4)GweSsg!=nm z_?2(}wS}L_(S1((hhb>`14I35_|Kf-K7;zh1n7PW|BFxkHTq{>YoAyAVOr!g{s)%v o*9Lwb6#Mko51V25pZxtB83xllL`Io{f|mS)l1bqO?)|HO0dvc2ng9R* literal 7867 zcmd^E2UL?^vZqSuND&c4nt*_`5PFfSbQGjW3B3~_AYG6mAV`fO9T5c7D4|M?^db%Q(G2s9Y_nZMsNQ)>H**XK7=K8eDjnfROj8+4BqBOJT zA*?A$Z)ghMhrZR4bO|P8_vlf=t@S~CtGCr`8A7<1%IQgim?|-r+%#*R6%Y8OBSx(w zJ2fnl+abG^ET9+t4aHn5!B)lPCvV+ry`g2SAUi8g^<}7%`XfhCs_av8l%}6C>iLrT z*GquR;I^7P-VaO7#F@nN8wa|hQPm)dE#(}x9cj*}Q?@&#TaL5vr(`d$Dj4h$c7#O$ zbiz}GXn0KV(cV5tKTYAs~Xf5Q^sqBggI#VuZF*J zCO#Y9(^VtAJwH_@&@EZTFX&!oHT|Nug6kF7K${FeX6q73N=QIJMod6p@PCQY-xKQQ zm&00@k5kTQxMipmJ_prUlahcNo}}1F&@%{ghw*`XC!okb+jwhO}R)@%ctr^ z)b~w`7%pQ~i!7oGnMB^qk{_pI4~-Zh+z>XjsYZ z%Z`O?UGB^2sp&mL&R56D_GR2i$i|E%SbwNNNuj~=qLq9vrqz(u-_FXt|M0^;cEk?T znfzi+M6uUfUUB)rj{k5*#|FyQGHn#Nv*0x6q1&J5SXb?mOCPu$mz{$y+Pitv4+^{` zx9d@j+y68=f*J3|3}L2d4|Dq!;d%QZUR$7BjTXrIU`$W@;@rn|mw}CmJpb^6I$7!0 zmY!*&VOf1MR*>p2niw+X3aO#V=Y2g3BVnJIvwBv_LhcU@VZwx!uA%ro3LaCv%=UrJ{F6Kg}CVio;J+w zDAe~{N{Shd;lvuc!8~({6p%Ukkvmr@>z!AX8ZG%ti?SBSyIpZ})>W+ayUFucC;eWX z8``w@R}Y&qe&Js=S+KnFcFPF{o8R&uQZ(B8=Agda@L}O06J{gMWQ)mrbgjCvn4KGY z+P`3^0ykupw8exoQ=eYOWHFVp3vLGM4BG{L!EBd3*(F{WC-1S+-eSUUpQ2w?hRrSI~X{XTiJeb$Q*y&0yV` zI`8bud10W8BUX{KxH{}Yrp{aYI9@}EjK|%KKUhyQbw=!;@fw0Mo^>1P7P*RFhka*d z@(xL46m?7J7P*M)!dx zO4rd@TouNW**if)Qr^O_T0r@_A9s4OtM4B(8U1m-jMeg+NEL#(c z44?)eO zOQ#{4RJN7}z{$dK9EfuyC!(4n=+fzkSXHdm0B}G!3nzk`Bs!`ol`fs0$Xvx*2LLAz zm*GV4k({{G^q4N4ktj>W+6Vxr2sh(I2$DqKX)32nKSeaDVr>C{Q-%j}B1A||L^rk2 zrJp8ZRkgMUz^TGBI1xZKzvwO$-8c)EoT|N3ESf4MhqL_m%*Qifvr-%(oo4~rDKn9? zQk>hCfKb+mnV4BAu73pY?;rVJ`Q_al&N3j6UvwKPd5nc#PSw`w3YzL4WTq-%W|QQ% zfwVK20b-|*NebIOvlBZspBXzv%@i%K465tmc_ zr!&h-%Vy(+wgo7Wy)FLmiY6AE7A0~xqO@u@{vzUEI`ZGS8iO$w3OQ99Cw?^5ot%6G zkkK!?1@-S_#+5F8>5<*3OGF|HO3~YAwdJ51$eM!E)h|7A<^R6v82LrNK^cs*2*{~A zIF+EO9^~Yg1OGFa&(^r-ST(ox=#dYi*3QpmX@+z{=(AJS_~ux(5dZ8sCgkLo0VVvR zJ5U2-ENA3Y?VOy^RR8JB%r%QebCaFv2gtxDYgUVsCgs4&wXvpD;Ma>nDs*}?fG*wPc{(l|wgj}n&(`htSWR492NaYvZj1nDV`LAPg>L979JC}RL z-8Wq8GIDd?!#~CMDM2JR7Hk{$NBX|4px1^*CULg6)`M!t6W=GwOrW+MYv1=)Z|JXD zS)huT^+jGfdP@2$P3O4Z3`^T(&RXc(pN$C&%kqGD(xg4dxtC>3=d6EQcdVU&7j5)e zcH_P-6_2>@KyG;<8!XX53N^XHkm9Wvup+y>Twr66zvyde1$cc#rxaG=0R7Pxv@Z&7 zP%v$*5j5Y2dV?`GwL!0fmz@rYkCMP-8Rv}G)C2@ipZ_iiwkPzU$psi5%D zdqTf9MT;oiiCKn#7|~$ZyF0NGD;iOdC+yZ&ifG;0c`$D$vl1T9wk-7ahqQu0w7?41 zJJK?O`5#F|%ik<^)6k=)yc>y6G2BR~sNk{52k16F)041!BjuePV$V``wIw}uhVtad zl@r|lx$b&F%v444(aHfv%xqWO<7@?6iV3j~yKh|NtgIt>cQN;%%yLFQjz-Utdi>k` zodLqNT2o#zs{1@%2NR90sZOWVEbo@aVHu8O@Sb@LQ#3mFMmli^9?gh(H1or8B(|%KhUP8r&(#D zG4)n2VOITFCf4_?MN0~VIJXC^Rusd?FW03M$_G-?EauuiZ;+p^I^4|O zU@@Y(QtfwB80B}q5Dwo^54O%)uX_l9-7bwpW=DycJ#2H2YO~!Gr%$|f;Z!pq{>cU- zWlvX}tOUUWrnW5sjZ0CnA4+-+7s61h?LdX|j{f7TU0;dTY%^AqMXSvpMwL+;GR`4! zEwwZe7^C_EiBmnVx~f18to+NHO_8!bK=2*%t7Xw;b;CwLK_mHQdH|j7u zbhd74K+a zOJn?DX+F+2PzQTMU+7T@@2GO;k=(4;DoUsJ`gRy%Q;$vBh*V~kaG0BUme?3IXc{-# zT(7*a=AFslktpY>0leWaUHo>>ASJyvGNy=Bl?{3hH{ATW6BA0S!C2cV2E4hhO+*&< zS#5ZjUz;p)>6`nDsBn5M7~cmiBvi8mE5s*g$cc8~7?haktezT=N@?-|ZGjcX%klAeNpF;D=1Z?u?$M~<= z_g=9}nKG6U?y(PvFgI+s`#=kEd*i<*hcW2S(lMK0XQ zwcXE{&AtqxxO*sN#%C;$_pdI2^!#2h?S$HC&0S`NlrRfryE`;UQ558*`LRwX%^}i| zGTM+GdMLB273?h6zqmhv^NOB&wMEaeoY@X}$g+ZGH74OLGTnTja?UwCam2 z(LqFTQ-0bssY&}2U+LtoOVBSo-&B_6IPd%1GuR%gHKQx%jE|I*wF)XFA3B9bdnk&Y zf^}5!79Rx0J2t#`cpf-Sn}q9_Sw+{Q7s3jK7R)xus_2tE#lYHKccV5$hmxpHUMQ}) z6=Veo3O(K7Cd*A)F`|5q3eiSUxR7`dn;P6$X2R&6wrw%gwl)%XGBSyd^g;C_#S(!5 zB%{Wr?)F&u9_i5@w2xvPFXQ)s=eNc3Ge7p=M{sm}C%%VAFH)6V@%~F4za6pTqvfz` zT6eQ``Ryl*$mj;g#LLce>Q3xz=19)U(y1_8-92pII~;JX*gq453BTaqELajpPZGr7 z{@I*#E#~1aA6-V}t2$bCrhDl&<})5vWutAt@tb0lG#ImR?zsMIj`LmvwSbcfGZLKC znWja?8Xk#t>a_wrmoq-G7=Z2L8m}jBRug%j=04z)lhAeYV(xXhq;Dd?gAsy0(z}=% z0S=UW9?^Qq%E|O%cd`o_N6i;ofxQ0Os&aZUYBPaeBtTE>m1ncO5cvknpqQN64sizj zoRMNtdsqB9ld$iRWg1Jf;SydRwM4(uT1xyEKg9LF5u`FrgeV>906v0r+`Q&xb1xqu z#_LYa{-|1&^HQUM>iW32q>@K zxX4$|Z{1FCv+9C~bay&LMX9EV-Zt{KqK5xR`4V;~7ICQaP!YFMPr8O@4b1ia37l~M z*8A722Dj4TZ;3(*D6LnP*3V_tK@0-8Zsxu1IL8FO%cJcnN@2=-z@s!B+aZ+S^gK08 z-n0=PFp>cZJ@Yxr@eASqtw%o!Ak9@hA-)a}U$9Z2mxGVxk&#vz_8gsRdqfAEUztD< z8IGvT&8B(4mHV`&XkEJzk-(|ZnF`GtJkL17hpif_JLQ1ORmB@Q740V&GG+zEE_6P= zR2m@}YIZlNedCKBpyUYCT?9zbsZpl!1>szkfvMt zlf#WS09R7&`%?3m7e>ua-vaNO(}-m$+ETk^#m9uce)5u}%{d{W7s&&=`@yBt+Oxv4 zN9m(SgQ6Hv_VfjYiB&2P?;!RmLBX&r!i=8IA>JFC77YWDmvYAskSgeZ*>=-RGD4 zgX3H8_;<-X0ESVrr7qnLZE$`rlSXszaHa27G(vW%XVs}6`CaNBv6TiOyu?{ZjuecL zh>_suW#I9&=10Es# zHE#K}>v2zdbY}geZ>Rn$j=!8>zji+ERF0hOPYS?q5PsZ0alOCd9Jkmb7xa^!vHZay k{o3)k(*05P5AgH(8|H1GO?u)8odW;M#JfUDwxhTI0XJE#C;$Ke diff --git a/tests/014_ExportWithDataTypes.xlsx b/tests/014_ExportWithDataTypes.xlsx index 67deff409c3fd355432dfda7553c235d0bf66f16..33fdd8c93957b603db978a78461403c82cfc1a89 100644 GIT binary patch literal 8366 zcmeHMc{r5a`zK@>qEeD9No5OBcq0@MW#6}cl-Gm((6tz$kw@&V#37w*1s>!~&)4FOgg zeD=q8_N#mmj?_h55}z$}>r9}RzFNONQ5ke+=#I}L2TLoOojX(O!CC9WcZW%#MnS|! zF$Gz(szDy((0!_Zbbd8Xdwz~6s46Z69@K7E(Kk#vu7$N1_t@~!Vi5dfUN{o$oLFH{ z91>gOCSMuklBjri@=+ey^^Xb$)o?Nxp*h`rx2LxA>|I4Fog4HF@a%x{5z1}L?W3ZM z{|*>d$_!7+cOYEu-$o$ri+MWOhw3*u4oI-!h$6JZ4(FhDTrX*i%brsWv1K;Bicmdz z|0(+iXZy^l8V?oIdr#Lp0tMRYWDxMAmok%P-tE>o9m4QY32UA=793aSZPw>ca<|0V zzQ6?B$mN4`6wJZb9KE_Gp5OCZnyM~3k70{{yf^>`#0{i6D(R+w%$T^I(MKvfUkfp( zwm!El$dG|*eSDj{u5ODJEq3EYz3OD+advmU{Q_#Y5h9%?Wo9q0*4gwvZ;YRi%75Zs z2W#8v5t+|#X!b`p~BW4Y68h|+`NW+!8?gLj2&&=a1ysw%qUJ5T5OuUuYgS#Mj3)OoD%KEw#=<*HCQ z7AaVeMyI7#?7WDjE6kUG7d!EsXZ3RbazGd!`7kfK&IEbw1XHF~lK)KU(Tn3X(;wz@ zcD(T{B|}dgUPa92rPOJix&FfT*0nQF3QQT8VzK-#4=*#h#dm+F3rO8k;XQJm!CYfM zUb~7f_+qN%SU$G0pX>RDCu0%}0R`_LOQA}ps*O*pYfjdD6u1UY=?;%Pv=XNJZmyAZ z?}8@0`z~8tx%yn%Q}gw+qLB$ce(($WYycDE z9R=1~CrCZ*btlfx)uF6k@rQ}Vo>ZN}zYVkC%WAQa!wF-Xzv*Ozj#mn_&AuF=?wrh1 zZl){q3++$ANMEyeq)j$Viiz-D9nv~@`bG)N8l`Bt58X_7qsAouh z*%xf0;?>9QWLK!Axjr!p9Id$aM(oU@#fOhKyVhx)*{(C<>C{%UnQx)r)D`$EJTtG} z4?O&Olk1lHavii(@Ulo+sr3ZBwJZE_)KExl;l8CDrgt`_?>q? zB2jNh38rpbSis+N-L*DTIGrX@Kf)EgNC&;1>YRq5!vp|SxinW!3RFsUt@`{Jf{SoNcdiPd<)P+5Ms3Wy4DBNkOS7+n)w><=2GY`0N185b^`3!5 z9JdqcGKUj|35b~spUbsM+ewe@>gP3bgWk@bmzryxlPD$*&a|$=y`^xkhFcWis*ORb zuTQWXz_(7N3^KDewN5{f9xE+hbd>ti-xy9DY7j4Wm~>EVFCq7@uO=3SCK;7IcrYqQ zrpG-=3$@C=i|~GfGwB&#)`?$LcChUMH)%RAt>05Pdie8I|H}rDQ5l~zH_RW$i!<~* z!oDomjAd%*xP+2eU<>m&R4cyYp&W<29!akb5jwAJ#4?#)&$%7i-L#fkFXt8P;Sak# zd`0<>d7VN3;f&af!ao$k$$#e8OH-!`XDGQfkmH{=$?+eX?mO&VHR zy-iht^Sa{A%j(?;)U8MKbIoZ=97S}nu5~*FHoYtdtN@>6i4QD{tu8z-q&oG&Z;yzG zOVY18Y!EIJ6#2P7pTg${ChJLiF!or%c^17P2*!b29n{>Gz{+y@t82OW!$ZFNcGHLU z(uQ{z<-Sk<(e}?Q5*k_PSkK3XCMwX~MmWi<7(PloX$Vc3 zwzr(e1l~CiBd69h5c5tyW=3@cySQ_uj@Qvy(zxZsp^NU=teh(|-E4R5=O#B+H{mRh z4k~?q`#J7Do_>yY(TtEoGVA=3i-}|8fPF42X8B7=QgyW)Do-LY7`sV+ z+M*0?h`P;ef867`_JBq=8*FjzWXGoMha}sn8}5Y?6C(ZV+_WufW-j5w>a#$m*aER=RY~&0obhidL-!Uq%zWK(=#o(+Kv? zKaSQ~%J4zgq=?oq~1+YA-E$gHMhj?G)jhhLbb1b7d%8=8wM#RO}^G4+r$pAF)aA>#t&EJE6?N zRN@gUK)d60*@Pg*GZlR}GXjMeNm~;PTi`vbW;0NxE;pS!N~V1vnq)=NwaLO7IxE4% zWC=%O5%#af^x5pF!_4pbnr!eT9i;BPX*^Jnd zB(c9;jgy6(jQU!U6P3s`aFPp4*KTCSx>IkYu{l5K(6J82CnbGX_qNBH=qH>!GjFNQ zgNb7wNjL82_F0yekGZcWz^=tO#=Sbe?}_~ejZdh8k_t=K2ih2Km#fTN8j<=oR|za^OTm=1{1#3=nx3)e$##xk6Om;sklOv zMnZ=1ZvlzGvfA0fYNmDefKEMYtw+48wvS@I94W!{XJg`eu3)`56+aDqE`2{#8*BA_ z-;)V_{W09@h8pT!oZjkG;b$l)cvG2!k;hb-u#eFqu;-gAzDyM^4sb2p}a*yU1Xm&Ap=83q2S=AqQ&YM zZFlK`H)l)(SGuMWZcfDtq(CUa>#|%@aYMYx{u2JEIYi+tblz1j$U6Vn$~Bg!K6ava zbD|oAc${X1;a-K~M+#QHeGfRIO5bsk6x^29X=kIV-? z1kj5Xo4^uf=|dN(^Gao1Vodb*1cB(Ik&k&~r%T{(ZN3mZ}oj zpSM&(h!(eKe8SX^am#AQbYvG3_GXU(}I;2|?I0OoSfkAh?}I?>|gA-k)dS zoTWWQDsZS}%^i*)^Las*axBl##D88%UMl1wj-DXjTdkfr@Xp5Rdo_J>?MaVh)_3iR zg**@T)7Qv+dnK2q|4d~hV2*bidN6Az;rLf`4!&x;aLG@osBVv4&xEfNq)jPaF{b+` z&m5z8=C5Y}*Sjzn(pCJ=kMq0kc|WL$;8Hu(@Qs9G3Lsve%gaVF3eY_%85SLR_x4gB zl@HgmQ}?VR>bb*22a-`f==2;8u>|F`8dm8(dv~`rX5P&u8trOQg~d&Uk3$*vj5GB; zF2rli^uXj0M!lKcUr>H)nsHjvex-57&)f}@wG?~er#);R$W`9Q6652m7S6)_dnS)Y+(JZD?%1m?u`AhC7jL%gN{ zuF6hq_SyQfyJ`OBJijIG&1TVdt7h@=oH0puM|rjtJCj#Z^`$-V9Ms;b(yD#BcqSyPMi$Jejs<$G+AeBQfyK z-XZOI&>{w7SD4j&e|Dw{WlpTgZR{mk4Oro8$%L`u$g!dLcC87`Kk7z{w5v8}kefKt zIzNk41LKb447N3}F|t+DhnPLL=&tLq-W(dZGZnWn3pb4Eti}$pkTEzp1c&0wKtYdB z^&DjVn8O3C$ksK@zR}su>>8`0{9#-!#~RG4ows_i)uBf4+Azwj`aTB{(lU#gaqna` z7bNsMDw0#@z9XxShq>y$wC`1Zvf^EMtUp0ZEU0ovbe29*OQC#zc^+0O7&Hzqrf+}1 z^M#B=%C`0PWUjl*zZ}BU2aQ?|RP&E)!(*KToMU;VAd8ZHGb{Sr1&$5W-ogHmyZicP8 zRvslyV!ILLjayY@9dZ1evm9$9esf?Uj5k_QPDrhabBE`EhisX_Z3bQZJMl`kR6Q1YU0V=PX1_MxRfJ2jjL_4{oJ(2>23drVeYK0jngn= zy#rBEeY#8!Q0ha`AC>9jcOn8yE!E|a1>WQ2fIt>(#AwIciJEci?P|EZj@0~tbx)FM zAF9c~&&|E7)FaXnY%hOY&Wjxu2OmGk)oo%R-#5g3K%mZwUu+zR_LF!N_KI!ekwQUVpIlqzwe)x$Xc@2UMMwpgb)a5?`SMZ5Ja`e-ztpF7po~$*`v}oWUjNZ$B1B*H zdaVN$iZcGS3)6veMj7uXL|^gxk1jPHLq#xrxGb#XbtDC3(t*+m=I@I#{-J)fEPU1L zNGkYL2YO8~pFhg@ARfa&F#J;;odRm;Ku-(iABr+&#A9d)h5^gMSG<0xVS@RRQO2Bj z3^TzHy)3Nk6`cy=b)c}P&u&Ab_5o5FA#H+^mw=Y3kgJqIXq%w)C17wWBmj^Su9Th8 zI?aI|IzJ!g?9qa?8R0&>c+4xa1q*awfsa_=GZq-c0wY*p91BcgfjKO&gauZyz;`UL zg$1ZUfEEN8K!6zp*g$|C1df5g2@v1|0Ra#Y1OZVHkN|-TARr3@@*toD0xBSI9RzNI zfDQ;4fPe`ISb%^v2;2dIyC7f>0!|=+1OZPF@CAW~An*tTf6%L@q0Sq{R8JGEgZH*`y9@41iLd;m!cpXZPhxE;Ju6L|3IlK|2$U~y` z!gu)mziSZK)MLrjtDN4c%s3r1R8UzwDwPA@@mu{DCVFV7kaB)hsxiJp1U3~dxvIhG z{hAr~Tm1tQF4$D4SM|ADH;1F#m4<7J+nTzu)a&B9O-H z_q+W|1TvWYez$*z04`oPxY__UJ$Gq%@T1!U-L;;+rmFi!+wpE+N-(YC8F>?}Gv2ON zlU4U8@|)A>Rfj(jqY?di8-sb9cFxfJ%_@?f^K$CK=EU*_r17+mS9kW!s}M?rZoAL~o>9X&$z` zb<*1cp<5yzlKUPV!NcRj~u-l{6^8X>tVO9xhG z(_BZ){L%($X>Qz-i0X|mX`O{R(DIw|6BGHntwIiTUpyJz5WzV3QhA%A@*0} zy%mSu;@eL#pissXm>hoaB9;dI9cQSFefmu2K^?{$q!ycO2X9#QJBPU*?UIJ=O+UU_!%g`A0Df0oRufWbn zKY0ht_~_TKAY$W$g_p#!iv^At2eru= zJZ**Tmof;y>8;JWyzFh1ReZ+xakHL+wxsy90QtAUQi?B3nK9xyBt7@sw0m+F?rhuy zAiY~E)7U;QG~y%iXErh5k>GM+((PNRjC*3t!E~m3gxi+Wc{#-SYLco4xNrRf1;g<| zMdCRt7db}dZy(+u5yPC@nrK%?y&HzKPtdgCUQlV~u9ER;4=rLdLLn^V+*blBtnn`d zbhcND?aWLRadwE5v2#I-j3(c7m0YfXs45Zzh)r#R2yt<6i1Bf7RR7GB?roo zlU{;`DJr}P5u@vvw302%ldJyM_bLtQ^dViA$#u&F5nJn+R!ir@g*69XH~t!AK4tt! zNh?|u9-o^#Z@KTVg?8|*LxOwVgYtPOXUx?p3o*hjBWXc zkIKbJLuJu%^vK!G#jUx~d@0GK6Kme z`I))ys|Efals)T~KR?<1>a#hQU$Ac@#kg7zxn?xcz25A5UewIU{>jlo^YGWX&nnFN zQz}MXj$)_O7r{htmuYc63V(|ir2cRW{T?f2T8c2{N_dFR^p5r_M?DDC_w3HKIlSC%hyc_bskaw zUBqbN&BsU%h*u{uv$RC!WMmG+>pbt)d#QRN2+>x}BoGeSFpS9>?Ee16y6m8`Vk~mv z3N__ed0pEmjM7cx`-uIpmrjkZTb(bux#P>XwdU?pcUo!5HvouEt)JeR`g}28z33G# zV8!92=!Km5%48m?L>_6|z4FVF+3Zh{enp?ZJGXwyxJbQ~=#~MWBCUxZK;XZ>d#z5_ zaj*1+@4jo-keS~{Gd}@;|Dl-(+AI~Ww-w&pLuQ^I%{&GCJ-)~L9y)c*iz+l#jOPtm z`hK+Z^?GnOEqsJGA!RZ?HuTET3}P_58(rCrE)Bh#_Uah-%XsNi(~W^9hi<##FE+`N zr5*Y2JYU;AxSix*I0{0O2kV*sQF>EK0Ihf5(;69=Qsr25wm<=JEl-F?r-9d05 zmHN%Y*))WbUUblaGpz+qpkedZY#PEzuRAosnYIF2h#aKSF*GQYl-VH!&a@E#BW#dL zgXYK|kPA4|PQVZ`k5n2luVxz*PRj2%fn*R2$gV!~5H>-fr1TDQu%)#?kc!Zu5<*4z zs!#s)Pl#@kG<)S87w4wAWNN8TDQgUK!7A6Uru4fz;8=ZOn!Y^EI?XoCKFvAJ-M|B) zC&KHzX?zX<#Sd^~Dg+YD1~f>4=mB_Sw~Q|XpacQYOodbgmVph*AbL_f#aqUl04QNV z5mO-@!E9iI5s01~FXEQ5002r9(9cwOj=(ag0S2N6;&t9K76(9y1J;=eFA~fKHTZ$( zL3m`hjc)^>fPgd1g_j8|gBzki^i+6?w~ZA5P?7+C=0Y}t+2Dp}AbJ|Sh}*{M04QmI z8gn5hf#t)7d=NbyUgvFNJphy}z>&F-hhX+$0}4b>k4GkBYzlyq2ShU$3J_R^H1vQ* z87@o6m|KM{kw>R87YoX|g|z<(=ywg>>~i9c2)Zkg>FCUn2k4gQ&NGyv5VG+&cnfQ$;;^ zR$sWT?WmKX-UsrQV=eUW?D-#@jfoZe68Xc_^g=-zw~(eD)ej7$5;Ffu5cN8Pck$bz z_+C>Ks(8kod%2xq_j44gI)it${XaOIQ3g&484IiICGxn`^kP9xw~%)|s{b7z>Pax1 z^^?TC9c?O9LNJ_d{X)$>2RT$)Fr2;dUp+=4w~)6zeIFRkNywO4Suc@CrluDO{?~&j z=T~sUd2gM)**KK$U1rw?U@uh`RGQE2|p&7!BLZc5&)K@=4r5^fLdr3g@wlTV}a+V4+U^%gWi2=-VeD|s-C6DFc=00&kiD+{2B{;CW zTJqg;Q1`~3+uB5JeYePJ!FGJ^F0?3Vl5x%B$g65BFLSdS+%dYbku?n6gVnj0`_zBf zs&*GPtWBGgI^5etT)pd`H@V#1`(%4Oe|qzht2E}zHy`0cF-Yx%eqA+({-Kkf{<>q8 zSH=FI3Wn^MG;-&NeO;x%!MXb8k4PiS5B`0MG(wy#pr@%L&+&U?uOev^ABbq0+9Q8&yLx?^A;Oe`CFdWCUFS;wLdaMAZoUIZ z)idE(hfhbX7g}tx#r+vn+3PHM=8ceB z9-3zv<)PuGTuoWHTL~R{*O*JI3Ep3OhAA?fQV9nt8&ZsZpLy7ayIQ5o#!nv0>VO%m zYmTv^lQn!)5WYoyY=bx1i{KLM3^}pn`S*!4^$#1kSXRm|ML-Nn)rBYd!HcH~GB@HFgB*erR$ClIdn&X6c<+}>9dDCe>@(nhgDk5z$en5o)5En-f4J*3j;2E|R2Y;FFO@p0bNWy46}n_ABF-d6kNBdzDx zt?+#cCt!!qgVM@8;13{U&a3a2r|aleDR<#;_Zm;))ry^6)#N*T+tnTv3LGZe8R4P| zB2UG#($YdJ*gHR5kCR4d?6b?%JEL@PQVq%^8J<2Nj8e;XkdW<&MmXV>Hd))U(#)`E z^ZLkrNbVU9spz_6M^c=RH}!peh_oemAjs$CPDEdetn$559l42q<;dwfuaR3W7Yv%B zVffEFA+njFG_~nUP##jHIl~zH?E0*q~EA1P2tENdSPx;FA6M~B9n_Er zlS)sPNb1(n4#MP@J$8S!vW00#y_W2M>}jO4#=;!f@>CV@k35a$ho?DPn>bmRYq&Ta zm;8<^efH5{SP&oRp-P{GZV6mTS60g+<<4t^=kw>lCZzFeiDNQxA5p5>t=x0o(q03J zn-i1^p{7m&Tw)hVRa?N;7u;!mKIZrGK96Cl5F^MWy%;+UO;b{MG0UDs>J`1NwK(+s z3;%^C`5RmiX0O=Y$EnI@tWXUVR=++AwUpAkZO@T>MVhMd&)M9js13CpZUhK-+-E^k z5Dt4Mn##yU2zxc=y0{9J>R0Lw=c7kly&vBn6maV_pljO`3^q|bE7zWsWY-oGSFC!M z9}ZJ1*7AnBhHuuaNJf?QNuJ+Go8L`EpFK7bo5V!(4AunXcze8GnI;x&~P?=tiFYaO#L(^lzrbY@@HD!mL{sa7(SI)|}g za|VuD?@C-M*P)VhSPbsJt}IF-XCSCSR@b{eV(7UJWSPs!&k;h-c}xSR=->i##2u@8 zJ(73?%T6D?rY1n2bvtQY&ks6M#J|5b(*C8M;Eh$ofW2^%SZFGMBZ_Oe68e0tdrP+X`e(*Rldo|ymjxgtdEX4ZoFJ! zTyx3e=#WsQ^xN~uMqQb|nt!irK4)o9T4Fex!Zo$6c=*r;BCibuL zPO2~IAN3EI6j>bH{POO@IXzfZ>gW5k23J|kTVK_Z*`w?3SD`7wrP3A{W$KCgJV}zk zioU9^kc4BdP9Bu8rJv8Ry;h)p1#)NLA%e8jiSxpyZbaXV?jH@iZP&cxOOpP!JHPDq z)T$E40ef?$SL1C*u9-SD`oqG~@pV6;P=meMdoF_s&-ZPY%KgnN*e9gTQtu~Pec3?! z_kQgXh_O+o8>~hS-(8Njs6;v)TsLT;%2;4}c~*Q(+YFHC>YvhJEs1dpKPms7%W;Jr zU{A0bqCeU;ls~r3@kwq9hufVL&|?&_1$0md)~#eGgAn1`pyi<7Y9eYnem$aTOa3*BdH3`H*>zmH0Y6Pxn@wO9<}$zggn0ZQOcZ`^1@2V??!=f(hG^g-mlFG`F_>5 zZi5Bi=rY-PJwP>Cdqdx0oruI(meo7YHHf9#jZ!SvV(nXQW$Yx7Rp2S3I7waKo^t!U z>vGz`gH`uI#mo^wBH~6~1tf!XON<6md~}GnWo%g(PrGi9s!r|A+DjLmn?>ExhUg7x z_4p0${UE(ZZ54?HSH8#=Z}JU3AwR>BRq0D)1oM^x&IqSQ2aehf5#w_^HVtg--Hi>w zyfacOzE9-@MilCB$$_NQX*9BM#^#Bg{My~8|DgrNR=$t;`YG}g#RZvoCMK$F`+Q*3|`cw8sMI$YO zqptj9!1en9EaAQCmNO20Re&>+Q$oxXNZm|rIs2&UJ5`*WcavrrR3YZ!by`o+<#>+t zESRejLSQS0vt2efRdhI6*SVdZD_@HVgm{YN1~%U`veLcQ@ziE%m*Q$z2}-NpsC05Z z7#&H)>!HkF0dKs)O|r=_z%L=Yho6*pNmDAbwLRjJjz9i!tcz!8)L6jUqv{W19U1l? zKg{)iY340pARmaZ51VFMqSYNt9;X-bvq32?J};MHzL{l{+qAr_9!Zvf*xmHY%uDbz zrTlKf#Pz)0$=I59zVRIH2)b4hhfGcH+SOuq<5nEkG7es`jzrjP>BlZXGtOBnG^Mr!y5}C&K=xS*E@W9OhyHbAf1j zI#@Ux9{XsSMyE1%R~+DBm^WyJVb__0k?4jQfP5^a?h@E+nAdY+WUAzz9_wqSfFYyW z@-_*<#j>o;)RI;r>ZEBdevY;mHwywq{O&(`(z^LgnI^W;uCUSU3R>P&ss_n}K9e(T zt`cYM6t%4$W7~%t$Na+TC9vbxtcBh?fGwe7tmw@88_hlPYQwqD}h)}=! zSNQ}j){%i-C|1Oy&o*tw@DjsL=`Z%RQv8DA^c>Vkz8+ z{K(rEhQ=Hf?(s8Q+k#3;zKXom5?vLZV)}a4L5b5{+b@WM}zb8hVk@;+3^kIZ{_#y8-LYunnC?miz2pPtd>)( z>E~73uUbxXkYl#>QyySL_HVWPUGDX(!qchNF?IPV!Jrd`zY>{W6`W2(jx(~KvVzrj zoO=A4pZ%)g^oewwuKW~g+8-MJmc0Dh^mHUWKC*s_!r8x>R!mE&OhQ@XJW zgdO?sgx+8Eoc7q`5cE@y7=9Cwer Date: Tue, 15 Dec 2020 11:57:10 +0200 Subject: [PATCH 045/257] test cli with checkpointing and sql error --- tests/test_cli.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index c759d92e..606c055d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -12,7 +12,7 @@ from commcare_export.checkpoint import CheckpointManager from commcare_export.cli import CLI_ARGS, EXIT_STATUS_ERROR, main_with_args -from commcare_export.commcare_hq_client import MockCommCareHqClient +from commcare_export.commcare_hq_client import MockCommCareHqClient, CommCareHqClient, _params_to_url from commcare_export.specs import TableSpec from commcare_export.writers import JValueTableWriter, SqlTableWriter @@ -416,6 +416,55 @@ def _check_checkpoints(self, caplog, expected): }) +class MockCheckpointingClient(CommCareHqClient): + """Mock client that uses the main client for iteration but overrides the data request + to return mocked data""" + def __init__(self, mock_data): + self.mock_data = { + resource: { + _params_to_url(params): result + for params, result in resource_results + } + for resource, resource_results in mock_data.items() + } + self.totals = { + resource: sum(len(results) for _, results in resource_results) + for resource, resource_results in mock_data.items() + } + + def get(self, resource, params=None): + mock_requests = self.mock_data[resource] + key = _params_to_url(params) + objects = mock_requests.pop(key) + if objects: + return {'meta': {'limit': len(objects), 'next': bool(mock_requests), + 'offset': 0, 'previous': None, + 'total_count': self.totals[resource]}, + 'objects': objects} + else: + return None + + +CONFLICTING_TYPES_CHECKPOINT_CLIENT = MockCheckpointingClient({ + 'case': [ + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified'}, + [ + {'id': 1, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-23T05:13:01.000000Z'}, + {'id': 2, 'name': 'f2', 'count': 123, 'server_date_modified': '2012-04-24T05:13:01.000000Z'} + ] + ), + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, + [ + {'id': 3, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, + {'id': 4, 'name': 'f2', 'count': 'abc', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} + ] + ), + ], +}) + + @pytest.fixture(scope='class') def strict_writer(db_params): return SqlTableWriter(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) @@ -455,6 +504,25 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf expected_re = re.compile('Stopping because of database error') assert re.search(expected_re, out) + def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_manager, capfd): + _pull_mock_data( + strict_writer, all_db_checkpoint_manager, + CONFLICTING_TYPES_CHECKPOINT_CLIENT, 'tests/013_ConflictingTypes.xlsx' + ) + out, err = capfd.readouterr() + + expected_re = re.compile('Stopping because of database error') + assert re.search(expected_re, out) + + # expect checkpoint to have the date from the first batch and not the 2nd + runs = list(strict_writer.engine.execute( + 'SELECT table_name, since_param from commcare_export_runs where query_file_name = %s', + 'tests/013_ConflictingTypes.xlsx' + )) + assert {r[0]: r[1] for r in runs} == { + 'Case': '2012-04-24T05:13:01', + } + # An input where missing fields should be added due to declared data types. DATA_TYPES_CLIENT = MockCommCareHqClient({ From 7069c6ade4e958351f922f44778c1fe13531df3c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 17 Dec 2020 15:09:46 +0200 Subject: [PATCH 046/257] re-initialize mock client --- tests/test_cli.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 606c055d..5d693101 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -418,7 +418,9 @@ def _check_checkpoints(self, caplog, expected): class MockCheckpointingClient(CommCareHqClient): """Mock client that uses the main client for iteration but overrides the data request - to return mocked data""" + to return mocked data. + + Note this client needs to be re-initialized after use.""" def __init__(self, mock_data): self.mock_data = { resource: { @@ -445,24 +447,25 @@ def get(self, resource, params=None): return None -CONFLICTING_TYPES_CHECKPOINT_CLIENT = MockCheckpointingClient({ - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified'}, - [ - {'id': 1, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-23T05:13:01.000000Z'}, - {'id': 2, 'name': 'f2', 'count': 123, 'server_date_modified': '2012-04-24T05:13:01.000000Z'} - ] - ), - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, - [ - {'id': 3, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, - {'id': 4, 'name': 'f2', 'count': 'abc', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} - ] - ), - ], -}) +def get_conflicting_types_checkpoint_client(): + return MockCheckpointingClient({ + 'case': [ + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified'}, + [ + {'id': 1, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-23T05:13:01.000000Z'}, + {'id': 2, 'name': 'f2', 'count': 123, 'server_date_modified': '2012-04-24T05:13:01.000000Z'} + ] + ), + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, + [ + {'id': 3, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, + {'id': 4, 'name': 'f2', 'count': 'abc', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} + ] + ), + ], + }) @pytest.fixture(scope='class') @@ -507,7 +510,7 @@ def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capf def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_manager, capfd): _pull_mock_data( strict_writer, all_db_checkpoint_manager, - CONFLICTING_TYPES_CHECKPOINT_CLIENT, 'tests/013_ConflictingTypes.xlsx' + get_conflicting_types_checkpoint_client(), 'tests/013_ConflictingTypes.xlsx' ) out, err = capfd.readouterr() From 1271685106dd9b951470c6dfa17a15d457f01f7c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 11 Jan 2021 13:31:26 +0200 Subject: [PATCH 047/257] dbapi agnostic query --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 5d693101..8718f8cb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -519,8 +519,8 @@ def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_ma # expect checkpoint to have the date from the first batch and not the 2nd runs = list(strict_writer.engine.execute( - 'SELECT table_name, since_param from commcare_export_runs where query_file_name = %s', - 'tests/013_ConflictingTypes.xlsx' + sqlalchemy.text('SELECT table_name, since_param from commcare_export_runs where query_file_name = :fn'), + fn='tests/013_ConflictingTypes.xlsx' )) assert {r[0]: r[1] for r in runs} == { 'Case': '2012-04-24T05:13:01', From 7e405e565c412ea0e5e31e2408bf23b26c17e7b8 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 11 Jan 2021 13:31:36 +0200 Subject: [PATCH 048/257] remove deprecated param --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 8718f8cb..23b82c7f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -546,8 +546,8 @@ class TestCLIWithDataTypes(object): def test_cli_data_types_add_columns(self, strict_writer, all_db_checkpoint_manager, capfd): _pull_mock_data(strict_writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/014_ExportWithDataTypes.xlsx') - metadata = sqlalchemy.schema.MetaData(bind=strict_writer.engine, - reflect=True) + metadata = sqlalchemy.schema.MetaData(bind=strict_writer.engine) + metadata.reflect() cols = metadata.tables['forms'].c assert sorted([c.name for c in cols]) == sorted([u'id', u'a_bool', u'an_int', u'a_date', u'a_datetime', u'a_text']) From 972fd1049eb7cad418f3ee730ae4461e236e131f Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 22 Jan 2021 16:38:24 +0200 Subject: [PATCH 049/257] add data_source column to checkpoint --- commcare_export/checkpoint.py | 25 +++++--- commcare_export/excel_query.py | 16 +++-- examples/demo-deliveries.json | 3 + examples/demo-pregnancy-cases-with-forms.json | 6 ++ examples/demo-pregnancy-cases.json | 3 + examples/demo-registrations.json | 3 + .../a56c82a8d02e_add_detail_to_checkpoint.py | 27 ++++++++ tests/test_checkpointmanager.py | 20 +++--- tests/test_excel_query.py | 63 +++++++++++++------ 9 files changed, 122 insertions(+), 44 deletions(-) create mode 100644 migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 6d56afe8..ecebacce 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -36,6 +36,7 @@ class Checkpoint(Base): since_param = Column(String) time_of_run = Column(String) final = Column(Boolean) + data_source = Column(String) def __repr__(self): return ( @@ -49,8 +50,9 @@ def __repr__(self): "commcare={r.commcare}, " "since_param={r.since_param}, " "time_of_run={r.time_of_run}, " - "final={r.final})>".format(r=self) - ) + "final={r.final}), " + "data_source={r.data_source}>" + ).format(r=self) @contextmanager @@ -71,7 +73,8 @@ class CheckpointManager(SqlMixin): table_name = 'commcare_export_runs' migrations_repository = os.path.join(repo_root, 'migrations') - def __init__(self, db_url, query, query_md5, project, commcare, key=None, table_names=None, poolclass=None, engine=None): + def __init__(self, db_url, query, query_md5, project, commcare, + key=None, table_names=None, poolclass=None, engine=None, data_source=None): super(CheckpointManager, self).__init__(db_url, poolclass=poolclass, engine=engine) self.query = query self.query_md5 = query_md5 @@ -80,11 +83,12 @@ def __init__(self, db_url, query, query_md5, project, commcare, key=None, table_ self.key = key self.Session = sessionmaker(self.engine, expire_on_commit=False) self.table_names = table_names + self.data_source = data_source - def for_tables(self, table_names): + def for_dataset(self, data_source, table_names): return CheckpointManager( self.db_url, self.query, self.query_md5, self.project, self.commcare, self.key, - engine=self.engine, table_names=table_names + engine=self.engine, table_names=table_names, data_source=data_source ) def set_checkpoint(self, checkpoint_time, is_final=False): @@ -94,8 +98,9 @@ def set_checkpoint(self, checkpoint_time, is_final=False): def _set_checkpoint(self, checkpoint_time, final, time_of_run=None): logger.info( - 'Setting %s checkpoint for tables %s: %s', + 'Setting %s checkpoint: data_source: %s, tables %s: checkpoint: %s', 'final' if final else 'batch', + self.data_source, ', '.join(self.table_names), checkpoint_time ) @@ -121,7 +126,8 @@ def _set_checkpoint(self, checkpoint_time, final, time_of_run=None): commcare=self.commcare, since_param=since_param, time_of_run=time_of_run or datetime.datetime.utcnow().isoformat(), - final=final + final=final, + data_source=self.data_source ) session.add(checkpoint) created.append(checkpoint) @@ -317,16 +323,17 @@ def get_since(self, checkpoint_manager): since = checkpoint_manager.get_time_of_last_checkpoint() return dateutil.parser.parse(since) if since else None - def get_checkpoint_manager(self, table_names): + def get_checkpoint_manager(self, data_source, table_names): """This get's called before each table is exported and set in the `env`. It is then passed to the API client and used to set the checkpoints. + :param data_source: Data source for this checkout e.g. 'form' :param table_names: List of table names being exported to. This is a list since multiple tables can be processed by a since API query. """ manager = None if self.base_checkpoint_manager: - manager = self.base_checkpoint_manager.for_tables(table_names) + manager = self.base_checkpoint_manager.for_dataset(data_source, table_names) since = self.get_since(manager) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 4a4611e9..c8c213f5 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -339,13 +339,14 @@ def parse_sheet(worksheet, mappings=None, column_enforcer=None): body, root_doc_expr, data_types, + data_source, ) -class SheetParts(namedtuple('SheetParts', 'name headings source body root_expr data_types')): - def __new__(cls, name, headings, source, body, root_expr=None, data_types=None): +class SheetParts(namedtuple('SheetParts', 'name headings source body root_expr data_types data_source')): + def __new__(cls, name, headings, source, body, root_expr=None, data_types=None, data_source=None): data_types = data_types or [] - return super(SheetParts, cls).__new__(cls, name, headings, source, body, root_expr, data_types) + return super(SheetParts, cls).__new__(cls, name, headings, source, body, root_expr, data_types, data_source) @property def columns(self): @@ -448,7 +449,10 @@ def get_multi_emit_query(source, sheets, missing_value): ) table_names = [sheet.name for sheet in sheets] - return Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal(table_names)), multi_query) + data_source = sheets[0].data_source # sheets will all have the same datasource + return Bind('checkpoint_manager', Apply( + Reference('get_checkpoint_manager'), Literal(data_source), Literal(table_names) + ), multi_query) def get_single_emit_query(sheet, missing_value): @@ -475,7 +479,9 @@ def _get_source(source, root_expr): missing_value=missing_value, data_types=sheet.data_types, ) - return Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal([sheet.name])), emit) + return Bind('checkpoint_manager', Apply( + Reference('get_checkpoint_manager'), Literal(sheet.data_source), Literal([sheet.name]) + ), emit) def check_field_length(parsed_sheets, max_column_length): diff --git a/examples/demo-deliveries.json b/examples/demo-deliveries.json index 77e15078..5ac64320 100644 --- a/examples/demo-deliveries.json +++ b/examples/demo-deliveries.json @@ -7,6 +7,9 @@ "Ref": "get_checkpoint_manager" }, "args": [ + { + "Lit": "form" + }, { "Lit": [ "Deliveries", diff --git a/examples/demo-pregnancy-cases-with-forms.json b/examples/demo-pregnancy-cases-with-forms.json index 089e0760..62069f51 100644 --- a/examples/demo-pregnancy-cases-with-forms.json +++ b/examples/demo-pregnancy-cases-with-forms.json @@ -9,6 +9,9 @@ "Ref": "get_checkpoint_manager" }, "args": [ + { + "Lit": "case" + }, { "Lit": [ "Pregnant Mother Cases" @@ -112,6 +115,9 @@ "Ref": "get_checkpoint_manager" }, "args": [ + { + "Lit": "case" + }, { "Lit": [ "CaseToForm" diff --git a/examples/demo-pregnancy-cases.json b/examples/demo-pregnancy-cases.json index e65144eb..68bce7b5 100644 --- a/examples/demo-pregnancy-cases.json +++ b/examples/demo-pregnancy-cases.json @@ -7,6 +7,9 @@ "Ref": "get_checkpoint_manager" }, "args": [ + { + "Lit": "case" + }, { "Lit": [ "Pregnant Mother Cases", diff --git a/examples/demo-registrations.json b/examples/demo-registrations.json index 1c9742fb..01511912 100644 --- a/examples/demo-registrations.json +++ b/examples/demo-registrations.json @@ -7,6 +7,9 @@ "Ref": "get_checkpoint_manager" }, "args": [ + { + "Lit": "form" + }, { "Lit": [ "Registrations" diff --git a/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py b/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py new file mode 100644 index 00000000..99ddf2bd --- /dev/null +++ b/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py @@ -0,0 +1,27 @@ +"""Add detail to checkpoint + +Revision ID: a56c82a8d02e +Revises: f4fd4c80f40a +Create Date: 2021-01-22 16:35:07.063082 + +""" +from alembic import op +import sqlalchemy as sa + + +revision = 'a56c82a8d02e' +down_revision = 'f4fd4c80f40a' +branch_labels = None +depends_on = None + + +def upgrade(): + url = op.get_bind().engine.url + collation = 'utf8_bin' if 'mysql' in url.drivername else None + op.add_column( + 'commcare_export_runs', + sa.Column('data_source', sa.Unicode(255, collation=collation)) + ) + +def downgrade(): + op.drop_column('commcare_export_runs', 'data_source') diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index 91dbb843..f5253376 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -39,7 +39,7 @@ def test_checkpoint_table_exists(self, manager): def test_get_time_of_last_checkpoint(self, manager): manager.create_checkpoint_table() - manager = manager.for_tables(['t1']) + manager = manager.for_dataset('form', ['t1']) manager.set_checkpoint(datetime.datetime.utcnow()) second_run = datetime.datetime.utcnow() manager.set_checkpoint(second_run) @@ -61,7 +61,7 @@ def test_get_last_checkpoint_no_args(self, manager): time_of_run=datetime.datetime.utcnow().isoformat(), final=True )) - manager = manager.for_tables(['t1', 't2']) + manager = manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param assert checkpoint.project == manager.project @@ -95,7 +95,7 @@ def test_get_last_checkpoint_no_table(self, manager): time_of_run=datetime.datetime.utcnow().isoformat(), final=True )) - manager = manager.for_tables(['t1', 't2']) + manager = manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param assert checkpoint.table_name in manager.table_names @@ -105,7 +105,7 @@ def test_get_last_checkpoint_no_table(self, manager): def test_clean_on_final_run(self, manager): manager.create_checkpoint_table() - manager = manager.for_tables(['t1']) + manager = manager.for_dataset('form', ['t1']) manager.set_checkpoint(datetime.datetime.utcnow()) manager.set_checkpoint(datetime.datetime.utcnow()) @@ -119,7 +119,7 @@ def _get_non_final_rows_count(): def test_get_time_of_last_checkpoint_with_key(self, manager): manager.create_checkpoint_table() - manager = manager.for_tables(['t1']) + manager = manager.for_dataset('form', ['t1']) manager.key = 'my key' last_run_time = datetime.datetime.utcnow() manager.set_checkpoint(last_run_time) @@ -132,13 +132,13 @@ def test_multiple_tables(self, manager): manager.create_checkpoint_table() t1 = uuid.uuid4().hex t2 = uuid.uuid4().hex - manager = manager.for_tables([t1, t2]) + manager = manager.for_dataset('form', [t1, t2]) last_run_time = datetime.datetime.utcnow() manager.set_checkpoint(last_run_time) - assert manager.for_tables([t1]).get_time_of_last_checkpoint() == last_run_time.isoformat() - assert manager.for_tables([t2]).get_time_of_last_checkpoint() == last_run_time.isoformat() - assert manager.for_tables(['t3']).get_last_checkpoint() is None + assert manager.for_dataset('form', [t1]).get_time_of_last_checkpoint() == last_run_time.isoformat() + assert manager.for_dataset('form', [t2]).get_time_of_last_checkpoint() == last_run_time.isoformat() + assert manager.for_dataset('form', ['t3']).get_last_checkpoint() is None checkpoints = manager.list_checkpoints() assert len(checkpoints) == 2 @@ -146,7 +146,7 @@ def test_multiple_tables(self, manager): def test_get_latest_checkpoints(self, manager): manager.create_checkpoint_table() - manager = manager.for_tables(['t1', 't2']) + manager = manager.for_dataset('form', ['t1', 't2']) manager.set_checkpoint(datetime.datetime.utcnow()) manager.query_md5 = '456' diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index fcab6592..d9e05d84 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -87,7 +87,11 @@ def test_parse_sheet(self): test_cases = [ ('001_JustDataSource.xlsx', SheetParts( - name='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), body=None), + name='Forms', + headings=[], + source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + body=None, + data_source="form"), ), #('001a_JustDataSource_LibreOffice.xlsx', Emit(table='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form")))), @@ -104,7 +108,8 @@ def test_parse_sheet(self): 'type': 'intake', }) ), - body=None + body=None, + data_source="form" )), ('003_DataSourceAndEmitColumns.xlsx', @@ -134,7 +139,8 @@ def test_parse_sheet(self): Literal('Error: both substr arguments must be non-negative integers: substr(a, b)'), Literal('Error: both substr arguments must be non-negative integers: substr(-1, 10)'), Literal('Error: both substr arguments must be non-negative integers: substr(3, -4)') - ]) + ]), + data_source="form" )), ('005_DataSourcePath.xlsx', @@ -143,7 +149,8 @@ def test_parse_sheet(self): headings = [], source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), body=None, - root_expr=Reference('form.delivery_information.child_questions.[*]') + root_expr=Reference('form.delivery_information.child_questions.[*]'), + data_source="form" )), ('006_IncludeReferencedItems.xlsx', @@ -157,12 +164,17 @@ def test_parse_sheet(self): Literal(None), Literal(['foo', 'bar', 'bizzle']) ), - body=None + body=None, + data_source="form" )), ('010_JustDataSourceTableName.xlsx', SheetParts( - name='my_table', headings=[], source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), body=None), - ), + name='my_table', + headings=[], + source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + body=None, + data_source="form" + )), ] for filename, minilinq in test_cases: @@ -182,8 +194,18 @@ def test_parse_workbook(self): test_cases = [ ('004_TwoDataSources.xlsx', [ - SheetParts(name='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), body=None), - SheetParts(name='Cases', headings=[], source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), body=None) + SheetParts( + name='Forms', + headings=[], + source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + body=None, + data_source="form"), + SheetParts( + name='Cases', + headings=[], + source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), + body=None, + data_source="case") ]), ('007_Mappings.xlsx', [ @@ -192,6 +214,7 @@ def test_parse_workbook(self): headings=[Literal('Form Type')], source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), body=List([compile_mapped_field(field_mappings, Reference("type"))]), + data_source="form" ) ]), @@ -221,7 +244,7 @@ def test_compile_mapped_field(self): assert list(expression.eval(env)) == [] def test_get_queries_from_excel(self): - minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal(["Forms"])), + minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), Emit( table='Forms', missing_value='---', @@ -259,7 +282,7 @@ def test_get_queries_from_excel(self): def test_alternate_source_fields(self): minilinq = List([ # First sheet uses a CSV column and also tests combining "Map Via" - Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal(["Forms"])), + Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), Emit( table='Forms', missing_value='---', headings =[ @@ -280,7 +303,7 @@ def test_alternate_source_fields(self): ), # Second sheet uses multiple alternate source field columns (listed out of order) - Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal(["Forms1"])), + Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms1"])), Emit( table='Forms1', missing_value='---', headings=[ @@ -302,7 +325,7 @@ def test_alternate_source_fields(self): self._compare_minilinq_to_compiled(minilinq, '011_AlternateSourceFields.xlsx') def test_columns_with_data_types(self): - minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal(["Forms"])), + minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), Emit( table='Forms', missing_value='---', @@ -336,7 +359,7 @@ def test_columns_with_data_types(self): def test_multi_emit(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal(["Forms", "Cases"])), + Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms", "Cases"])), Filter( predicate=Apply( Reference("filter_empty"), @@ -374,7 +397,7 @@ def test_multi_emit(self): ), Bind( 'checkpoint_manager', - Apply(Reference('get_checkpoint_manager'), Literal(["Other cases"])), + Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), Emit( table="Other cases", headings=[Literal("id")], @@ -393,7 +416,7 @@ def test_multi_emit(self): def test_multi_emit_no_combine(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal(["Forms"])), + Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), Emit( table="Forms", headings=[Literal("id"), Literal("name")], @@ -407,7 +430,7 @@ def test_multi_emit_no_combine(self): ) ) ), - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal(["Cases"])), + Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Cases"])), Emit( table="Cases", headings=[Literal("case_id")], @@ -423,7 +446,7 @@ def test_multi_emit_no_combine(self): ) ) ), - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal(["Other cases"])), + Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), Emit( table="Other cases", headings=[Literal("id")], @@ -442,7 +465,7 @@ def test_multi_emit_no_combine(self): def test_multi_emit_with_organization(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal(["Forms", "Cases"])), + Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms", "Cases"])), Filter( predicate=Apply( Reference("filter_empty"), @@ -482,7 +505,7 @@ def test_multi_emit_with_organization(self): ), Bind( 'checkpoint_manager', - Apply(Reference('get_checkpoint_manager'), Literal(["Other cases"])), + Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), Emit( table="Other cases", headings=[Literal("id"), Literal("commcare_userid")], From 114040a84333151d67b250cd1eab4df1173c85a2 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 22 Jan 2021 16:59:19 +0200 Subject: [PATCH 050/257] add last_doc_id to checkpoint --- commcare_export/checkpoint.py | 21 +++++++++++-------- commcare_export/commcare_hq_client.py | 6 +++++- .../a56c82a8d02e_add_detail_to_checkpoint.py | 5 +++++ tests/test_checkpointmanager.py | 10 +++++---- tests/test_cli.py | 16 +++++++------- 5 files changed, 36 insertions(+), 22 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index ecebacce..cac536a3 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -37,6 +37,7 @@ class Checkpoint(Base): time_of_run = Column(String) final = Column(Boolean) data_source = Column(String) + last_doc_id = Column(String) def __repr__(self): return ( @@ -51,7 +52,8 @@ def __repr__(self): "since_param={r.since_param}, " "time_of_run={r.time_of_run}, " "final={r.final}), " - "data_source={r.data_source}>" + "data_source={r.data_source}, " + "last_doc_id={r.last_doc_id}>" ).format(r=self) @@ -91,18 +93,18 @@ def for_dataset(self, data_source, table_names): engine=self.engine, table_names=table_names, data_source=data_source ) - def set_checkpoint(self, checkpoint_time, is_final=False): - self._set_checkpoint(checkpoint_time, is_final) + def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): + self._set_checkpoint(checkpoint_time, is_final, doc_id=doc_id) if is_final: self._cleanup() - def _set_checkpoint(self, checkpoint_time, final, time_of_run=None): + def _set_checkpoint(self, checkpoint_time, final, time_of_run=None, doc_id=None): logger.info( - 'Setting %s checkpoint: data_source: %s, tables %s: checkpoint: %s', + 'Setting %s checkpoint: data_source: %s, tables %s: checkpoint: %s:%s', 'final' if final else 'batch', self.data_source, ', '.join(self.table_names), - checkpoint_time + checkpoint_time, doc_id ) if not checkpoint_time: raise DataExportException('Tried to set an empty checkpoint. This is not allowed.') @@ -127,7 +129,8 @@ def _set_checkpoint(self, checkpoint_time, final, time_of_run=None): since_param=since_param, time_of_run=time_of_run or datetime.datetime.utcnow().isoformat(), final=final, - data_source=self.data_source + data_source=self.data_source, + last_doc_id=doc_id ) session.add(checkpoint) created.append(checkpoint) @@ -301,9 +304,9 @@ def __init__(self, manager, since): self.manager = manager self.since_param = since - def set_checkpoint(self, checkpoint_time, is_final=False): + def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): if self.manager: - self.manager.set_checkpoint(checkpoint_time, is_final) + self.manager.set_checkpoint(checkpoint_time, is_final, doc_id=doc_id) class CheckpointManagerProvider(object): diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index cca2c05b..033b5384 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -171,7 +171,11 @@ def checkpoint(self, checkpoint_manager, paginator, batch, is_final): if isinstance(paginator, DatePaginator): since_date = paginator.get_since_date(batch) if since_date: - checkpoint_manager.set_checkpoint(since_date, is_final) + try: + last_obj = batch['objects'][-1] + except IndexError: + last_obj = {} + checkpoint_manager.set_checkpoint(since_date, is_final, doc_id=last_obj.get("id", None)) else: logger.warning('Failed to get a checkpoint date from a batch of data.') diff --git a/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py b/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py index 99ddf2bd..2ba1b149 100644 --- a/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py +++ b/migrations/versions/a56c82a8d02e_add_detail_to_checkpoint.py @@ -22,6 +22,11 @@ def upgrade(): 'commcare_export_runs', sa.Column('data_source', sa.Unicode(255, collation=collation)) ) + op.add_column( + 'commcare_export_runs', + sa.Column('last_doc_id', sa.Unicode(255, collation=collation)) + ) def downgrade(): op.drop_column('commcare_export_runs', 'data_source') + op.drop_column('commcare_export_runs', 'last_doc_id') diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index f5253376..256ec438 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -106,15 +106,15 @@ def test_get_last_checkpoint_no_table(self, manager): def test_clean_on_final_run(self, manager): manager.create_checkpoint_table() manager = manager.for_dataset('form', ['t1']) - manager.set_checkpoint(datetime.datetime.utcnow()) - manager.set_checkpoint(datetime.datetime.utcnow()) + manager.set_checkpoint(datetime.datetime.utcnow(), doc_id="1") + manager.set_checkpoint(datetime.datetime.utcnow(), doc_id="2") def _get_non_final_rows_count(): with session_scope(manager.Session) as session: return session.query(Checkpoint).filter_by(final=False).count() assert _get_non_final_rows_count() == 2 - manager.set_checkpoint(datetime.datetime.utcnow(), True) + manager.set_checkpoint(datetime.datetime.utcnow(), True, doc_id="3") assert _get_non_final_rows_count() == 0 def test_get_time_of_last_checkpoint_with_key(self, manager): @@ -134,7 +134,8 @@ def test_multiple_tables(self, manager): t2 = uuid.uuid4().hex manager = manager.for_dataset('form', [t1, t2]) last_run_time = datetime.datetime.utcnow() - manager.set_checkpoint(last_run_time) + doc_id = uuid.uuid4().hex + manager.set_checkpoint(last_run_time, doc_id=doc_id) assert manager.for_dataset('form', [t1]).get_time_of_last_checkpoint() == last_run_time.isoformat() assert manager.for_dataset('form', [t2]).get_time_of_last_checkpoint() == last_run_time.isoformat() @@ -143,6 +144,7 @@ def test_multiple_tables(self, manager): checkpoints = manager.list_checkpoints() assert len(checkpoints) == 2 assert {checkpoints[0].table_name, checkpoints[1].table_name} == {t1, t2} + assert {checkpoints[0].last_doc_id, checkpoints[1].last_doc_id} == {doc_id} def test_get_latest_checkpoints(self, manager): manager.create_checkpoint_table() diff --git a/tests/test_cli.py b/tests/test_cli.py index 23b82c7f..39e02a49 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -453,15 +453,15 @@ def get_conflicting_types_checkpoint_client(): ( {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified'}, [ - {'id': 1, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-23T05:13:01.000000Z'}, - {'id': 2, 'name': 'f2', 'count': 123, 'server_date_modified': '2012-04-24T05:13:01.000000Z'} + {'id': "doc 1", 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-23T05:13:01.000000Z'}, + {'id': "doc 2", 'name': 'f2', 'count': 123, 'server_date_modified': '2012-04-24T05:13:01.000000Z'} ] ), ( {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, [ - {'id': 3, 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, - {'id': 4, 'name': 'f2', 'count': 'abc', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} + {'id': "doc 3", 'name': 'n1', 'count': 10, 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, + {'id': "doc 4", 'name': 'f2', 'count': 'abc', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} ] ), ], @@ -519,12 +519,12 @@ def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_ma # expect checkpoint to have the date from the first batch and not the 2nd runs = list(strict_writer.engine.execute( - sqlalchemy.text('SELECT table_name, since_param from commcare_export_runs where query_file_name = :fn'), + sqlalchemy.text('SELECT table_name, since_param, last_doc_id from commcare_export_runs where query_file_name = :fn'), fn='tests/013_ConflictingTypes.xlsx' )) - assert {r[0]: r[1] for r in runs} == { - 'Case': '2012-04-24T05:13:01', - } + assert runs == [ + ('Case', '2012-04-24T05:13:01', 'doc 2'), + ] # An input where missing fields should be added due to declared data types. From 0f76ab90124f0e5b8048b9083b2b3667eb474245 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 25 Jan 2021 14:14:43 +0200 Subject: [PATCH 051/257] refactor paginators & params --- commcare_export/commcare_minilinq.py | 47 ++++++++++++++-------------- tests/test_commcare_hq_client.py | 20 ++++++------ 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index ca8e8dda..0dfeeefe 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -30,25 +30,21 @@ def __call__(self, since, until): return params -resource_since_params = { - 'form': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), - 'case': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), - 'user': None, - 'location': None, - 'application': None, - 'web-user': None, +SUPPORTED_RESOURCES = { + 'form', 'case', 'user', 'location', 'application', 'web-user' +} + + +DATE_PARAMS = { + 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end') } def get_paginator(resource, page_size=1000): return { - 'form': DatePaginator('form', 'indexed_on', page_size), - 'case': DatePaginator('case', 'indexed_on', page_size), - 'user': SimplePaginator('user', page_size), - 'location': SimplePaginator('location', page_size), - 'application': SimplePaginator('application', page_size), - 'web-user': SimplePaginator('web-user', page_size), - }[resource] + 'form': DatePaginator('indexed_on', page_size), + 'case': DatePaginator('indexed_on', page_size), + }.get(resource, SimplePaginator(page_size)) class CommCareHqEnv(DictEnv): @@ -67,8 +63,8 @@ def __init__(self, commcare_hq_client, until=None, page_size=1000): @unwrap('checkpoint_manager') def api_data(self, resource, checkpoint_manager, payload=None, include_referenced_items=None): - if resource not in resource_since_params: - raise ValueError('I do not know how to access the API resource "%s"' % resource) + if resource not in SUPPORTED_RESOURCES: + raise ValueError('Unknown API resource "%s' % resource) paginator = get_paginator(resource, self.page_size) paginator.init(payload, include_referenced_items, self.until) @@ -89,9 +85,9 @@ class SimplePaginator(object): """ Paginate based on the 'next' URL provided in the API response. """ - def __init__(self, resource, page_size=1000): - self.resource = resource + def __init__(self, page_size=1000, params=None): self.page_size = page_size + self.params = params def init(self, payload=None, include_referenced_items=None, until=None): self.payload = dict(payload or {}) # Do not mutate passed-in dicts @@ -102,10 +98,9 @@ def next_page_params_since(self, since=None): params = self.payload params['limit'] = self.page_size - resource_date_params = resource_since_params[self.resource] - if (since or self.until) and resource_date_params: + if (since or self.until) and self.params: params.update( - resource_date_params(since, self.until) + self.params(since, self.until) ) if self.include_referenced_items: @@ -128,11 +123,15 @@ class DatePaginator(SimplePaginator): This also adds an ordering parameter to ensure that the records are ordered by the date field in ascending order. - :param resource: The name of the resource being fetched: ``form`` or ``case``. :param since_field: The name of the date field to use for pagination. + :param page_size: Number of results to request in each page """ - def __init__(self, resource, since_field, page_size=1000): - super(DatePaginator, self).__init__(resource, page_size) + + DEFAULT_PARAMS = object() + + def __init__(self, since_field, page_size=1000, params=DEFAULT_PARAMS): + params = DATE_PARAMS[since_field] if params is DatePaginator.DEFAULT_PARAMS else params + super(DatePaginator, self).__init__(page_size, params) self.since_field = since_field def next_page_params_since(self, since=None): diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 5fc5c0f4..7bcf9498 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -12,7 +12,8 @@ from commcare_export.checkpoint import CheckpointManagerWithSince from commcare_export.commcare_hq_client import CommCareHqClient, ResourceRepeatException -from commcare_export.commcare_minilinq import SimplePaginator, DatePaginator, resource_since_params, get_paginator +from commcare_export.commcare_minilinq import SimplePaginator, DatePaginator, get_paginator, \ + DATE_PARAMS class FakeSession(object): @@ -46,7 +47,7 @@ def _get_results(self, params): 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}] } else: - since_query_param =resource_since_params['case'].start_param + since_query_param = DATE_PARAMS['indexed_on'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' # include ID=1 again to make sure it gets filtered out return { @@ -67,7 +68,7 @@ def _get_results(self, params): {'id': 2, 'foo': 2, 'indexed_on': '2017-01-01T15:36:22Z'}] } else: - since_query_param = resource_since_params['case'].start_param + since_query_param = DATE_PARAMS['indexed_on'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' return { 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, @@ -86,7 +87,7 @@ def _get_results(self, params): 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '{}Z'.format(since1)}] } else: - since_query_param = resource_since_params['form'].start_param + since_query_param = DATE_PARAMS['indexed_on'].start_param indexed_on = params[since_query_param] if indexed_on == since1: # include ID=1 again to make sure it gets filtered out @@ -136,26 +137,27 @@ def setup_class(cls): pass def test_empty_batch(self): - self.assertIsNone(DatePaginator('fake', 'since').next_page_params_from_batch({'objects': []})) + self.assertIsNone(DatePaginator('since', params=SimplePaginator()).next_page_params_from_batch({'objects': []})) def test_bad_date(self): - self.assertIsNone(DatePaginator('fake', 'since').next_page_params_from_batch({'objects': [{ + self.assertIsNone(DatePaginator('since', params=SimplePaginator()).next_page_params_from_batch({'objects': [{ 'since': 'not a date' }]})) def test_multi_field_sort(self): d1 = '2017-01-01T15:36:22Z' d2 = '2017-01-01T18:36:22Z' - self.assertEqual(DatePaginator('fake', ['s1', 's2']).get_since_date({'objects': [{ + paginator = DatePaginator(['s1', 's2'], params=SimplePaginator()) + self.assertEqual(paginator.get_since_date({'objects': [{ 's1': d1, 's2': d2 }]}), datetime.strptime(d1, '%Y-%m-%dT%H:%M:%SZ')) - self.assertEqual(DatePaginator('fake', ['s1', 's2']).get_since_date({'objects': [{ + self.assertEqual(paginator.get_since_date({'objects': [{ 's2': d2 }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ')) - self.assertEqual(DatePaginator('fake', ['s1', 's2']).get_since_date({'objects': [{ + self.assertEqual(paginator.get_since_date({'objects': [{ 's1': None, 's2': d2 }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ')) From 58383632c3c2799aaf5265fb1268d8e603392e34 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 25 Jan 2021 14:31:53 +0200 Subject: [PATCH 052/257] configurable pagination mode --- commcare_export/commcare_minilinq.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 0dfeeefe..8b531808 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -40,11 +40,16 @@ def __call__(self, since, until): } -def get_paginator(resource, page_size=1000): +def get_paginator(resource, page_size=1000, pagination_mode='by_date_indexed'): return { - 'form': DatePaginator('indexed_on', page_size), - 'case': DatePaginator('indexed_on', page_size), - }.get(resource, SimplePaginator(page_size)) + 'by_date_indexed': { + 'form': DatePaginator('indexed_on', page_size), + 'case': DatePaginator('indexed_on', page_size), + }, + 'by_date_modified': { + + } + }[pagination_mode].get(resource, SimplePaginator(page_size)) class CommCareHqEnv(DictEnv): @@ -53,10 +58,11 @@ class CommCareHqEnv(DictEnv): CommCareHq API. """ - def __init__(self, commcare_hq_client, until=None, page_size=1000): + def __init__(self, commcare_hq_client, until=None, page_size=1000, pagination_mode='by_date_indexed'): self.commcare_hq_client = commcare_hq_client self.until = until self.page_size = page_size + self.pagination_mode = pagination_mode super(CommCareHqEnv, self).__init__({ 'api_data' : self.api_data }) @@ -66,7 +72,7 @@ def api_data(self, resource, checkpoint_manager, payload=None, include_reference if resource not in SUPPORTED_RESOURCES: raise ValueError('Unknown API resource "%s' % resource) - paginator = get_paginator(resource, self.page_size) + paginator = get_paginator(resource, self.page_size, self.pagination_mode) paginator.init(payload, include_referenced_items, self.until) initial_params = paginator.next_page_params_since(checkpoint_manager.since_param) return self.commcare_hq_client.iterate( From 7a00b74c86dd61d90c85ef94fa5bde372070eb52 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 25 Jan 2021 14:52:42 +0200 Subject: [PATCH 053/257] support old and new pagination modes --- commcare_export/commcare_minilinq.py | 69 ++++++++++++++++++++++++---- tests/test_commcare_minilinq.py | 23 +++++++--- 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 8b531808..28cefb34 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -5,6 +5,7 @@ API directly. """ import json +from enum import Enum from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap @@ -16,6 +17,16 @@ from urlparse import parse_qs, urlparse +SUPPORTED_RESOURCES = { + 'form', 'case', 'user', 'location', 'application', 'web-user' +} + + +class PaginationMode(Enum): + date_indexed = "date_indexed" + date_modified = "date_modified" + + class SimpleSinceParams(object): def __init__(self, start, end): self.start_param = start @@ -30,24 +41,64 @@ def __call__(self, since, until): return params -SUPPORTED_RESOURCES = { - 'form', 'case', 'user', 'location', 'application', 'web-user' -} +class FormFilterSinceParams(object): + def __call__(self, since, until): + range_expression = {} + if since: + range_expression['gte'] = since.isoformat() + + if until: + range_expression['lte'] = until.isoformat() + + server_modified_missing = {"missing": { + "field": "server_modified_on", "null_value": True, "existence": True} + } + query = json.dumps({ + 'filter': { + "or": [ + { + "and": [ + { + "not": server_modified_missing + }, + { + "range": { + "server_modified_on": range_expression + } + } + ] + }, + { + "and": [ + server_modified_missing, + { + "range": { + "received_on": range_expression + } + } + ] + } + ] + }}) + + return {'_search': query} DATE_PARAMS = { - 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end') + 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), + 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end') } -def get_paginator(resource, page_size=1000, pagination_mode='by_date_indexed'): +def get_paginator(resource, page_size=1000, pagination_mode=PaginationMode.date_indexed): return { - 'by_date_indexed': { + PaginationMode.date_indexed: { 'form': DatePaginator('indexed_on', page_size), 'case': DatePaginator('indexed_on', page_size), }, - 'by_date_modified': { - + PaginationMode.date_modified: { + 'form': DatePaginator(['server_modified_on', 'received_on'], page_size, params=FormFilterSinceParams()), + 'case': DatePaginator('server_date_modified', page_size), } }[pagination_mode].get(resource, SimplePaginator(page_size)) @@ -58,7 +109,7 @@ class CommCareHqEnv(DictEnv): CommCareHq API. """ - def __init__(self, commcare_hq_client, until=None, page_size=1000, pagination_mode='by_date_indexed'): + def __init__(self, commcare_hq_client, until=None, page_size=1000, pagination_mode=PaginationMode.date_indexed): self.commcare_hq_client = commcare_hq_client self.until = until self.page_size = page_size diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index c0da3c3c..74626fa5 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -19,17 +19,26 @@ def check_case(self, val, result): if isinstance(result, list): assert [datum.value if isinstance(datum, jsonpath.DatumInContext) else datum for datum in val] == result - def test_eval(self): + def test_eval_indexed_on(self): + self._test_eval(PaginationMode.date_indexed) + + def test_eval_modified_on(self): + self._test_eval(PaginationMode.date_modified) + + def _test_eval(self, pagination_mode): + form_order_by = get_paginator('form', pagination_mode=pagination_mode).since_field + case_order_by = get_paginator('case', pagination_mode=pagination_mode).since_field + def die(msg): raise Exception(msg) client = MockCommCareHqClient({ 'form': [ ( - {'limit': 1000, 'filter': 'test1', 'order_by': 'indexed_on'}, + {'limit': 1000, 'filter': 'test1', 'order_by': form_order_by}, [1, 2, 3], ), ( - {'limit': 1000, 'filter': 'test2', 'order_by': 'indexed_on'}, + {'limit': 1000, 'filter': 'test2', 'order_by': form_order_by}, [ { 'x': [{ 'y': 1 }, {'y': 2}] }, { 'x': [{ 'y': 3 }, {'z': 4}] }, @@ -37,18 +46,18 @@ def die(msg): raise Exception(msg) ] ), ( - {'limit': 1000, 'filter': 'laziness-test', 'order_by': 'indexed_on'}, + {'limit': 1000, 'filter': 'laziness-test', 'order_by': form_order_by}, (i if i < 5 else die('Not lazy enough') for i in range(12)) ), ( - {'limit': 1000, 'cases__full': 'true', 'order_by': 'indexed_on'}, + {'limit': 1000, 'cases__full': 'true', 'order_by': form_order_by}, [1, 2, 3, 4, 5] ), ], 'case': [ ( - {'limit': 1000, 'type': 'foo', 'order_by': 'indexed_on'}, + {'limit': 1000, 'type': 'foo', 'order_by': case_order_by}, [ { 'x': 1 }, { 'x': 2 }, @@ -69,7 +78,7 @@ def die(msg): raise Exception(msg) ] }) - env = BuiltInEnv() | CommCareHqEnv(client) | JsonPathEnv({}) # {'form': api_client.iterate('form')}) + env = BuiltInEnv() | CommCareHqEnv(client, pagination_mode=pagination_mode) | JsonPathEnv({}) # {'form': api_client.iterate('form')}) checkpoint_manager = CheckpointManagerWithSince(None, None) assert list(Apply(Reference('api_data'), From a9c2b8cbf8f30b783f340596b16e10ee1c15c226 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 26 Jan 2021 15:17:34 +0200 Subject: [PATCH 054/257] store and retrieve pagination mode from checkpoint --- commcare_export/checkpoint.py | 66 +++++++--- commcare_export/commcare_minilinq.py | 5 +- ...61ab6_add_pagination_mode_to_checkpoint.py | 28 +++++ tests/test_checkpointmanager.py | 117 +++++++++++------- tests/test_commcare_hq_client.py | 6 +- tests/test_commcare_minilinq.py | 6 +- 6 files changed, 161 insertions(+), 67 deletions(-) create mode 100644 migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index cac536a3..706dafd5 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -14,6 +14,7 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker +from commcare_export.commcare_minilinq import PaginationMode from commcare_export.exceptions import DataExportException from commcare_export.writers import SqlMixin @@ -38,6 +39,16 @@ class Checkpoint(Base): final = Column(Boolean) data_source = Column(String) last_doc_id = Column(String) + pagination_mode = Column(String) + + def get_pagination_mode(self): + """Get Enum from value stored in the checkpoint. Null or empty value defaults to + 'date_modified' mode to support legacy checkpoints. + """ + if not self.pagination_mode: + return PaginationMode.date_modified + + return PaginationMode[self.pagination_mode] def __repr__(self): return ( @@ -53,7 +64,8 @@ def __repr__(self): "time_of_run={r.time_of_run}, " "final={r.final}), " "data_source={r.data_source}, " - "last_doc_id={r.last_doc_id}>" + "last_doc_id={r.last_doc_id}, " + "pagination_mode={r.pagination_mode}>" ).format(r=self) @@ -93,18 +105,20 @@ def for_dataset(self, data_source, table_names): engine=self.engine, table_names=table_names, data_source=data_source ) - def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): - self._set_checkpoint(checkpoint_time, is_final, doc_id=doc_id) + def set_checkpoint(self, checkpoint_time, pagination_mode, is_final=False, doc_id=None): + self._set_checkpoint(checkpoint_time, pagination_mode, is_final, doc_id=doc_id) if is_final: self._cleanup() - def _set_checkpoint(self, checkpoint_time, final, time_of_run=None, doc_id=None): + def _set_checkpoint(self, checkpoint_time, pagination_mode, final, time_of_run=None, doc_id=None): logger.info( - 'Setting %s checkpoint: data_source: %s, tables %s: checkpoint: %s:%s', + 'Setting %s checkpoint: data_source: %s, tables: %s, pagination_mode: %s, checkpoint: %s:%s', 'final' if final else 'batch', self.data_source, ', '.join(self.table_names), - checkpoint_time, doc_id + pagination_mode.name, + checkpoint_time, + doc_id ) if not checkpoint_time: raise DataExportException('Tried to set an empty checkpoint. This is not allowed.') @@ -130,7 +144,8 @@ def _set_checkpoint(self, checkpoint_time, final, time_of_run=None, doc_id=None) time_of_run=time_of_run or datetime.datetime.utcnow().isoformat(), final=final, data_source=self.data_source, - last_doc_id=doc_id + last_doc_id=doc_id, + pagination_mode=pagination_mode.name ) session.add(checkpoint) created.append(checkpoint) @@ -195,7 +210,9 @@ def get_legacy_checkpoints(self): project=self.project, commcare=self.commcare, key=self.key ) if table_run: - return self._set_checkpoint(table_run.since_param, table_run.final, table_run.time_of_run) + return self._set_checkpoint( + table_run.since_param, PaginationMode.date_modified, table_run.final, table_run.time_of_run + ) # Check for run without the args table_run = self._get_last_checkpoint( @@ -203,7 +220,9 @@ def get_legacy_checkpoints(self): project=None, commcare=None, table_name=None ) if table_run: - return self._set_checkpoint(table_run.since_param, table_run.final, table_run.time_of_run) + return self._set_checkpoint( + table_run.since_param, PaginationMode.date_modified, table_run.final, table_run.time_of_run + ) def _get_last_checkpoint(self, session, **kwarg_filters): query = session.query(Checkpoint) @@ -299,14 +318,15 @@ def _validate_tables(self): raise Exception("Not tables set in checkpoint manager") -class CheckpointManagerWithSince(object): - def __init__(self, manager, since): +class CheckpointManagerWithDetails(object): + def __init__(self, manager, since_param, pagination_mode): self.manager = manager - self.since_param = since + self.since_param = since_param + self.pagination_mode = pagination_mode def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): if self.manager: - self.manager.set_checkpoint(checkpoint_time, is_final, doc_id=doc_id) + self.manager.set_checkpoint(checkpoint_time, self.pagination_mode, is_final, doc_id=doc_id) class CheckpointManagerProvider(object): @@ -326,6 +346,19 @@ def get_since(self, checkpoint_manager): since = checkpoint_manager.get_time_of_last_checkpoint() return dateutil.parser.parse(since) if since else None + def get_pagination_mode(self, checkpoint_manager): + """Always use the default pagination mode unless we are continuing from + a previous checkpoint in which case use the same pagination mode as before. + """ + if self.start_over or self.since or not checkpoint_manager: + return PaginationMode.date_indexed + + last_checkpoint = checkpoint_manager.get_last_checkpoint() + if not last_checkpoint: + return PaginationMode.date_indexed + + return last_checkpoint.get_pagination_mode() + def get_checkpoint_manager(self, data_source, table_names): """This get's called before each table is exported and set in the `env`. It is then passed to the API client and used to set the checkpoints. @@ -339,9 +372,10 @@ def get_checkpoint_manager(self, data_source, table_names): manager = self.base_checkpoint_manager.for_dataset(data_source, table_names) since = self.get_since(manager) + pagination_mode = self.get_pagination_mode(manager) logger.info( - "Creating checkpoint manager for tables: %s with 'since' parameter: %s", - ', '.join(table_names), since + "Creating checkpoint manager for tables: %s, since: %s, pagination_mode: %s", + ', '.join(table_names), since, pagination_mode.name ) - return CheckpointManagerWithSince(manager, since) + return CheckpointManagerWithDetails(manager, since, pagination_mode) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 28cefb34..79d052fc 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -109,11 +109,10 @@ class CommCareHqEnv(DictEnv): CommCareHq API. """ - def __init__(self, commcare_hq_client, until=None, page_size=1000, pagination_mode=PaginationMode.date_indexed): + def __init__(self, commcare_hq_client, until=None, page_size=1000): self.commcare_hq_client = commcare_hq_client self.until = until self.page_size = page_size - self.pagination_mode = pagination_mode super(CommCareHqEnv, self).__init__({ 'api_data' : self.api_data }) @@ -123,7 +122,7 @@ def api_data(self, resource, checkpoint_manager, payload=None, include_reference if resource not in SUPPORTED_RESOURCES: raise ValueError('Unknown API resource "%s' % resource) - paginator = get_paginator(resource, self.page_size, self.pagination_mode) + paginator = get_paginator(resource, self.page_size, checkpoint_manager.pagination_mode) paginator.init(payload, include_referenced_items, self.until) initial_params = paginator.next_page_params_since(checkpoint_manager.since_param) return self.commcare_hq_client.iterate( diff --git a/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py b/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py new file mode 100644 index 00000000..13445ae7 --- /dev/null +++ b/migrations/versions/6f158d161ab6_add_pagination_mode_to_checkpoint.py @@ -0,0 +1,28 @@ +"""Add pagination_mode to checkpoint + +Revision ID: 6f158d161ab6 +Revises: a56c82a8d02e +Create Date: 2021-01-25 15:13:45.996453 + +""" +from alembic import op +import sqlalchemy as sa + + +revision = '6f158d161ab6' +down_revision = 'a56c82a8d02e' +branch_labels = None +depends_on = None + + + +def upgrade(): + url = op.get_bind().engine.url + collation = 'utf8_bin' if 'mysql' in url.drivername else None + op.add_column( + 'commcare_export_runs', + sa.Column('pagination_mode', sa.Unicode(255, collation=collation)) + ) + +def downgrade(): + op.drop_column('commcare_export_runs', 'pagination_mode') diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index 256ec438..bfb750ba 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -7,7 +7,8 @@ import pytest import sqlalchemy -from commcare_export.checkpoint import CheckpointManager, Checkpoint, session_scope +from commcare_export.checkpoint import CheckpointManager, Checkpoint, session_scope, CheckpointManagerProvider +from commcare_export.commcare_minilinq import PaginationMode @pytest.fixture() @@ -21,6 +22,12 @@ def manager(db_params): manager.connection.execute(sqlalchemy.sql.text('DROP TABLE IF EXISTS alembic_version')) +@pytest.fixture() +def configured_manager(manager): + manager.create_checkpoint_table() + return manager + + @pytest.mark.dbtest class TestCheckpointManager(object): def test_create_checkpoint_table(self, manager, revision='head'): @@ -37,47 +44,44 @@ def test_checkpoint_table_exists(self, manager): manager.connection.execute(sqlalchemy.sql.text('DROP TABLE alembic_version')) manager.create_checkpoint_table() - def test_get_time_of_last_checkpoint(self, manager): - manager.create_checkpoint_table() - manager = manager.for_dataset('form', ['t1']) - manager.set_checkpoint(datetime.datetime.utcnow()) + def test_get_time_of_last_checkpoint(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1']) + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) second_run = datetime.datetime.utcnow() - manager.set_checkpoint(second_run) + manager.set_checkpoint(second_run, PaginationMode.date_indexed) assert manager.get_time_of_last_checkpoint() == second_run.isoformat() - def test_get_last_checkpoint_no_args(self, manager): + def test_get_last_checkpoint_no_args(self, configured_manager): # test that we can still get the time of last run no project and commcare args - manager.create_checkpoint_table() - with session_scope(manager.Session) as session: + with session_scope(configured_manager.Session) as session: since_param = datetime.datetime.utcnow().isoformat() session.add(Checkpoint( id=uuid.uuid4().hex, - query_file_name=manager.query, - query_file_md5=manager.query_md5, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, project=None, commcare=None, since_param=since_param, time_of_run=datetime.datetime.utcnow().isoformat(), final=True )) - manager = manager.for_dataset('form', ['t1', 't2']) + manager = configured_manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param assert checkpoint.project == manager.project assert checkpoint.commcare == manager.commcare assert len(manager.get_latest_checkpoints()) == 2 - def test_get_last_checkpoint_no_table(self, manager): + def test_get_last_checkpoint_no_table(self, configured_manager): # test that we can still get the time of last run no table # also tests that new checkoints are created with the tables - manager.create_checkpoint_table() - with session_scope(manager.Session) as session: + with session_scope(configured_manager.Session) as session: since_param = datetime.datetime.utcnow().isoformat() session.add(Checkpoint( id=uuid.uuid4().hex, - query_file_name=manager.query, - query_file_md5=manager.query_md5, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, project=None, commcare=None, since_param=since_param, @@ -87,15 +91,15 @@ def test_get_last_checkpoint_no_table(self, manager): session.add(Checkpoint( id=uuid.uuid4().hex, - query_file_name=manager.query, - query_file_md5=manager.query_md5, - project=manager.project, - commcare=manager.commcare, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, + project=configured_manager.project, + commcare=configured_manager.commcare, since_param=since_param, time_of_run=datetime.datetime.utcnow().isoformat(), final=True )) - manager = manager.for_dataset('form', ['t1', 't2']) + manager = configured_manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param assert checkpoint.table_name in manager.table_names @@ -103,39 +107,36 @@ def test_get_last_checkpoint_no_table(self, manager): assert len(checkpoints) == 2 assert {c.table_name for c in checkpoints} == set(manager.table_names) - def test_clean_on_final_run(self, manager): - manager.create_checkpoint_table() - manager = manager.for_dataset('form', ['t1']) - manager.set_checkpoint(datetime.datetime.utcnow(), doc_id="1") - manager.set_checkpoint(datetime.datetime.utcnow(), doc_id="2") + def test_clean_on_final_run(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1']) + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, doc_id="1") + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, doc_id="2") def _get_non_final_rows_count(): with session_scope(manager.Session) as session: return session.query(Checkpoint).filter_by(final=False).count() assert _get_non_final_rows_count() == 2 - manager.set_checkpoint(datetime.datetime.utcnow(), True, doc_id="3") + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, True, doc_id="3") assert _get_non_final_rows_count() == 0 - def test_get_time_of_last_checkpoint_with_key(self, manager): - manager.create_checkpoint_table() - manager = manager.for_dataset('form', ['t1']) + def test_get_time_of_last_checkpoint_with_key(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1']) manager.key = 'my key' last_run_time = datetime.datetime.utcnow() - manager.set_checkpoint(last_run_time) + manager.set_checkpoint(last_run_time, PaginationMode.date_indexed) assert manager.get_time_of_last_checkpoint() == last_run_time.isoformat() manager.key = None assert manager.get_time_of_last_checkpoint() is None - def test_multiple_tables(self, manager): - manager.create_checkpoint_table() + def test_multiple_tables(self, configured_manager): t1 = uuid.uuid4().hex t2 = uuid.uuid4().hex - manager = manager.for_dataset('form', [t1, t2]) + manager = configured_manager.for_dataset('form', [t1, t2]) last_run_time = datetime.datetime.utcnow() doc_id = uuid.uuid4().hex - manager.set_checkpoint(last_run_time, doc_id=doc_id) + manager.set_checkpoint(last_run_time, PaginationMode.date_indexed, doc_id=doc_id) assert manager.for_dataset('form', [t1]).get_time_of_last_checkpoint() == last_run_time.isoformat() assert manager.for_dataset('form', [t2]).get_time_of_last_checkpoint() == last_run_time.isoformat() @@ -146,18 +147,50 @@ def test_multiple_tables(self, manager): assert {checkpoints[0].table_name, checkpoints[1].table_name} == {t1, t2} assert {checkpoints[0].last_doc_id, checkpoints[1].last_doc_id} == {doc_id} - def test_get_latest_checkpoints(self, manager): - manager.create_checkpoint_table() - manager = manager.for_dataset('form', ['t1', 't2']) - manager.set_checkpoint(datetime.datetime.utcnow()) + def test_get_latest_checkpoints(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1', 't2']) + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) manager.query_md5 = '456' - manager.set_checkpoint(datetime.datetime.utcnow()) + manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) latest_time = datetime.datetime.utcnow() - manager.set_checkpoint(latest_time) + manager.set_checkpoint(latest_time, PaginationMode.date_indexed) checkpoints = manager.get_latest_checkpoints() assert len(checkpoints) == 2 assert [c.table_name for c in checkpoints] == ['t1', 't2'] assert {c.query_file_md5 for c in checkpoints} == {'456'} assert {c.since_param for c in checkpoints} == {latest_time.isoformat()} + + +@pytest.mark.parametrize('since, start_over, expected_since, expected_paginator', [ + (None, True, None, PaginationMode.date_indexed), + ('since', False, 'since', PaginationMode.date_indexed), + (None, False, None, PaginationMode.date_indexed), +]) +def test_checkpoint_details_static(since, start_over, expected_since, expected_paginator): + cmp = CheckpointManagerProvider(None, since, start_over) + assert expected_since == cmp.get_since(None) + assert expected_paginator == cmp.get_pagination_mode(None) + + +@pytest.mark.dbtest +class TestCheckpointManagerProvider(object): + def test_checkpoint_details_no_checkpoint(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1']) + assert None is CheckpointManagerProvider().get_since(manager) + assert PaginationMode.date_indexed == CheckpointManagerProvider().get_pagination_mode(manager) + + def test_checkpoint_details_latest_from_db(self, configured_manager): + manager = configured_manager.for_dataset('form', ['t1']) + + self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_modified) + self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_indexed) + self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_modified) + + def _test_checkpoint_details(self, manager, checkpoint_date, pagination_mode): + manager.set_checkpoint(checkpoint_date, pagination_mode) + + cmp = CheckpointManagerProvider() + assert pagination_mode == cmp.get_pagination_mode(manager) + assert checkpoint_date == cmp.get_since(manager) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 7bcf9498..bbb2da51 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -10,10 +10,10 @@ import pytest -from commcare_export.checkpoint import CheckpointManagerWithSince +from commcare_export.checkpoint import CheckpointManagerWithDetails from commcare_export.commcare_hq_client import CommCareHqClient, ResourceRepeatException from commcare_export.commcare_minilinq import SimplePaginator, DatePaginator, get_paginator, \ - DATE_PARAMS + DATE_PARAMS, PaginationMode class FakeSession(object): @@ -112,7 +112,7 @@ def _test_iterate(self, session, paginator, expected_count, expected_vals): # Iteration should do two "gets" because the first will have something in the "next" metadata field paginator.init() - checkpoint_manager = CheckpointManagerWithSince(None, None) + checkpoint_manager = CheckpointManagerWithDetails(None, None, PaginationMode.date_indexed) results = list(client.iterate('/fake/uri', paginator, checkpoint_manager=checkpoint_manager)) self.assertEqual(len(results), expected_count) self.assertEqual([result['foo'] for result in results], expected_vals) diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 74626fa5..454f3c7f 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -3,7 +3,7 @@ from jsonpath_rw import jsonpath -from commcare_export.checkpoint import CheckpointManagerWithSince +from commcare_export.checkpoint import CheckpointManagerWithDetails from commcare_export.minilinq import * from commcare_export.env import * from commcare_export.commcare_hq_client import MockCommCareHqClient @@ -78,9 +78,9 @@ def die(msg): raise Exception(msg) ] }) - env = BuiltInEnv() | CommCareHqEnv(client, pagination_mode=pagination_mode) | JsonPathEnv({}) # {'form': api_client.iterate('form')}) + env = BuiltInEnv() | CommCareHqEnv(client) | JsonPathEnv({}) # {'form': api_client.iterate('form')}) - checkpoint_manager = CheckpointManagerWithSince(None, None) + checkpoint_manager = CheckpointManagerWithDetails(None, None, pagination_mode) assert list(Apply(Reference('api_data'), Literal('form'), Literal(checkpoint_manager), From 5c4b2b043cf31e805e37f8d14a6b1f684656c098 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 26 Jan 2021 15:19:53 +0200 Subject: [PATCH 055/257] test pagination at CLI level --- tests/test_cli.py | 170 ++++++++++++++++++++++++++++++++++++++-------- tests/utils.py | 17 +++++ 2 files changed, 157 insertions(+), 30 deletions(-) create mode 100644 tests/utils.py diff --git a/tests/test_cli.py b/tests/test_cli.py index 42cb151e..7999e954 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,11 +10,13 @@ import sqlalchemy from mock import mock -from commcare_export.checkpoint import CheckpointManager +from commcare_export.checkpoint import CheckpointManager, session_scope, Checkpoint from commcare_export.cli import CLI_ARGS, EXIT_STATUS_ERROR, main_with_args from commcare_export.commcare_hq_client import MockCommCareHqClient, CommCareHqClient, _params_to_url +from commcare_export.commcare_minilinq import PaginationMode from commcare_export.specs import TableSpec from commcare_export.writers import JValueTableWriter, SqlTableWriter +from tests.utils import SqlWriterWithTearDown CLI_ARGS_BY_NAME = { arg.name: arg @@ -287,12 +289,14 @@ def test_cli_table_plus_locations(self, mock_client): get_expected_locations_results(True)) -@pytest.fixture(scope='class') +@pytest.fixture(scope='function') def writer(pg_db_params): - return SqlTableWriter(pg_db_params['url'], poolclass=sqlalchemy.pool.NullPool) + writer = SqlWriterWithTearDown(pg_db_params['url'], poolclass=sqlalchemy.pool.NullPool) + yield writer + writer.tear_down() -@pytest.fixture(scope='class') +@pytest.fixture(scope='function') def checkpoint_manager(pg_db_params): cm = CheckpointManager(pg_db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) cm.create_checkpoint_table() @@ -323,6 +327,24 @@ def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): main_with_args(args) +def _check_data(writer, expected, table_name, columns): + actual = [ + list(row) for row in + writer.engine.execute(f'SELECT {", ".join(columns)} FROM "{table_name}"') + ] + + message = '' + if actual != expected: + message += 'Data not equal to expected:\n' + if len(actual) != len(expected): + message += ' {} rows compared to {} expected\n'.format(len(actual), len(expected)) + message += 'Diff:\n' + for i, rows in enumerate(zip_longest(actual, expected)): + if rows[0] != rows[1]: + message += '{}: {} != {}\n'.format(i, rows[0], rows[1]) + assert actual == expected, message + + @pytest.mark.dbtest class TestCLIIntegrationTests(object): def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog): @@ -369,21 +391,7 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ } def _check_data(self, writer, expected, table_name): - actual = [ - list(row) for row in - writer.engine.execute("SELECT id, name, indexed_on FROM {}".format(table_name)) - ] - - message = '' - if actual != expected: - message += 'Data not equal to expected:\n' - if len(actual) != len(expected): - message += ' {} rows compared to {} expected\n'.format(len(actual), len(expected)) - message += 'Diff:\n' - for i, rows in enumerate(zip_longest(actual, expected)): - if rows[0] != rows[1]: - message += '{}: {} != {}\n'.format(i, rows[0], rows[1]) - assert actual == expected, message + _check_data(writer, expected, table_name, ['id', 'name', 'indexed_on']) def _check_checkpoints(self, caplog, expected): # Depends on the logging in the CheckpointManager._set_checkpoint method @@ -421,7 +429,7 @@ class MockCheckpointingClient(CommCareHqClient): to return mocked data. Note this client needs to be re-initialized after use.""" - def __init__(self, mock_data): + def __init__(self, mock_data): self.mock_data = { resource: { _params_to_url(params): result @@ -437,7 +445,11 @@ def __init__(self, mock_data): def get(self, resource, params=None): mock_requests = self.mock_data[resource] key = _params_to_url(params) - objects = mock_requests.pop(key) + try: + objects = mock_requests.pop(key) + except KeyError: + print(mock_requests.keys()) + raise if objects: return {'meta': {'limit': len(objects), 'next': bool(mock_requests), 'offset': 0, 'previous': None, @@ -468,22 +480,28 @@ def get_conflicting_types_checkpoint_client(): }) -@pytest.fixture(scope='class') +@pytest.fixture(scope='function') def strict_writer(db_params): - return SqlTableWriter(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) + writer = SqlWriterWithTearDown(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) + yield writer + writer.tear_down() -@pytest.fixture(scope='class') +@pytest.fixture(scope='function') def all_db_checkpoint_manager(db_params): cm = CheckpointManager(db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) cm.create_checkpoint_table() - return cm + yield cm + with session_scope(cm.Session) as session: + session.query(Checkpoint).delete(synchronize_session='fetch') -def _pull_mock_data(writer, checkpoint_manager, api_client, query): +def _pull_mock_data(writer, checkpoint_manager, api_client, query, start_over=None, since=None): args = make_args( query=query, output_format='sql', + start_over=start_over, + since=since ) # set this so that it get's written to the checkpoints @@ -543,10 +561,10 @@ def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_ma @pytest.mark.dbtest class TestCLIWithDataTypes(object): - def test_cli_data_types_add_columns(self, strict_writer, all_db_checkpoint_manager, capfd): - _pull_mock_data(strict_writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/014_ExportWithDataTypes.xlsx') + def test_cli_data_types_add_columns(self, writer, all_db_checkpoint_manager, capfd): + _pull_mock_data(writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/014_ExportWithDataTypes.xlsx') - metadata = sqlalchemy.schema.MetaData(bind=strict_writer.engine) + metadata = sqlalchemy.schema.MetaData(bind=writer.engine) metadata.reflect() cols = metadata.tables['forms'].c @@ -558,8 +576,100 @@ def test_cli_data_types_add_columns(self, strict_writer, all_db_checkpoint_manag values = [ list(row) for row in - strict_writer.engine.execute('SELECT * FROM forms') + writer.engine.execute('SELECT * FROM forms') ] assert values == [['1', None, None, None, None, None], ['2', None, None, None, None, None]] + + +def get_indexed_on_client(page): + p1 = MockCheckpointingClient({ + 'case': [ + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, + [ + {'id': "doc 1", 'name': 'n1', 'indexed_on': '2012-04-23T05:13:01.000000Z'}, + {'id': "doc 2", 'name': 'n2', 'indexed_on': '2012-04-24T05:13:01.000000Z'} + ] + ) + ] + }) + p2 = MockCheckpointingClient({ + 'case': [ + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on', 'indexed_on_start': '2012-04-24T05:13:01'}, + [ + {'id': "doc 3", 'name': 'n3', 'indexed_on': '2012-04-25T05:13:01.000000Z'}, + {'id': "doc 4", 'name': 'n4', 'indexed_on': '2012-04-26T05:13:01.000000Z'} + ] + ) + ] + }) + return [p1, p2][page] + + +@pytest.mark.dbtest +class TestCLIPaginationMode(object): + def test_cli_pagination_fresh(self, writer, all_db_checkpoint_manager): + checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + + _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(0), 'tests/013_ConflictingTypes.xlsx') + self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") + self._check_checkpoint(checkpoint_manager, '2012-04-24T05:13:01', 'doc 2') + + _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx') + self._check_data(writer, [["doc 1"], ["doc 2"], ["doc 3"], ["doc 4"]], "Case") + self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 4') + + def test_cli_pagination_legacy(self, writer, all_db_checkpoint_manager): + """Test that we continue with the same pagination mode as was already in use""" + + checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + # simulate previous run with legacy pagination mode + checkpoint_manager.set_checkpoint('2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True) + + client = MockCheckpointingClient({ + 'case': [ + ( + {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, + [ + {'id': "doc 1", 'name': 'n1', 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, + {'id': "doc 2", 'name': 'n2', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} + ] + ) + ] + }) + + _pull_mock_data(writer, all_db_checkpoint_manager, client, 'tests/013_ConflictingTypes.xlsx') + self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") + self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 2', PaginationMode.date_modified.name) + + def test_cli_pagination_start_over(self, writer, all_db_checkpoint_manager): + """Test that we switch to the new pagination mode when using 'start_over'""" + checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + # simulate previous run with legacy pagination mode + checkpoint_manager.set_checkpoint('2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True) + + _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(0), 'tests/013_ConflictingTypes.xlsx', start_over=True) + self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") + self._check_checkpoint(checkpoint_manager, '2012-04-24T05:13:01', 'doc 2') + + def test_cli_pagination_since(self, writer, all_db_checkpoint_manager): + """Test that we use to the new pagination mode when using 'since'""" + checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + # simulate previous run with legacy pagination mode + checkpoint_manager.set_checkpoint('2012-04-28T05:13:01', PaginationMode.date_modified, is_final=True) + + _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx', since='2012-04-24T05:13:01') + self._check_data(writer, [["doc 3"], ["doc 4"]], "Case") + self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 4') + + def _check_data(self, writer, expected, table_name): + _check_data(writer, expected, table_name, ['id']) + + def _check_checkpoint(self, checkpoint_manager, since_param, doc_id, pagination_mode=PaginationMode.date_indexed.name): + checkpoint = checkpoint_manager.get_last_checkpoint() + assert checkpoint.pagination_mode == pagination_mode + assert checkpoint.since_param == since_param + assert checkpoint.last_doc_id == doc_id diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..4a5035a9 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,17 @@ +from commcare_export.writers import SqlTableWriter + + +class SqlWriterWithTearDown(SqlTableWriter): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tables = set() + + def write_table(self, table): + super().write_table(table) + if table.rows: + self.tables.add(table.name) + + def tear_down(self): + for table in self.tables: + self.engine.execute(f'DROP TABLE "{table}"') + self.tables = set() From 8d9d98d367611382bd275c6285d1045e5ee2c507 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 27 Jan 2021 14:25:12 +0200 Subject: [PATCH 056/257] add log warning --- commcare_export/checkpoint.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 706dafd5..7bb668a5 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -378,4 +378,13 @@ def get_checkpoint_manager(self, data_source, table_names): "Creating checkpoint manager for tables: %s, since: %s, pagination_mode: %s", ', '.join(table_names), since, pagination_mode.name ) + if pagination_mode != PaginationMode.date_indexed: + logger.warning( + "\n====================================\n" + "This export is using a deprecated pagination mode which will be removed in future versions.\n" + "To switch to the new mode you must re-sync your data using `--start-over`.\n" + "For more details see: %s" + "\n====================================\n", + "https://wiki.commcarehq.org/display/commcarepublic/CommCare+Export+Tool+Release+Notes" + ) return CheckpointManagerWithDetails(manager, since, pagination_mode) From eb23b7fe2c4458aef4303652d5214a8e2f4ddf21 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 27 Jan 2021 17:06:15 +0200 Subject: [PATCH 057/257] remove whitespace --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7999e954..4c2eae51 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -429,7 +429,7 @@ class MockCheckpointingClient(CommCareHqClient): to return mocked data. Note this client needs to be re-initialized after use.""" - def __init__(self, mock_data): + def __init__(self, mock_data): self.mock_data = { resource: { _params_to_url(params): result From 8f57ed9b59c22295e96225e37af0846b25dc5758 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 27 Jan 2021 17:24:09 +0200 Subject: [PATCH 058/257] remove checkpoint manager when using 'since' --- tests/test_cli.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 4c2eae51..c2e83bd6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -317,6 +317,7 @@ def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): until=until, ) + assert not (checkpoint_manager and since), "'checkpoint_manager' must be None when using 'since'" # set this so that it get's written to the checkpoints checkpoint_manager.query = query @@ -504,8 +505,11 @@ def _pull_mock_data(writer, checkpoint_manager, api_client, query, start_over=No since=since ) - # set this so that it get's written to the checkpoints - checkpoint_manager.query = query + assert not (checkpoint_manager and since), "'checkpoint_manager' must be None when using 'since'" + + if checkpoint_manager: + # set this so that it get's written to the checkpoints + checkpoint_manager.query = query # have to mock these to override the pool class otherwise they hold the db connection open api_client_patch = mock.patch('commcare_export.cli._get_api_client', @@ -661,9 +665,9 @@ def test_cli_pagination_since(self, writer, all_db_checkpoint_manager): # simulate previous run with legacy pagination mode checkpoint_manager.set_checkpoint('2012-04-28T05:13:01', PaginationMode.date_modified, is_final=True) - _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx', since='2012-04-24T05:13:01') + # this will fail if it doesn't use the 'date_indexed' pagination mode due to how the mock client is setup + _pull_mock_data(writer, None, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx', since='2012-04-24T05:13:01') self._check_data(writer, [["doc 3"], ["doc 4"]], "Case") - self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 4') def _check_data(self, writer, expected, table_name): _check_data(writer, expected, table_name, ['id']) From 67f062468d157120ac708b7b70fd293c1a47b527 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 27 Jan 2021 17:26:42 +0200 Subject: [PATCH 059/257] remove print --- tests/test_cli.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index c2e83bd6..2bf6a526 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -446,11 +446,7 @@ def __init__(self, mock_data): def get(self, resource, params=None): mock_requests = self.mock_data[resource] key = _params_to_url(params) - try: - objects = mock_requests.pop(key) - except KeyError: - print(mock_requests.keys()) - raise + objects = mock_requests.pop(key) if objects: return {'meta': {'limit': len(objects), 'next': bool(mock_requests), 'offset': 0, 'previous': None, From f7db1c9b9b4143d42e06af524196dcaa298412c8 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 27 Jan 2021 20:44:15 +0200 Subject: [PATCH 060/257] allow checkpointing with since / until in integration tests --- tests/test_cli.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 2bf6a526..1ad64142 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -317,8 +317,7 @@ def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): until=until, ) - assert not (checkpoint_manager and since), "'checkpoint_manager' must be None when using 'since'" - # set this so that it get's written to the checkpoints + # set this so that it gets written to the checkpoints checkpoint_manager.query = query # have to mock these to override the pool class otherwise they hold the db connection open @@ -504,7 +503,7 @@ def _pull_mock_data(writer, checkpoint_manager, api_client, query, start_over=No assert not (checkpoint_manager and since), "'checkpoint_manager' must be None when using 'since'" if checkpoint_manager: - # set this so that it get's written to the checkpoints + # set this so that it gets written to the checkpoints checkpoint_manager.query = query # have to mock these to override the pool class otherwise they hold the db connection open From 6e2a403bcccea9beb9802b6717ce9f5748178d3c Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Tue, 2 Feb 2021 10:13:35 +0200 Subject: [PATCH 061/257] make username explicit in release docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c1d041b3..5c60aa12 100644 --- a/README.md +++ b/README.md @@ -503,7 +503,7 @@ Ensure that the archive (`dist/commcare-export-X.YY.0.tar.gz`) has the correct v ``` $ pip install twine -$ twine upload dist/commcare-export-X.YY.0.tar.gz +$ twine upload -u dimagi dist/commcare-export-X.YY.0.tar.gz ``` 4\. Verify upload From e653952f241f3dd1d5d453af61f072cbc85ee699 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Wed, 17 Feb 2021 14:50:25 +0200 Subject: [PATCH 062/257] failing test for json writer --- tests/test_writers.py | 56 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/test_writers.py b/tests/test_writers.py index 1a844a60..70ac46e3 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -290,6 +290,62 @@ def test_change_type(self, writer): assert dict(row) == expected[id] + def test_json_type(self, strict_writer): + complex_object = { + 'poke1': { + 'name': 'snorlax', + 'color': 'blue', + 'attributes': { + 'strength': 10, + 'endurance': 10, + 'speed': 4, + }, + 'friends': [ + 'pikachu', + 'charmander', + ], + }, + 'poke2': { + 'name': 'pikachu', + 'color': 'yellow', + 'attributes': { + 'strength': 2, + 'endurance': 2, + 'speed': 8, + 'cuteness': 10, + }, + 'friends': [ + 'snorlax', + 'charmander', + ], + }, + } + with strict_writer: + strict_writer.write_table(TableSpec(**{ + 'name': 'foo_with_json', + 'headings': ['id', 'json_col'], + 'rows': [ + ['simple', {'k1': 'v1', 'k2': 'v2'}], + ['with_lists', {'l1': ['i1', 'i2']}], + ['complex', complex_object], + ], + 'data_types': [ + 'text', + 'json', + ] + })) + + # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + with strict_writer: + result = dict([(row['id'], row) for row in strict_writer.connection.execute( + 'SELECT id, json_col FROM foo_with_json' + )]) + + assert len(result) == 3 + assert dict(result['simple']) == {'id': 'simple', 'json_col': {'k1': 'v1', 'k2': 'v2'}} + assert dict(result['with_lists']) == {'id': 'with_lists', 'json_col': {'l1': ['i1', 'i2']}} + assert dict(result['complex']) == {'id': 'complex', 'json_col': complex_object} + def test_explicit_types(self, strict_writer): with strict_writer: strict_writer.write_table(TableSpec(**{ From 1fe587f9d899352b2e01390fe4fc206854cd7948 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Wed, 17 Feb 2021 14:55:45 +0200 Subject: [PATCH 063/257] holy crap it was literally this easy --- commcare_export/data_types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/commcare_export/data_types.py b/commcare_export/data_types.py index 3a15340c..006a5e7b 100644 --- a/commcare_export/data_types.py +++ b/commcare_export/data_types.py @@ -5,12 +5,14 @@ DATA_TYPE_DATE = 'date' DATA_TYPE_DATETIME = 'datetime' DATA_TYPE_INTEGER = 'integer' +DATA_TYPE_JSON = 'json' DATA_TYPES_TO_SQLALCHEMY_TYPES = { DATA_TYPE_BOOLEAN: sqlalchemy.Boolean(), DATA_TYPE_DATETIME: sqlalchemy.DateTime(), DATA_TYPE_DATE: sqlalchemy.Date(), DATA_TYPE_INTEGER: sqlalchemy.Integer(), + DATA_TYPE_JSON: sqlalchemy.JSON(), } class UnknownDataType(Exception): From b886cc479f8d057bf722bd543f0e47c3e9ccbe0f Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Wed, 17 Feb 2021 15:13:32 +0200 Subject: [PATCH 064/257] fix compatibility check --- commcare_export/writers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index a13ba6eb..8e742fbf 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -416,6 +416,10 @@ def compatible(self, source_type, dest_type): } # add dialect specific types + try: + compatibility[sqlalchemy.JSON] = (sqlalchemy.dialects.postgresql.json.JSON,) + except AttributeError: + pass try: compatibility[sqlalchemy.Boolean] += (sqlalchemy.dialects.mssql.base.BIT,) except AttributeError: From 0a307dd8cd2fb0497465fcad3a7655900e0fc5bf Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Wed, 17 Feb 2021 15:28:18 +0200 Subject: [PATCH 065/257] don't need a strict writer for this test --- tests/test_writers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_writers.py b/tests/test_writers.py index 70ac46e3..d08a305e 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -290,7 +290,7 @@ def test_change_type(self, writer): assert dict(row) == expected[id] - def test_json_type(self, strict_writer): + def test_json_type(self, writer): complex_object = { 'poke1': { 'name': 'snorlax', @@ -320,8 +320,8 @@ def test_json_type(self, strict_writer): ], }, } - with strict_writer: - strict_writer.write_table(TableSpec(**{ + with writer: + writer.write_table(TableSpec(**{ 'name': 'foo_with_json', 'headings': ['id', 'json_col'], 'rows': [ @@ -336,8 +336,8 @@ def test_json_type(self, strict_writer): })) # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above - with strict_writer: - result = dict([(row['id'], row) for row in strict_writer.connection.execute( + with writer: + result = dict([(row['id'], row) for row in writer.connection.execute( 'SELECT id, json_col FROM foo_with_json' )]) From 51c69d8b4bbd6c975eb4cf6b3f62be0da1c7a632 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Wed, 17 Feb 2021 15:28:31 +0200 Subject: [PATCH 066/257] only run on postgres --- tests/test_writers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_writers.py b/tests/test_writers.py index d08a305e..a574c86c 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -321,6 +321,8 @@ def test_json_type(self, writer): }, } with writer: + if not writer.is_postgres: + return writer.write_table(TableSpec(**{ 'name': 'foo_with_json', 'headings': ['id', 'json_col'], From debb2be373d3dde70e5c2af2449a6cbee958dd42 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Fri, 26 Feb 2021 09:10:23 +0200 Subject: [PATCH 067/257] fix link to pagination release notes --- commcare_export/checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 7bb668a5..dc68d348 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -385,6 +385,6 @@ def get_checkpoint_manager(self, data_source, table_names): "To switch to the new mode you must re-sync your data using `--start-over`.\n" "For more details see: %s" "\n====================================\n", - "https://wiki.commcarehq.org/display/commcarepublic/CommCare+Export+Tool+Release+Notes" + "https://github.com/dimagi/commcare-export/releases/tag/1.5.0" ) return CheckpointManagerWithDetails(manager, since, pagination_mode) From f98ce7c95832b6f810d6c7213ed691a95e9254fc Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 5 Mar 2021 10:26:03 +0200 Subject: [PATCH 068/257] handle None response This happens if the connection times out --- commcare_export/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index c65c8cf0..b6637f64 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -259,7 +259,7 @@ def evaluate_query(env, query): force_lazy_result(lazy_result) return 0 except requests.exceptions.RequestException as e: - if e.response.status_code == 401: + if e.response and e.response.status_code == 401: print("\nAuthentication failed. Please check your credentials.", file=sys.stderr) return EXIT_STATUS_ERROR else: From ed92aa30dec13814478dd1abf92a87e40adb7572 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Mon, 8 Mar 2021 14:20:29 +0200 Subject: [PATCH 069/257] add messaging-events to API --- commcare_export/commcare_minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 79d052fc..de514649 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -18,7 +18,7 @@ SUPPORTED_RESOURCES = { - 'form', 'case', 'user', 'location', 'application', 'web-user' + 'form', 'case', 'user', 'location', 'application', 'web-user', 'messaging-event' } From 1c26998caa81c16193d760f0c598c37a9dc88fd0 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Mon, 8 Mar 2021 14:47:07 +0200 Subject: [PATCH 070/257] add checkpointing --- commcare_export/commcare_minilinq.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index de514649..34573d90 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -86,7 +86,8 @@ def __call__(self, since, until): DATE_PARAMS = { 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), - 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end') + 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end'), + 'date': SimpleSinceParams('date__gte', 'date__lt'), # used by messaging-events } @@ -95,10 +96,12 @@ def get_paginator(resource, page_size=1000, pagination_mode=PaginationMode.date_ PaginationMode.date_indexed: { 'form': DatePaginator('indexed_on', page_size), 'case': DatePaginator('indexed_on', page_size), + 'messaging-event': DatePaginator('date', page_size), }, PaginationMode.date_modified: { 'form': DatePaginator(['server_modified_on', 'received_on'], page_size, params=FormFilterSinceParams()), 'case': DatePaginator('server_date_modified', page_size), + 'messaging-event': DatePaginator('date', page_size), } }[pagination_mode].get(resource, SimplePaginator(page_size)) From 32de2b13563511958ce6f34484dae16f04ec8edf Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Thu, 11 Mar 2021 13:48:29 +0200 Subject: [PATCH 071/257] add failing test documenting desired behavior --- tests/test_commcare_hq_client.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index bbb2da51..0491fe70 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -77,6 +77,31 @@ def _get_results(self, params): } +class FakMessageLogSession(FakeSession): + # for message logs, the last batch returns the same results in a loop, because + # we use a non-counting paginator in tastypie that can't know if it's "finished" + # We will gracefully treat this as success under the conditions where: + # - total_count is absent + # - the number of returned rows is fewer than the limit + # - the contents of the batch are the same + def _get_results(self, params): + obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'} + obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} + if not params: + return { + 'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2, 'total_count': None}, + 'objects': [obj_1, obj_2] + } + else: + since_query_param = DATE_PARAMS['date'].start_param + print(params) + assert params[since_query_param] == '2017-01-01T15:37:22' + return { + 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': None}, + 'objects': [obj_2] + } + + class FakeDateFormSession(FakeSession): def _get_results(self, params): since1 = '2017-01-01T15:36:22' @@ -129,6 +154,9 @@ def test_repeat_limit(self): match="Requested resource '/fake/uri' 10 times with same parameters"): self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2]) + def test_message_log(self): + self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 2, [1, 2]) + class TestDatePaginator(unittest.TestCase): From 433daef03400ddcf10b2bae9a98cb04ef4ffb892 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Thu, 11 Mar 2021 14:03:50 +0200 Subject: [PATCH 072/257] fix last batch of messaging events --- commcare_export/commcare_hq_client.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 033b5384..ee516f53 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -146,13 +146,14 @@ def iterate_resource(resource=resource, params=params): fetched += len(batch['objects']) logger.debug('Received %s of %s', fetched, total_count) - if not batch['objects']: more_to_fetch = False else: + got_new_data = False for obj in batch['objects']: if obj['id'] not in last_batch_ids: yield obj + got_new_data = True if batch['meta']['next']: last_batch_ids = {obj['id'] for obj in batch['objects']} @@ -162,6 +163,15 @@ def iterate_resource(resource=resource, params=params): else: more_to_fetch = False + limit = batch['meta'].get('limit') + repeated_last_page_of_non_counting_resource = ( + not got_new_data + and total_count == 'unknown' + and (limit and len(batch['objects']) < limit) + ) + if more_to_fetch and repeated_last_page_of_non_counting_resource: + more_to_fetch = False + self.checkpoint(checkpoint_manager, paginator, batch, not more_to_fetch) return RepeatableIterator(iterate_resource) From 1a2ba092f91837061a0a8ce681935f8ab73f9406 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Thu, 11 Mar 2021 14:05:40 +0200 Subject: [PATCH 073/257] remove print statement --- tests/test_commcare_hq_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 0491fe70..9c29e000 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -94,7 +94,6 @@ def _get_results(self, params): } else: since_query_param = DATE_PARAMS['date'].start_param - print(params) assert params[since_query_param] == '2017-01-01T15:37:22' return { 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': None}, From 9908562e81dee72321c902c13edac319ccf6f17c Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Mon, 15 Mar 2021 16:22:42 +0200 Subject: [PATCH 074/257] extract variable to constant --- commcare_export/commcare_hq_client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index ee516f53..317e1234 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -121,6 +121,7 @@ def iterate(self, resource, paginator, params=None, checkpoint_manager=None): Assumes the endpoint is a list endpoint, and iterates over it making a lot of assumptions that it is like a tastypie endpoint. """ + UNKNOWN_COUNT = 'unknown' params = dict(params or {}) def iterate_resource(resource=resource, params=params): more_to_fetch = True @@ -140,8 +141,8 @@ def iterate_resource(resource=resource, params=params): batch = self.get(resource, params) last_params = copy.copy(params) - if not total_count or total_count == 'unknown' or fetched >= total_count: - total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else 'unknown' + if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count: + total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else UNKNOWN_COUNT fetched = 0 fetched += len(batch['objects']) @@ -166,7 +167,7 @@ def iterate_resource(resource=resource, params=params): limit = batch['meta'].get('limit') repeated_last_page_of_non_counting_resource = ( not got_new_data - and total_count == 'unknown' + and total_count == UNKNOWN_COUNT and (limit and len(batch['objects']) < limit) ) if more_to_fetch and repeated_last_page_of_non_counting_resource: From c6ee572a14c4e917acffea3c148dd6b4b7726bd1 Mon Sep 17 00:00:00 2001 From: Cory Zue Date: Mon, 15 Mar 2021 16:23:58 +0200 Subject: [PATCH 075/257] move condition inside if statement --- commcare_export/commcare_hq_client.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 317e1234..0ea72a4d 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -165,13 +165,13 @@ def iterate_resource(resource=resource, params=params): more_to_fetch = False limit = batch['meta'].get('limit') - repeated_last_page_of_non_counting_resource = ( - not got_new_data - and total_count == UNKNOWN_COUNT - and (limit and len(batch['objects']) < limit) - ) - if more_to_fetch and repeated_last_page_of_non_counting_resource: - more_to_fetch = False + if more_to_fetch: + repeated_last_page_of_non_counting_resource = ( + not got_new_data + and total_count == UNKNOWN_COUNT + and (limit and len(batch['objects']) < limit) + ) + more_to_fetch = not repeated_last_page_of_non_counting_resource self.checkpoint(checkpoint_manager, paginator, batch, not more_to_fetch) From 5076ba488bfbfe89150f19f66839983301faca30 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Mon, 31 May 2021 13:10:17 +0200 Subject: [PATCH 076/257] Change mysql collation and charset from utf8 -> utf8mb4 --- commcare_export/writers.py | 2 +- tests/conftest.py | 2 +- tests/test_writers.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 8e742fbf..d6c7ad20 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -262,7 +262,7 @@ class SqlMixin(object): def __init__(self, db_url, poolclass=None, engine=None): self.db_url = db_url - self.collation = 'utf8_bin' if 'mysql' in db_url else None + self.collation = 'utf8mb4_unicode_ci' if 'mysql' in db_url else None self.engine = engine or sqlalchemy.create_engine(db_url, poolclass=poolclass) def __enter__(self): diff --git a/tests/conftest.py b/tests/conftest.py index 5f7f4f9e..d62a2981 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,7 +66,7 @@ def tear_down(): 'admin_db': 'postgres' }, marks=pytest.mark.postgres), pytest.param({ - 'url': '{}%s?charset=utf8'.format(mysql_base), + 'url': '{}%s?charset=utf8mb4'.format(mysql_base), }, marks=pytest.mark.mysql), pytest.param({ 'url': '{}%s?driver=ODBC+Driver+17+for+SQL+Server'.format(mssql_base), diff --git a/tests/test_writers.py b/tests/test_writers.py index a574c86c..679c3683 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -439,6 +439,16 @@ def test_mssql_nvarchar_length_downsize(self, writer): result = self._get_column_lengths(writer.connection, 'mssql_nvarchar_length_downsize') assert result['some_data'] == ('some_data', 'nvarchar', -1) + def test_big_lump_of_poo(self, writer): + with writer: + writer.write_table(TableSpec(**{ + 'name': 'foo_with_emoji', + 'headings': ['id', 'fun_to_be_had'], + 'rows': [ + ['A steaming poo', '💩'], + ['2020', '😷'], + ], + })) def _get_column_lengths(self, connection, table_name): return { From 6bbaa7ca3c286e260b43aa5cf9b94827f4e065f7 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 1 Jun 2021 10:36:56 +0200 Subject: [PATCH 077/257] Change to _bin --- commcare_export/writers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index d6c7ad20..8e742fbf 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -262,7 +262,7 @@ class SqlMixin(object): def __init__(self, db_url, poolclass=None, engine=None): self.db_url = db_url - self.collation = 'utf8mb4_unicode_ci' if 'mysql' in db_url else None + self.collation = 'utf8_bin' if 'mysql' in db_url else None self.engine = engine or sqlalchemy.create_engine(db_url, poolclass=poolclass) def __enter__(self): From a6434433fe568a301d2bb7364b386c3ba1bdb2a4 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Mon, 7 Jun 2021 11:05:42 +0200 Subject: [PATCH 078/257] Change collation back to 'utf8mb4_unicode_ci' --- commcare_export/writers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 8e742fbf..d6c7ad20 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -262,7 +262,7 @@ class SqlMixin(object): def __init__(self, db_url, poolclass=None, engine=None): self.db_url = db_url - self.collation = 'utf8_bin' if 'mysql' in db_url else None + self.collation = 'utf8mb4_unicode_ci' if 'mysql' in db_url else None self.engine = engine or sqlalchemy.create_engine(db_url, poolclass=poolclass) def __enter__(self): From 6c7f65abe26cc20dd079f9faca3c9ee843438c71 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 7 Jun 2021 13:04:13 +0200 Subject: [PATCH 079/257] update date filtering params for messaging-event --- commcare_export/commcare_minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 34573d90..4d55682d 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -87,7 +87,7 @@ def __call__(self, since, until): DATE_PARAMS = { 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end'), - 'date': SimpleSinceParams('date__gte', 'date__lt'), # used by messaging-events + 'date': SimpleSinceParams('date.gte', 'date.lt'), # used by messaging-events } From 64a226ed605ff1341f6a7761e8ddb6e544fcd840 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 8 Jun 2021 13:59:45 +0200 Subject: [PATCH 080/257] remove '@' from password --- .travis.yml | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bf869142..c25bc4ee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ addons: - unixodbc-dev env: global: - - MSSQL_SA_PASSWORD=Password@123 + - MSSQL_SA_PASSWORD=Password-123 # HQ_USERNAME and HQ_API_KEY - secure: etv02uWtyy5P4DfyuHjFm5RDFc6WBHLsnIMC75VjDk8kxDgwV/lDbPYMh/hzfPHyskgA1EQbc8IfHlbZWFVV8jOTy+wvrVir/mw95AEyNyAL/TTSWvYfTvdCsxOSbY6vcGlJNfy6rc+y0h6QyuIknY0OhU8sTaRcQnvbFPnOz28= - secure: aLj1bKtUF2CnAwG+yjiAjo39cKi9WHaonIwqsuhOx4McsD/xSz4QHv/6/XhXZ5KxKyxw1+PBl/mWo6gyrT5iHDRBPk5iJXqZAgQFS2ukZSv/tUBGL7bWzoO9YfoLuWllA33DCr3PiXAhkH53dTcor16UN9wXeCprBBSGjhpAxRQ= diff --git a/tests/conftest.py b/tests/conftest.py index 5f7f4f9e..8e4364b4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -57,7 +57,7 @@ def tear_down(): postgres_base = os.environ.get('POSTGRES_URL', 'postgresql://postgres@localhost/') mysql_base = os.environ.get('MYSQL_URL', 'mysql+pymysql://travis@/') -mssql_base = os.environ.get('MSSQL_URL', 'mssql+pyodbc://SA:Password@123@localhost/') +mssql_base = os.environ.get('MSSQL_URL', 'mssql+pyodbc://SA:Password-123@localhost/') @pytest.fixture(scope="class", params=[ From da7ee4da0df41d51b96a81cb304e70de5efe94fd Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 8 Jun 2021 14:03:32 +0200 Subject: [PATCH 081/257] less verbose output --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c56246ad..d577c98f 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ class PyTest(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) - self.test_args = ['-vv'] + self.test_args = ['-vv', '--tb=short'] self.test_suite = True def run_tests(self): From c6c2d58aebdbaa2aba12a63257e3776806144818 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 8 Jun 2021 15:15:59 +0200 Subject: [PATCH 082/257] change param name Since sqlalchemy v1.4 this fails with the error message: TypeError: warned() got multiple values for argument 'fn' --- tests/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1ad64142..d98ed122 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -536,8 +536,8 @@ def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_ma # expect checkpoint to have the date from the first batch and not the 2nd runs = list(strict_writer.engine.execute( - sqlalchemy.text('SELECT table_name, since_param, last_doc_id from commcare_export_runs where query_file_name = :fn'), - fn='tests/013_ConflictingTypes.xlsx' + sqlalchemy.text('SELECT table_name, since_param, last_doc_id from commcare_export_runs where query_file_name = :file'), + file='tests/013_ConflictingTypes.xlsx' )) assert runs == [ ('Case', '2012-04-24T05:13:01', 'doc 2'), From 215c1db118cc10e95a0cec7a573d897d7efa282c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 9 Jun 2021 17:20:45 +0200 Subject: [PATCH 083/257] more verbose error logging if debug enabled --- commcare_export/excel_query.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index c8c213f5..2ec6efd8 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -380,7 +380,11 @@ def parse_workbook(workbook, column_enforcer=None): try: sheet_parts = parse_sheet(workbook[sheet], mappings, column_enforcer) except Exception as e: - logger.warning('Ignoring sheet "{}": {}'.format(sheet, str(e))) + msg = 'Ignoring sheet "{}": {}'.format(sheet, str(e)) + if logger.isEnabledFor(logging.DEBUG): + logger.exception(msg) + else: + logger.warning(msg) continue parsed_sheets.append(sheet_parts) From ddef74f027154aa4d35891a4c9501d80dbd005d2 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Mon, 14 Jun 2021 17:56:25 +0200 Subject: [PATCH 084/257] Refer user to use utf8mb4 charset --- commcare_export/cli.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index b6637f64..f1377810 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -209,6 +209,12 @@ def _get_writer(output_format, output, strict_types): elif output_format == 'sql': # Output should be a connection URL # Writer had bizarre issues so we use a full connection instead of passing in a URL or engine + if output.startswith('mysql'): + charset_split = output.split('charset=') + if len(charset_split) > 1 and charset_split[1] != 'utf8mb4': + raise Exception(f"The charset '{charset_split[1]}' might cause problems with the export. " + f"It is recommended that you use 'utf8mb4' instead.") + return writers.SqlTableWriter(output, strict_types) else: raise Exception("Unknown output format: {}".format(output_format)) From f78bfa45821c4a0af74168de567b4d7d6766cc90 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 14:23:07 +0200 Subject: [PATCH 085/257] total_count may not be present --- commcare_export/commcare_hq_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 0ea72a4d..14f4285a 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -142,7 +142,7 @@ def iterate_resource(resource=resource, params=params): batch = self.get(resource, params) last_params = copy.copy(params) if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count: - total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else UNKNOWN_COUNT + total_count = int(batch['meta']['total_count']) if batch['meta'].get('total_count') else UNKNOWN_COUNT fetched = 0 fetched += len(batch['objects']) From 8b50bda1b9e2bfd8726d681b21f99bdc92db059a Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 14:25:30 +0200 Subject: [PATCH 086/257] extract vars --- commcare_export/commcare_hq_client.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 14f4285a..01d32972 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -141,35 +141,37 @@ def iterate_resource(resource=resource, params=params): batch = self.get(resource, params) last_params = copy.copy(params) + batch_meta = batch['meta'] if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count: - total_count = int(batch['meta']['total_count']) if batch['meta'].get('total_count') else UNKNOWN_COUNT + total_count = int(batch_meta['total_count']) if batch_meta.get('total_count') else UNKNOWN_COUNT fetched = 0 - fetched += len(batch['objects']) + batch_objects = batch['objects'] + fetched += len(batch_objects) logger.debug('Received %s of %s', fetched, total_count) - if not batch['objects']: + if not batch_objects: more_to_fetch = False else: got_new_data = False - for obj in batch['objects']: + for obj in batch_objects: if obj['id'] not in last_batch_ids: yield obj got_new_data = True - if batch['meta']['next']: - last_batch_ids = {obj['id'] for obj in batch['objects']} + if batch_meta.get('next'): + last_batch_ids = {obj['id'] for obj in batch_objects} params = paginator.next_page_params_from_batch(batch) if not params: more_to_fetch = False else: more_to_fetch = False - limit = batch['meta'].get('limit') + limit = batch_meta.get('limit') if more_to_fetch: repeated_last_page_of_non_counting_resource = ( not got_new_data and total_count == UNKNOWN_COUNT - and (limit and len(batch['objects']) < limit) + and (limit and len(batch_objects) < limit) ) more_to_fetch = not repeated_last_page_of_non_counting_resource From c9696b494925c62d2e74b804747b488cc634c16d Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 14:26:26 +0200 Subject: [PATCH 087/257] reformat if --- commcare_export/commcare_hq_client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 01d32972..e91cebff 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -143,7 +143,10 @@ def iterate_resource(resource=resource, params=params): last_params = copy.copy(params) batch_meta = batch['meta'] if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count: - total_count = int(batch_meta['total_count']) if batch_meta.get('total_count') else UNKNOWN_COUNT + if batch_meta.get('total_count'): + total_count = int(batch_meta['total_count']) + else: + total_count = UNKNOWN_COUNT fetched = 0 batch_objects = batch['objects'] From 7d6a2230d3180be9d7abf67f7a5aa9de33d43a38 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 14:27:12 +0200 Subject: [PATCH 088/257] default total_count to 'unknown' --- commcare_export/commcare_hq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index e91cebff..1947a10f 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -126,7 +126,7 @@ def iterate(self, resource, paginator, params=None, checkpoint_manager=None): def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() - total_count = None + total_count = UNKNOWN_COUNT fetched = 0 repeat_counter = 0 last_params = None @@ -142,7 +142,7 @@ def iterate_resource(resource=resource, params=params): batch = self.get(resource, params) last_params = copy.copy(params) batch_meta = batch['meta'] - if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count: + if total_count == UNKNOWN_COUNT or fetched >= total_count: if batch_meta.get('total_count'): total_count = int(batch_meta['total_count']) else: From d476afc48faffff2cdc2ebb61bcbb97e900f5ff6 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 15:36:13 +0200 Subject: [PATCH 089/257] update tests --- tests/test_commcare_hq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 9c29e000..643c22da 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -89,14 +89,14 @@ def _get_results(self, params): obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} if not params: return { - 'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2, 'total_count': None}, + 'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2}, 'objects': [obj_1, obj_2] } else: since_query_param = DATE_PARAMS['date'].start_param assert params[since_query_param] == '2017-01-01T15:37:22' return { - 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': None}, + 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2}, 'objects': [obj_2] } From 9cbfbf46e4f30054fd1f2bd51ecd3fb8e7d70278 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 15:43:20 +0200 Subject: [PATCH 090/257] update tests --- tests/test_commcare_hq_client.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 643c22da..616a0101 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -87,18 +87,24 @@ class FakMessageLogSession(FakeSession): def _get_results(self, params): obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'} obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} + obj_3 = {'id': 3, 'foo': 3, 'date': '2017-01-01T15:38:22Z'} if not params: return { - 'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2}, + 'meta': {'next': '?cursor=xyz', 'limit': 2}, 'objects': [obj_1, obj_2] } else: since_query_param = DATE_PARAMS['date'].start_param - assert params[since_query_param] == '2017-01-01T15:37:22' - return { - 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2}, - 'objects': [obj_2] - } + since = params[since_query_param] + if since == '2017-01-01T15:37:22': + return { + 'meta': {'next': '?cursor=xyz', 'limit': 2}, + 'objects': [obj_3] + } + if since == '2017-01-01T15:38:22': + return {'meta': {'next': None, 'limit': 2}, 'objects': []} + + raise Exception(since) class FakeDateFormSession(FakeSession): @@ -154,7 +160,7 @@ def test_repeat_limit(self): self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2]) def test_message_log(self): - self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 2, [1, 2]) + self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 3, [1, 2, 3]) class TestDatePaginator(unittest.TestCase): From 399772114913b254e8e2541b2bd6634e91fd78d4 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 15:52:47 +0200 Subject: [PATCH 091/257] move comment --- commcare_export/commcare_hq_client.py | 1 + tests/test_commcare_hq_client.py | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 1947a10f..03a90e8c 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -171,6 +171,7 @@ def iterate_resource(resource=resource, params=params): limit = batch_meta.get('limit') if more_to_fetch: + # Handle the case where API is 'non-counting' and repeats the last batch repeated_last_page_of_non_counting_resource = ( not got_new_data and total_count == UNKNOWN_COUNT diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 616a0101..3721a9e0 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -78,12 +78,6 @@ def _get_results(self, params): class FakMessageLogSession(FakeSession): - # for message logs, the last batch returns the same results in a loop, because - # we use a non-counting paginator in tastypie that can't know if it's "finished" - # We will gracefully treat this as success under the conditions where: - # - total_count is absent - # - the number of returned rows is fewer than the limit - # - the contents of the batch are the same def _get_results(self, params): obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'} obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} From 189865f67787075e751d30576d77143a585282fa Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 22 Jun 2021 15:55:25 +0200 Subject: [PATCH 092/257] fix typo --- tests/test_commcare_hq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 3721a9e0..6a84ec2f 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -77,7 +77,7 @@ def _get_results(self, params): } -class FakMessageLogSession(FakeSession): +class FakeMessageLogSession(FakeSession): def _get_results(self, params): obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'} obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} @@ -154,7 +154,7 @@ def test_repeat_limit(self): self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2]) def test_message_log(self): - self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 3, [1, 2, 3]) + self._test_iterate(FakeMessageLogSession(), get_paginator('messaging-event', 2), 3, [1, 2, 3]) class TestDatePaginator(unittest.TestCase): From 89cb6eedf11508bd68d4a4663f8f1ad9146876ac Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 12:41:28 +0200 Subject: [PATCH 093/257] auto-id documentation test --- tests/test_minilinq.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index a1bb335a..b2836d65 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -26,7 +26,9 @@ def setup_class(cls): def check_case(self, val, expected): if isinstance(expected, list): - assert [datum.value if isinstance(datum, jsonpath.DatumInContext) else datum for datum in val] == expected + assert [unwrap_val(datum) for datum in val] == expected + else: + assert val == expected def test_eval_literal(self): env = BuiltInEnv() @@ -53,6 +55,27 @@ def test_eval_auto_id_reference(self): # When auto id is on, this always becomes a string. Sorry! self.check_case(Reference("foo.id").eval(JsonPathEnv({'foo': {'id': 2}})), ['2']) + def test_eval_auto_id_reference_nested(self): + # this test is documentation of existing (weird) functionality + # that results from a combination of jsonpath_rw auto_id feature and + # JsonPathEnv.lookup (which adds an additional auto ID for some reason). + env = JsonPathEnv({}) + + flatmap = FlatMap(source=Literal([{ + "id": 1, + "foo": {'id': 'bid', 'name': 'bob'}, + "bar": [ + {'baz': 'a1'}, {'baz': 'a2', 'id': 'bazzer'} + ] + }]), body=Reference('bar.[*]')) + mmap = Map(source=flatmap, body=List([ + Reference("id"), Reference('baz'), Reference('$.id'), Reference('$.foo.id'), Reference('$.foo.name') + ])) + self.check_case(mmap.eval(env), [ + ['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'bob'], + ['1.bar.bazzer', 'a2', '1', '1.bid', 'bob'] + ]) + def test_eval_collapsed_list(self): """ Special case to handle XML -> JSON conversion where there just happened to be a single value at save time From a2f369500d20df412d5eb268a93d11a277a6cd02 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 12:47:22 +0200 Subject: [PATCH 094/257] comments for how it changes with different code changes --- tests/test_minilinq.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index b2836d65..c62aa030 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -76,6 +76,18 @@ def test_eval_auto_id_reference_nested(self): ['1.bar.bazzer', 'a2', '1', '1.bid', 'bob'] ]) + # Without the additional auto id field added in JsonPathEnv the result for Reference("id") changes + # as follows: + # '1.bar.1.bar.[0]' -> '1.bar.[0]' + + # With the change above AND a change to jsonpath_rw to prevent converting IDs that exist into + # auto IDs we get the following: + # Reference("id"): + # '1.bar.bazzer' -> 'bazzer' + # + # Reference('$.foo.id'): + # '1.bid' -> 'bid' + def test_eval_collapsed_list(self): """ Special case to handle XML -> JSON conversion where there just happened to be a single value at save time From a8b49f7b3081409f8fcab8dffd10c8bb62c52cc5 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 13:09:38 +0200 Subject: [PATCH 095/257] add ref to PR --- tests/test_minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index c62aa030..3e49bd68 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -81,7 +81,7 @@ def test_eval_auto_id_reference_nested(self): # '1.bar.1.bar.[0]' -> '1.bar.[0]' # With the change above AND a change to jsonpath_rw to prevent converting IDs that exist into - # auto IDs we get the following: + # auto IDs (see https://github.com/kennknowles/python-jsonpath-rw/pull/96) we get the following: # Reference("id"): # '1.bar.bazzer' -> 'bazzer' # From e48e3c8cb01a27cd3b69c12ad418a139cd58b74e Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 17:01:31 +0200 Subject: [PATCH 096/257] test value or root expression --- commcare_export/env.py | 16 ++++++++++++++- tests/test_minilinq.py | 44 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index eef7442f..a61b91f6 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -433,7 +433,20 @@ def template(format_template, *args): def _or(*args): - unwrapped_args = (unwrap_val(arg) for arg in args) + return _or_impl(unwrap_val, *args) + + +def _or_raw(*args): + def unwrap_iter(arg): + if isinstance(arg, RepeatableIterator): + return list(arg) + return arg + + return _or_impl(unwrap_iter, *args) + + +def _or_impl(_unwrap, *args): + unwrapped_args = (_unwrap(arg) for arg in args) vals = (val for val in unwrapped_args if val is not None and val != []) try: return next(vals) @@ -498,6 +511,7 @@ def __init__(self, d=None): 'or': _or, 'sha1': sha1, 'substr': substr, + '_or_raw': _or_raw, # for internal use }) return super(BuiltInEnv, self).__init__(d) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 3e49bd68..03c7b3a5 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -1,23 +1,20 @@ # -*- coding: utf-8 -*- -import inspect import types import unittest from itertools import * import pytest -from six.moves import map, xrange +from six.moves import xrange -from jsonpath_rw import jsonpath - -from commcare_export.minilinq import * -from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.env import * +from commcare_export.minilinq import * from commcare_export.writers import JValueTableWriter class LazinessException(Exception): pass def die(msg): raise LazinessException(msg) # Hack: since "raise" is a statement not an expression, need a funcall wrapping it + class TestMiniLinq(unittest.TestCase): @classmethod @@ -88,6 +85,33 @@ def test_eval_auto_id_reference_nested(self): # Reference('$.foo.id'): # '1.bid' -> 'bid' + def test_flatmap_value_or_root(self): + """Low level test case for 'value-or-root' use case""" + env = BuiltInEnv() | JsonPathEnv({}) + + data = [ + {"id": 1, "foo": {'id': 'bid', 'name': 'zip'}, "bar": [{'baz': 'a1'}, {'baz': 'a2', 'id': 'bazzer'}]}, + {"id": 2, "foo": {'id': 'bid', 'name': 'zap'}, "bar": []}, + {"id": 3, "foo": {'id': 'bid', 'name': 'mip'}, "bar": {}}, + # {"id": 4, "foo": {'id': 'bid', 'name': 'map'}, "bar": None}, # fails with TypeError from jsonpath + {"id": 5, "foo": {'id': 'bid', 'name': 'mop'}}, + ] + value_or_root = Apply( + Reference('_or_raw'), Reference(str('bar.[*]')), Reference("$") + ) + flatmap = FlatMap(source=Literal(data), body=value_or_root) + mmap = Map(source=flatmap, body=List([ + Reference("id"), Reference('baz'), Reference('$.id'), Reference('$.foo.id'), Reference('$.foo.name') + ])) + self.check_case(mmap.eval(env), [ + ['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'zip'], + ['1.bar.bazzer', 'a2', '1', '1.bid', 'zip'], + ['2', [], '2', '2.bid', 'zap'], + ['3.bar.3.bar.[0]', [], '3', '3.bid', 'mip'], + # ['4.bar.[0]', [], '4', '4.bid', 'map'], + ['5', [], '5', '5.bid', 'mop'], + ]) + def test_eval_collapsed_list(self): """ Special case to handle XML -> JSON conversion where there just happened to be a single value at save time @@ -147,6 +171,14 @@ def test_or(self): assert Apply(Reference("or"), Reference('a.b'), Reference('a.c')).eval(env) == 'c val' assert Apply(Reference("or"), Reference('a.b'), Reference('a.d')).eval(env) is None + env = env.replace({'a': [], 'b': [1, 2], 'c': 2}) + self.check_case(Apply(Reference("or"), Reference('a.[*]'), Reference('b')).eval(env), [1, 2]) + self.check_case(Apply(Reference("or"), Reference('b.[*]'), Reference('c')).eval(env), [1, 2]) + self.check_case( + Apply(Reference("or"), Reference('a.[*]'), Reference('$')).eval(env), + {'a': [], 'b': [1, 2], 'c': 2, 'id': '$'} + ) + def test_attachment_url(self): env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv({'id': '123', 'domain': 'd1', 'photo': 'a.jpg'}) expected = 'https://www.commcarehq.org/a/d1/api/form/attachment/123/a.jpg' From eb4c43aa33efefef5962cf759f110b0f584668b5 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 17:12:08 +0200 Subject: [PATCH 097/257] update excel query --- commcare_export/excel_query.py | 34 ++++++++++++++----- tests/test_excel_query.py | 62 +++++++++++++++++++++++++++++++--- tests/test_minilinq.py | 5 ++- 3 files changed, 85 insertions(+), 16 deletions(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 2ec6efd8..3e29315d 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -204,7 +204,7 @@ def split_leftmost(jsonpath_expr): return (jsonpath_expr, jsonpath.This()) -def compile_source(worksheet): +def compile_source(worksheet, value_or_root=False): """ Compiles just the part of the Excel Spreadsheet that indicates the API endpoint to hit along with optional filters @@ -260,7 +260,21 @@ def compile_source(worksheet): if data_source_jsonpath is None or isinstance(data_source_jsonpath, jsonpath.This) or isinstance(data_source_jsonpath, jsonpath.Root): return data_source, api_query, None else: - return data_source, api_query, Reference(str(data_source_jsonpath)) + if value_or_root: + # if the jsonpath doesn't not yield a value yield the root document + expr = get_value_or_root_expression(data_source_jsonpath) + else: + expr = Reference(str(data_source_jsonpath)) + return data_source, api_query, expr + + +def get_value_or_root_expression(value_expression): + """Return expression used when iterating over a nested document but also wanting + a record if the value expression returns an empty result.""" + return Apply( + Reference('_or_raw'), Reference(str(value_expression)), Reference("$") + ) + # If the source is expected to provide a column, then require that it is # already present or can be added without conflicting with an existing @@ -296,9 +310,9 @@ def require_column_in_sheet(sheet_name, data_source, table_name, output_headings return (headings, body) -def parse_sheet(worksheet, mappings=None, column_enforcer=None): +def parse_sheet(worksheet, mappings=None, column_enforcer=None, value_or_root=False): mappings = mappings or {} - data_source, source_expr, root_doc_expr = compile_source(worksheet) + data_source, source_expr, root_doc_expr = compile_source(worksheet, value_or_root) table_name_column = get_column_by_name(worksheet, 'table name') if table_name_column: @@ -355,7 +369,7 @@ def columns(self): ] -def parse_workbook(workbook, column_enforcer=None): +def parse_workbook(workbook, column_enforcer=None, value_or_root=False): """ Returns a MiniLinq corresponding to the Excel configuration, which consists of the following sheets: @@ -378,7 +392,7 @@ def parse_workbook(workbook, column_enforcer=None): parsed_sheets = [] for sheet in emit_sheets: try: - sheet_parts = parse_sheet(workbook[sheet], mappings, column_enforcer) + sheet_parts = parse_sheet(workbook[sheet], mappings, column_enforcer, value_or_root) except Exception as e: msg = 'Ignoring sheet "{}": {}'.format(sheet, str(e)) if logger.isEnabledFor(logging.DEBUG): @@ -509,14 +523,18 @@ def check_columns(parsed_sheets, columns): if errors_by_sheet: raise MissingColumnException(errors_by_sheet) + blacklisted_tables = [] + + def blacklist(table_name): blacklisted_tables.append(table_name) + def get_queries_from_excel(workbook, missing_value=None, combine_emits=False, max_column_length=None, required_columns=None, - column_enforcer=None): - parsed_sheets = parse_workbook(workbook, column_enforcer) + column_enforcer=None, value_or_root=False): + parsed_sheets = parse_workbook(workbook, column_enforcer, value_or_root) for sheet in parsed_sheets: if sheet.name in blacklisted_tables: raise ReservedTableNameException(sheet.name) diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index d9e05d84..9f774edb 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -412,7 +412,7 @@ def test_multi_emit(self): ) ]) - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine=True) + self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=True) def test_multi_emit_no_combine(self): minilinq = List([ @@ -461,7 +461,7 @@ def test_multi_emit_no_combine(self): ) ]) - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine=False) + self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=False) def test_multi_emit_with_organization(self): minilinq = List([ @@ -522,11 +522,63 @@ def test_multi_emit_with_organization(self): ]) column_enforcer = ColumnEnforcer() - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine=True, + self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=True, column_enforcer=column_enforcer) - def _compare_minilinq_to_compiled(self, minilinq, filename, combine=False, column_enforcer=None): + def test_value_or_root(self): + minilinq = List([ + Bind("checkpoint_manager", + Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), + Emit( + table="Forms", + headings=[Literal("id"), Literal("name")], + missing_value='---', + source=Map( + source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + body=List([ + Reference("id"), + Reference("form.name"), + ]), + ) + ) + ), + Bind("checkpoint_manager", + Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Cases"])), + Emit( + table="Cases", + headings=[Literal("case_id")], + missing_value='---', + source=Map( + source=FlatMap( + body=Apply(Reference("_or_raw"), Reference("form..case"), Reference("$")), + source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')) + ), + body=List([ + Reference("@case_id"), + ]), + ) + ) + ), + Bind("checkpoint_manager", + Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), + Emit( + table="Other cases", + headings=[Literal("id")], + missing_value='---', + source=Map( + source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), + body=List([ + Reference("id") + ]) + ) + ) + ) + ]) + + self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=False, value_or_root=True) + + def _compare_minilinq_to_compiled(self, minilinq, filename, **kwargs): print("Parsing {}".format(filename)) abs_path = os.path.join(os.path.dirname(__file__), filename) - compiled = get_queries_from_excel(openpyxl.load_workbook(abs_path), missing_value='---', combine_emits=combine, column_enforcer=column_enforcer) + compiled = get_queries_from_excel(openpyxl.load_workbook(abs_path), missing_value='---', **kwargs) assert compiled.to_jvalue() == minilinq.to_jvalue(), filename diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 03c7b3a5..cb420f50 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -7,6 +7,7 @@ from six.moves import xrange from commcare_export.env import * +from commcare_export.excel_query import get_value_or_root_expression from commcare_export.minilinq import * from commcare_export.writers import JValueTableWriter @@ -96,9 +97,7 @@ def test_flatmap_value_or_root(self): # {"id": 4, "foo": {'id': 'bid', 'name': 'map'}, "bar": None}, # fails with TypeError from jsonpath {"id": 5, "foo": {'id': 'bid', 'name': 'mop'}}, ] - value_or_root = Apply( - Reference('_or_raw'), Reference(str('bar.[*]')), Reference("$") - ) + value_or_root = get_value_or_root_expression('bar.[*]') flatmap = FlatMap(source=Literal(data), body=value_or_root) mmap = Map(source=flatmap, body=List([ Reference("id"), Reference('baz'), Reference('$.id'), Reference('$.foo.id'), Reference('$.foo.name') From ba850bc3b9e454fd86c0a05b6aadcd353852f65e Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 24 Jun 2021 17:13:53 +0200 Subject: [PATCH 098/257] update cli --- commcare_export/cli.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index f1377810..c7e028b2 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -91,6 +91,9 @@ def add_to_parser(self, parser, **additional_kwargs): help="Export tables containing mobile worker data and " "location data and add a commcare_userid field to any " "exported form or case"), + Argument('export-root-if-no-subdocument', default=False, action='store_true', help=( + "Use this when you are exporting a nested document e.g. form.form..case, messaging-event.messages.[*]" + "And you want to have a record exported even if the nested document does not exist or is empty.")) ] @@ -154,18 +157,22 @@ def _get_query(args, writer, column_enforcer=None): writer.supports_multi_table_write, writer.max_column_length, writer.required_columns, - column_enforcer + column_enforcer, + args.export_root_if_no_subdocument ) + def _get_query_from_file(query_arg, missing_value, combine_emits, - max_column_length, required_columns, column_enforcer): + max_column_length, required_columns, column_enforcer, + value_or_root): if os.path.exists(query_arg): if os.path.splitext(query_arg)[1] in ['.xls', '.xlsx']: import openpyxl workbook = openpyxl.load_workbook(query_arg) return excel_query.get_queries_from_excel( workbook, missing_value, combine_emits, - max_column_length, required_columns, column_enforcer + max_column_length, required_columns, column_enforcer, + value_or_root ) else: with io.open(query_arg, encoding='utf-8') as fh: From 8ac159fd0e12c6b7cf4abac582c8b2926c6fd739 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 10:42:42 +0200 Subject: [PATCH 099/257] prevent access to non-root referenced fields This works around the special case where we have replaced the doc with the root doc. In this case any field reference that is not 'root based' should be ignored. --- commcare_export/cli.py | 2 +- commcare_export/env.py | 14 +++++++++++--- commcare_export/excel_query.py | 18 +++++++----------- commcare_export/jsonpath_utils.py | 12 ++++++++++++ tests/test_minilinq.py | 2 ++ 5 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 commcare_export/jsonpath_utils.py diff --git a/commcare_export/cli.py b/commcare_export/cli.py index c7e028b2..8da4ce38 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -93,7 +93,7 @@ def add_to_parser(self, parser, **additional_kwargs): "exported form or case"), Argument('export-root-if-no-subdocument', default=False, action='store_true', help=( "Use this when you are exporting a nested document e.g. form.form..case, messaging-event.messages.[*]" - "And you want to have a record exported even if the nested document does not exist or is empty.")) + " And you want to have a record exported even if the nested document does not exist or is empty.")) ] diff --git a/commcare_export/env.py b/commcare_export/env.py index a61b91f6..798eadf4 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -11,6 +11,8 @@ from jsonpath_rw import jsonpath from jsonpath_rw.parser import parse as parse_jsonpath + +from commcare_export.jsonpath_utils import split_leftmost from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator @@ -179,7 +181,8 @@ class JsonPathEnv(Env): """ def __init__(self, bindings=None): self.__bindings = bindings or {} - + self.__restrict_to_root = bool(jsonpath.Fields("__root_only").find(self.__bindings)) + # Currently hardcoded because it is a global is jsonpath-rw # Probably not widely used, but will require refactor if so jsonpath.auto_id_field = "id" @@ -198,14 +201,19 @@ def lookup(self, name): else: raise NotFound(unwrap_val(name)) - def iter(jsonpath_expr=jsonpath_expr): # Capture closure + if self.__restrict_to_root and str(jsonpath_expr) != 'id': # special case for 'id' + expr, _ = split_leftmost(jsonpath_expr) + if not isinstance(expr, jsonpath.Root): + return RepeatableIterator(lambda : iter(())) + + def iterator(jsonpath_expr=jsonpath_expr): # Capture closure for datum in jsonpath_expr.find(self.__bindings): # HACK: The auto id from jsonpath_rw is good, but we lose it when we do .value here, # so just slap it on if not present if isinstance(datum.value, dict) and 'id' not in datum.value: datum.value['id'] = jsonpath.AutoIdForDatum(datum).value yield datum - return RepeatableIterator(iter) + return RepeatableIterator(iterator) def bind(self, *args): "(str, ??) -> Env | ({str: ??}) -> Env" diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 3e29315d..0932d3da 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -9,6 +9,7 @@ from jsonpath_rw.parser import parse as parse_jsonpath from commcare_export.exceptions import LongFieldsException, MissingColumnException, ReservedTableNameException +from commcare_export.jsonpath_utils import split_leftmost from commcare_export.map_format import compile_map_format_via from commcare_export.minilinq import * @@ -193,16 +194,6 @@ def compile_fields(worksheet, mappings=None): for field, source_field, alt_source_fields, map_via, format_via in args ] -def split_leftmost(jsonpath_expr): - if isinstance(jsonpath_expr, jsonpath.Child): - further_leftmost, rest = split_leftmost(jsonpath_expr.left) - return further_leftmost, rest.child(jsonpath_expr.right) - elif isinstance(jsonpath_expr, jsonpath.Descendants): - further_leftmost, rest = split_leftmost(jsonpath_expr.left) - return further_leftmost, jsonpath.Descendants(rest, jsonpath_expr.right) - else: - return (jsonpath_expr, jsonpath.This()) - def compile_source(worksheet, value_or_root=False): """ @@ -271,8 +262,13 @@ def compile_source(worksheet, value_or_root=False): def get_value_or_root_expression(value_expression): """Return expression used when iterating over a nested document but also wanting a record if the value expression returns an empty result.""" + + # We add a bind here so that in JsonPathEnv we can restrict expressions to only those that reference + # the root. That prevents us from mistakenly getting values from the root that happen to have the + # same name as those in the child. + root_expr = Bind("__root_only", Literal(True), Reference("$")) return Apply( - Reference('_or_raw'), Reference(str(value_expression)), Reference("$") + Reference('_or_raw'), Reference(str(value_expression)), root_expr ) diff --git a/commcare_export/jsonpath_utils.py b/commcare_export/jsonpath_utils.py new file mode 100644 index 00000000..7ef2de34 --- /dev/null +++ b/commcare_export/jsonpath_utils.py @@ -0,0 +1,12 @@ +from jsonpath_rw import jsonpath + + +def split_leftmost(jsonpath_expr): + if isinstance(jsonpath_expr, jsonpath.Child): + further_leftmost, rest = split_leftmost(jsonpath_expr.left) + return further_leftmost, rest.child(jsonpath_expr.right) + elif isinstance(jsonpath_expr, jsonpath.Descendants): + further_leftmost, rest = split_leftmost(jsonpath_expr.left) + return further_leftmost, jsonpath.Descendants(rest, jsonpath_expr.right) + else: + return jsonpath_expr, jsonpath.This() diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index cb420f50..2e4568ab 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -96,6 +96,7 @@ def test_flatmap_value_or_root(self): {"id": 3, "foo": {'id': 'bid', 'name': 'mip'}, "bar": {}}, # {"id": 4, "foo": {'id': 'bid', 'name': 'map'}, "bar": None}, # fails with TypeError from jsonpath {"id": 5, "foo": {'id': 'bid', 'name': 'mop'}}, + {"id": 6, "foo": {'id': 'bid', 'name': 'mop'}, "baz": "root_bazz"}, ] value_or_root = get_value_or_root_expression('bar.[*]') flatmap = FlatMap(source=Literal(data), body=value_or_root) @@ -109,6 +110,7 @@ def test_flatmap_value_or_root(self): ['3.bar.3.bar.[0]', [], '3', '3.bid', 'mip'], # ['4.bar.[0]', [], '4', '4.bid', 'map'], ['5', [], '5', '5.bid', 'mop'], + ['6', [], '6', '6.bid', 'mop'], ]) def test_eval_collapsed_list(self): From dfb8f4cae6ac7181f59bb8aab0f30e607e44daf8 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 10:55:11 +0200 Subject: [PATCH 100/257] typo --- commcare_export/excel_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 0932d3da..8b3220ee 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -252,7 +252,7 @@ def compile_source(worksheet, value_or_root=False): return data_source, api_query, None else: if value_or_root: - # if the jsonpath doesn't not yield a value yield the root document + # if the jsonpath doesn't yield a value yield the root document expr = get_value_or_root_expression(data_source_jsonpath) else: expr = Reference(str(data_source_jsonpath)) From c3994cb65f914a600e814e64f37eeed2368c0094 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 11:16:47 +0200 Subject: [PATCH 101/257] update excel query test --- tests/test_excel_query.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index 9f774edb..a81e14ca 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -550,7 +550,11 @@ def test_value_or_root(self): missing_value='---', source=Map( source=FlatMap( - body=Apply(Reference("_or_raw"), Reference("form..case"), Reference("$")), + body=Apply( + Reference("_or_raw"), + Reference("form..case"), + Bind("__root_only", Literal(True), Reference("$")) + ), source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')) ), body=List([ From 4d194d284f400fa664c943b7bf7adc04ba812f36 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 11:38:17 +0200 Subject: [PATCH 102/257] refactor tests --- tests/test_minilinq.py | 97 +++++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 2e4568ab..9b067184 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -86,33 +86,82 @@ def test_eval_auto_id_reference_nested(self): # Reference('$.foo.id'): # '1.bid' -> 'bid' - def test_flatmap_value_or_root(self): - """Low level test case for 'value-or-root' use case""" - env = BuiltInEnv() | JsonPathEnv({}) + def test_value_or_root(self): + """Test that when accessing a child object the child data is used if it exists (normal case).""" + data = { + "id": 1, + "bar": [ + {'baz': 'a1'}, {'baz': 'a2'} + ] + } + self._test_value_or_root([Reference('id'), Reference('baz')], data, [ + ['1.bar.1.bar.[0]', 'a1'], + ['1.bar.1.bar.[1]', 'a2'], + ]) - data = [ - {"id": 1, "foo": {'id': 'bid', 'name': 'zip'}, "bar": [{'baz': 'a1'}, {'baz': 'a2', 'id': 'bazzer'}]}, - {"id": 2, "foo": {'id': 'bid', 'name': 'zap'}, "bar": []}, - {"id": 3, "foo": {'id': 'bid', 'name': 'mip'}, "bar": {}}, - # {"id": 4, "foo": {'id': 'bid', 'name': 'map'}, "bar": None}, # fails with TypeError from jsonpath - {"id": 5, "foo": {'id': 'bid', 'name': 'mop'}}, - {"id": 6, "foo": {'id': 'bid', 'name': 'mop'}, "baz": "root_bazz"}, - ] - value_or_root = get_value_or_root_expression('bar.[*]') - flatmap = FlatMap(source=Literal(data), body=value_or_root) - mmap = Map(source=flatmap, body=List([ - Reference("id"), Reference('baz'), Reference('$.id'), Reference('$.foo.id'), Reference('$.foo.name') - ])) - self.check_case(mmap.eval(env), [ - ['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'zip'], - ['1.bar.bazzer', 'a2', '1', '1.bid', 'zip'], - ['2', [], '2', '2.bid', 'zap'], - ['3.bar.3.bar.[0]', [], '3', '3.bid', 'mip'], - # ['4.bar.[0]', [], '4', '4.bid', 'map'], - ['5', [], '5', '5.bid', 'mop'], - ['6', [], '6', '6.bid', 'mop'], + def test_value_or_root_empty_list(self): + """Should use the root object if the child is an empty list""" + data = { + "id": 1, + "foo": "I am foo", + "bar": [], + } + self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + ['1', [], "I am foo"], ]) + def test_value_or_root_empty_dict(self): + """Should use the root object if the child is an empty dict""" + data = { + "id": 1, + "foo": "I am foo", + "bar": {}, + } + self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + ['1.bar.1.bar.[0]', [], "I am foo"], # weird ID here due to bug in jsonpath + ]) + + @pytest.mark.skip(reason="fails with TypeError from jsonpath") + def test_value_or_root_None(self): + """Should use the root object if the child is None""" + data = { + "id": 1, + "bar": None, + } + self._test_value_or_root([Reference('id'), Reference('baz')], data, [ + ['1.bar.[0]', []], # weird ID here due to bug in jsonpath + ]) + + def test_value_or_root_missing(self): + """Should use the root object if the child does not exist""" + data = { + "id": 1, + "foo": "I am foo", + # 'bar' is missing + } + self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + ['1', [], 'I am foo'], + ]) + + def test_value_or_root_ignore_field_in_root(self): + """Test that a child reference is ignored if we are using the root doc even if there is a field + wit that name. (this doesn't apply to 'id')""" + data = { + "id": 1, + "foo": "I am foo", + } + self._test_value_or_root([Reference('id'), Reference('foo')], data, [ + ['1', []], + ]) + + def _test_value_or_root(self, columns, data, expected): + """Low level test case for 'value-or-root'""" + env = BuiltInEnv() | JsonPathEnv({}) + value_or_root = get_value_or_root_expression('bar.[*]') + flatmap = FlatMap(source=Literal([data]), body=value_or_root) + mmap = Map(source=flatmap, body=List(columns)) + self.check_case(mmap.eval(env), expected) + def test_eval_collapsed_list(self): """ Special case to handle XML -> JSON conversion where there just happened to be a single value at save time From 567ac70986b0f2e9ee1f114cdaf5a6ba57ef4563 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 10:58:59 +0200 Subject: [PATCH 103/257] replace jsonpath-rw with jsonpath-ng --- commcare_export/env.py | 8 ++++---- commcare_export/excel_query.py | 6 +++--- commcare_export/jsonpath_utils.py | 2 +- commcare_export/misc.py | 2 +- setup.py | 2 +- tests/test_commcare_minilinq.py | 2 +- tests/test_minilinq.py | 6 +++--- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index 798eadf4..9a2ddfa2 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -9,8 +9,8 @@ import six from itertools import chain -from jsonpath_rw import jsonpath -from jsonpath_rw.parser import parse as parse_jsonpath +from jsonpath_ng import jsonpath +from jsonpath_ng.parser import parse as parse_jsonpath from commcare_export.jsonpath_utils import split_leftmost from commcare_export.misc import unwrap, unwrap_val @@ -183,7 +183,7 @@ def __init__(self, bindings=None): self.__bindings = bindings or {} self.__restrict_to_root = bool(jsonpath.Fields("__root_only").find(self.__bindings)) - # Currently hardcoded because it is a global is jsonpath-rw + # Currently hardcoded because it is a global is jsonpath-ng # Probably not widely used, but will require refactor if so jsonpath.auto_id_field = "id" @@ -208,7 +208,7 @@ def lookup(self, name): def iterator(jsonpath_expr=jsonpath_expr): # Capture closure for datum in jsonpath_expr.find(self.__bindings): - # HACK: The auto id from jsonpath_rw is good, but we lose it when we do .value here, + # HACK: The auto id from jsonpath_ng is good, but we lose it when we do .value here, # so just slap it on if not present if isinstance(datum.value, dict) and 'id' not in datum.value: datum.value['id'] = jsonpath.AutoIdForDatum(datum).value diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 8b3220ee..b2f1de5a 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -2,11 +2,11 @@ import re from collections import defaultdict, namedtuple -from jsonpath_rw.lexer import JsonPathLexerError +from jsonpath_ng.lexer import JsonPathLexerError from six.moves import xrange -from jsonpath_rw import jsonpath -from jsonpath_rw.parser import parse as parse_jsonpath +from jsonpath_ng import jsonpath +from jsonpath_ng.parser import parse as parse_jsonpath from commcare_export.exceptions import LongFieldsException, MissingColumnException, ReservedTableNameException from commcare_export.jsonpath_utils import split_leftmost diff --git a/commcare_export/jsonpath_utils.py b/commcare_export/jsonpath_utils.py index 7ef2de34..defc9a58 100644 --- a/commcare_export/jsonpath_utils.py +++ b/commcare_export/jsonpath_utils.py @@ -1,4 +1,4 @@ -from jsonpath_rw import jsonpath +from jsonpath_ng import jsonpath def split_leftmost(jsonpath_expr): diff --git a/commcare_export/misc.py b/commcare_export/misc.py index 6858e20b..1940959f 100644 --- a/commcare_export/misc.py +++ b/commcare_export/misc.py @@ -3,7 +3,7 @@ import hashlib import inspect import io -from jsonpath_rw import jsonpath +from jsonpath_ng import jsonpath from commcare_export.repeatable_iterator import RepeatableIterator diff --git a/setup.py b/setup.py index d577c98f..3483781f 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ def run_tests(self): install_requires = [ 'alembic', 'argparse', - 'jsonpath-rw>=1.2.1', + 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', 'python-dateutil', 'requests', diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 454f3c7f..5ae7c043 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -1,7 +1,7 @@ import unittest from itertools import * -from jsonpath_rw import jsonpath +from jsonpath_ng import jsonpath from commcare_export.checkpoint import CheckpointManagerWithDetails from commcare_export.minilinq import * diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 9b067184..8b1606cb 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -45,7 +45,7 @@ def test_eval_reference(self): self.check_case(Reference("foo.$.baz").eval(JsonPathEnv({'foo': [2], 'baz': 3})), [3]) def test_eval_auto_id_reference(self): - "Test that we have turned on the jsonpath_rw.jsonpath.auto_id field properly" + "Test that we have turned on the jsonpath_ng.jsonpath.auto_id field properly" env = BuiltInEnv() self.check_case(Reference("foo.id").eval(JsonPathEnv({'foo': [2]})), ['foo']) @@ -55,7 +55,7 @@ def test_eval_auto_id_reference(self): def test_eval_auto_id_reference_nested(self): # this test is documentation of existing (weird) functionality - # that results from a combination of jsonpath_rw auto_id feature and + # that results from a combination of jsonpath_ng auto_id feature and # JsonPathEnv.lookup (which adds an additional auto ID for some reason). env = JsonPathEnv({}) @@ -78,7 +78,7 @@ def test_eval_auto_id_reference_nested(self): # as follows: # '1.bar.1.bar.[0]' -> '1.bar.[0]' - # With the change above AND a change to jsonpath_rw to prevent converting IDs that exist into + # With the change above AND a change to jsonpath_ng to prevent converting IDs that exist into # auto IDs (see https://github.com/kennknowles/python-jsonpath-rw/pull/96) we get the following: # Reference("id"): # '1.bar.bazzer' -> 'bazzer' From 3aa97946c57ce4e1304e4e5342efcb78b683a5d2 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 25 Jun 2021 10:59:05 +0200 Subject: [PATCH 104/257] fix tests --- tests/test_minilinq.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 8b1606cb..0e24a849 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -118,10 +118,9 @@ def test_value_or_root_empty_dict(self): "bar": {}, } self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ - ['1.bar.1.bar.[0]', [], "I am foo"], # weird ID here due to bug in jsonpath + ['1', [], "I am foo"], ]) - @pytest.mark.skip(reason="fails with TypeError from jsonpath") def test_value_or_root_None(self): """Should use the root object if the child is None""" data = { @@ -129,7 +128,7 @@ def test_value_or_root_None(self): "bar": None, } self._test_value_or_root([Reference('id'), Reference('baz')], data, [ - ['1.bar.[0]', []], # weird ID here due to bug in jsonpath + ['1', []], ]) def test_value_or_root_missing(self): From 4831e992cda40b720514a01110384614b4f802be Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 2 Jul 2021 12:13:51 +0200 Subject: [PATCH 105/257] use 'date_last_activity' for filtering / ordering of messaging data --- commcare_export/commcare_minilinq.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 4d55682d..cc23ee55 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -87,7 +87,8 @@ def __call__(self, since, until): DATE_PARAMS = { 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end'), - 'date': SimpleSinceParams('date.gte', 'date.lt'), # used by messaging-events + # used by messaging-events + 'date_last_activity': SimpleSinceParams('date_last_activity.gte', 'date_last_activity.lt'), } @@ -96,12 +97,12 @@ def get_paginator(resource, page_size=1000, pagination_mode=PaginationMode.date_ PaginationMode.date_indexed: { 'form': DatePaginator('indexed_on', page_size), 'case': DatePaginator('indexed_on', page_size), - 'messaging-event': DatePaginator('date', page_size), + 'messaging-event': DatePaginator('date_last_activity', page_size), }, PaginationMode.date_modified: { 'form': DatePaginator(['server_modified_on', 'received_on'], page_size, params=FormFilterSinceParams()), 'case': DatePaginator('server_date_modified', page_size), - 'messaging-event': DatePaginator('date', page_size), + 'messaging-event': DatePaginator('date_last_activity', page_size), } }[pagination_mode].get(resource, SimplePaginator(page_size)) From 35157a41d4d85df54a25569b1ef4147f3ba5a60c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 7 Jul 2021 16:26:04 +0200 Subject: [PATCH 106/257] fix tests --- tests/test_commcare_hq_client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 6a84ec2f..fae0ef45 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -79,16 +79,16 @@ def _get_results(self, params): class FakeMessageLogSession(FakeSession): def _get_results(self, params): - obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'} - obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'} - obj_3 = {'id': 3, 'foo': 3, 'date': '2017-01-01T15:38:22Z'} + obj_1 = {'id': 1, 'foo': 1, 'date_last_activity': '2017-01-01T15:36:22Z'} + obj_2 = {'id': 2, 'foo': 2, 'date_last_activity': '2017-01-01T15:37:22Z'} + obj_3 = {'id': 3, 'foo': 3, 'date_last_activity': '2017-01-01T15:38:22Z'} if not params: return { 'meta': {'next': '?cursor=xyz', 'limit': 2}, 'objects': [obj_1, obj_2] } else: - since_query_param = DATE_PARAMS['date'].start_param + since_query_param = DATE_PARAMS['date_last_activity'].start_param since = params[since_query_param] if since == '2017-01-01T15:37:22': return { From 3db9c725ce4c5d206409dd1d400b3f0332948eab Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 15 Jul 2021 10:56:15 +0200 Subject: [PATCH 107/257] add unique function for removing duplicate values when exporting lists --- commcare_export/env.py | 10 +++++++++- tests/test_minilinq.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index 9a2ddfa2..9b03687c 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -475,6 +475,13 @@ def substr(val, start, end): return val[start:end] +@unwrap('val') +def unique(val): + if isinstance(val, list): + return list(set(val)) + return val + + class BuiltInEnv(DictEnv): """ A built-in environment of operators and functions @@ -519,7 +526,8 @@ def __init__(self, d=None): 'or': _or, 'sha1': sha1, 'substr': substr, - '_or_raw': _or_raw, # for internal use + '_or_raw': _or_raw, # for internal use, + 'unique': unique }) return super(BuiltInEnv, self).__init__(d) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 0e24a849..2f549164 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -261,6 +261,11 @@ def test_case_url(self): expected = 'https://www.commcarehq.org/a/d1/reports/case_data/123/' assert Apply(Reference('case_url'), Reference('id')).eval(env) == expected + def test_unique(self): + env = BuiltInEnv() | JsonPathEnv( + {"list": [{"a": 1}, {"a": 2}, {"a": 3}, {"a": 2}]}) + assert Apply(Reference('unique'), Reference('list[*].a')).eval(env) == [1, 2, 3] + def test_template(self): env = BuiltInEnv() | JsonPathEnv({'a': '1', 'b': '2'}) assert Apply(Reference('template'), Literal('{}.{}'), Reference('a'), Reference('b')).eval(env) == '1.2' From 509324dd3c19633b24a58a8d8ba4168761cfcccf Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 15 Jul 2021 16:31:30 +0200 Subject: [PATCH 108/257] update docs with 'unique' --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5c60aa12..872202c4 100644 --- a/README.md +++ b/README.md @@ -397,6 +397,7 @@ List of builtin functions: | attachment_url | Convert an attachment name into it's download URL | | | form_url | Output the URL to the form view on CommCare HQ | | | case_url | Output the URL to the case view on CommCare HQ | | +| unique | Ouptut only unique values in a list | | Output Formats -------------- From f66aea79bf2459366bc7ec15ed39d7a72ebd0d00 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Fri, 6 Aug 2021 10:04:14 +0200 Subject: [PATCH 109/257] prompt for username & password before they are needed --- commcare_export/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 8da4ce38..5ceb1bd7 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -300,6 +300,13 @@ def main_with_args(args): '--query, --users, --locations', file=sys.stderr) return EXIT_STATUS_ERROR + if not args.username: + args.username = input('Please provide a username: ') + + if not args.password: + # Windows getpass does not accept unicode + args.password = getpass.getpass() + column_enforcer = None if args.with_organization: column_enforcer = builtin_queries.ColumnEnforcer() @@ -321,13 +328,6 @@ def main_with_args(args): if writer.support_checkpoints: checkpoint_manager = _get_checkpoint_manager(args) - if not args.username: - args.username = input('Please provide a username: ') - - if not args.password: - # Windows getpass does not accept unicode - args.password = getpass.getpass() - since, until = get_date_params(args) if args.start_over: if checkpoint_manager: From fd1b18df78fd70c44da2632284952de238df98c7 Mon Sep 17 00:00:00 2001 From: Brian DeRenzi Date: Wed, 22 Sep 2021 10:33:08 +0100 Subject: [PATCH 110/257] FIX: Alembic 1.7.x is breaking migrations --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3483781f..fdc5e64b 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def run_tests(self): include_package_data=True, license = 'MIT', install_requires = [ - 'alembic', + 'alembic==1.6.5', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', From 48a5fa47fd0976ff22678cfd214925f1660a57a7 Mon Sep 17 00:00:00 2001 From: bderenzi Date: Wed, 22 Sep 2021 12:40:03 +0100 Subject: [PATCH 111/257] Allow older versions of alembic as well --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fdc5e64b..195752e4 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def run_tests(self): include_package_data=True, license = 'MIT', install_requires = [ - 'alembic==1.6.5', + 'alembic<=1.6.5', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', From 0192cdf140f29b14b5d70d5ce11aaf8e4f8061b9 Mon Sep 17 00:00:00 2001 From: bderenzi Date: Thu, 23 Sep 2021 08:54:29 +0100 Subject: [PATCH 112/257] Make room for patches on 1.6.x Co-authored-by: Daniel Miller --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 195752e4..0c6d2195 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def run_tests(self): include_package_data=True, license = 'MIT', install_requires = [ - 'alembic<=1.6.5', + 'alembic<1.7', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', From df78dcf9c027e75efde3afdf3949d6881638be26 Mon Sep 17 00:00:00 2001 From: Marco Perosa Date: Wed, 3 Nov 2021 11:25:58 -0400 Subject: [PATCH 113/257] Update HQ credentials --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c25bc4ee..1728cb96 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,8 +13,8 @@ env: global: - MSSQL_SA_PASSWORD=Password-123 # HQ_USERNAME and HQ_API_KEY - - secure: etv02uWtyy5P4DfyuHjFm5RDFc6WBHLsnIMC75VjDk8kxDgwV/lDbPYMh/hzfPHyskgA1EQbc8IfHlbZWFVV8jOTy+wvrVir/mw95AEyNyAL/TTSWvYfTvdCsxOSbY6vcGlJNfy6rc+y0h6QyuIknY0OhU8sTaRcQnvbFPnOz28= - - secure: aLj1bKtUF2CnAwG+yjiAjo39cKi9WHaonIwqsuhOx4McsD/xSz4QHv/6/XhXZ5KxKyxw1+PBl/mWo6gyrT5iHDRBPk5iJXqZAgQFS2ukZSv/tUBGL7bWzoO9YfoLuWllA33DCr3PiXAhkH53dTcor16UN9wXeCprBBSGjhpAxRQ= + - secure: "AhNARIXHCKJhDpDHNT97h4WThW/eyofBpp4rI80i+DA8whlfKucHrfDaJ190tRblY8viBiC8FhucFxYVX6oSS2MaPN3X1bF8WzdMYiBzMJ05ODIRQ3pPjRsGD3e14MhpRriaHfa1w90/WdnU2QWXY6J8zitXlwXV5PRBdsk7raQ=" + - secure: "hJqnvpMEgiiU8AT21T4dPRLqA+n4BSsWk9yVYV7DrfsZIIFN0Ioao7bg8MMh/XZ6mGMEjxtgFZ307ApuoQdfGpct9Yg2uxj/vx6n7VHdEzvoWfzhDEPn9mTG7QBxPkZJlz444m7hVW/jW8Pqr2LEdR+ORjGfe8FlwCU2MauOmIc=" before_install: - docker pull microsoft/mssql-server-linux:2017-latest - docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PASSWORD" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest From 1059b5bc3ed95b92832424d7d14bf409d4bb97d5 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Thu, 4 Nov 2021 11:17:54 -0400 Subject: [PATCH 114/257] Change location of mssql docker image --- .travis.yml | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1728cb96..82166a58 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,8 @@ env: - secure: "AhNARIXHCKJhDpDHNT97h4WThW/eyofBpp4rI80i+DA8whlfKucHrfDaJ190tRblY8viBiC8FhucFxYVX6oSS2MaPN3X1bF8WzdMYiBzMJ05ODIRQ3pPjRsGD3e14MhpRriaHfa1w90/WdnU2QWXY6J8zitXlwXV5PRBdsk7raQ=" - secure: "hJqnvpMEgiiU8AT21T4dPRLqA+n4BSsWk9yVYV7DrfsZIIFN0Ioao7bg8MMh/XZ6mGMEjxtgFZ307ApuoQdfGpct9Yg2uxj/vx6n7VHdEzvoWfzhDEPn9mTG7QBxPkZJlz444m7hVW/jW8Pqr2LEdR+ORjGfe8FlwCU2MauOmIc=" before_install: - - docker pull microsoft/mssql-server-linux:2017-latest - - docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PASSWORD" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest + - docker pull mcr.microsoft.com/mssql/server:2017-latest + - docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PASSWORD" -p 1433:1433 --name mssql1 -d mcr.microsoft.com/mssql/server:2017-latest - curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - - echo "deb [arch=amd64] https://packages.microsoft.com/ubuntu/14.04/prod trusty main" | sudo tee /etc/apt/sources.list.d/mssql-release.list - sudo apt-get update -qq diff --git a/README.md b/README.md index 872202c4..d62e40b1 100644 --- a/README.md +++ b/README.md @@ -577,7 +577,7 @@ mysql> GRANT ALL PRIVILEGES ON *.* TO 'travis'@'%'; MSSQL ===== ``` -$ docker pull microsoft/mssql-server-linux:2017-latest +$ docker pull mcr.microsoft.com/mssql/server:2017-latest $ docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Password@123" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest # install driver @@ -592,7 +592,7 @@ $ odbcinst -q -d MSSQL for Mac OS ========== ``` -$ docker pull microsoft/mssql-server-linux:2017-latest +$ docker pull mcr.microsoft.com/mssql/server:2017-latest $ docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Password@123" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest # Install driver From a7f2837c3447ed6882fe61176f06d5c0bab086a8 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 4 Nov 2021 16:22:34 +0000 Subject: [PATCH 115/257] Fix Travis badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 872202c4..ce806079 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ CommCare Export https://github.com/dimagi/commcare-export -[![Build Status](https://travis-ci.com/dimagi/commcare-export.png)](https://travis-ci.com/dimagi/commcare-export) +[![Build Status](https://app.travis-ci.com/dimagi/commcare-export.svg?branch=master)](https://app.travis-ci.com/dimagi/commcare-export) [![Test coverage](https://coveralls.io/repos/dimagi/commcare-export/badge.png?branch=master)](https://coveralls.io/r/dimagi/commcare-export) [![PyPI version](https://badge.fury.io/py/commcare-export.svg)](https://badge.fury.io/py/commcare-export) From a9c94149a41b6e15a460acc0752604913cfd0b55 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 4 Nov 2021 16:29:20 +0000 Subject: [PATCH 116/257] Ensure Python 3 --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ce806079..a2520791 100644 --- a/README.md +++ b/README.md @@ -15,14 +15,15 @@ A command-line tool (and Python library) to generate customized exports from the Installation & Quick Start -------------------------- -0a\. Install Python and `pip`. This tool is [tested with Python 2.7, 3.6 and 3.7](https://travis-ci.com/dimagi/commcare-export). +0a\. Install [Python 3](https://www.python.org/downloads/) and `pip`. This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). 0b\. Sign up for [CommCareHQ](https://www.commcarehq.org/) if you have not already. 1\. Install CommCare Export via `pip` ``` -$ pip install commcare-export +$ python3 -m pip install wheel +$ python3 -m pip install commcare-export ``` 2\. Create a project space and application. From 5d8f097d88ee2a1e5c497595f23ec7c660ddd724 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Thu, 4 Nov 2021 16:53:55 +0000 Subject: [PATCH 117/257] Python comes with pip since 3.4 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2520791..18a24d0c 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ A command-line tool (and Python library) to generate customized exports from the Installation & Quick Start -------------------------- -0a\. Install [Python 3](https://www.python.org/downloads/) and `pip`. This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). +0a\. Install [Python 3](https://www.python.org/downloads/). This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). 0b\. Sign up for [CommCareHQ](https://www.commcarehq.org/) if you have not already. From 3c5d4c2b8891c2315a92e1ff11b8e30b2a7929ae Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Tue, 9 Nov 2021 10:09:50 -0500 Subject: [PATCH 118/257] Update readme with pinned version of alembic --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fa1715bf..766cddda 100644 --- a/README.md +++ b/README.md @@ -435,7 +435,7 @@ $ pip install openpyxl $ pip install xlwt # To sync with a SQL database -$ pip install SQLAlchemy alembic psycopg2 pymysql pyodbc +$ pip install SQLAlchemy "alembic<1.7" psycopg2 pymysql pyodbc ``` Contributing From a77856a5132f3964ac39c138f3ee02c88c635f3e Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 09:11:30 -0500 Subject: [PATCH 119/257] yapfify setup.py --- setup.py | 67 +++++++++++++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/setup.py b/setup.py index 0c6d2195..1067f68d 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ import re import io import subprocess -import setuptools +import setuptools from setuptools.command.test import test as TestCommand -VERSION_PATH='commcare_export/VERSION' +VERSION_PATH = 'commcare_export/VERSION' # Overwrite VERSION if we are actually building for a distribution to pypi # This code path requires dependencies, etc, to be available @@ -19,17 +19,20 @@ # This import requires either commcare_export/VERSION or to be in a git clone (as does the package in general) import commcare_export + version = commcare_export.version.version() # Crash if the VERSION is not a simple version and it is going to register or upload if 'register' in sys.argv or 'upload' in sys.argv: version = commcare_export.version.stored_version() if not version or not re.match('\d+\.\d+\.\d+', version): - print('Version %s is not an appropriate version for publicizing!' % version) + print('Version %s is not an appropriate version for publicizing!' % + version) sys.exit(1) readme = 'README.md' + class PyTest(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) @@ -45,46 +48,37 @@ def run_tests(self): test_deps = ['pytest', 'psycopg2', 'mock'] -setuptools.setup( - name = "commcare-export", - version = version, - description = 'A command-line tool (and Python library) to extract data from CommCareHQ into a SQL database or Excel workbook', - long_description = io.open(readme, encoding='utf-8').read(), - long_description_content_type = 'text/markdown', - author = 'Dimagi', - author_email = 'information@dimagi.com', - url = "https://github.com/dimagi/commcare-export", - entry_points = { +setuptools.setup( + name="commcare-export", + version=version, + description='A command-line tool (and Python library) to extract data from ' + 'CommCareHQ into a SQL database or Excel workbook', + long_description=io.open(readme, encoding='utf-8').read(), + long_description_content_type='text/markdown', + author='Dimagi', + author_email='information@dimagi.com', + url="https://github.com/dimagi/commcare-export", + entry_points={ 'console_scripts': [ 'commcare-export = commcare_export.cli:entry_point', 'commcare-export-utils = commcare_export.utils_cli:entry_point' ] }, - packages = setuptools.find_packages(exclude=['tests*']), - data_files = [ - (os.path.join('share', 'commcare-export', 'examples'), glob.glob('examples/*.json') + glob.glob('examples/*.xlsx')), + packages=setuptools.find_packages(exclude=['tests*']), + data_files=[ + (os.path.join('share', 'commcare-export', 'examples'), + glob.glob('examples/*.json') + glob.glob('examples/*.xlsx')), ], include_package_data=True, - license = 'MIT', - install_requires = [ - 'alembic<1.7', - 'argparse', - 'jsonpath-ng~=1.5', - 'openpyxl==2.5.12', - 'python-dateutil', - 'requests', - 'ndg-httpsclient', - 'simplejson', - 'six', - 'sqlalchemy', - 'pytz', - 'sqlalchemy-migrate', - 'backoff', - 'csv342' + license='MIT', + install_requires=[ + 'alembic<1.7', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', + 'python-dateutil', 'requests', 'ndg-httpsclient', 'simplejson', 'six', + 'sqlalchemy', 'pytz', 'sqlalchemy-migrate', 'backoff', 'csv342' ], - tests_require = test_deps, - cmdclass = {'test': PyTest}, - classifiers = [ + tests_require=test_deps, + cmdclass={'test': PyTest}, + classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', @@ -103,5 +97,4 @@ def run_tests(self): 'Topic :: System :: Archiving', 'Topic :: System :: Distributed Computing', ], - extras_require={'test': test_deps} -) + extras_require={'test': test_deps}) From 6b66b43ac2a4bd20b61d92f44d8d30e40f6428b5 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 09:12:42 -0500 Subject: [PATCH 120/257] clean up imports --- setup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 1067f68d..a01af952 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,9 @@ -from __future__ import print_function -import os.path -import sys import glob -import re import io -import subprocess +import os.path +import re +import sys + import setuptools from setuptools.command.test import test as TestCommand From 64f6da1880f537e33dcf9c4dfeef988984ab0c3e Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 09:25:55 -0500 Subject: [PATCH 121/257] Use extras_require for optional dependencies --- setup.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index a01af952..36cfa9a9 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,10 @@ def run_tests(self): test_deps = ['pytest', 'psycopg2', 'mock'] +base_sql_deps = ["SQLAlchemy", "alembic<1.7"] +postgres = ["psycopg2"] +mysql = ["pymysql"] +odbc = ["pyodbc"] setuptools.setup( name="commcare-export", @@ -75,7 +79,15 @@ def run_tests(self): 'python-dateutil', 'requests', 'ndg-httpsclient', 'simplejson', 'six', 'sqlalchemy', 'pytz', 'sqlalchemy-migrate', 'backoff', 'csv342' ], - tests_require=test_deps, + extras_require={ + 'test': test_deps, + 'base_sql': base_sql_deps, + 'postgres': base_sql_deps + postgres, + 'mysql': base_sql_deps + mysql, + 'odbc': base_sql_deps + odbc, + 'xlsx': ["openpyxl"], + 'xls': ["xlwt"], + }, cmdclass={'test': PyTest}, classifiers=[ 'Development Status :: 4 - Beta', @@ -95,5 +107,5 @@ def run_tests(self): 'Topic :: Software Development :: Interpreters', 'Topic :: System :: Archiving', 'Topic :: System :: Distributed Computing', - ], - extras_require={'test': test_deps}) + ] +) From 0ccdac96a7ff9bc2333a8865c169d17b81949abd Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 09:26:07 -0500 Subject: [PATCH 122/257] Pin python version to >=3.6 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 36cfa9a9..8fd3182e 100644 --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ def run_tests(self): ], include_package_data=True, license='MIT', + python_requires=">=3.6", install_requires=[ 'alembic<1.7', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', 'python-dateutil', 'requests', 'ndg-httpsclient', 'simplejson', 'six', From c0fb2d35a678986bc97bd373480973b20a49180a Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 09:36:42 -0500 Subject: [PATCH 123/257] Update readme for optional deps --- README.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 766cddda..6f80d093 100644 --- a/README.md +++ b/README.md @@ -429,13 +429,23 @@ are optional. Here is how you might install them: ``` # To export "xlsx" -$ pip install openpyxl +$ pip install commcare-export[xlsx] # To export "xls" -$ pip install xlwt +$ pip install commcare-export[xls] -# To sync with a SQL database -$ pip install SQLAlchemy "alembic<1.7" psycopg2 pymysql pyodbc +# To sync with a Postgres database +$ pip install commcare-export[postgres] + +# To sync with a mysql database +$ pip install commcare-export[mysql] + +# To sync with a database which uses odbc (e.g. mssql) +$ pip install commcare-export[odbc] + +# To sync with another SQL database supported by SQLAlchemy +$ pip install commcare-export[base_sql] +# Then install the python package for your database ``` Contributing From 6c63150dab1ea0cf17aa3c049450b9ee8a9bc572 Mon Sep 17 00:00:00 2001 From: Farid Rener Date: Wed, 10 Nov 2021 10:30:08 -0500 Subject: [PATCH 124/257] Square brakets confuse my terminal emulator --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6f80d093..aae74301 100644 --- a/README.md +++ b/README.md @@ -429,22 +429,22 @@ are optional. Here is how you might install them: ``` # To export "xlsx" -$ pip install commcare-export[xlsx] +$ pip install "commcare-export[xlsx]" # To export "xls" -$ pip install commcare-export[xls] +$ pip install "commcare-export[xls]" # To sync with a Postgres database -$ pip install commcare-export[postgres] +$ pip install "commcare-export[postgres]" # To sync with a mysql database -$ pip install commcare-export[mysql] +$ pip install "commcare-export[mysql]" # To sync with a database which uses odbc (e.g. mssql) -$ pip install commcare-export[odbc] +$ pip install "commcare-export[odbc]" # To sync with another SQL database supported by SQLAlchemy -$ pip install commcare-export[base_sql] +$ pip install "commcare-export[base_sql]" # Then install the python package for your database ``` From 86a325414ff2977ca03241043d5a567aca984b5a Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 5 Apr 2022 16:04:54 +0200 Subject: [PATCH 125/257] Add DATA_TYPE_TEXT to DATA_TYPES_TO_SQLALCHEMY_TYPES --- commcare_export/data_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/commcare_export/data_types.py b/commcare_export/data_types.py index 006a5e7b..b15bcf8e 100644 --- a/commcare_export/data_types.py +++ b/commcare_export/data_types.py @@ -8,6 +8,7 @@ DATA_TYPE_JSON = 'json' DATA_TYPES_TO_SQLALCHEMY_TYPES = { + DATA_TYPE_TEXT: sqlalchemy.Text(), DATA_TYPE_BOOLEAN: sqlalchemy.Boolean(), DATA_TYPE_DATETIME: sqlalchemy.DateTime(), DATA_TYPE_DATE: sqlalchemy.Date(), From 16fa85b1a12147aae20257c519a102bcac0f5124 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 12:44:17 +0100 Subject: [PATCH 126/257] Fix import for Alembic>=1.7 --- commcare_export/writers.py | 7 ++++--- setup.py | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index d6c7ad20..6d56c4d5 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -4,7 +4,8 @@ import zipfile from six.moves import zip_longest -import alembic +from alembic.migration import MigrationContext +from alembic.operations import Operations import csv342 as csv import six import sqlalchemy @@ -453,8 +454,8 @@ def least_upper_bound(self, source_type, dest_type): return sqlalchemy.UnicodeText(collation=self.collation) def make_table_compatible(self, table_name, row_dict, data_type_dict): - ctx = alembic.migration.MigrationContext.configure(self.connection) - op = alembic.operations.Operations(ctx) + ctx = MigrationContext.configure(self.connection) + op = Operations(ctx) if not table_name in self.metadata.tables: if self.strict_types: diff --git a/setup.py b/setup.py index 8fd3182e..97e0da08 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ def run_tests(self): test_deps = ['pytest', 'psycopg2', 'mock'] -base_sql_deps = ["SQLAlchemy", "alembic<1.7"] +base_sql_deps = ["SQLAlchemy", "alembic"] postgres = ["psycopg2"] mysql = ["pymysql"] odbc = ["pyodbc"] @@ -76,9 +76,20 @@ def run_tests(self): license='MIT', python_requires=">=3.6", install_requires=[ - 'alembic<1.7', 'argparse', 'jsonpath-ng~=1.5', 'openpyxl==2.5.12', - 'python-dateutil', 'requests', 'ndg-httpsclient', 'simplejson', 'six', - 'sqlalchemy', 'pytz', 'sqlalchemy-migrate', 'backoff', 'csv342' + 'alembic', + 'argparse', + 'backoff', + 'csv342', + 'jsonpath-ng~=1.5', + 'ndg-httpsclient', + 'openpyxl==2.5.12', + 'python-dateutil', + 'pytz', + 'requests', + 'simplejson', + 'six', + 'sqlalchemy', + 'sqlalchemy-migrate' ], extras_require={ 'test': test_deps, From dcf9ed2139a0b428967ed6dabd9ac3cc81c860b0 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 16:01:32 +0100 Subject: [PATCH 127/257] Try building off Ubuntu 18.04 Bionic --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 82166a58..34a9b784 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: python sudo: required -dist: "xenial" +dist: "bionic" python: - "3.6" - "3.7" @@ -19,7 +19,7 @@ before_install: - docker pull mcr.microsoft.com/mssql/server:2017-latest - docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PASSWORD" -p 1433:1433 --name mssql1 -d mcr.microsoft.com/mssql/server:2017-latest - curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - - - echo "deb [arch=amd64] https://packages.microsoft.com/ubuntu/14.04/prod trusty main" | sudo tee /etc/apt/sources.list.d/mssql-release.list + - curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list - sudo apt-get update -qq install: - sudo apt-get install pandoc From d2bef02c5aef4454ac678ce3763022707af348e8 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 16:52:21 +0100 Subject: [PATCH 128/257] Update classifiers --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 97e0da08..757de810 100644 --- a/setup.py +++ b/setup.py @@ -110,11 +110,10 @@ def run_tests(self): 'Intended Audience :: System Administrators', 'Intended Audience :: End Users/Desktop', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Topic :: Database', 'Topic :: Software Development :: Interpreters', 'Topic :: System :: Archiving', From 14d48855f9127a660830f5f049a62e192bcbb700 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 17:00:13 +0100 Subject: [PATCH 129/257] isort --- .isort.cfg | 5 ++++ commcare_export/builtin_queries.py | 1 - commcare_export/checkpoint.py | 14 +++++++--- commcare_export/cli.py | 34 +++++++++++++++-------- commcare_export/commcare_hq_client.py | 20 ++++++++----- commcare_export/commcare_minilinq.py | 3 +- commcare_export/env.py | 20 +++++++------ commcare_export/excel_query.py | 24 +++++++++++----- commcare_export/location_info_provider.py | 4 +-- commcare_export/map_format.py | 2 +- commcare_export/minilinq.py | 12 ++++++-- commcare_export/misc.py | 13 +++++++-- commcare_export/repeatable_iterator.py | 1 - commcare_export/utils.py | 13 +++++++-- commcare_export/utils_cli.py | 11 ++++++-- commcare_export/version.py | 3 +- commcare_export/writers.py | 8 +++--- tests/conftest.py | 13 +++++++-- tests/test_checkpointmanager.py | 18 ++++++++++-- tests/test_cli.py | 24 ++++++++++------ tests/test_commcare_hq_client.py | 29 +++++++++++++------ tests/test_commcare_minilinq.py | 8 +++--- tests/test_env.py | 1 + tests/test_excel_query.py | 14 +++++++--- tests/test_map_format.py | 16 +++++++---- tests/test_minilinq.py | 2 +- tests/test_misc.py | 14 ++++++++-- tests/test_repeatable_iterator.py | 13 +++++++-- tests/test_writers.py | 20 ++++++++++--- 29 files changed, 254 insertions(+), 106 deletions(-) create mode 100644 .isort.cfg diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..d09f55c8 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,5 @@ +# https://github.com/timothycrosley/isort/wiki/isort-Settings +[settings] +multi_line_output=3 +include_trailing_comma=true + diff --git a/commcare_export/builtin_queries.py b/commcare_export/builtin_queries.py index da2f2acd..beadc5d3 100644 --- a/commcare_export/builtin_queries.py +++ b/commcare_export/builtin_queries.py @@ -133,4 +133,3 @@ def column_to_require(self, data_source): return ColumnEnforcer.columns_to_require[data_source] else: return None - diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index dc68d348..f3dfc5ce 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -1,16 +1,22 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import datetime import logging -import uuid - import os +import uuid from contextlib import contextmanager from operator import attrgetter import dateutil.parser import six -from sqlalchemy import Column, String, Boolean, func, and_ +from sqlalchemy import Boolean, Column, String, and_, func from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 5ceb1bd7..d8d0c870 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -1,4 +1,11 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import argparse import getpass @@ -13,20 +20,25 @@ import sqlalchemy from six.moves import input -from commcare_export import excel_query -from commcare_export import writers +from commcare_export import builtin_queries, excel_query, writers from commcare_export.checkpoint import CheckpointManagerProvider -from commcare_export.misc import default_to_json -from commcare_export.utils import get_checkpoint_manager -from commcare_export.commcare_hq_client import CommCareHqClient, LATEST_KNOWN_VERSION, ResourceRepeatException +from commcare_export.commcare_hq_client import ( + LATEST_KNOWN_VERSION, + CommCareHqClient, + ResourceRepeatException, +) from commcare_export.commcare_minilinq import CommCareHqEnv -from commcare_export.env import BuiltInEnv, JsonPathEnv, EmitterEnv -from commcare_export.exceptions import LongFieldsException, DataExportException, MissingQueryFileException -from commcare_export.minilinq import MiniLinq, List +from commcare_export.env import BuiltInEnv, EmitterEnv, JsonPathEnv +from commcare_export.exceptions import ( + DataExportException, + MissingQueryFileException, +) +from commcare_export.location_info_provider import LocationInfoProvider +from commcare_export.minilinq import List, MiniLinq +from commcare_export.misc import default_to_json from commcare_export.repeatable_iterator import RepeatableIterator +from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import builtin_queries -from commcare_export.location_info_provider import LocationInfoProvider EXIT_STATUS_ERROR = 1 diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 03a90e8c..66f5d6d3 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -1,13 +1,22 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import copy import logging from collections import OrderedDict -import backoff import requests -from requests.auth import AuthBase -from requests.auth import HTTPDigestAuth +from requests.auth import AuthBase, HTTPDigestAuth + +import backoff +import commcare_export +from commcare_export.repeatable_iterator import RepeatableIterator AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' @@ -19,8 +28,6 @@ from urlparse import urlparse, parse_qs from urllib import urlopen, urlencode -import commcare_export -from commcare_export.repeatable_iterator import RepeatableIterator logger = logging.getLogger(__name__) @@ -266,4 +273,3 @@ def __ne__(self, other): def __call__(self, r): r.headers['Authorization'] = 'apikey %s:%s' % (self.username, self.apikey) return r - diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index cc23ee55..afb489c2 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -7,9 +7,10 @@ import json from enum import Enum +from dateutil.parser import ParserError, parse + from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap -from dateutil.parser import ParserError, parse try: from urllib.parse import parse_qs, urlparse diff --git a/commcare_export/env.py b/commcare_export/env.py index 9b03687c..28945363 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -1,21 +1,25 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import hashlib import json -import uuid -from datetime import datetime import operator +import uuid + import pytz import six -from itertools import chain - -from jsonpath_ng import jsonpath -from jsonpath_ng.parser import parse as parse_jsonpath from commcare_export.jsonpath_utils import split_leftmost from commcare_export.misc import unwrap, unwrap_val - from commcare_export.repeatable_iterator import RepeatableIterator +from jsonpath_ng import jsonpath +from jsonpath_ng.parser import parse as parse_jsonpath JSONPATH_CACHE = {} diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index b2f1de5a..34852389 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -1,17 +1,27 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes -import re +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) + from collections import defaultdict, namedtuple -from jsonpath_ng.lexer import JsonPathLexerError from six.moves import xrange -from jsonpath_ng import jsonpath -from jsonpath_ng.parser import parse as parse_jsonpath - -from commcare_export.exceptions import LongFieldsException, MissingColumnException, ReservedTableNameException +from commcare_export.exceptions import ( + LongFieldsException, + MissingColumnException, + ReservedTableNameException, +) from commcare_export.jsonpath_utils import split_leftmost from commcare_export.map_format import compile_map_format_via from commcare_export.minilinq import * +from jsonpath_ng import jsonpath +from jsonpath_ng.parser import parse as parse_jsonpath + def take_while(pred, iterator): for v in iterator: diff --git a/commcare_export/location_info_provider.py b/commcare_export/location_info_provider.py index 8911a523..78992926 100644 --- a/commcare_export/location_info_provider.py +++ b/commcare_export/location_info_provider.py @@ -1,7 +1,7 @@ import logging -from commcare_export.misc import unwrap_val from commcare_export.commcare_minilinq import SimplePaginator +from commcare_export.misc import unwrap_val logger = logging.getLogger(__name__) @@ -90,5 +90,3 @@ def get_location_hierarchy(self): loc_uri = loc_data['parent'] ancestors[resource_uri] = type_code_to_id return ancestors - - diff --git a/commcare_export/map_format.py b/commcare_export/map_format.py index 56a7c6e4..5a68bdcc 100644 --- a/commcare_export/map_format.py +++ b/commcare_export/map_format.py @@ -1,6 +1,6 @@ import re -from commcare_export.minilinq import Literal, Apply, Reference +from commcare_export.minilinq import Apply, Literal, Reference SELECTED_AT = 'selected-at' SELECTED = 'selected' diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 9db8ae52..cd9a9d4e 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,13 +1,19 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) + import logging import six from six.moves import map from commcare_export.misc import unwrap, unwrap_val - from commcare_export.repeatable_iterator import RepeatableIterator - from commcare_export.specs import TableSpec logger = logging.getLogger(__name__) diff --git a/commcare_export/misc.py b/commcare_export/misc.py index 1940959f..0ac1c196 100644 --- a/commcare_export/misc.py +++ b/commcare_export/misc.py @@ -1,10 +1,19 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) + import functools import hashlib import inspect import io -from jsonpath_ng import jsonpath + from commcare_export.repeatable_iterator import RepeatableIterator +from jsonpath_ng import jsonpath def digest_file(path): diff --git a/commcare_export/repeatable_iterator.py b/commcare_export/repeatable_iterator.py index 17285e44..596d6413 100644 --- a/commcare_export/repeatable_iterator.py +++ b/commcare_export/repeatable_iterator.py @@ -1,4 +1,3 @@ -from types import GeneratorType class RepeatableIterator(object): diff --git a/commcare_export/utils.py b/commcare_export/utils.py index 74cfceda..2efbad59 100644 --- a/commcare_export/utils.py +++ b/commcare_export/utils.py @@ -1,11 +1,18 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import sys -from commcare_export import misc -from commcare_export.checkpoint import CheckpointManager from six.moves import input +from commcare_export import misc +from commcare_export.checkpoint import CheckpointManager from commcare_export.writers import StreamingMarkdownTableWriter diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index b7e19e0d..6fe77fdc 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -1,4 +1,11 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import argparse import inspect @@ -6,7 +13,7 @@ import sys from commcare_export.cli import CLI_ARGS -from commcare_export.utils import get_checkpoint_manager, confirm, print_runs +from commcare_export.utils import confirm, get_checkpoint_manager, print_runs EXIT_STATUS_ERROR = 1 diff --git a/commcare_export/version.py b/commcare_export/version.py index d4e1d4e1..d2be2b9c 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -1,7 +1,8 @@ from __future__ import print_function, unicode_literals + import io -import subprocess import os.path +import subprocess __all__ = ['__version__', 'stored_version', 'git_version'] diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 6d56c4d5..5c9f3fdb 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -2,15 +2,15 @@ import io import logging import zipfile -from six.moves import zip_longest -from alembic.migration import MigrationContext -from alembic.operations import Operations -import csv342 as csv import six import sqlalchemy from six import u +from six.moves import zip_longest +import csv342 as csv +from alembic.migration import MigrationContext +from alembic.operations import Operations from commcare_export.data_types import UnknownDataType, get_sqlalchemy_type from commcare_export.specs import TableSpec diff --git a/tests/conftest.py b/tests/conftest.py index 365a9239..7d254b5b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,14 +1,22 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import logging import os import uuid -import pytest import sqlalchemy from sqlalchemy.exc import DBAPIError +import pytest + TEST_DB = 'test_commcare_export_%s' % uuid.uuid4().hex logging.getLogger().setLevel(logging.DEBUG) @@ -85,4 +93,3 @@ def db_params(request): ], ids=['postgres']) def pg_db_params(request): return _db_params(request, 'test_commcare_export_%s' % uuid.uuid4().hex) - diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index bfb750ba..a9f73e2e 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -1,13 +1,25 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import datetime import uuid -import pytest import sqlalchemy -from commcare_export.checkpoint import CheckpointManager, Checkpoint, session_scope, CheckpointManagerProvider +import pytest +from commcare_export.checkpoint import ( + Checkpoint, + CheckpointManager, + CheckpointManagerProvider, + session_scope, +) from commcare_export.commcare_minilinq import PaginationMode diff --git a/tests/test_cli.py b/tests/test_cli.py index d98ed122..dd843111 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,22 +1,30 @@ # -*- coding: utf-8 -*- -import csv342 as csv import os import re import unittest from argparse import Namespace from copy import copy +from unittest import mock -import pytest import sqlalchemy -from mock import mock +from tests.utils import SqlWriterWithTearDown -from commcare_export.checkpoint import CheckpointManager, session_scope, Checkpoint -from commcare_export.cli import CLI_ARGS, EXIT_STATUS_ERROR, main_with_args -from commcare_export.commcare_hq_client import MockCommCareHqClient, CommCareHqClient, _params_to_url +import csv342 as csv +import pytest +from commcare_export.checkpoint import ( + Checkpoint, + CheckpointManager, + session_scope, +) +from commcare_export.cli import CLI_ARGS, main_with_args +from commcare_export.commcare_hq_client import ( + CommCareHqClient, + MockCommCareHqClient, + _params_to_url, +) from commcare_export.commcare_minilinq import PaginationMode from commcare_export.specs import TableSpec -from commcare_export.writers import JValueTableWriter, SqlTableWriter -from tests.utils import SqlWriterWithTearDown +from commcare_export.writers import JValueTableWriter CLI_ARGS_BY_NAME = { arg.name: arg diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index fae0ef45..9063c4b5 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -1,19 +1,31 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) -import json import unittest from datetime import datetime -import simplejson - import requests +import simplejson import pytest - from commcare_export.checkpoint import CheckpointManagerWithDetails -from commcare_export.commcare_hq_client import CommCareHqClient, ResourceRepeatException -from commcare_export.commcare_minilinq import SimplePaginator, DatePaginator, get_paginator, \ - DATE_PARAMS, PaginationMode +from commcare_export.commcare_hq_client import ( + CommCareHqClient, + ResourceRepeatException, +) +from commcare_export.commcare_minilinq import ( + DATE_PARAMS, + DatePaginator, + PaginationMode, + SimplePaginator, + get_paginator, +) class FakeSession(object): @@ -188,4 +200,3 @@ def test_multi_field_sort(self): 's1': None, 's2': d2 }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ')) - diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index 5ae7c043..c24bd851 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -1,13 +1,13 @@ import unittest from itertools import * -from jsonpath_ng import jsonpath - from commcare_export.checkpoint import CheckpointManagerWithDetails -from commcare_export.minilinq import * -from commcare_export.env import * from commcare_export.commcare_hq_client import MockCommCareHqClient from commcare_export.commcare_minilinq import * +from commcare_export.env import * +from commcare_export.minilinq import * +from jsonpath_ng import jsonpath + class TestCommCareMiniLinq(unittest.TestCase): diff --git a/tests/test_env.py b/tests/test_env.py index 0e1ea718..1e63c295 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,4 +1,5 @@ import doctest + import commcare_export.env diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index a81e14ca..f23d7083 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -1,4 +1,11 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import os.path import pprint @@ -6,11 +13,10 @@ import openpyxl -from commcare_export.env import BuiltInEnv -from commcare_export.env import JsonPathEnv +from commcare_export.builtin_queries import ColumnEnforcer +from commcare_export.env import BuiltInEnv, JsonPathEnv from commcare_export.excel_query import * from commcare_export.excel_query import _get_safe_source_field -from commcare_export.builtin_queries import ColumnEnforcer class TestExcelQuery(unittest.TestCase): diff --git a/tests/test_map_format.py b/tests/test_map_format.py index b8ba3e96..e3b2cee1 100644 --- a/tests/test_map_format.py +++ b/tests/test_map_format.py @@ -1,13 +1,17 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) import unittest -from commcare_export.map_format import ( - parse_template, - parse_function_arg, -) -from commcare_export.minilinq import Apply, Reference, Literal +from commcare_export.map_format import parse_function_arg, parse_template +from commcare_export.minilinq import Apply, Literal, Reference class TestMapFormats(unittest.TestCase): diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 2f549164..388587c7 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -3,9 +3,9 @@ import unittest from itertools import * -import pytest from six.moves import xrange +import pytest from commcare_export.env import * from commcare_export.excel_query import get_value_or_root_expression from commcare_export.minilinq import * diff --git a/tests/test_misc.py b/tests/test_misc.py index 136e2476..18deae53 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,9 +1,17 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes -import unittest +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) + import hashlib -import tempfile import struct +import tempfile +import unittest from commcare_export import misc diff --git a/tests/test_repeatable_iterator.py b/tests/test_repeatable_iterator.py index 58098223..2660678b 100644 --- a/tests/test_repeatable_iterator.py +++ b/tests/test_repeatable_iterator.py @@ -1,9 +1,18 @@ -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes -from itertools import * +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) + import unittest +from itertools import * from commcare_export.repeatable_iterator import RepeatableIterator + class TestRepeatableIterator(unittest.TestCase): @classmethod diff --git a/tests/test_writers.py b/tests/test_writers.py index 679c3683..d3454124 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -1,18 +1,30 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes +from __future__ import ( + absolute_import, + division, + generators, + nested_scopes, + print_function, + unicode_literals, +) -import csv342 as csv import datetime import io import tempfile import zipfile import openpyxl -import pytest import sqlalchemy +import csv342 as csv +import pytest from commcare_export.specs import TableSpec -from commcare_export.writers import SqlTableWriter, JValueTableWriter, Excel2007TableWriter, CsvTableWriter +from commcare_export.writers import ( + CsvTableWriter, + Excel2007TableWriter, + JValueTableWriter, + SqlTableWriter, +) @pytest.fixture() From 1a87de8cf58a7150e06a893ff4408ac368c6475c Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 17:26:05 +0100 Subject: [PATCH 130/257] Drop six and imports for Python 2 --- commcare_export/checkpoint.py | 12 +----------- commcare_export/cli.py | 10 ---------- commcare_export/commcare_hq_client.py | 9 ++------- commcare_export/commcare_minilinq.py | 7 +------ commcare_export/data_types.py | 1 + commcare_export/env.py | 24 ++++++++---------------- commcare_export/excel_query.py | 18 +++++++++++------- commcare_export/minilinq.py | 15 ++------------- commcare_export/misc.py | 9 --------- commcare_export/utils.py | 11 ----------- commcare_export/utils_cli.py | 9 --------- commcare_export/version.py | 2 -- commcare_export/writers.py | 26 +++++++++++++------------- setup.py | 1 - tests/conftest.py | 10 ---------- tests/test_checkpointmanager.py | 10 ---------- tests/test_cli.py | 7 +------ tests/test_commcare_hq_client.py | 9 --------- tests/test_excel_query.py | 9 --------- tests/test_map_format.py | 10 ---------- tests/test_minilinq.py | 5 +---- tests/test_misc.py | 10 ---------- tests/test_repeatable_iterator.py | 9 --------- tests/test_writers.py | 10 ---------- 24 files changed, 41 insertions(+), 202 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index f3dfc5ce..6b5c92e1 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import datetime import logging import os @@ -15,7 +6,6 @@ from operator import attrgetter import dateutil.parser -import six from sqlalchemy import Boolean, Column, String, and_, func from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker @@ -130,7 +120,7 @@ def _set_checkpoint(self, checkpoint_time, pagination_mode, final, time_of_run=N raise DataExportException('Tried to set an empty checkpoint. This is not allowed.') self._validate_tables() - if isinstance(checkpoint_time, six.text_type): + if isinstance(checkpoint_time, str): since_param = checkpoint_time else: since_param = checkpoint_time.isoformat() diff --git a/commcare_export/cli.py b/commcare_export/cli.py index d8d0c870..31ca105d 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import argparse import getpass import io @@ -18,7 +9,6 @@ import dateutil.parser import requests import sqlalchemy -from six.moves import input from commcare_export import builtin_queries, excel_query, writers from commcare_export.checkpoint import CheckpointManagerProvider diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 66f5d6d3..1c26c919 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -10,6 +10,7 @@ import copy import logging from collections import OrderedDict +from urllib.parse import urlencode import requests from requests.auth import AuthBase, HTTPDigestAuth @@ -21,19 +22,13 @@ AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' -try: - from urllib.request import urlopen - from urllib.parse import urlparse, urlencode, parse_qs -except ImportError: - from urlparse import urlparse, parse_qs - from urllib import urlopen, urlencode - logger = logging.getLogger(__name__) LATEST_KNOWN_VERSION='0.5' RESOURCE_REPEAT_LIMIT=10 + def on_backoff(details): _log_backoff(details, 'Waiting for retry.') diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index afb489c2..89af619a 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -6,18 +6,13 @@ """ import json from enum import Enum +from urllib.parse import parse_qs, urlparse from dateutil.parser import ParserError, parse from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap -try: - from urllib.parse import parse_qs, urlparse -except ImportError: - from urlparse import parse_qs, urlparse - - SUPPORTED_RESOURCES = { 'form', 'case', 'user', 'location', 'application', 'web-user', 'messaging-event' } diff --git a/commcare_export/data_types.py b/commcare_export/data_types.py index b15bcf8e..8ffe3c8f 100644 --- a/commcare_export/data_types.py +++ b/commcare_export/data_types.py @@ -16,6 +16,7 @@ DATA_TYPE_JSON: sqlalchemy.JSON(), } + class UnknownDataType(Exception): pass diff --git a/commcare_export/env.py b/commcare_export/env.py index 28945363..76250e06 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -1,19 +1,9 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import hashlib import json import operator import uuid import pytz -import six from commcare_export.jsonpath_utils import split_leftmost from commcare_export.misc import unwrap, unwrap_val @@ -23,11 +13,13 @@ JSONPATH_CACHE = {} + class CannotBind(Exception): pass class CannotReplace(Exception): pass class CannotEmit(Exception): pass class NotFound(Exception): pass + class Env(object): """ An abstract model of an "environment" where data can be bound to @@ -198,7 +190,7 @@ def parse(self, jsonpath_string): def lookup(self, name): "str|JsonPath -> ??" - if isinstance(name, six.string_types): + if isinstance(name, str): jsonpath_expr = self.parse(name) elif isinstance(name, jsonpath.JSONPath): jsonpath_expr = name @@ -227,7 +219,7 @@ def bind(self, *args): new_bindings.update(args[0]) return self.__class__(new_bindings) - elif isinstance(args[0], six.string_types): + elif isinstance(args[0], str): new_bindings[args[0]] = args[1] return self.__class__(new_bindings) @@ -250,8 +242,8 @@ def _not_val(val): def _to_unicode(val): if isinstance(val, bytes): return val.decode('utf8') - elif not isinstance(val, six.text_type): - return six.text_type(val) + elif not isinstance(val, str): + return str(val) return val @@ -314,7 +306,7 @@ def sha1(val): return None if not isinstance(val, bytes): - val = six.text_type(val).encode('utf8') + val = str(val).encode('utf8') return hashlib.sha1(val).hexdigest() @@ -360,7 +352,7 @@ def count_selected(val): @unwrap('val') def json2str(val): - if isinstance(val, six.string_types): + if isinstance(val, str): return val try: return json.dumps(val) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 34852389..bd7063e9 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -9,8 +9,6 @@ from collections import defaultdict, namedtuple -from six.moves import xrange - from commcare_export.exceptions import ( LongFieldsException, MissingColumnException, @@ -30,6 +28,7 @@ def take_while(pred, iterator): else: return + def drop_while(pred, iterator): for v in iterator: if not pred(v): @@ -39,12 +38,14 @@ def drop_while(pred, iterator): for v in iterator: yield v + def without_empty_tail(cells): """ Returns the prefix of a column that is not entirely empty. """ return list(reversed(list(drop_while(lambda v: (not v) or (not v.value), reversed(cells))))) + def map_value(mappings_sheet, mapping_name, source_value): "From the mappings_sheet, replaces the source_value with appropriate output value" return source_value @@ -52,22 +53,22 @@ def map_value(mappings_sheet, mapping_name, source_value): def get_column_by_name(worksheet, column_name): # columns and rows are indexed from 1 - for col in xrange(1, worksheet.max_column + 1): + for col in range(1, worksheet.max_column + 1): value = worksheet.cell(row=1, column=col).value value = value.lower().strip() if value else value if column_name == value: return without_empty_tail([ - worksheet.cell(row=i, column=col) for i in xrange(2, worksheet.max_row + 1) + worksheet.cell(row=i, column=col) for i in range(2, worksheet.max_row + 1) ]) def get_columns_by_prefix(worksheet, column_prefix): # columns and rows are indexed from 1 - for col in xrange(1, worksheet.max_column + 1): + for col in range(1, worksheet.max_column + 1): value = worksheet.cell(row=1, column=col).value if value and value.lower().startswith(column_prefix): yield value, without_empty_tail([ - worksheet.cell(row=i, column=col) for i in xrange(2, worksheet.max_row + 1) + worksheet.cell(row=i, column=col) for i in range(2, worksheet.max_row + 1) ]) @@ -84,6 +85,7 @@ def compile_mappings(worksheet): return mappings + def compile_filters(worksheet, mappings=None): filter_names = [cell.value for cell in get_column_by_name(worksheet, 'filter name') or []] @@ -93,9 +95,10 @@ def compile_filters(worksheet, mappings=None): filter_values = extended_to_len(len(filter_names), [cell.value for cell in get_column_by_name(worksheet, 'filter value') or []]) return zip(filter_names, filter_values) + def extended_to_len(desired_len, some_list, value=None): return [some_list[i] if i < len(some_list) else value - for i in xrange(0, desired_len)] + for i in range(0, desired_len)] def _get_safe_source_field(source_field): @@ -316,6 +319,7 @@ def require_column_in_sheet(sheet_name, data_source, table_name, output_headings return (headings, body) + def parse_sheet(worksheet, mappings=None, column_enforcer=None, value_or_root=False): mappings = mappings or {} data_source, source_expr, root_doc_expr = compile_source(worksheet, value_or_root) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index cd9a9d4e..b6dda9fe 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,23 +1,12 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import logging -import six -from six.moves import map - from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.specs import TableSpec logger = logging.getLogger(__name__) + class MiniLinq(object): """ The abstract base class for MiniLinqs, and also the factory/registry @@ -54,7 +43,7 @@ def from_jvalue(cls, jvalue): if not issubclass(MiniLinq, cls): raise NotImplementedError() - if isinstance(jvalue, six.string_types): + if isinstance(jvalue, str): return jvalue elif isinstance(jvalue, list): diff --git a/commcare_export/misc.py b/commcare_export/misc.py index 0ac1c196..70aa41e3 100644 --- a/commcare_export/misc.py +++ b/commcare_export/misc.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import functools import hashlib import inspect diff --git a/commcare_export/utils.py b/commcare_export/utils.py index 2efbad59..85f4b045 100644 --- a/commcare_export/utils.py +++ b/commcare_export/utils.py @@ -1,16 +1,5 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import sys -from six.moves import input - from commcare_export import misc from commcare_export.checkpoint import CheckpointManager from commcare_export.writers import StreamingMarkdownTableWriter diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index 6fe77fdc..be84d0a6 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import argparse import inspect import logging diff --git a/commcare_export/version.py b/commcare_export/version.py index d2be2b9c..f2e430ea 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -1,5 +1,3 @@ -from __future__ import print_function, unicode_literals - import io import os.path import subprocess diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 5c9f3fdb..bb2652cd 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -2,11 +2,9 @@ import io import logging import zipfile +from itertools import zip_longest -import six import sqlalchemy -from six import u -from six.moves import zip_longest import csv342 as csv from alembic.migration import MigrationContext @@ -23,27 +21,29 @@ def ensure_text(v, convert_none=False): if v is None: return '' if convert_none else v - if isinstance(v, six.text_type): + if isinstance(v, str): + return v + elif isinstance(v, bytes): return v - elif isinstance(v, six.binary_type): - return u(v) elif isinstance(v, datetime.datetime): return v.strftime('%Y-%m-%d %H:%M:%S') elif isinstance(v, datetime.date): return v.isoformat() else: - return u(str(v)) + return str(v) + def to_jvalue(v): if v is None: return None - if isinstance(v, (six.text_type,) + six.integer_types): + if isinstance(v, (str, int)): + return v + elif isinstance(v, bytes): return v - elif isinstance(v, six.binary_type): - return u(v) else: - return u(str(v)) + return str(v) + class TableWriter(object): """ @@ -364,7 +364,7 @@ def best_type_for(self, val): if isinstance(val, int): return sqlalchemy.Integer() - elif isinstance(val, six.string_types): + elif isinstance(val, str): if self.is_postgres: # PostgreSQL is the best; you can use TEXT everywhere and it works like a charm. return sqlalchemy.UnicodeText(collation=self.collation) @@ -436,7 +436,7 @@ def strict_types_compatibility_check(self, source_type, dest_type, val): return # Can't do anything elif dest_type.length is None: return # already a TEXT column - elif isinstance(val, six.string_types) and dest_type.length >= len(val): + elif isinstance(val, str) and dest_type.length >= len(val): return # no need to upgrade to TEXT column elif source_type.length is None: return sqlalchemy.UnicodeText(collation=self.collation) diff --git a/setup.py b/setup.py index 757de810..b01a087a 100644 --- a/setup.py +++ b/setup.py @@ -87,7 +87,6 @@ def run_tests(self): 'pytz', 'requests', 'simplejson', - 'six', 'sqlalchemy', 'sqlalchemy-migrate' ], diff --git a/tests/conftest.py b/tests/conftest.py index 7d254b5b..2c1fca04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import logging import os import uuid diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index a9f73e2e..1906d7e0 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -1,13 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import datetime import uuid diff --git a/tests/test_cli.py b/tests/test_cli.py index dd843111..7d0773c6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,9 +1,9 @@ -# -*- coding: utf-8 -*- import os import re import unittest from argparse import Namespace from copy import copy +from itertools import zip_longest from unittest import mock import sqlalchemy @@ -31,11 +31,6 @@ for arg in CLI_ARGS } -try: - from itertools import izip_longest as zip_longest -except ImportError: - # PY 3 - from itertools import zip_longest DEFAULT_BATCH_SIZE = 200 diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 9063c4b5..6337d371 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import unittest from datetime import datetime diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index f23d7083..526454b6 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import os.path import pprint import unittest diff --git a/tests/test_map_format.py b/tests/test_map_format.py index e3b2cee1..fb964b17 100644 --- a/tests/test_map_format.py +++ b/tests/test_map_format.py @@ -1,13 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import unittest from commcare_export.map_format import parse_function_arg, parse_template diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 388587c7..e2197194 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -1,10 +1,7 @@ -# -*- coding: utf-8 -*- import types import unittest from itertools import * -from six.moves import xrange - import pytest from commcare_export.env import * from commcare_export.excel_query import get_value_or_root_expression @@ -39,7 +36,7 @@ def test_eval_reference(self): assert Reference("foo").eval(DictEnv({'foo': 2})) == 2 assert Reference(Reference(Reference('a'))).eval(DictEnv({'a': 'b', 'b': 'c', 'c': 2})) == 2 self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': [2]})), [2]) - self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': xrange(0, 1)})), [0]) # Should work the same w/ iterators as with lists + self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': range(0, 1)})), [0]) # Should work the same w/ iterators as with lists # Should be able to get back out to the root, as the JsonPathEnv actually passes the full datum around self.check_case(Reference("foo.$.baz").eval(JsonPathEnv({'foo': [2], 'baz': 3})), [3]) diff --git a/tests/test_misc.py b/tests/test_misc.py index 18deae53..3975a6ef 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -1,13 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import hashlib import struct import tempfile diff --git a/tests/test_repeatable_iterator.py b/tests/test_repeatable_iterator.py index 2660678b..b8a396c8 100644 --- a/tests/test_repeatable_iterator.py +++ b/tests/test_repeatable_iterator.py @@ -1,12 +1,3 @@ -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import unittest from itertools import * diff --git a/tests/test_writers.py b/tests/test_writers.py index d3454124..fabd719c 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -1,13 +1,3 @@ -# -*- coding: utf-8 -*- -from __future__ import ( - absolute_import, - division, - generators, - nested_scopes, - print_function, - unicode_literals, -) - import datetime import io import tempfile From e814ed1136516c33897bb1b9f9bc3084bc78aae3 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 17:26:26 +0100 Subject: [PATCH 131/257] Add missing import --- tests/test_minilinq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index e2197194..f9a0d15d 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -1,5 +1,6 @@ import types import unittest +from datetime import datetime from itertools import * import pytest From fd15691293c0c79b371c31295af4d4bd58d6ce4c Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 19 Apr 2022 20:20:46 +0100 Subject: [PATCH 132/257] Drop csv342 csv342 backports Python 3's csv to Python 2. We don't need it any more. --- commcare_export/writers.py | 12 +++--------- setup.py | 1 - tests/test_cli.py | 2 +- tests/test_writers.py | 2 +- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index bb2652cd..682fa866 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -1,3 +1,4 @@ +import csv import datetime import io import logging @@ -6,7 +7,6 @@ import sqlalchemy -import csv342 as csv from alembic.migration import MigrationContext from alembic.operations import Operations from commcare_export.data_types import UnknownDataType, get_sqlalchemy_type @@ -88,17 +88,11 @@ def write_table(self, table): if self.archive is None: raise Exception('Attempt to write to a closed CsvWriter') - def _encode_row(row): - return [ - val.encode('utf-8') if isinstance(val, bytes) else val - for val in row - ] - tempfile = io.StringIO() writer = csv.writer(tempfile, dialect=csv.excel) - writer.writerow(_encode_row(table.headings)) + writer.writerow(table.headings) for row in table.rows: - writer.writerow(_encode_row(row)) + writer.writerow(row) # TODO: make this a polite zip and put everything in a subfolder with the same basename # as the zipfile diff --git a/setup.py b/setup.py index b01a087a..bc1b3586 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,6 @@ def run_tests(self): 'alembic', 'argparse', 'backoff', - 'csv342', 'jsonpath-ng~=1.5', 'ndg-httpsclient', 'openpyxl==2.5.12', diff --git a/tests/test_cli.py b/tests/test_cli.py index 7d0773c6..269cd67c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,4 @@ +import csv import os import re import unittest @@ -9,7 +10,6 @@ import sqlalchemy from tests.utils import SqlWriterWithTearDown -import csv342 as csv import pytest from commcare_export.checkpoint import ( Checkpoint, diff --git a/tests/test_writers.py b/tests/test_writers.py index fabd719c..43f580fc 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -1,3 +1,4 @@ +import csv import datetime import io import tempfile @@ -6,7 +7,6 @@ import openpyxl import sqlalchemy -import csv342 as csv import pytest from commcare_export.specs import TableSpec from commcare_export.writers import ( From a33c4421b80d9008aae5dd919612c41fc8da0dc9 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 20 Apr 2022 23:46:27 +0100 Subject: [PATCH 133/257] Add yapf config. Clean commcare_export/writers.py --- .style.yapf | 10 ++ commcare_export/writers.py | 287 ++++++++++++++++++++++++------------- 2 files changed, 196 insertions(+), 101 deletions(-) create mode 100644 .style.yapf diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 00000000..4398b865 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,10 @@ +[style] +based_on_style = yapf +column_limit = 79 +indent_width = 4 +coalesce_brackets = true +dedent_closing_brackets = true +spaces_before_comment = 2 +split_before_arithmetic_operator = true +split_before_bitwise_operator = true +split_before_logical_operator = true diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 682fa866..779f0a85 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -47,25 +47,25 @@ def to_jvalue(v): class TableWriter(object): """ - Interface for export writers: Usable in a "with" - statement, and while open one can call write_table. + Interface for export writers: Usable in a "with" statement, and + while open one can call write_table. - If the implementing class does not actually need any - set up, no-op defaults have been provided + If the implementing class does not actually need any set up, no-op + defaults have been provided. """ max_column_length = None support_checkpoints = False - # set to False if writer does not support writing to the same table multiple times + # set to False if writer does not support writing to the same table + # multiple times supports_multi_table_write = True required_columns = None def __enter__(self): return self - - def write_table(self, table): - "{'name': str, 'headings': [str], 'rows': [[str]]} -> ()" + + def write_table(self, table: TableSpec) -> None: raise NotImplementedError() def __exit__(self, exc_type, exc_val, exc_tb): @@ -79,7 +79,7 @@ def __init__(self, file, max_column_size=MAX_COLUMN_SIZE): self.file = file self.tables = [] self.archive = None - + def __enter__(self): self.archive = zipfile.ZipFile(self.file, 'w', zipfile.ZIP_DEFLATED) return self @@ -94,10 +94,12 @@ def write_table(self, table): for row in table.rows: writer.writerow(row) - # TODO: make this a polite zip and put everything in a subfolder with the same basename - # as the zipfile - self.archive.writestr('%s.csv' % self.zip_safe_name(table.name), - tempfile.getvalue().encode('utf-8')) + # TODO: make this a polite zip and put everything in a subfolder + # with the same basename as the zipfile + self.archive.writestr( + '%s.csv' % self.zip_safe_name(table.name), + tempfile.getvalue().encode('utf-8') + ) def __exit__(self, exc_type, exc_val, exc_tb): self.archive.close() @@ -108,14 +110,16 @@ def zip_safe_name(self, name): class Excel2007TableWriter(TableWriter): max_table_name_size = 31 - + def __init__(self, file): try: import openpyxl except ImportError: - raise Exception("It doesn't look like this machine is configured for " - "excel export. To export to excel you have to run the " - "command: pip install openpyxl") + raise Exception( + "It doesn't look like this machine is configured for " + "Excel export. To export to Excel you have to run the " + "command: pip install openpyxl" + ) self.file = file self.book = openpyxl.workbook.Workbook(write_only=True) @@ -150,9 +154,11 @@ def __init__(self, file): try: import xlwt except ImportError: - raise Exception("It doesn't look like this machine is configured for " - "excel export. To export to excel you have to run the " - "command: pip install xlwt") + raise Exception( + "It doesn't look like this machine is configured for " + "excel export. To export to excel you have to run the " + "command: pip install xlwt" + ) self.file = file self.book = xlwt.Workbook() @@ -179,10 +185,10 @@ def get_sheet(self, table): for colnum, val in enumerate(table.headings): sheet.write(0, colnum, ensure_text(val)) - self.sheets[name] = (sheet, 1) # start from row 1 + self.sheets[name] = (sheet, 1) # start from row 1 return self.sheets[name] - + def __exit__(self, exc_type, exc_val, exc_tb): self.book.save(self.file) @@ -194,7 +200,7 @@ class JValueTableWriter(TableWriter): def __init__(self): self.tables = {} - + def write_table(self, table): if table.name not in self.tables: self.tables[table.name] = TableSpec( @@ -205,39 +211,52 @@ def write_table(self, table): else: assert self.tables[table.name].headings == list(table.headings) - self.tables[table.name].rows = list(self.tables[table.name].rows) + [ - [to_jvalue(v) for v in row] for row in table.rows - ] + self.tables[table.name].rows = list( + self.tables[table.name].rows + ) + [[to_jvalue(v) for v in row] for row in table.rows] class StreamingMarkdownTableWriter(TableWriter): """ - Writes markdown to an output stream, where each table just comes one after the other + Writes markdown to an output stream, where each table just comes one + after the other """ supports_multi_table_write = False def __init__(self, output_stream, compute_widths=False): self.output_stream = output_stream self.compute_widths = compute_widths - - def write_table(self, table, ): + + def write_table( + self, + table, + ): col_widths = None if self.compute_widths: col_widths = self._get_column_widths(table) - row_template = ' | '.join(['{{:<{}}}'.format(width) for width in col_widths]) + row_template = ' | '.join([ + '{{:<{}}}'.format(width) for width in col_widths + ]) else: row_template = ' | '.join(['{}'] * len(table.headings)) if table.name: self.output_stream.write('\n# %s \n\n' % table.name) - self.output_stream.write('| %s |\n' % row_template.format(*table.headings)) + self.output_stream.write( + '| %s |\n' % row_template.format(*table.headings) + ) if col_widths: - self.output_stream.write('| %s |\n' % row_template.format(*['-' * width for width in col_widths])) + self.output_stream.write( + '| %s |\n' + % row_template.format(*['-' * width for width in col_widths]) + ) for row in table.rows: text_row = (ensure_text(val, convert_none=True) for val in row) - self.output_stream.write('| %s |\n' % row_template.format(*text_row)) + self.output_stream.write( + '| %s |\n' % row_template.format(*text_row) + ) def _get_column_widths(self, table): all_rows = [table.headings] + table.rows @@ -253,12 +272,16 @@ class SqlMixin(object): """ MIN_VARCHAR_LEN = 32 - MAX_VARCHAR_LEN = 255 # Arbitrary point at which we switch to TEXT; for postgres VARCHAR == TEXT anyhow + # Arbitrary point at which we switch to TEXT; for Postgres + # VARCHAR == TEXT anyhow + MAX_VARCHAR_LEN = 255 def __init__(self, db_url, poolclass=None, engine=None): self.db_url = db_url self.collation = 'utf8mb4_unicode_ci' if 'mysql' in db_url else None - self.engine = engine or sqlalchemy.create_engine(db_url, poolclass=poolclass) + self.engine = engine or sqlalchemy.create_engine( + db_url, poolclass=poolclass + ) def __enter__(self): self.connection = self.engine.connect() @@ -300,24 +323,33 @@ def max_column_length(self): @property def metadata(self): - if not hasattr(self, '_metadata') or self._metadata.bind.closed or self._metadata.bind.invalidated: + if ( + not hasattr(self, '_metadata') + or self._metadata.bind.closed + or self._metadata.bind.invalidated + ): if self.connection.closed: raise Exception('Tried to reflect via a closed connection') if self.connection.invalidated: - raise Exception('Tried to reflect via an invalidated connection') + raise Exception( + 'Tried to reflect via an invalidated connection' + ) self._metadata = sqlalchemy.MetaData() self._metadata.bind = self.connection self._metadata.reflect() return self._metadata def table(self, table_name): - return sqlalchemy.Table(table_name, self.metadata, autoload=True, autoload_with=self.connection) + return sqlalchemy.Table( + table_name, + self.metadata, + autoload=True, + autoload_with=self.connection + ) def get_id_column(self): return sqlalchemy.Column( - 'id', - sqlalchemy.Unicode(self.MAX_VARCHAR_LEN), - primary_key=True + 'id', sqlalchemy.Unicode(self.MAX_VARCHAR_LEN), primary_key=True ) @@ -343,9 +375,11 @@ def get_explicit_type(self, data_type): return get_sqlalchemy_type(data_type) except UnknownDataType: if data_type: - logger.warning("Found unknown data type '{data_type}'".format( - data_type=data_type, - )) + logger.warning( + "Found unknown data type '{data_type}'".format( + data_type=data_type, + ) + ) return self.best_type_for('') # todo: more explicit fallback def best_type_for(self, val): @@ -360,30 +394,45 @@ def best_type_for(self, val): return sqlalchemy.Integer() elif isinstance(val, str): if self.is_postgres: - # PostgreSQL is the best; you can use TEXT everywhere and it works like a charm. + # PostgreSQL is the best; you can use TEXT everywhere + # and it works like a charm. return sqlalchemy.UnicodeText(collation=self.collation) elif self.is_mysql: - # MySQL cannot build an index on TEXT due to the lack of a field length, so we - # try to use VARCHAR when possible. - if len(val) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? - return sqlalchemy.Unicode(max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation) + # MySQL cannot build an index on TEXT due to the lack of + # a field length, so we try to use VARCHAR when + # possible. + if len( + val + ) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? + return sqlalchemy.Unicode( + max(len(val), self.MIN_VARCHAR_LEN), + collation=self.collation + ) else: return sqlalchemy.UnicodeText(collation=self.collation) elif self.is_mssql: - # MSSQL (pre 2016) doesn't allow indices on columns longer than 900 bytes - # - https://docs.microsoft.com/en-us/sql/t-sql/statements/create-index-transact-sql - # If any of our data is bigger than this, then set the column to NVARCHAR(max) - # `length` here is the size in bytes - https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String.params.length + # MSSQL (pre 2016) doesn't allow indices on columns + # longer than 900 bytes + # https://docs.microsoft.com/en-us/sql/t-sql/statements/create-index-transact-sql + # If any of our data is bigger than this, then set the + # column to NVARCHAR(max) `length` here is the size in + # bytes + # https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String.params.length length_in_bytes = len(val.encode('utf-8')) column_length_in_bytes = None if length_in_bytes > 900 else 900 - return sqlalchemy.NVARCHAR(length=column_length_in_bytes, collation=self.collation) + return sqlalchemy.NVARCHAR( + length=column_length_in_bytes, collation=self.collation + ) if self.is_oracle: return sqlalchemy.Unicode(4000, collation=self.collation) else: - raise Exception("Unknown database dialect: {}".format(self.db_url)) + raise Exception( + "Unknown database dialect: {}".format(self.db_url) + ) else: - # We do not have a name for "bottom" in SQL aka the type whose least upper bound - # with any other type is the other type. + # We do not have a name for "bottom" in SQL aka the type + # whose least upper bound with any other type is the other + # type. return sqlalchemy.UnicodeText(collation=self.collation) def compatible(self, source_type, dest_type): @@ -394,29 +443,36 @@ def compatible(self, source_type, dest_type): if not isinstance(dest_type, sqlalchemy.String): return False elif source_type.length is None: - # The length being None means that we are looking at indefinite strings aka TEXT. - # This tool will never create strings with bounds, but if a target DB has one then - # we cannot insert to it. - # We will request that whomever uses this tool convert to TEXT type. + # The length being None means that we are looking at + # indefinite strings aka TEXT. This tool will never + # create strings with bounds, but if a target DB has one + # then we cannot insert to it. We will request that + # whoever uses this tool convert to TEXT type. return dest_type.length is None else: - return dest_type.length is None or (dest_type.length >= source_type.length) + return dest_type.length is None or ( + dest_type.length >= source_type.length + ) compatibility = { sqlalchemy.String: (sqlalchemy.Text,), sqlalchemy.Integer: (sqlalchemy.String, sqlalchemy.Text), - sqlalchemy.Boolean: (sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Integer), - sqlalchemy.DateTime: (sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Date), + sqlalchemy.Boolean: + (sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Integer), + sqlalchemy.DateTime: + (sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Date), sqlalchemy.Date: (sqlalchemy.String, sqlalchemy.Text), } # add dialect specific types try: - compatibility[sqlalchemy.JSON] = (sqlalchemy.dialects.postgresql.json.JSON,) + compatibility[sqlalchemy.JSON + ] = (sqlalchemy.dialects.postgresql.json.JSON,) except AttributeError: pass try: - compatibility[sqlalchemy.Boolean] += (sqlalchemy.dialects.mssql.base.BIT,) + compatibility[sqlalchemy.Boolean + ] += (sqlalchemy.dialects.mssql.base.BIT,) except AttributeError: pass @@ -439,9 +495,11 @@ def strict_types_compatibility_check(self, source_type, dest_type, val): def least_upper_bound(self, source_type, dest_type): """ - Returns the _coercion_ least uppper bound. + Returns the _coercion_ least upper bound. + Mostly just promotes everything to string if it is not already. - In fact, since this is only called when they are incompatible, it promotes to string right away. + In fact, since this is only called when they are incompatible, + it promotes to string right away. """ # FIXME: Don't be so silly @@ -453,21 +511,30 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): if not table_name in self.metadata.tables: if self.strict_types: - create_sql = sqlalchemy.schema.CreateTable(sqlalchemy.Table( - table_name, - sqlalchemy.MetaData(), - *self._get_columns_for_data(row_dict, data_type_dict) - )).compile(self.connection.engine) - logger.warning("Table '{table_name}' does not exist. Creating table with:\n{schema}".format( - table_name=table_name, - schema=create_sql - )) - empty_cols = [name for name, val in row_dict.items() - if val is None and name not in data_type_dict] + create_sql = sqlalchemy.schema.CreateTable( + sqlalchemy.Table( + table_name, sqlalchemy.MetaData(), + *self._get_columns_for_data(row_dict, data_type_dict) + ) + ).compile(self.connection.engine) + logger.warning( + f"Table '{table_name}' does not exist. Creating table " + f"with:\n{create_sql}" + ) + empty_cols = [ + name for name, val in row_dict.items() + if val is None and name not in data_type_dict + ] if empty_cols: - logger.warning("This schema does not include the following columns since we are unable " - "to determine the column type at this stage: {}".format(empty_cols)) - op.create_table(table_name, *self._get_columns_for_data(row_dict, data_type_dict)) + logger.warning( + "This schema does not include the following columns " + "since we are unable to determine the column type at " + f"this stage: {empty_cols}" + ) + op.create_table( + table_name, + *self._get_columns_for_data(row_dict, data_type_dict) + ) self.metadata.clear() self.metadata.reflect() return @@ -482,9 +549,13 @@ def get_current_table_columns(): continue ty = self.get_data_type(data_type_dict[column], val) - if not column in columns: - logger.warning("Adding column '{}.{} {}'".format(table_name, column, ty)) - op.add_column(table_name, sqlalchemy.Column(column, ty, nullable=True)) + if column not in columns: + logger.warning( + "Adding column '{}.{} {}'".format(table_name, column, ty) + ) + op.add_column( + table_name, sqlalchemy.Column(column, ty, nullable=True) + ) self.metadata.clear() self.metadata.reflect() columns = get_current_table_columns() @@ -492,36 +563,45 @@ def get_current_table_columns(): current_ty = columns[column].type new_type = None if self.strict_types: - # don't bother checking compatibility since we're not going to change anything - new_type = self.strict_types_compatibility_check(ty, current_ty, val) + # don't bother checking compatibility since we're + # not going to change anything + new_type = self.strict_types_compatibility_check( + ty, current_ty, val + ) elif not self.compatible(ty, current_ty): new_type = self.least_upper_bound(ty, current_ty) if new_type: - logger.warning('Altering column %s from %s to %s for value: "%s:%s"', columns[column], current_ty, new_type, type(val), val) + logger.warning( + 'Altering column %s from %s to %s for value: "%s:%s"', + columns[column], current_ty, new_type, type(val), val + ) op.alter_column(table_name, column, type_=new_type) self.metadata.clear() self.metadata.reflect() columns = get_current_table_columns() def upsert(self, table, row_dict): - # For atomicity "insert, catch, update" is slightly better than "select, insert or update". - # The latter may crash, while the former may overwrite data (which should be fine if whatever is - # racing against this is importing from the same source... if not you are busted anyhow - - # strip out values that are None since the column may not exist yet - row_dict = {col: val for col, val in row_dict.items() if val is not None} + # For atomicity "insert, catch, update" is slightly better than + # "select, insert or update". The latter may crash, while the + # former may overwrite data (which should be fine if whatever is + # racing against this is importing from the same source... if + # not you are busted anyhow + + # strip out values that are None since the column may not exist + # yet + row_dict = { + col: val for col, val in row_dict.items() if val is not None + } try: insert = table.insert().values(**row_dict) self.connection.execute(insert) except sqlalchemy.exc.IntegrityError: - update = table.update().where(table.c.id == row_dict['id']).values(**row_dict) + update = table.update().where(table.c.id == row_dict['id'] + ).values(**row_dict) self.connection.execute(update) - def write_table(self, table): - """ - :param table: a TableSpec - """ + def write_table(self, table: TableSpec) -> None: table_name = table.name headings = table.headings data_type_dict = dict(zip_longest(headings, table.data_types)) @@ -533,7 +613,12 @@ def write_table(self, table): def _get_columns_for_data(self, row_dict, data_type_dict): return [self.get_id_column()] + [ - sqlalchemy.Column(column_name, self.get_data_type(data_type_dict[column_name], val), nullable=True) + sqlalchemy.Column( + column_name, + self.get_data_type(data_type_dict[column_name], val), + nullable=True + ) for column_name, val in row_dict.items() - if (val is not None or data_type_dict[column_name]) and column_name != 'id' + if (val is not None or data_type_dict[column_name]) + and column_name != 'id' ] From d0f5f0664423aa2475e535c9ab36d9f3532fea78 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 22 Apr 2022 17:15:42 +0100 Subject: [PATCH 134/257] yapf commcare_export --- commcare_export/builtin_queries.py | 97 ++++-- commcare_export/checkpoint.py | 246 ++++++++++---- commcare_export/cli.py | 282 +++++++++++----- commcare_export/commcare_hq_client.py | 121 ++++--- commcare_export/commcare_minilinq.py | 143 +++++--- commcare_export/env.py | 216 +++++++----- commcare_export/excel_query.py | 380 +++++++++++++++------- commcare_export/exceptions.py | 28 +- commcare_export/jsonpath_utils.py | 4 +- commcare_export/location_info_provider.py | 27 +- commcare_export/map_format.py | 20 +- commcare_export/minilinq.py | 328 ++++++++++++------- commcare_export/misc.py | 4 +- commcare_export/repeatable_iterator.py | 8 +- commcare_export/specs.py | 2 - commcare_export/utils.py | 28 +- commcare_export/utils_cli.py | 57 ++-- commcare_export/version.py | 4 +- commcare_export/writers.py | 5 +- 19 files changed, 1334 insertions(+), 666 deletions(-) diff --git a/commcare_export/builtin_queries.py b/commcare_export/builtin_queries.py index beadc5d3..f47debde 100644 --- a/commcare_export/builtin_queries.py +++ b/commcare_export/builtin_queries.py @@ -11,6 +11,7 @@ class Column: + def __init__(self, name, source, map_function=None, *extra_args): self.name = Literal(name) self.source = source @@ -22,22 +23,27 @@ def mapped_source_field(self): if not self.map_function: return Reference(self.source) else: - return Apply(Reference(self.map_function), Reference(self.source), - *self.extra_args) + return Apply( + Reference(self.map_function), Reference(self.source), + *self.extra_args + ) def compile_query(columns, data_source, table_name): - source = Apply(Reference('api_data'), Literal(data_source), - Reference('checkpoint_manager')) - part = excel_query.SheetParts(table_name, [c.name for c in columns], source, - List([c.mapped_source_field for c in columns]), - None) + source = Apply( + Reference('api_data'), Literal(data_source), + Reference('checkpoint_manager') + ) + part = excel_query.SheetParts( + table_name, [c.name for c in columns], source, + List([c.mapped_source_field for c in columns]), None + ) return excel_query.compile_queries([part], None, False)[0] -# A MiniLinq query for internal CommCare user table. -# It reads every field produced by the /user/ API endpoint and -# writes the data to a table named "commcare_users" in a database. +# A MiniLinq query for internal CommCare user table. It reads every +# field produced by the /user/ API endpoint and writes the data to a +# table named "commcare_users" in a database. user_columns = [ Column('id', 'id'), @@ -50,26 +56,29 @@ def compile_query(columns, data_source, table_name): Column('resource_uri', 'resource_uri'), Column('commcare_location_id', 'user_data.commcare_location_id'), Column('commcare_location_ids', 'user_data.commcare_location_ids'), - Column('commcare_primary_case_sharing_id', - 'user_data.commcare_primary_case_sharing_id'), + Column( + 'commcare_primary_case_sharing_id', + 'user_data.commcare_primary_case_sharing_id' + ), Column('commcare_project', 'user_data.commcare_project'), Column('username', 'username') ] users_query = compile_query(user_columns, 'user', USERS_TABLE_NAME) +# A MiniLinq query for internal CommCare location table. It reads every +# field produced by the /location/ API endpoint and appends fields to +# hold parent locations using location_type information before writing +# the data to a table named "commcare_locations" in a database. -# A MiniLinq query for internal CommCare location table. -# It reads every field produced by the /location/ API endpoint and -# appends fields to hold parent locations using location_type information -# before writing the data to a table named "commcare_locations" in a database. def get_locations_query(lp): location_types = lp.location_types - # For test stability and clarity, we order location types from deepest - # to shallowest. + # For test stability and clarity, we order location types from + # deepest to shallowest. depth = {} + def set_depth(lt): if lt not in depth: parent = location_types[lt]['parent'] @@ -82,12 +91,15 @@ def set_depth(lt): for lt in location_types: set_depth(lt) - ordered_location_types = sorted(location_types.values(), - key=lambda lt: -depth[lt['resource_uri']]) + ordered_location_types = sorted( + location_types.values(), + key=lambda lt: -depth[lt['resource_uri']] + ) location_codes = [lt['code'] for lt in ordered_location_types] # The input names are codes produced by Django's slugify utility - # method. Replace hyphens with underscores to be easier to use in SQL. + # method. Replace hyphens with underscores to be easier to use in + # SQL. def sql_column_name(code): return re.sub('-', '_', code) @@ -106,24 +118,37 @@ def sql_column_name(code): Column('parent', 'parent'), Column('resource_uri', 'resource_uri'), Column('site_code', 'site_code'), - Column('location_type_administrative', 'location_type', - 'get_location_info', Literal('administrative')), - Column('location_type_code', 'location_type', - 'get_location_info', Literal('code')), - Column('location_type_name', 'location_type', - 'get_location_info', Literal('name')), - Column('location_type_parent', 'location_type', - 'get_location_info', Literal('parent')), - ] + [Column(sql_column_name(code), - 'resource_uri', 'get_location_ancestor', - Literal(code)) for code in location_codes] - return compile_query(location_columns, 'location', - LOCATIONS_TABLE_NAME) + Column( + 'location_type_administrative', 'location_type', + 'get_location_info', Literal('administrative') + ), + Column( + 'location_type_code', 'location_type', 'get_location_info', + Literal('code') + ), + Column( + 'location_type_name', 'location_type', 'get_location_info', + Literal('name') + ), + Column( + 'location_type_parent', 'location_type', 'get_location_info', + Literal('parent') + ), + ] + [ + Column( + sql_column_name(code), 'resource_uri', 'get_location_ancestor', + Literal(code) + ) for code in location_codes + ] + return compile_query(location_columns, 'location', LOCATIONS_TABLE_NAME) + # Require specified columns in emitted tables. class ColumnEnforcer(): - columns_to_require = {'form': Column('commcare_userid', '$.metadata.userID'), - 'case': Column('commcare_userid', '$.user_id')} + columns_to_require = { + 'form': Column('commcare_userid', '$.metadata.userID'), + 'case': Column('commcare_userid', '$.user_id') + } def __init__(self): self._emitted_tables = set([]) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 6b5c92e1..5f615a74 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -38,8 +38,10 @@ class Checkpoint(Base): pagination_mode = Column(String) def get_pagination_mode(self): - """Get Enum from value stored in the checkpoint. Null or empty value defaults to - 'date_modified' mode to support legacy checkpoints. + """ + Get Enum from value stored in the checkpoint. Null or empty + value defaults to 'date_modified' mode to support legacy + checkpoints. """ if not self.pagination_mode: return PaginationMode.date_modified @@ -67,7 +69,9 @@ def __repr__(self): @contextmanager def session_scope(Session): - """Provide a transactional scope around a series of operations.""" + """ + Provide a transactional scope around a series of operations. + """ session = Session() try: yield session @@ -83,9 +87,22 @@ class CheckpointManager(SqlMixin): table_name = 'commcare_export_runs' migrations_repository = os.path.join(repo_root, 'migrations') - def __init__(self, db_url, query, query_md5, project, commcare, - key=None, table_names=None, poolclass=None, engine=None, data_source=None): - super(CheckpointManager, self).__init__(db_url, poolclass=poolclass, engine=engine) + def __init__( + self, + db_url, + query, + query_md5, + project, + commcare, + key=None, + table_names=None, + poolclass=None, + engine=None, + data_source=None + ): + super(CheckpointManager, self).__init__( + db_url, poolclass=poolclass, engine=engine + ) self.query = query self.query_md5 = query_md5 self.project = project @@ -97,27 +114,56 @@ def __init__(self, db_url, query, query_md5, project, commcare, def for_dataset(self, data_source, table_names): return CheckpointManager( - self.db_url, self.query, self.query_md5, self.project, self.commcare, self.key, - engine=self.engine, table_names=table_names, data_source=data_source + self.db_url, + self.query, + self.query_md5, + self.project, + self.commcare, + self.key, + engine=self.engine, + table_names=table_names, + data_source=data_source ) - def set_checkpoint(self, checkpoint_time, pagination_mode, is_final=False, doc_id=None): - self._set_checkpoint(checkpoint_time, pagination_mode, is_final, doc_id=doc_id) + def set_checkpoint( + self, + checkpoint_time, + pagination_mode, + is_final=False, + doc_id=None, + ): + self._set_checkpoint( + checkpoint_time, + pagination_mode, + is_final, + doc_id=doc_id, + ) if is_final: self._cleanup() - def _set_checkpoint(self, checkpoint_time, pagination_mode, final, time_of_run=None, doc_id=None): + def _set_checkpoint( + self, + checkpoint_time, + pagination_mode, + final, + time_of_run=None, + doc_id=None + ): logger.info( - 'Setting %s checkpoint: data_source: %s, tables: %s, pagination_mode: %s, checkpoint: %s:%s', + 'Setting %s checkpoint: data_source: %s, tables: %s, ' + 'pagination_mode: %s, checkpoint: %s:%s', + 'final' if final else 'batch', self.data_source, ', '.join(self.table_names), pagination_mode.name, checkpoint_time, - doc_id + doc_id, ) if not checkpoint_time: - raise DataExportException('Tried to set an empty checkpoint. This is not allowed.') + raise DataExportException( + 'Tried to set an empty checkpoint. This is not allowed.' + ) self._validate_tables() if isinstance(checkpoint_time, str): @@ -137,7 +183,8 @@ def _set_checkpoint(self, checkpoint_time, pagination_mode, final, time_of_run=N project=self.project, commcare=self.commcare, since_param=since_param, - time_of_run=time_of_run or datetime.datetime.utcnow().isoformat(), + time_of_run=time_of_run + or datetime.datetime.utcnow().isoformat(), final=final, data_source=self.data_source, last_doc_id=doc_id, @@ -149,7 +196,9 @@ def _set_checkpoint(self, checkpoint_time, pagination_mode, final, time_of_run=N def create_checkpoint_table(self, revision='head'): from alembic import command, config - cfg = config.Config(os.path.join(self.migrations_repository, 'alembic.ini')) + cfg = config.Config( + os.path.join(self.migrations_repository, 'alembic.ini') + ) cfg.set_main_option('script_location', self.migrations_repository) with self.engine.begin() as connection: cfg.attributes['connection'] = connection @@ -159,65 +208,91 @@ def _cleanup(self): self._validate_tables() with session_scope(self.Session) as session: session.query(Checkpoint).filter_by( - final=False, query_file_md5=self.query_md5, - project=self.project, commcare=self.commcare - ).filter(Checkpoint.table_name.in_(self.table_names)).delete(synchronize_session='fetch') + final=False, + query_file_md5=self.query_md5, + project=self.project, + commcare=self.commcare + ).filter(Checkpoint.table_name.in_(self.table_names) + ).delete(synchronize_session='fetch') def get_time_of_last_checkpoint(self, log_warnings=True): - """Return the earliest time from the list of checkpoints that for the current - query file / key.""" + """ + Return the earliest time from the list of checkpoints that for + the current query file / key. + """ run = self.get_last_checkpoint() if run and log_warnings: self.log_warnings(run) return run.since_param if run else None def get_last_checkpoint(self): - """Return a single checkpoint such that it has the earliest `since_param` of all - checkpoints for the active tables.""" + """ + Return a single checkpoint such that it has the earliest + `since_param` of all checkpoints for the active tables. + """ self._validate_tables() table_runs = [] with session_scope(self.Session) as session: for table in self.table_names: if self.key: table_run = self._get_last_checkpoint( - session, table_name=table, - key=self.key, project=self.project, commcare=self.commcare + session, + table_name=table, + key=self.key, + project=self.project, + commcare=self.commcare ) else: table_run = self._get_last_checkpoint( - session, table_name=table, - query_file_md5=self.query_md5, project=self.project, commcare=self.commcare, key=self.key + session, + table_name=table, + query_file_md5=self.query_md5, + project=self.project, + commcare=self.commcare, + key=self.key ) if table_run: - table_runs.append(table_run) + table_runs.append(table_run) if not table_runs: table_runs = self.get_legacy_checkpoints() if table_runs: - sorted_runs = list(sorted(table_runs, key=attrgetter('time_of_run'))) + sorted_runs = list( + sorted(table_runs, key=attrgetter('time_of_run')) + ) return sorted_runs[0] def get_legacy_checkpoints(self): with session_scope(self.Session) as session: # check without table_name table_run = self._get_last_checkpoint( - session, query_file_md5=self.query_md5, table_name=None, - project=self.project, commcare=self.commcare, key=self.key + session, + query_file_md5=self.query_md5, + table_name=None, + project=self.project, + commcare=self.commcare, + key=self.key ) if table_run: return self._set_checkpoint( - table_run.since_param, PaginationMode.date_modified, table_run.final, table_run.time_of_run + table_run.since_param, PaginationMode.date_modified, + table_run.final, table_run.time_of_run ) # Check for run without the args table_run = self._get_last_checkpoint( - session, query_file_md5=self.query_md5, key=self.key, - project=None, commcare=None, table_name=None + session, + query_file_md5=self.query_md5, + key=self.key, + project=None, + commcare=None, + table_name=None ) if table_run: return self._set_checkpoint( - table_run.since_param, PaginationMode.date_modified, table_run.final, table_run.time_of_run + table_run.since_param, PaginationMode.date_modified, + table_run.final, table_run.time_of_run ) def _get_last_checkpoint(self, session, **kwarg_filters): @@ -226,8 +301,7 @@ def _get_last_checkpoint(self, session, **kwarg_filters): query = query.filter_by(**kwarg_filters) return query.order_by(Checkpoint.time_of_run.desc()).first() - def log_warnings(self, run): - # type: (Checkpoint) -> None + def log_warnings(self, run: Checkpoint) -> None: md5_mismatch = run.query_file_md5 != self.query_md5 name_mismatch = run.query_file_name != self.query if md5_mismatch or name_mismatch: @@ -235,12 +309,13 @@ def log_warnings(self, run): "Query differs from most recent checkpoint:\n" "From checkpoint: name=%s, md5=%s\n" "From command line args: name=%s, md5=%s\n", - run.query_file_name, run.query_file_md5, - self.query, self.query_md5 + run.query_file_name, run.query_file_md5, self.query, + self.query_md5 ) def list_checkpoints(self, limit=20): - """List all checkpoints filtered by: + """ + List all checkpoints filtered by: * file name * project * commcare @@ -264,24 +339,34 @@ def _filter_query(self, query): return query def get_latest_checkpoints(self): - """Returns the latest checkpoint for each table filtered by the fields set in the manager: + """ + Returns the latest checkpoint for each table filtered by the + fields set in the manager: * query_md5 * project * commcare * key """ with session_scope(self.Session) as session: - cols = [Checkpoint.project, Checkpoint.commcare, Checkpoint.query_file_md5, Checkpoint.table_name] + cols = [ + Checkpoint.project, Checkpoint.commcare, + Checkpoint.query_file_md5, Checkpoint.table_name + ] inner_query = self._filter_query( session.query( - *(cols + [func.max(Checkpoint.time_of_run).label('max_time_of_run')]) - ) - .filter(Checkpoint.query_file_md5 == self.query_md5) - .filter(Checkpoint.table_name.isnot(None)) + *( + cols + [ + func.max(Checkpoint.time_of_run + ).label('max_time_of_run') + ] + ) + ).filter(Checkpoint.query_file_md5 == self.query_md5 + ).filter(Checkpoint.table_name.isnot(None)) ).group_by(*cols).subquery() query = session.query(Checkpoint).join( - inner_query, and_( + inner_query, + and_( Checkpoint.project == inner_query.c.project, Checkpoint.commcare == inner_query.c.commcare, Checkpoint.query_file_md5 == inner_query.c.query_file_md5, @@ -294,15 +379,23 @@ def get_latest_checkpoints(self): # Keeping for future reference # # window_func = func.row_number().over( - # partition_by=Checkpoint.table_name, order_by=Checkpoint.time_of_run.desc() + # partition_by=Checkpoint.table_name, + # order_by=Checkpoint.time_of_run.desc() # ).label("row_number") - # inner_query = self._filter_query(session.query(Checkpoint, window_func)) - # inner_query = inner_query.filter(Checkpoint.query_file_md5 == self.query_md5) - # inner_query = inner_query.filter(Checkpoint.table_name.isnot(None)).subquery() + # inner_query = self._filter_query( + # session.query(Checkpoint, window_func) + # ) + # inner_query = inner_query.filter( + # Checkpoint.query_file_md5 == self.query_md5 + # ) + # inner_query = inner_query.filter( + # Checkpoint.table_name.isnot(None) + # ).subquery() # # query = session.query(Checkpoint).select_entity_from(inner_query)\ # .filter(inner_query.c.row_number == 1)\ # .order_by(Checkpoint.table_name.asc()) + # return list(query) def update_checkpoint(self, run): @@ -315,6 +408,7 @@ def _validate_tables(self): class CheckpointManagerWithDetails(object): + def __init__(self, manager, since_param, pagination_mode): self.manager = manager self.since_param = since_param @@ -322,11 +416,19 @@ def __init__(self, manager, since_param, pagination_mode): def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): if self.manager: - self.manager.set_checkpoint(checkpoint_time, self.pagination_mode, is_final, doc_id=doc_id) + self.manager.set_checkpoint( + checkpoint_time, self.pagination_mode, is_final, doc_id=doc_id + ) class CheckpointManagerProvider(object): - def __init__(self, base_checkpoint_manager=None, since=None, start_over=None): + + def __init__( + self, + base_checkpoint_manager=None, + since=None, + start_over=None, + ): self.start_over = start_over self.since = since self.base_checkpoint_manager = base_checkpoint_manager @@ -343,8 +445,10 @@ def get_since(self, checkpoint_manager): return dateutil.parser.parse(since) if since else None def get_pagination_mode(self, checkpoint_manager): - """Always use the default pagination mode unless we are continuing from - a previous checkpoint in which case use the same pagination mode as before. + """ + Always use the default pagination mode unless we are continuing + from a previous checkpoint in which case use the same pagination + mode as before. """ if self.start_over or self.since or not checkpoint_manager: return PaginationMode.date_indexed @@ -356,31 +460,43 @@ def get_pagination_mode(self, checkpoint_manager): return last_checkpoint.get_pagination_mode() def get_checkpoint_manager(self, data_source, table_names): - """This get's called before each table is exported and set in the `env`. It is then - passed to the API client and used to set the checkpoints. + """ + This get's called before each table is exported and set in the + `env`. It is then passed to the API client and used to set the + checkpoints. :param data_source: Data source for this checkout e.g. 'form' - :param table_names: List of table names being exported to. This is a list since - multiple tables can be processed by a since API query. + :param table_names: List of table names being exported to. This + is a list since multiple tables can be processed by a + 'since' API query. """ manager = None if self.base_checkpoint_manager: - manager = self.base_checkpoint_manager.for_dataset(data_source, table_names) + manager = self.base_checkpoint_manager.for_dataset( + data_source, table_names + ) since = self.get_since(manager) pagination_mode = self.get_pagination_mode(manager) logger.info( - "Creating checkpoint manager for tables: %s, since: %s, pagination_mode: %s", - ', '.join(table_names), since, pagination_mode.name + "Creating checkpoint manager for tables: %s, since: %s, " + "pagination_mode: %s", + + ', '.join(table_names), + since, + pagination_mode.name, ) if pagination_mode != PaginationMode.date_indexed: logger.warning( "\n====================================\n" - "This export is using a deprecated pagination mode which will be removed in future versions.\n" - "To switch to the new mode you must re-sync your data using `--start-over`.\n" - "For more details see: %s" + "This export is using a deprecated pagination mode which will " + "be removed in\n" + "future versions. To switch to the new mode you must re-sync " + "your data using\n" + "`--start-over`. For more details see: %s" "\n====================================\n", + "https://github.com/dimagi/commcare-export/releases/tag/1.5.0" ) return CheckpointManagerWithDetails(manager, since, pagination_mode) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 31ca105d..bb70ee14 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -41,6 +41,7 @@ class Argument(object): + def __init__(self, name, *args, **kwargs): self.name = name.replace('-', '_') self._args = ['--{}'.format(name)] + list(args) @@ -56,51 +57,124 @@ def add_to_parser(self, parser, **additional_kwargs): CLI_ARGS = [ - Argument('version', default=False, action='store_true', - help='Print the current version of the commcare-export tool.'), - Argument('query', required=False, help='JSON or Excel query file'), - Argument('dump-query', default=False, action='store_true'), - Argument('commcare-hq', default='prod', - help='Base url for the CommCare HQ instance e.g. https://www.commcarehq.org'), - Argument('api-version', default=LATEST_KNOWN_VERSION), - Argument('project'), - Argument('username'), - Argument('password', help='Enter password, or if using apikey auth-mode, enter the api key.'), - Argument('auth-mode', default='password', choices=['password', 'apikey'], - help='Use "digest" auth, or "apikey" auth (for two factor enabled domains).'), - Argument('since', help='Export all data after this date. Format YYYY-MM-DD or YYYY-MM-DDTHH:mm:SS'), - Argument('until', help='Export all data up until this date. Format YYYY-MM-DD or YYYY-MM-DDTHH:mm:SS'), - Argument('start-over', default=False, action='store_true', - help='When saving to a SQL database; the default is to pick up since the last success. This disables that.'), - Argument('profile'), - Argument('verbose', default=False, action='store_true'), - Argument('output-format', default='json', choices=['json', 'csv', 'xls', 'xlsx', 'sql', 'markdown'], - help='Output format'), - Argument('output', metavar='PATH', default='reports.zip', help='Path to output; defaults to `reports.zip`.'), - Argument('strict-types', default=False, action='store_true', - help="When saving to a SQL database don't allow changing column types once they are created."), - Argument('missing-value', default=None, help="Value to use when a field is missing from the form / case."), - Argument('batch-size', default=200, help="Number of records to process per batch."), - Argument('checkpoint-key', help="Use this key for all checkpoints instead of the query file MD5 hash " - "in order to prevent table rebuilds after a query file has been edited."), - Argument('users', default=False, action='store_true', - help="Export a table containing data about this project's " - "mobile workers"), - Argument('locations', default=False, action='store_true', - help="Export a table containing data about this project's " - "locations"), - Argument('with-organization', default=False, action='store_true', - help="Export tables containing mobile worker data and " - "location data and add a commcare_userid field to any " - "exported form or case"), - Argument('export-root-if-no-subdocument', default=False, action='store_true', help=( - "Use this when you are exporting a nested document e.g. form.form..case, messaging-event.messages.[*]" - " And you want to have a record exported even if the nested document does not exist or is empty.")) - ] + Argument( + 'version', + default=False, + action='store_true', + help='Print the current version of the commcare-export tool.' + ), + Argument('query', required=False, help='JSON or Excel query file'), + Argument('dump-query', default=False, action='store_true'), + Argument( + 'commcare-hq', + default='prod', + help='Base url for the CommCare HQ instance e.g. ' + 'https://www.commcarehq.org' + ), + Argument('api-version', default=LATEST_KNOWN_VERSION), + Argument('project'), + Argument('username'), + Argument( + 'password', + help='Enter password, or if using apikey auth-mode, enter the api key.' + ), + Argument( + 'auth-mode', + default='password', + choices=['password', 'apikey'], + help='Use "digest" auth, or "apikey" auth (for two factor enabled ' + 'domains).' + ), + Argument( + 'since', + help='Export all data after this date. Format YYYY-MM-DD or ' + 'YYYY-MM-DDTHH:mm:SS' + ), + Argument( + 'until', + help='Export all data up until this date. Format YYYY-MM-DD or ' + 'YYYY-MM-DDTHH:mm:SS' + ), + Argument( + 'start-over', + default=False, + action='store_true', + help='When saving to a SQL database; the default is to pick up ' + 'since the last success. This disables that.' + ), + Argument('profile'), + Argument('verbose', default=False, action='store_true'), + Argument( + 'output-format', + default='json', + choices=['json', 'csv', 'xls', 'xlsx', 'sql', 'markdown'], + help='Output format' + ), + Argument( + 'output', + metavar='PATH', + default='reports.zip', + help='Path to output; defaults to `reports.zip`.' + ), + Argument( + 'strict-types', + default=False, + action='store_true', + help="When saving to a SQL database don't allow changing column types " + "once they are created." + ), + Argument( + 'missing-value', + default=None, + help="Value to use when a field is missing from the form / case." + ), + Argument( + 'batch-size', + default=200, + help="Number of records to process per batch." + ), + Argument( + 'checkpoint-key', + help="Use this key for all checkpoints instead of the query file MD5 " + "hash in order to prevent table rebuilds after a query file has " + "been edited." + ), + Argument( + 'users', + default=False, + action='store_true', + help="Export a table containing data about this project's mobile " + "workers" + ), + Argument( + 'locations', + default=False, + action='store_true', + help="Export a table containing data about this project's locations" + ), + Argument( + 'with-organization', + default=False, + action='store_true', + help="Export tables containing mobile worker data and location data " + "and add a commcare_userid field to any exported form or case" + ), + Argument( + 'export-root-if-no-subdocument', + default=False, + action='store_true', + help="Use this when you are exporting a nested document e.g. " + "form.form..case, messaging-event.messages.[*] And you want to " + "have a record exported even if the nested document does not " + "exist or is empty.", + ) +] def main(argv): - parser = argparse.ArgumentParser('commcare-export', 'Output a customized export of CommCareHQ data.') + parser = argparse.ArgumentParser( + 'commcare-export', 'Output a customized export of CommCareHQ data.' + ) for arg in CLI_ARGS: arg.add_to_parser(parser) @@ -111,16 +185,24 @@ def main(argv): try: arg.encode('utf-8') except UnicodeDecodeError: - print(u"ERROR: Argument '%s' contains unicode characters. " - u"Only ASCII characters are supported.\n" % unicode(arg, 'utf-8'), file=sys.stderr) + print( + u"ERROR: Argument '%s' contains unicode characters. " + u"Only ASCII characters are supported.\n" + % unicode(arg, 'utf-8'), + file=sys.stderr + ) sys.exit(1) if args.verbose: - logging.basicConfig(level=logging.DEBUG, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + ) else: - logging.basicConfig(level=logging.WARN, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + logging.basicConfig( + level=logging.WARN, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + ) logging.getLogger('alembic').setLevel(logging.WARN) logging.getLogger('backoff').setLevel(logging.FATAL) @@ -131,7 +213,10 @@ def main(argv): exit(0) if not args.project: - print('commcare-export: error: argument --project is required', file=sys.stderr) + print( + 'commcare-export: error: argument --project is required', + file=sys.stderr + ) exit(1) if args.profile: @@ -154,32 +239,29 @@ def main(argv): def _get_query(args, writer, column_enforcer=None): return _get_query_from_file( - args.query, - args.missing_value, - writer.supports_multi_table_write, - writer.max_column_length, - writer.required_columns, - column_enforcer, + args.query, args.missing_value, writer.supports_multi_table_write, + writer.max_column_length, writer.required_columns, column_enforcer, args.export_root_if_no_subdocument ) -def _get_query_from_file(query_arg, missing_value, combine_emits, - max_column_length, required_columns, column_enforcer, - value_or_root): +def _get_query_from_file( + query_arg, missing_value, combine_emits, max_column_length, + required_columns, column_enforcer, value_or_root +): if os.path.exists(query_arg): if os.path.splitext(query_arg)[1] in ['.xls', '.xlsx']: import openpyxl workbook = openpyxl.load_workbook(query_arg) return excel_query.get_queries_from_excel( - workbook, missing_value, combine_emits, - max_column_length, required_columns, column_enforcer, - value_or_root + workbook, missing_value, combine_emits, max_column_length, + required_columns, column_enforcer, value_or_root ) else: with io.open(query_arg, encoding='utf-8') as fh: return MiniLinq.from_jvalue(json.loads(fh.read())) + def get_queries(args, writer, lp, column_enforcer=None): query_list = [] if args.query is not None: @@ -207,22 +289,29 @@ def _get_writer(output_format, output, strict_types): return writers.Excel2003TableWriter(output) elif output_format == 'csv': if not output.endswith(".zip"): - print("WARNING: csv output is a zip file, but " - "will be written to %s" % output) - print("Consider appending .zip to the file name to avoid confusion.") + print( + "WARNING: csv output is a zip file, but " + "will be written to %s" % output + ) + print( + "Consider appending .zip to the file name to avoid confusion." + ) return writers.CsvTableWriter(output) elif output_format == 'json': return writers.JValueTableWriter() elif output_format == 'markdown': return writers.StreamingMarkdownTableWriter(sys.stdout) elif output_format == 'sql': - # Output should be a connection URL - # Writer had bizarre issues so we use a full connection instead of passing in a URL or engine + # Output should be a connection URL. Writer had bizarre issues + # so we use a full connection instead of passing in a URL or + # engine. if output.startswith('mysql'): charset_split = output.split('charset=') if len(charset_split) > 1 and charset_split[1] != 'utf8mb4': - raise Exception(f"The charset '{charset_split[1]}' might cause problems with the export. " - f"It is recommended that you use 'utf8mb4' instead.") + raise Exception( + f"The charset '{charset_split[1]}' might cause problems with the export. " + f"It is recommended that you use 'utf8mb4' instead." + ) return writers.SqlTableWriter(output, strict_types) else: @@ -247,11 +336,17 @@ def _get_api_client(args, commcarehq_base_url): def _get_checkpoint_manager(args): - if not args.users and not args.locations and not os.path.exists(args.query): - logger.warning("Checkpointing disabled for non builtin, " - "non file-based query") + if not args.users and not args.locations and not os.path.exists( + args.query + ): + logger.warning( + "Checkpointing disabled for non builtin, " + "non file-based query" + ) elif args.since or args.until: - logger.warning("Checkpointing disabled when using '--since' or '--until'") + logger.warning( + "Checkpointing disabled when using '--since' or '--until'" + ) else: checkpoint_manager = get_checkpoint_manager(args) checkpoint_manager.create_checkpoint_table() @@ -275,7 +370,10 @@ def evaluate_query(env, query): return 0 except requests.exceptions.RequestException as e: if e.response and e.response.status_code == 401: - print("\nAuthentication failed. Please check your credentials.", file=sys.stderr) + print( + "\nAuthentication failed. Please check your credentials.", + file=sys.stderr + ) return EXIT_STATUS_ERROR else: raise @@ -284,8 +382,10 @@ def evaluate_query(env, query): print(e.message) print('Try increasing --batch-size to overcome the error') return EXIT_STATUS_ERROR - except (sqlalchemy.exc.DataError, sqlalchemy.exc.InternalError, - sqlalchemy.exc.ProgrammingError) as e: + except ( + sqlalchemy.exc.DataError, sqlalchemy.exc.InternalError, + sqlalchemy.exc.ProgrammingError + ) as e: print('Stopping because of database error:\n', e) return EXIT_STATUS_ERROR except KeyboardInterrupt: @@ -298,8 +398,11 @@ def main_with_args(args): writer = _get_writer(args.output_format, args.output, args.strict_types) if args.query is None and args.users is False and args.locations is False: - print('At least one the following arguments is required: ' - '--query, --users, --locations', file=sys.stderr) + print( + 'At least one the following arguments is required: ' + '--query, --users, --locations', + file=sys.stderr + ) return EXIT_STATUS_ERROR if not args.username: @@ -313,7 +416,9 @@ def main_with_args(args): if args.with_organization: column_enforcer = builtin_queries.ColumnEnforcer() - commcarehq_base_url = commcare_hq_aliases.get(args.commcare_hq, args.commcare_hq) + commcarehq_base_url = commcare_hq_aliases.get( + args.commcare_hq, args.commcare_hq + ) api_client = _get_api_client(args, commcarehq_base_url) lp = LocationInfoProvider(api_client, page_size=args.batch_size) try: @@ -333,7 +438,10 @@ def main_with_args(args): since, until = get_date_params(args) if args.start_over: if checkpoint_manager: - logger.warning('Ignoring all checkpoints and re-fetching all data from CommCare.') + logger.warning( + 'Ignoring all checkpoints and re-fetching all data from ' + 'CommCare.' + ) elif since: logger.debug('Starting from %s', args.since) @@ -345,16 +453,22 @@ def main_with_args(args): 'get_location_ancestor': lp.get_location_ancestor } env = ( - BuiltInEnv(static_env) - | CommCareHqEnv(api_client, until=until, page_size=args.batch_size) - | JsonPathEnv({}) - | EmitterEnv(writer) + BuiltInEnv(static_env) + | CommCareHqEnv(api_client, until=until, page_size=args.batch_size) + | JsonPathEnv({}) + | EmitterEnv(writer) ) exit_status = evaluate_query(env, query) if args.output_format == 'json': - print(json.dumps(list(writer.tables.values()), indent=4, default=default_to_json)) + print( + json.dumps( + list(writer.tables.values()), + indent=4, + default=default_to_json + ) + ) return exit_status diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 1c26c919..53ddb2d3 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -22,11 +22,10 @@ AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' - logger = logging.getLogger(__name__) -LATEST_KNOWN_VERSION='0.5' -RESOURCE_REPEAT_LIMIT=10 +LATEST_KNOWN_VERSION = '0.5' +RESOURCE_REPEAT_LIMIT = 10 def on_backoff(details): @@ -39,7 +38,10 @@ def on_giveup(details): def _log_backoff(details, action_message): details['__suffix'] = action_message - logger.warning("Request failed after {tries} attempts ({elapsed:.1f}s). {__suffix}".format(**details)) + logger.warning( + "Request failed after {tries} attempts ({elapsed:.1f}s). {__suffix}" + .format(**details) + ) def is_client_error(ex): @@ -53,6 +55,7 @@ def is_client_error(ex): class ResourceRepeatException(Exception): + def __init__(self, message): self.message = message @@ -65,8 +68,16 @@ class CommCareHqClient(object): A connection to CommCareHQ for a particular version, project, and user. """ - def __init__(self, url, project, username, password, - auth_mode=AUTH_MODE_PASSWORD, version=LATEST_KNOWN_VERSION, checkpoint_manager=None): + def __init__( + self, + url, + project, + username, + password, + auth_mode=AUTH_MODE_PASSWORD, + version=LATEST_KNOWN_VERSION, + checkpoint_manager=None + ): self.version = version self.url = url self.project = project @@ -86,7 +97,7 @@ def session(self): if self.__session == None: self.__session = requests.Session() self.__session.headers.update({ - 'User-Agent': 'commcare-export/%s' % commcare_export.__version__ + 'User-Agent': f'commcare-export/{commcare_export.__version__}' }) return self.__session @@ -100,31 +111,43 @@ def api_url(self): return '%s/a/%s/api/v%s' % (self.url, self.project, self.version) @backoff.on_exception( - backoff.expo, requests.exceptions.RequestException, - max_time=300, giveup=is_client_error, - on_backoff=on_backoff, on_giveup=on_giveup + backoff.expo, + requests.exceptions.RequestException, + max_time=300, + giveup=is_client_error, + on_backoff=on_backoff, + on_giveup=on_giveup ) def get(self, resource, params=None): """ Gets the named resource. - Currently a bit of a vulnerable stub that works - for this particular use case in the hands of a trusted user; would likely + Currently a bit of a vulnerable stub that works for this + particular use case in the hands of a trusted user; would likely want this to work like (or via) slumber. """ logger.debug("Fetching '%s' batch: %s", resource, params) - resource_url = '%s/%s/' % (self.api_url, resource) - response = self.session.get(resource_url, params=params, auth=self.__auth, timeout=60) + resource_url = '{self.api_url}/{resource}/' + response = self.session.get( + resource_url, params=params, auth=self.__auth, timeout=60 + ) response.raise_for_status() return response.json() - - def iterate(self, resource, paginator, params=None, checkpoint_manager=None): + + def iterate( + self, + resource, + paginator, + params=None, + checkpoint_manager=None, + ): """ Assumes the endpoint is a list endpoint, and iterates over it making a lot of assumptions that it is like a tastypie endpoint. """ UNKNOWN_COUNT = 'unknown' params = dict(params or {}) + def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() @@ -139,7 +162,10 @@ def iterate_resource(resource=resource, params=params): else: repeat_counter = 0 if repeat_counter >= RESOURCE_REPEAT_LIMIT: - raise ResourceRepeatException("Requested resource '{}' {} times with same parameters".format(resource, repeat_counter)) + raise ResourceRepeatException( + f"Requested resource '{resource}' {repeat_counter} " + "times with same parameters" + ) batch = self.get(resource, params) last_params = copy.copy(params) @@ -173,16 +199,18 @@ def iterate_resource(resource=resource, params=params): limit = batch_meta.get('limit') if more_to_fetch: - # Handle the case where API is 'non-counting' and repeats the last batch + # Handle the case where API is 'non-counting' + # and repeats the last batch repeated_last_page_of_non_counting_resource = ( - not got_new_data - and total_count == UNKNOWN_COUNT + not got_new_data and total_count == UNKNOWN_COUNT and (limit and len(batch_objects) < limit) ) more_to_fetch = not repeated_last_page_of_non_counting_resource - self.checkpoint(checkpoint_manager, paginator, batch, not more_to_fetch) - + self.checkpoint( + checkpoint_manager, paginator, batch, not more_to_fetch + ) + return RepeatableIterator(iterate_resource) def checkpoint(self, checkpoint_manager, paginator, batch, is_final): @@ -194,19 +222,22 @@ def checkpoint(self, checkpoint_manager, paginator, batch, is_final): last_obj = batch['objects'][-1] except IndexError: last_obj = {} - checkpoint_manager.set_checkpoint(since_date, is_final, doc_id=last_obj.get("id", None)) + checkpoint_manager.set_checkpoint( + since_date, is_final, doc_id=last_obj.get("id", None) + ) else: - logger.warning('Failed to get a checkpoint date from a batch of data.') + logger.warning( + 'Failed to get a checkpoint date from a batch of data.' + ) class MockCommCareHqClient(object): """ - An in-memory mock of the hq client, instantiated - with a simple mapping of resource and params to results. + An in-memory mock of the hq client, instantiated with a simple + mapping of resource and params to results. - Since dictionaries are not hashable, the mapping is - written as a pair of tuples, handled appropriately - internally. + Since dictionaries are not hashable, the mapping is written as a + pair of tuples, handled appropriately internally. MockCommCareHqClient({ 'forms': [ @@ -218,28 +249,39 @@ class MockCommCareHqClient(object): ), ] }) - """ + """ + def __init__(self, mock_data): self.mock_data = { resource: { _params_to_url(params): result for params, result in resource_results - } - for resource, resource_results in mock_data.items() + } for resource, resource_results in mock_data.items() } - def iterate(self, resource, paginator, params=None, checkpoint_manager=None): - logger.debug('Mock client call to resource "%s" with params "%s"', resource, params) + def iterate( + self, resource, paginator, params=None, checkpoint_manager=None + ): + logger.debug( + 'Mock client call to resource "%s" with params "%s"', resource, + params + ) return self.mock_data[resource][_params_to_url(params)] def get(self, resource): logger.debug('Mock client call to get resource "%s"', resource) objects = self.mock_data[resource][_params_to_url({'get': True})] if objects: - return {'meta': {'limit': len(objects), 'next': None, - 'offset': 0, 'previous': None, - 'total_count': len(objects)}, - 'objects': objects} + return { + 'meta': { + 'limit': len(objects), + 'next': None, + 'offset': 0, + 'previous': None, + 'total_count': len(objects) + }, + 'objects': objects + } else: return None @@ -249,6 +291,7 @@ def _params_to_url(params): class ApiKeyAuth(AuthBase): + def __init__(self, username, apikey): self.username = username self.apikey = apikey @@ -266,5 +309,5 @@ def __ne__(self, other): return not self == other def __call__(self, r): - r.headers['Authorization'] = 'apikey %s:%s' % (self.username, self.apikey) + r.headers['Authorization'] = f'apikey {self.username}:{self.apikey}' return r diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 89af619a..9e990432 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -14,7 +14,8 @@ from commcare_export.misc import unwrap SUPPORTED_RESOURCES = { - 'form', 'case', 'user', 'location', 'application', 'web-user', 'messaging-event' + 'form', 'case', 'user', 'location', 'application', 'web-user', + 'messaging-event' } @@ -24,6 +25,7 @@ class PaginationMode(Enum): class SimpleSinceParams(object): + def __init__(self, start, end): self.start_param = start self.end_param = end @@ -38,6 +40,7 @@ def __call__(self, since, until): class FormFilterSinceParams(object): + def __call__(self, since, until): range_expression = {} if since: @@ -46,49 +49,56 @@ def __call__(self, since, until): if until: range_expression['lte'] = until.isoformat() - server_modified_missing = {"missing": { - "field": "server_modified_on", "null_value": True, "existence": True} + server_modified_missing = { + "missing": { + "field": "server_modified_on", + "null_value": True, + "existence": True + } } query = json.dumps({ 'filter': { - "or": [ - { - "and": [ - { - "not": server_modified_missing - }, - { - "range": { - "server_modified_on": range_expression - } - } - ] - }, - { - "and": [ - server_modified_missing, - { - "range": { - "received_on": range_expression - } + "or": [{ + "and": [{ + "not": server_modified_missing + }, { + "range": { + "server_modified_on": range_expression + } + }] + }, { + "and": [ + server_modified_missing, { + "range": { + "received_on": range_expression } - ] - } - ] - }}) + } + ] + }] + } + }) return {'_search': query} DATE_PARAMS = { - 'indexed_on': SimpleSinceParams('indexed_on_start', 'indexed_on_end'), - 'server_date_modified': SimpleSinceParams('server_date_modified_start', 'server_date_modified_end'), + 'indexed_on': + SimpleSinceParams('indexed_on_start', 'indexed_on_end'), + 'server_date_modified': + SimpleSinceParams( + 'server_date_modified_start', 'server_date_modified_end' + ), # used by messaging-events - 'date_last_activity': SimpleSinceParams('date_last_activity.gte', 'date_last_activity.lt'), + 'date_last_activity': + SimpleSinceParams('date_last_activity.gte', 'date_last_activity.lt'), } -def get_paginator(resource, page_size=1000, pagination_mode=PaginationMode.date_indexed): +def get_paginator( + resource, + page_size=1000, + pagination_mode=PaginationMode.date_indexed, +): return { PaginationMode.date_indexed: { 'form': DatePaginator('indexed_on', page_size), @@ -96,9 +106,16 @@ def get_paginator(resource, page_size=1000, pagination_mode=PaginationMode.date_ 'messaging-event': DatePaginator('date_last_activity', page_size), }, PaginationMode.date_modified: { - 'form': DatePaginator(['server_modified_on', 'received_on'], page_size, params=FormFilterSinceParams()), - 'case': DatePaginator('server_date_modified', page_size), - 'messaging-event': DatePaginator('date_last_activity', page_size), + 'form': + DatePaginator( + ['server_modified_on', 'received_on'], + page_size, + params=FormFilterSinceParams(), + ), + 'case': + DatePaginator('server_date_modified', page_size), + 'messaging-event': + DatePaginator('date_last_activity', page_size), } }[pagination_mode].get(resource, SimplePaginator(page_size)) @@ -113,21 +130,31 @@ def __init__(self, commcare_hq_client, until=None, page_size=1000): self.commcare_hq_client = commcare_hq_client self.until = until self.page_size = page_size - super(CommCareHqEnv, self).__init__({ - 'api_data' : self.api_data - }) + super(CommCareHqEnv, self).__init__({'api_data': self.api_data}) @unwrap('checkpoint_manager') - def api_data(self, resource, checkpoint_manager, payload=None, include_referenced_items=None): + def api_data( + self, + resource, + checkpoint_manager, + payload=None, + include_referenced_items=None + ): if resource not in SUPPORTED_RESOURCES: raise ValueError('Unknown API resource "%s' % resource) - paginator = get_paginator(resource, self.page_size, checkpoint_manager.pagination_mode) + paginator = get_paginator( + resource, self.page_size, checkpoint_manager.pagination_mode + ) paginator.init(payload, include_referenced_items, self.until) - initial_params = paginator.next_page_params_since(checkpoint_manager.since_param) + initial_params = paginator.next_page_params_since( + checkpoint_manager.since_param + ) return self.commcare_hq_client.iterate( - resource, paginator, - params=initial_params, checkpoint_manager=checkpoint_manager + resource, + paginator, + params=initial_params, + checkpoint_manager=checkpoint_manager ) def bind(self, name, value): @@ -141,6 +168,7 @@ class SimplePaginator(object): """ Paginate based on the 'next' URL provided in the API response. """ + def __init__(self, page_size=1000, params=None): self.page_size = page_size self.params = params @@ -155,12 +183,13 @@ def next_page_params_since(self, since=None): params['limit'] = self.page_size if (since or self.until) and self.params: - params.update( - self.params(since, self.until) - ) + params.update(self.params(since, self.until)) if self.include_referenced_items: - params.update([('%s__full' % referenced_item, 'true') for referenced_item in self.include_referenced_items]) + params.update([ + (f'{referenced_item}__full', 'true') + for referenced_item in self.include_referenced_items + ]) return params @@ -171,13 +200,15 @@ def next_page_params_from_batch(self, batch): class DatePaginator(SimplePaginator): """ - This paginator is designed to get around the issue of deep paging where the deeper the page the longer - the query takes. + This paginator is designed to get around the issue of deep paging + where the deeper the page the longer the query takes. - Paginate records according to a date in the record. The params for the next batch will include a filter - for the date of the last record in the previous batch. + Paginate records according to a date in the record. The params for + the next batch will include a filter for the date of the last record + in the previous batch. - This also adds an ordering parameter to ensure that the records are ordered by the date field in ascending order. + This also adds an ordering parameter to ensure that the records are + ordered by the date field in ascending order. :param since_field: The name of the date field to use for pagination. :param page_size: Number of results to request in each page @@ -186,7 +217,8 @@ class DatePaginator(SimplePaginator): DEFAULT_PARAMS = object() def __init__(self, since_field, page_size=1000, params=DEFAULT_PARAMS): - params = DATE_PARAMS[since_field] if params is DatePaginator.DEFAULT_PARAMS else params + params = DATE_PARAMS[ + since_field] if params is DatePaginator.DEFAULT_PARAMS else params super(DatePaginator, self).__init__(page_size, params) self.since_field = since_field @@ -217,6 +249,11 @@ def get_since_date(self, batch): if since: try: - return parse(since, ignoretz=True) # ignoretz since we assume utc, and use naive datetimes everywhere + return parse( + since, + # ignoretz since we assume utc, and use naive + # datetimes everywhere + ignoretz=True + ) except ParserError: return None diff --git a/commcare_export/env.py b/commcare_export/env.py index 76250e06..15d0fdd4 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -14,10 +14,20 @@ JSONPATH_CACHE = {} -class CannotBind(Exception): pass -class CannotReplace(Exception): pass -class CannotEmit(Exception): pass -class NotFound(Exception): pass +class CannotBind(Exception): + pass + + +class CannotReplace(Exception): + pass + + +class CannotEmit(Exception): + pass + + +class NotFound(Exception): + pass class Env(object): @@ -25,8 +35,8 @@ class Env(object): An abstract model of an "environment" where data can be bound to names and later looked up. Not simply a dictionary as lookup in our case may support JsonPath, or may be a chaining of other - environments, so the abstract interface will - allow experimentation and customization. + environments, so the abstract interface will allow experimentation + and customization. """ # @@ -36,11 +46,9 @@ def bind(self, name, value): """ (key, ??) -> Env - Returns a new environment that is equivalent - to the current except the provided key is - bound to the value passed in. If the environment - does not support such a binding, raises - CannotBind + Returns a new environment that is equivalent to the current + except the provided key is bound to the value passed in. If the + environment does not support such a binding, raises CannotBind """ raise NotImplementedError() @@ -48,10 +56,9 @@ def lookup(self, key): """ key -> ?? - Note that the ?? may be None which may mean - the value was unbound or may mean it was - found and was None. This may need revisiting. - This may also raise NotFound if it is the + Note that the return value may be ``None`` which may mean the + value was unbound or may mean it was found and was None. This + may need revisiting. This may also raise NotFound if it is the sort of environment that does that. """ raise NotImplementedError() @@ -60,21 +67,19 @@ def replace(self, data): """ data -> Env - Completely replace the environment with new - data (somewhat like "this"-based Map functions a la jQuery). - Could be the same as creating a new empty env - and binding "@" in JsonPath. + Completely replace the environment with new data (somewhat like + "this"-based Map functions a la jQuery). Could be the same as + creating a new empty env and binding "@" in JsonPath. - May raise CannotReplace if this environment does - not support the input replacement + May raise CannotReplace if this environment does not support the + input replacement """ raise NotImplementedError() - # Minor impurity of the idea of a binding env: - # also allow `Emit` to directly call into - # the environment. It is up to the env - # whether to store it, write it immediately, - # or do something clever with iterators, etc. + # Minor impurity of the idea of a binding env: also allow `Emit` to + # directly call into the environment. It is up to the env whether to + # store it, write it immediately, or do something clever with + # iterators, etc. def emit_table(self, table_spec): raise CannotEmit() @@ -86,50 +91,62 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): pass - + # # Fluent interface to combinators # def __or__(self, other): return OrElse(self, other) + # # Combinators # + class OrElse(Env): """ - An environment that chains together a left environment - and a right environment. Note that this differes from - just a bunch of bindings, as the two envs might have - entirely different mechanisms (for example a magic - environment for special operators vs a JsonPathEnv - that always returns a list and operates only on - simple data) + An environment that chains together a left environment and a right + environment. Note that this differs from just a bunch of bindings, + as the two envs might have entirely different mechanisms (for + example a magic environment for special operators vs a JsonPathEnv + that always returns a list and operates only on simple data) """ + def __init__(self, left, right): self.left = left self.right = right - + def bind(self, name, value): - try: return OrElse(self.left.bind(name, value), self.right) - except CannotBind: return OrElse(self.left, self.right.bind(name, value)) + try: + return OrElse(self.left.bind(name, value), self.right) + except CannotBind: + return OrElse(self.left, self.right.bind(name, value)) def lookup(self, name): - try: return self.left.lookup(name) - except NotFound: return self.right.lookup(name) + try: + return self.left.lookup(name) + except NotFound: + return self.right.lookup(name) def replace(self, data): # A bit sketchy... - try: return OrElse(self.left.replace(data), self.right) - except CannotReplace: return OrElse(self.left, self.right.replace(data)) + try: + return OrElse(self.left.replace(data), self.right) + except CannotReplace: + return OrElse(self.left, self.right.replace(data)) def emit_table(self, table_spec): - try: return self.left.emit_table(table_spec) - except CannotEmit: return self.right.emit_table(table_spec) + try: + return self.left.emit_table(table_spec) + except CannotEmit: + return self.right.emit_table(table_spec) def has_emitted_tables(self): - return any([self.left.has_emitted_tables(), self.right.has_emitted_tables()]) + return any([ + self.left.has_emitted_tables(), + self.right.has_emitted_tables() + ]) def __enter__(self): self.left.__enter__() @@ -144,40 +161,47 @@ def __exit__(self, exc_type, exc_val, exc_tb): # # Concrete environment classes -# +# + class DictEnv(Env): """ A simple dictionary environment; more-or-less boring! """ + def __init__(self, d=None): self.d = d or {} def bind(self, name, value): return DictEnv(dict(list(self.d.items()) + [(name, value)])) - + def lookup(self, name): - try: return self.d[name] - except KeyError: raise NotFound(unwrap_val(name)) + try: + return self.d[name] + except KeyError: + raise NotFound(unwrap_val(name)) def replace(self, data): - if isinstance(data, dict): return DictEnv(data) - else: raise CannotReplace() + if isinstance(data, dict): + return DictEnv(data) + else: + raise CannotReplace() class JsonPathEnv(Env): """ - An environment like those that map names - to variables, but supporting dereferencing - an JsonPath expression. Note that it never - fails a lookup, but always returns an empty - list. + An environment like those that map names to variables, but + supporting dereferencing an JsonPath expression. Note that it never + fails a lookup, but always returns an empty list. It also interns all parsed expressions """ + def __init__(self, bindings=None): self.__bindings = bindings or {} - self.__restrict_to_root = bool(jsonpath.Fields("__root_only").find(self.__bindings)) + self.__restrict_to_root = bool( + jsonpath.Fields("__root_only").find(self.__bindings) + ) # Currently hardcoded because it is a global is jsonpath-ng # Probably not widely used, but will require refactor if so @@ -187,7 +211,7 @@ def parse(self, jsonpath_string): if jsonpath_string not in JSONPATH_CACHE: JSONPATH_CACHE[jsonpath_string] = parse_jsonpath(jsonpath_string) return JSONPATH_CACHE[jsonpath_string] - + def lookup(self, name): "str|JsonPath -> ??" if isinstance(name, str): @@ -197,28 +221,31 @@ def lookup(self, name): else: raise NotFound(unwrap_val(name)) - if self.__restrict_to_root and str(jsonpath_expr) != 'id': # special case for 'id' + # special case for 'id' + if self.__restrict_to_root and str(jsonpath_expr) != 'id': expr, _ = split_leftmost(jsonpath_expr) if not isinstance(expr, jsonpath.Root): - return RepeatableIterator(lambda : iter(())) + return RepeatableIterator(lambda: iter(())) - def iterator(jsonpath_expr=jsonpath_expr): # Capture closure + def iterator(jsonpath_expr=jsonpath_expr): # Capture closure for datum in jsonpath_expr.find(self.__bindings): - # HACK: The auto id from jsonpath_ng is good, but we lose it when we do .value here, - # so just slap it on if not present + # HACK: The auto id from jsonpath_ng is good, but we + # lose it when we do .value here, so just slap it on if + # not present if isinstance(datum.value, dict) and 'id' not in datum.value: datum.value['id'] = jsonpath.AutoIdForDatum(datum).value yield datum + return RepeatableIterator(iterator) def bind(self, *args): "(str, ??) -> Env | ({str: ??}) -> Env" - + new_bindings = dict(self.__bindings) if isinstance(args[0], dict): new_bindings.update(args[0]) return self.__class__(new_bindings) - + elif isinstance(args[0], str): new_bindings[args[0]] = args[1] return self.__class__(new_bindings) @@ -295,6 +322,7 @@ def str2date(val): return date.replace(microsecond=0, tzinfo=None) + @unwrap('val') def bool2int(val): return int(str2bool(val)) @@ -381,7 +409,7 @@ def format_uuid(val): def join(*args): - args = [unwrap_val(arg)for arg in args] + args = [unwrap_val(arg) for arg in args] try: return args[0].join(args[1:]) except TypeError: @@ -401,12 +429,9 @@ def attachment_url(val): return None from commcare_export.minilinq import Apply, Reference, Literal return Apply( - Reference('template'), - Literal('{}/a/{}/api/form/attachment/{}/{}'), - Reference('commcarehq_base_url'), - Reference('$.domain'), - Reference('$.id'), - Literal(val) + Reference('template'), Literal('{}/a/{}/api/form/attachment/{}/{}'), + Reference('commcarehq_base_url'), Reference('$.domain'), + Reference('$.id'), Literal(val) ) @@ -424,7 +449,7 @@ def _doc_url(url_path): from commcare_export.minilinq import Apply, Reference, Literal return Apply( Reference('template'), - Literal('{}/a/{}/reports/'+ url_path + '/{}/'), + Literal('{}/a/{}/reports/' + url_path + '/{}/'), Reference('commcarehq_base_url'), Reference('$.domain'), Reference('$.id'), @@ -441,6 +466,7 @@ def _or(*args): def _or_raw(*args): + def unwrap_iter(arg): if isinstance(arg, RepeatableIterator): return list(arg) @@ -480,14 +506,14 @@ def unique(val): class BuiltInEnv(DictEnv): """ - A built-in environment of operators and functions - which does not support replacement or bindings. + A built-in environment of operators and functions which does not + support replacement or bindings. - For convenience, this environment has been chosen to - queue up tables to be written out, since it will be - the first env involved in almost any situation. + For convenience, this environment has been chosen to queue up tables + to be written out, since it will be the first env involved in almost + any situation. """ - + def __init__(self, d=None): self.__tables = [] d = d or {} @@ -525,13 +551,17 @@ def __init__(self, d=None): '_or_raw': _or_raw, # for internal use, 'unique': unique }) - return super(BuiltInEnv, self).__init__(d) + super(BuiltInEnv, self).__init__(d) - def bind(self, name, value): raise CannotBind() - def replace(self, data): raise CannotReplace() + def bind(self, name, value): + raise CannotBind() + + def replace(self, data): + raise CannotReplace() class EmitterEnv(Env): + def __init__(self, writer): self.writer = writer self.emitted = False @@ -542,9 +572,14 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.writer.__exit__(exc_type, exc_val, exc_tb) - def bind(self, name, value): raise CannotBind() - def replace(self, data): raise CannotReplace() - def lookup(self, key): raise NotFound() + def bind(self, name, value): + raise CannotBind() + + def replace(self, data): + raise CannotReplace() + + def lookup(self, key): + raise NotFound() def emit_table(self, table_spec): self.emitted = True @@ -556,16 +591,21 @@ def has_emitted_tables(self): @staticmethod def _unwrap_row_vals(rows): - """The XMLtoJSON conversion in CommCare can result in a field being a JSON object - instead of a simple field (if the XML tag has attributes or different namespace from - the default). In this case the actual value of the XML element is stored in a '#text' field. """ + The XMLtoJSON conversion in CommCare can result in a field being + a JSON object instead of a simple field (if the XML tag has + attributes or different namespace from the default). In this + case the actual value of the XML element is stored in a '#text' + field. + """ + def _unwrap_val(val): if isinstance(val, dict): if '#text' in val: return val.get('#text') elif all(key == 'id' or key.startswith('@') for key in val): - # this implies the XML element was empty since all keys are from attributes + # this implies the XML element was empty since all + # keys are from attributes return '' return val diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index bd7063e9..bb4baa84 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -43,11 +43,22 @@ def without_empty_tail(cells): """ Returns the prefix of a column that is not entirely empty. """ - return list(reversed(list(drop_while(lambda v: (not v) or (not v.value), reversed(cells))))) + return list( + reversed( + list( + drop_while( + lambda v: (not v) or (not v.value), reversed(cells) + ) + ) + ) + ) def map_value(mappings_sheet, mapping_name, source_value): - "From the mappings_sheet, replaces the source_value with appropriate output value" + """ + From the mappings_sheet, replaces the source_value with appropriate + output value + """ return source_value @@ -58,7 +69,8 @@ def get_column_by_name(worksheet, column_name): value = value.lower().strip() if value else value if column_name == value: return without_empty_tail([ - worksheet.cell(row=i, column=col) for i in range(2, worksheet.max_row + 1) + worksheet.cell(row=i, column=col) + for i in range(2, worksheet.max_row + 1) ]) @@ -68,40 +80,59 @@ def get_columns_by_prefix(worksheet, column_prefix): value = worksheet.cell(row=1, column=col).value if value and value.lower().startswith(column_prefix): yield value, without_empty_tail([ - worksheet.cell(row=i, column=col) for i in range(2, worksheet.max_row + 1) + worksheet.cell(row=i, column=col) + for i in range(2, worksheet.max_row + 1) ]) def compile_mappings(worksheet): mapping_names = get_column_by_name(worksheet, "mapping name") - sources = extended_to_len(len(mapping_names), get_column_by_name(worksheet, "source")) - destinations = extended_to_len(len(mapping_names), get_column_by_name(worksheet, "destination")) + sources = extended_to_len( + len(mapping_names), get_column_by_name(worksheet, "source") + ) + destinations = extended_to_len( + len(mapping_names), get_column_by_name(worksheet, "destination") + ) mappings = defaultdict(lambda: defaultdict(lambda: None)) - - for mapping_name, source, dest in zip(mapping_names, sources, destinations): + + for mapping_name, source, dest in zip( + mapping_names, sources, destinations + ): if mapping_name and source: - mappings[mapping_name.value][source.value] = dest.value if dest else None + mappings[mapping_name.value][source.value + ] = dest.value if dest else None return mappings def compile_filters(worksheet, mappings=None): - filter_names = [cell.value for cell in get_column_by_name(worksheet, 'filter name') or []] + filter_names = [ + cell.value + for cell in get_column_by_name(worksheet, 'filter name') or [] + ] if not filter_names: return [] - filter_values = extended_to_len(len(filter_names), [cell.value for cell in get_column_by_name(worksheet, 'filter value') or []]) + filter_values = extended_to_len( + len(filter_names), [ + cell.value + for cell in get_column_by_name(worksheet, 'filter value') or [] + ] + ) return zip(filter_names, filter_values) def extended_to_len(desired_len, some_list, value=None): - return [some_list[i] if i < len(some_list) else value - for i in range(0, desired_len)] + return [ + some_list[i] if i < len(some_list) else value + for i in range(0, desired_len) + ] def _get_safe_source_field(source_field): + def _safe_node(node): try: parse_jsonpath(node) @@ -124,11 +155,21 @@ def _safe_node(node): return Reference(source_field) -def compile_field(field, source_field, alternate_source_fields=None, map_via=None, format_via=None, mappings=None): +def compile_field( + field, + source_field, + alternate_source_fields=None, + map_via=None, + format_via=None, + mappings=None +): expr = _get_safe_source_field(source_field) if alternate_source_fields: - expr = Apply(Reference('or'), expr, *[Reference(alt_field) for alt_field in alternate_source_fields]) + expr = Apply( + Reference('or'), expr, + *[Reference(alt_field) for alt_field in alternate_source_fields] + ) if map_via: expr = compile_map_format_via(expr, map_via) @@ -143,15 +184,25 @@ def compile_field(field, source_field, alternate_source_fields=None, map_via=Non def compile_mapped_field(field_mappings, field_expression): # quote the ref in case it has special chars - quoted_field = Apply(Reference('join'), Literal(''), Literal('"'), field_expression, Literal('"')) + quoted_field = Apply( + Reference('join'), Literal(''), Literal('"'), field_expression, + Literal('"') + ) # produce the mapping reference i.e. 'mapping."X"' - mapping_ref = Apply(Reference('join'), Literal('.'), Literal('mapping'), quoted_field) + mapping_ref = Apply( + Reference('join'), Literal('.'), Literal('mapping'), quoted_field + ) # apply the reference to the field mappings to get the final value - mapped_value = FlatMap(source=Literal([field_mappings]), body=Reference(mapping_ref), name='mapping') + mapped_value = FlatMap( + source=Literal([field_mappings]), + body=Reference(mapping_ref), + name='mapping' + ) return Apply(Reference('default'), mapped_value, field_expression) def _get_alternate_source_fields_from_csv(worksheet, num_fields): + def _clean_csv_field(field): if field and field.value: return [val.strip() for val in field.value.split(',')] @@ -164,10 +215,14 @@ def _clean_csv_field(field): def _get_alternate_source_fields_from_columns(worksheet, num_fields): - matching_columns = sorted(get_columns_by_prefix(worksheet, 'alternate source field'), key=lambda x: x[0]) + matching_columns = sorted( + get_columns_by_prefix(worksheet, 'alternate source field'), + key=lambda x: x[0] + ) alt_source_cols = [ - extended_to_len(num_fields, [cell.value if cell else cell for cell in alt_col]) - for col_name, alt_col in matching_columns + extended_to_len( + num_fields, [cell.value if cell else cell for cell in alt_col] + ) for col_name, alt_col in matching_columns ] # transpose columns to rows alt_srouce_fields = map(list, zip(*alt_source_cols)) @@ -188,13 +243,26 @@ def compile_fields(worksheet, mappings=None): if not fields: return [] - source_fields = extended_to_len(len(fields), get_column_by_name(worksheet, 'source field') or []) - map_vias = extended_to_len(len(fields), get_column_by_name(worksheet, 'map via') or []) - format_vias = extended_to_len(len(fields), get_column_by_name(worksheet, 'format via') or []) + source_fields = extended_to_len( + len(fields), + get_column_by_name(worksheet, 'source field') or [] + ) + map_vias = extended_to_len( + len(fields), + get_column_by_name(worksheet, 'map via') or [] + ) + format_vias = extended_to_len( + len(fields), + get_column_by_name(worksheet, 'format via') or [] + ) - alternate_source_fields = get_alternate_source_fields(worksheet, len(fields)) + alternate_source_fields = get_alternate_source_fields( + worksheet, len(fields) + ) - args = zip(fields, source_fields, alternate_source_fields, map_vias, format_vias) + args = zip( + fields, source_fields, alternate_source_fields, map_vias, format_vias + ) return [ compile_field( field=field.value, @@ -210,23 +278,29 @@ def compile_fields(worksheet, mappings=None): def compile_source(worksheet, value_or_root=False): """ - Compiles just the part of the Excel Spreadsheet that - indicates the API endpoint to hit along with optional filters - and an optional JSONPath within that endpoint, + Compiles just the part of the Excel Spreadsheet that indicates the + API endpoint to hit along with optional filters and an optional + JSONPath within that endpoint, For example, this spreadsheet - + Data Source Filter Name Filter Value Include Referenced Items ----------------------------- ------------ ------------------ -------------------------- form[*].form.child_questions app_id cases xmlns.exact - Should fetch from api/form?app_id=&xmlns.exact=&cases__full=true - and then iterate (FlatMap) over all child questions. + Should fetch from api/form?app_id=&xmlns.exact=&cases__full=true and then iterate (FlatMap) over all child + questions. + + :return: tuple of the 'data source' expression and the 'root doc + expression'. - :return: tuple of the 'data source' expression and the 'root doc expression'. - 'data source': The MiniLinq that calls 'api_data' function to get data from CommCare - 'root doc expression': The MiniLinq that is applied to each doc, can be None. + 'data source': The MiniLinq that calls 'api_data' function to + get data from CommCare + + 'root doc expression': The MiniLinq that is applied to each doc, + can be None. """ data_source_column = get_column_by_name(worksheet, 'data source') @@ -234,13 +308,22 @@ def compile_source(worksheet, value_or_root=False): raise Exception('Sheet has no "Data Source" column.') data_source_str = data_source_column[0].value filters = compile_filters(worksheet) - include_referenced_items = [cell.value for cell in (get_column_by_name(worksheet, 'include referenced items') or [])] + include_referenced_items = [ + cell.value for cell in + (get_column_by_name(worksheet, 'include referenced items') or []) + ] - data_source, data_source_jsonpath = split_leftmost(parse_jsonpath(data_source_str)) - maybe_redundant_slice, remaining_jsonpath = split_leftmost(data_source_jsonpath) + data_source, data_source_jsonpath = split_leftmost( + parse_jsonpath(data_source_str) + ) + maybe_redundant_slice, remaining_jsonpath = split_leftmost( + data_source_jsonpath + ) - # The leftmost _must_ be of type Fields with one field and will pull out the first field - if not isinstance(data_source, jsonpath.Fields) or len(data_source.fields) > 1: + # The leftmost _must_ be of type Fields with one field and will pull + # out the first field + if not isinstance(data_source, + jsonpath.Fields) or len(data_source.fields) > 1: raise Exception('Bad value for data source: %s' % str(data_source)) data_source = data_source.fields[0] @@ -248,11 +331,18 @@ def compile_source(worksheet, value_or_root=False): if isinstance(maybe_redundant_slice, jsonpath.Slice): data_source_jsonpath = remaining_jsonpath - api_query_args = [Reference("api_data"), Literal(data_source), Reference('checkpoint_manager')] - + api_query_args = [ + Reference("api_data"), + Literal(data_source), + Reference('checkpoint_manager') + ] + if not filters: if include_referenced_items: - api_query_args.append(Literal(None)) # Pad the argument list if we have further args; keeps tests and user code more readable at the expense of this conditional + # Pad the argument list if we have further args; keeps tests + # and user code more readable at the expense of this + # conditional + api_query_args.append(Literal(None)) else: api_query_args.append(Literal(dict(filters))) @@ -261,7 +351,11 @@ def compile_source(worksheet, value_or_root=False): api_query = Apply(*api_query_args) - if data_source_jsonpath is None or isinstance(data_source_jsonpath, jsonpath.This) or isinstance(data_source_jsonpath, jsonpath.Root): + if ( + data_source_jsonpath is None + or isinstance(data_source_jsonpath, jsonpath.This) + or isinstance(data_source_jsonpath, jsonpath.Root) + ): return data_source, api_query, None else: if value_or_root: @@ -273,12 +367,16 @@ def compile_source(worksheet, value_or_root=False): def get_value_or_root_expression(value_expression): - """Return expression used when iterating over a nested document but also wanting - a record if the value expression returns an empty result.""" + """ + Return expression used when iterating over a nested document but + also wanting a record if the value expression returns an empty + result. + """ - # We add a bind here so that in JsonPathEnv we can restrict expressions to only those that reference - # the root. That prevents us from mistakenly getting values from the root that happen to have the - # same name as those in the child. + # We add a bind here so that in JsonPathEnv we can restrict + # expressions to only those that reference the root. That prevents + # us from mistakenly getting values from the root that happen to + # have the same name as those in the child. root_expr = Bind("__root_only", Literal(True), Reference("$")) return Apply( Reference('_or_raw'), Reference(str(value_expression)), root_expr @@ -288,8 +386,14 @@ def get_value_or_root_expression(value_expression): # If the source is expected to provide a column, then require that it is # already present or can be added without conflicting with an existing # column. -def require_column_in_sheet(sheet_name, data_source, table_name, output_headings, - output_fields, column_enforcer): +def require_column_in_sheet( + sheet_name, + data_source, + table_name, + output_headings, + output_fields, + column_enforcer, +): # Check for conflicting use of column name. extend_fields = True @@ -304,25 +408,42 @@ def require_column_in_sheet(sheet_name, data_source, table_name, output_headings extend_fields = False continue else: - raise Exception('Field name "{}" conflicts with an internal name.'.format(required_column.name.v)) + raise Exception( + 'Field name "{}" conflicts with an internal name.' + .format(required_column.name.v) + ) if extend_fields: - headings = [Literal(output_heading.value) - for output_heading in output_headings] + [required_column.name] - body = List(output_fields + - [compile_field(field=required_column.name, - source_field=required_column.source)]) + headings = [ + Literal(output_heading.value) for output_heading in output_headings + ] + [required_column.name] + body = List( + output_fields + [ + compile_field( + field=required_column.name, + source_field=required_column.source + ) + ] + ) else: - headings = [Literal(output_heading.value) - for output_heading in output_headings] + headings = [ + Literal(output_heading.value) for output_heading in output_headings + ] body = List(output_fields) return (headings, body) -def parse_sheet(worksheet, mappings=None, column_enforcer=None, value_or_root=False): +def parse_sheet( + worksheet, + mappings=None, + column_enforcer=None, + value_or_root=False, +): mappings = mappings or {} - data_source, source_expr, root_doc_expr = compile_source(worksheet, value_or_root) + data_source, source_expr, root_doc_expr = compile_source( + worksheet, value_or_root + ) table_name_column = get_column_by_name(worksheet, 'table name') if table_name_column: @@ -339,20 +460,21 @@ def parse_sheet(worksheet, mappings=None, column_enforcer=None, value_or_root=Fa source = source_expr body = None else: - # note: if we want to add data types to the columns added by the column_enforcer - # this will have to conditionally move into the if/else below + # note: if we want to add data types to the columns added by the + # column_enforcer this will have to conditionally move into the + # if/else below data_types = [Literal(data_type.value) for data_type in output_types] if column_enforcer is not None: - (headings, body) = require_column_in_sheet(worksheet.title, - data_source, - output_table_name, - output_headings, - output_fields, - column_enforcer) + (headings, body) = require_column_in_sheet( + worksheet.title, data_source, output_table_name, + output_headings, output_fields, column_enforcer + ) source = source_expr else: - headings = [Literal(output_heading.value) - for output_heading in output_headings] + headings = [ + Literal(output_heading.value) + for output_heading in output_headings + ] source = source_expr body = List(output_fields) @@ -367,16 +489,32 @@ def parse_sheet(worksheet, mappings=None, column_enforcer=None, value_or_root=Fa ) -class SheetParts(namedtuple('SheetParts', 'name headings source body root_expr data_types data_source')): - def __new__(cls, name, headings, source, body, root_expr=None, data_types=None, data_source=None): +class SheetParts( + namedtuple( + 'SheetParts', + 'name headings source body root_expr data_types data_source' + ) +): + + def __new__( + cls, + name, + headings, + source, + body, + root_expr=None, + data_types=None, + data_source=None + ): data_types = data_types or [] - return super(SheetParts, cls).__new__(cls, name, headings, source, body, root_expr, data_types, data_source) + return super(SheetParts, cls).__new__( + cls, name, headings, source, body, root_expr, data_types, + data_source + ) @property def columns(self): - return [ - col.v for col in self.headings - ] + return [col.v for col in self.headings] def parse_workbook(workbook, column_enforcer=None, value_or_root=False): @@ -384,7 +522,8 @@ def parse_workbook(workbook, column_enforcer=None, value_or_root=False): Returns a MiniLinq corresponding to the Excel configuration, which consists of the following sheets: - 1. "Mappings" a sheet with three columns that defines simple lookup table functions + 1. "Mappings" a sheet with three columns that defines simple lookup + table functions: A. MappingName - the name by which this mapping is referenced B. Source - the value to match C. Destination - the value to return @@ -397,12 +536,17 @@ def parse_workbook(workbook, column_enforcer=None, value_or_root=False): mappings_sheet = None mappings = compile_mappings(mappings_sheet) if mappings_sheet else None - emit_sheets = [sheet_name for sheet_name in workbook.sheetnames if sheet_name != 'Mappings'] + emit_sheets = [ + sheet_name for sheet_name in workbook.sheetnames + if sheet_name != 'Mappings' + ] parsed_sheets = [] for sheet in emit_sheets: try: - sheet_parts = parse_sheet(workbook[sheet], mappings, column_enforcer, value_or_root) + sheet_parts = parse_sheet( + workbook[sheet], mappings, column_enforcer, value_or_root + ) except Exception as e: msg = 'Ignoring sheet "{}": {}'.format(sheet, str(e)) if logger.isEnabledFor(logging.DEBUG): @@ -420,7 +564,8 @@ def compile_queries(parsed_sheets, missing_value, combine_emits): # group sheets by source sheets_by_source = [] for sheet in parsed_sheets: - # Not easy to implement hashing on MiniLinq objects so can't use a dict + # Not easy to implement hashing on MiniLinq objects so can't use + # a dict for source, sheets in sheets_by_source: if sheet.source == source: sheets.append(sheet) @@ -432,7 +577,9 @@ def compile_queries(parsed_sheets, missing_value, combine_emits): for source, sheets in sheets_by_source: if len(sheets) > 1: if combine_emits: - queries.append(get_multi_emit_query(source, sheets, missing_value)) + queries.append( + get_multi_emit_query(source, sheets, missing_value) + ) else: queries.extend([ get_single_emit_query(sheet, missing_value) @@ -444,12 +591,15 @@ def compile_queries(parsed_sheets, missing_value, combine_emits): def get_multi_emit_query(source, sheets, missing_value): - """Multiple `Emit` expressions using the same data source. - For this we reverse the `Map` so that we apply each `Emit` - repeatedly for each doc produced by the data source. + """ + Multiple `Emit` expressions using the same data source. For this we + reverse the `Map` so that we apply each `Emit` repeatedly for each + doc produced by the data source. """ emits = [] - multi_query = Filter( # the filter here is to prevent accumulating a `[None]` value for each doc + # the filter here is to prevent accumulating a `[None]` value for + # each doc + multi_query = Filter( predicate=Apply( Reference("filter_empty"), Reference("$") @@ -461,39 +611,40 @@ def get_multi_emit_query(source, sheets, missing_value): ) for sheet in sheets: - # if there is no root expression then we just reference the whole document with `this` + # if there is no root expression then we just reference the + # whole document with `this` root_expr = sheet.root_expr or Reference("`this`") emits.append( Emit( table=sheet.name, headings=sheet.headings, - source=Map( - source=root_expr, - body=sheet.body - ), + source=Map(source=root_expr, body=sheet.body), missing_value=missing_value, data_types=sheet.data_types, ) ) table_names = [sheet.name for sheet in sheets] - data_source = sheets[0].data_source # sheets will all have the same datasource - return Bind('checkpoint_manager', Apply( - Reference('get_checkpoint_manager'), Literal(data_source), Literal(table_names) - ), multi_query) + data_source = sheets[ + 0].data_source # sheets will all have the same datasource + return Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal(data_source), + Literal(table_names) + ), multi_query + ) def get_single_emit_query(sheet, missing_value): - """Single `Emit` for the data source to we can just - apply the `Emit` once with the source expression being - the data source. """ + Single `Emit` for the data source to we can just apply the `Emit` + once with the source expression being the data source. + """ + def _get_source(source, root_expr): if root_expr: - return FlatMap( - source=source, - body=root_expr - ) + return FlatMap(source=source, body=root_expr) else: return source @@ -501,15 +652,18 @@ def _get_source(source, root_expr): table=sheet.name, headings=sheet.headings, source=Map( - source=_get_source(sheet.source, sheet.root_expr), - body=sheet.body + source=_get_source(sheet.source, sheet.root_expr), body=sheet.body ), missing_value=missing_value, data_types=sheet.data_types, ) - return Bind('checkpoint_manager', Apply( - Reference('get_checkpoint_manager'), Literal(sheet.data_source), Literal([sheet.name]) - ), emit) + return Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal(sheet.data_source), + Literal([sheet.name]) + ), emit + ) def check_field_length(parsed_sheets, max_column_length): @@ -541,9 +695,15 @@ def blacklist(table_name): blacklisted_tables.append(table_name) -def get_queries_from_excel(workbook, missing_value=None, combine_emits=False, - max_column_length=None, required_columns=None, - column_enforcer=None, value_or_root=False): +def get_queries_from_excel( + workbook, + missing_value=None, + combine_emits=False, + max_column_length=None, + required_columns=None, + column_enforcer=None, + value_or_root=False +): parsed_sheets = parse_workbook(workbook, column_enforcer, value_or_root) for sheet in parsed_sheets: if sheet.name in blacklisted_tables: diff --git a/commcare_export/exceptions.py b/commcare_export/exceptions.py index 0e924fe4..cfa775f5 100644 --- a/commcare_export/exceptions.py +++ b/commcare_export/exceptions.py @@ -3,6 +3,7 @@ class DataExportException(Exception): class LongFieldsException(DataExportException): + def __init__(self, long_fields, max_length): self.long_fields = long_fields self.max_length = max_length @@ -12,43 +13,52 @@ def message(self): message = '' for table, headers in self.long_fields.items(): message += ( - 'Table "{}" has field names longer than the maximum allowed for this database ({}):\n'.format( - table, self.max_length - )) + f'Table "{table}" has field names longer than the maximum ' + f'allowed for this database ({self.max_length}):\n' + ) for header in headers: message += ' {}\n'.format(header) - message += '\nPlease adjust field names to be within the maximum length limit of {}'.format(self.max_length) + message += ( + '\nPlease adjust field names to be within the maximum length ' + f'limit of {self.max_length}' + ) return message class MissingColumnException(DataExportException): + def __init__(self, errors_by_sheet): self.errors_by_sheet = errors_by_sheet @property def message(self): lines = [ - 'Sheet "{}" is missing definitions for required fields: "{}"'.format( - sheet, '", "'.join(missing_cols) - ) for sheet, missing_cols in self.errors_by_sheet.items() + 'Sheet "{}" is missing definitions for required fields: "{}"' + .format(sheet, '", "'.join(missing_cols)) + for sheet, missing_cols in self.errors_by_sheet.items() ] return '\n'.join(lines) class MissingQueryFileException(DataExportException): + def __init__(self, query_file): self.query_file = query_file @property def message(self): - return 'Query file not found: {}'.format(self.query_file) + return f'Query file not found: {self.query_file}' class ReservedTableNameException(DataExportException): + def __init__(self, conflicting_name): self.conflicting_name = conflicting_name @property def message(self): - return 'Table name "{}" conflicts with an internal table name. Please export to a different table.'.format(self.conflicting_name) + return ( + f'Table name "{self.conflicting_name}" conflicts with an internal ' + f'table name. Please export to a different table.' + ) diff --git a/commcare_export/jsonpath_utils.py b/commcare_export/jsonpath_utils.py index defc9a58..e694956e 100644 --- a/commcare_export/jsonpath_utils.py +++ b/commcare_export/jsonpath_utils.py @@ -7,6 +7,8 @@ def split_leftmost(jsonpath_expr): return further_leftmost, rest.child(jsonpath_expr.right) elif isinstance(jsonpath_expr, jsonpath.Descendants): further_leftmost, rest = split_leftmost(jsonpath_expr.left) - return further_leftmost, jsonpath.Descendants(rest, jsonpath_expr.right) + return further_leftmost, jsonpath.Descendants( + rest, jsonpath_expr.right + ) else: return jsonpath_expr, jsonpath.This() diff --git a/commcare_export/location_info_provider.py b/commcare_export/location_info_provider.py index 78992926..c9cebac1 100644 --- a/commcare_export/location_info_provider.py +++ b/commcare_export/location_info_provider.py @@ -5,12 +5,14 @@ logger = logging.getLogger(__name__) -# LocationInfoProvider uses the /location_type/ endpoint of the API -# to retrieve location type data, stores that information in a dictionary +# LocationInfoProvider uses the /location_type/ endpoint of the API to +# retrieve location type data, stores that information in a dictionary # keyed by resource URI and provides the method 'get_location_info' to # extract values from the dictionary. + class LocationInfoProvider: + def __init__(self, api_client, page_size): self._api_client = api_client self._page_size = page_size @@ -43,8 +45,9 @@ def get_location_types(self): paginator = SimplePaginator('location_type', self._page_size) paginator.init(None, False, None) location_type_dict = {} - for row in self._api_client.iterate('location_type', paginator, - {'limit': self._page_size}): + for row in self._api_client.iterate( + 'location_type', paginator, {'limit': self._page_size} + ): location_type_dict[row['resource_uri']] = row return location_type_dict @@ -60,8 +63,9 @@ def get_location_hierarchy(self): # Extract every location, its type and its parent location_data = {} - for row in self._api_client.iterate('location', paginator, - {'limit': self._page_size}): + for row in self._api_client.iterate( + 'location', paginator, {'limit': self._page_size} + ): location_data[row['resource_uri']] = { 'location_id': row['location_id'], 'location_type': row['location_type'], @@ -70,19 +74,24 @@ def get_location_hierarchy(self): # Build a map from location resource_uri to a map from # location_type_code to ancestor location id. - ancestors = {} # includes location itself + ancestors = {} # includes location itself for resource_uri in location_data: loc_uri = resource_uri type_code_to_id = {} while loc_uri is not None: if loc_uri not in location_data: - logger.warning('Unknown location referenced: {}'.format(loc_uri)) + logger.warning( + 'Unknown location referenced: {}'.format(loc_uri) + ) break loc_data = location_data[loc_uri] loc_type = loc_data['location_type'] if loc_type not in self.location_types: - logger.warning('Unknown location type referenced: {}'.format(loc_type)) + logger.warning( + 'Unknown location type referenced: {}' + .format(loc_type) + ) break type_code = self.location_types[loc_type]['code'] diff --git a/commcare_export/map_format.py b/commcare_export/map_format.py index 5a68bdcc..8f413426 100644 --- a/commcare_export/map_format.py +++ b/commcare_export/map_format.py @@ -9,6 +9,7 @@ class ParsingException(Exception): + def __init__(self, message): self.message = message @@ -25,7 +26,9 @@ def parse_function_arg(slug, expr_string): matches = re.match(regex, expr_string) if not matches: - raise ParsingException('Error: Unable to parse: {}'.format(expr_string)) + raise ParsingException( + 'Error: Unable to parse: {}'.format(expr_string) + ) return matches.groups()[0] @@ -35,7 +38,10 @@ def parse_selected_at(value_expr, selected_at_expr_string): try: index = int(index) except ValueError: - return Literal('Error: selected-at index must be an integer: {}'.format(selected_at_expr_string)) + return Literal( + 'Error: selected-at index must be an integer: {}' + .format(selected_at_expr_string) + ) return Apply(Reference(SELECTED_AT), value_expr, Literal(index)) @@ -49,7 +55,10 @@ def parse_template(value_expr, format_expr_string): args_string = parse_function_arg(TEMPLATE, format_expr_string) args = [arg.strip() for arg in args_string.split(',') if arg.strip()] if len(args) < 1: - return Literal('Error: template function requires the format template: {}'.format(format_expr_string)) + return Literal( + 'Error: template function requires the format template: ' + f'{format_expr_string}' + ) template = args.pop(0) if args: args = [Reference(arg) for arg in args] @@ -63,7 +72,10 @@ def parse_substr(value_expr, substr_expr_string): regex = r'^\s*(\d+)\s*,\s*(\d+)\s*$' matches = re.match(regex, args_string) if not matches or len(matches.groups()) != 2: - raise ParsingException('Error: both substr arguments must be non-negative integers: {}'.format(substr_expr_string)) + raise ParsingException( + 'Error: both substr arguments must be non-negative integers: ' + f'{substr_expr_string}' + ) # These conversions should always succeed after a pattern match. start = int(matches.groups()[0]) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index b6dda9fe..45ae9b94 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,4 +1,5 @@ import logging +from typing import List as ListType from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator @@ -19,7 +20,7 @@ def __init__(self, *args, **kwargs): def eval(self, env): "( env: object(bindings: {str: ??}, writer: Writer) )-> ??" raise NotImplementedError() - + #### Factory methods #### _node_classes = {} @@ -31,21 +32,23 @@ def register(cls, clazz, slug=None): @classmethod def from_jvalue(cls, jvalue): """ - The term `jvalue` is code for "the output of a JSON deserialization". This - module does not actually care about JSON, which is concrete syntax, but - only the corresponding data model of lists and string-indexed dictionaries. + The term `jvalue` is code for "the output of a JSON + deserialization". This module does not actually care about + JSON, which is concrete syntax, but only the corresponding data + model of lists and string-indexed dictionaries. - (since this data might never actually be a string, that layer is handled elsewhere) + (since this data might never actually be a string, that layer is + handled elsewhere) """ - # This is a bit wonky, but this method really should not be inherited. - # So if we end up here from a subclass, it is broken. + # This is a bit wonky, but this method really should not be + # inherited. So if we end up here from a subclass, it is broken. if not issubclass(MiniLinq, cls): raise NotImplementedError() - + if isinstance(jvalue, str): return jvalue - + elif isinstance(jvalue, list): # Leverage for literal lists of data in the code return [MiniLinq.from_jvalue(v) for v in jvalue] @@ -54,25 +57,31 @@ def from_jvalue(cls, jvalue): # Dictionaries are reserved; they must always have exactly # one entry and it must be the AST node class if len(jvalue.values()) != 1: - raise ValueError('JValue serialization of AST contains dict with number of slugs != 1') + raise ValueError( + 'JValue serialization of AST contains dict with number of slugs != 1' + ) slug = list(jvalue.keys())[0] if slug not in cls._node_classes: - raise ValueError('JValue serialization of AST contains unknown node type: %s' % slug) + raise ValueError( + 'JValue serialization of AST contains unknown node type: %s' + % slug + ) return cls._node_classes[slug].from_jvalue(jvalue) class Reference(MiniLinq): """ - An MiniLinq referencing a datum or data. It is flexible - about what the type of the environment is, but it must - support using these as keys. + An MiniLinq referencing a datum or data. It is flexible about what + the type of the environment is, but it must support using these as + keys. """ + def __init__(self, ref): - self.ref = ref #parse_jsonpath(ref) #ref + self.ref = ref #parse_jsonpath(ref) #ref self.nested = isinstance(self.ref, MiniLinq) - + def eval(self, env): if self.nested: ref = self.ref.eval(env) @@ -95,14 +104,15 @@ def __repr__(self): class Literal(MiniLinq): """ - An MiniLinq wrapper around a python value. Returns exactly the - value given to it. Note: when going to/from jvalue the - contents are left alone, so it can be _used_ with a non-JSON - encodable value, but cannot be encoded. + An MiniLinq wrapper around a python value. Returns exactly the value + given to it. Note: when going to/from jvalue the contents are left + alone, so it can be _used_ with a non-JSON encodable value, but + cannot be encoded. """ + def __init__(self, v): self.v = v - + def eval(self, env): return self.v @@ -122,11 +132,10 @@ def to_jvalue(self): class Bind(MiniLinq): """ - Binds the results of an expression to a new name. Will be useful - in writing exports by hand or debugging, and maybe for efficiency - if it de-dupes computation (but generally exports will be - expected to be too large to store, so it'll be re-run on each - access. + Binds the results of an expression to a new name. Will be useful in + writing exports by hand or debugging, and maybe for efficiency if it + de-dupes computation (but generally exports will be expected to be + too large to store, so it'll be re-run on each access. """ def __init__(self, name, value, body): @@ -139,25 +148,37 @@ def eval(self, env): return self.body.eval(env.bind(self.name, self.value.eval(env))) def __eq__(self, other): - return isinstance(other, Bind) and self.name == other.name and self.value == other.value and self.body == other.body + return isinstance( + other, Bind + ) and self.name == other.name and self.value == other.value and self.body == other.body def __repr__(self): - return '%s(name=%r, value=%r, body=%r)' % (self.__class__.__name__, self.name, self.value, self.body) + return '%s(name=%r, value=%r, body=%r)' % ( + self.__class__.__name__, self.name, self.value, self.body + ) @classmethod def from_jvalue(cls, jvalue): fields = jvalue['Bind'] - return cls(name=fields['name'], - value=MiniLinq.from_jvalue(fields['value']), - body=MiniLinq.from_jvalue(fields['body'])) + return cls( + name=fields['name'], + value=MiniLinq.from_jvalue(fields['value']), + body=MiniLinq.from_jvalue(fields['body']) + ) def to_jvalue(self): - return {'Bind':{'name': self.name, - 'value': self.value.to_jvalue(), - 'body': self.body.to_jvalue()}} + return { + 'Bind': { + 'name': self.name, + 'value': self.value.to_jvalue(), + 'body': self.body.to_jvalue() + } + } def __repr__(self): - return '%s(name=%r, value=%r, body=%r)' % (self.__class__.__name__, self.name, self.value, self.body) + return '%s(name=%r, value=%r, body=%r)' % ( + self.__class__.__name__, self.name, self.value, self.body + ) class Filter(MiniLinq): @@ -174,7 +195,9 @@ def __init__(self, source, predicate, name=None): def eval(self, env): source_result = self.source.eval(env) - def iterate(env=env, source_result=source_result): # Python closure workaround + def iterate( + env=env, source_result=source_result + ): # Python closure workaround if self.name: for item in source_result: if self.predicate.eval(env.bind(self.name, item)): @@ -187,34 +210,48 @@ def iterate(env=env, source_result=source_result): # Python closure workaround return RepeatableIterator(iterate) def __eq__(self, other): - return isinstance(other, Filter) and self.source == other.source and self.name == other.name and self.predicate == other.predicate + return ( + isinstance(other, Filter) + and self.source == other.source + and self.name == other.name + and self.predicate == other.predicate + ) @classmethod def from_jvalue(cls, jvalue): fields = jvalue['Filter'] # TODO: catch errors and give informative error messages - return cls(predicate = MiniLinq.from_jvalue(fields['predicate']), - source = MiniLinq.from_jvalue(fields['source']), - name = fields.get('name')) + return cls( + predicate=MiniLinq.from_jvalue(fields['predicate']), + source=MiniLinq.from_jvalue(fields['source']), + name=fields.get('name') + ) def to_jvalue(self): - return {'Filter': {'predicate': self.predicate.to_jvalue(), - 'source': self.source.to_jvalue(), - 'name': self.name}} + return { + 'Filter': { + 'predicate': self.predicate.to_jvalue(), + 'source': self.source.to_jvalue(), + 'name': self.name + } + } def __repr__(self): - return '%s(source=%r, name=%r, predicate=%r)' % (self.__class__.__name__, self.source, self.name, self.predicate) + return '%s(source=%r, name=%r, predicate=%r)' % ( + self.__class__.__name__, self.source, self.name, self.predicate + ) class List(MiniLinq): """ - A list of expressions, embeds the [ ... ] syntax into the - MiniLinq meta-leval + A list of expressions, embeds the [ ... ] syntax into the MiniLinq + meta-leval """ + def __init__(self, items): self.items = items - + def eval(self, env): return [item.eval(env) for item in self.items] @@ -234,14 +271,12 @@ def to_jvalue(self): class Map(MiniLinq): """ - Like the `FROM` clause of a SQL `SELECT` or jQuery's map, - binds each item from its `source` and evaluates - the body MiniLinq. - - If `name` is provided to the constructor, then instead of - replacing the environment with each row, it will just - bind the row to `name`, enabling references to the - rest of the env. + Like the `FROM` clause of a SQL `SELECT` or jQuery's map, binds each + item from its `source` and evaluates the body MiniLinq. + + If `name` is provided to the constructor, then instead of replacing + the environment with each row, it will just bind the row to `name`, + enabling references to the rest of the env. """ def __init__(self, source, body, name=None): @@ -249,11 +284,13 @@ def __init__(self, source, body, name=None): self.source = source self.name = name self.body = body - + def eval(self, env): source_result = self.source.eval(env) - def iterate(env=env, source_result=source_result): # Python closure workaround + def iterate( + env=env, source_result=source_result + ): # Python closure workaround if self.name: for item in source_result: yield self.body.eval(env.bind(self.name, item)) @@ -264,34 +301,43 @@ def iterate(env=env, source_result=source_result): # Python closure workaround return RepeatableIterator(iterate) def __eq__(self, other): - return isinstance(other, Map) and self.name == other.name and self.source == other.source and self.body == other.body + return ( + isinstance(other, Map) + and self.name == other.name + and self.source == other.source + and self.body == other.body + ) @classmethod def from_jvalue(cls, jvalue): fields = jvalue['Map'] # TODO: catch errors and give informative error messages - return cls(body = MiniLinq.from_jvalue(fields['body']), - source = MiniLinq.from_jvalue(fields['source']), - name = fields.get('name')) + return cls( + body=MiniLinq.from_jvalue(fields['body']), + source=MiniLinq.from_jvalue(fields['source']), + name=fields.get('name') + ) def to_jvalue(self): - return {'Map': {'body': self.body.to_jvalue(), - 'source': self.source.to_jvalue(), - 'name': self.name}} + return { + 'Map': { + 'body': self.body.to_jvalue(), + 'source': self.source.to_jvalue(), + 'name': self.name + } + } class FlatMap(MiniLinq): """ - Somewhat like a JOIN, but not quite. Called `SelectMany` - in LINQ and `flatMap` other languages. Obvious equivalence: - `flatMap f = flatten . map f` but so common it is useful to - have around. - - If `name` is provided to the constructor, then instead of - replacing the environment with each row, it will just - bind the row to `name`, enabling references to the - rest of the env. + Somewhat like a JOIN, but not quite. Called `SelectMany` in LINQ and + `flatMap` other languages. Obvious equivalence: `flatMap f = flatten + . map f` but so common it is useful to have around. + + If `name` is provided to the constructor, then instead of replacing + the environment with each row, it will just bind the row to `name`, + enabling references to the rest of the env. """ def __init__(self, source, body, name=None): @@ -299,14 +345,19 @@ def __init__(self, source, body, name=None): self.source = source self.name = name self.body = body - + def eval(self, env): source_result = self.source.eval(env) - def iterate(env=env, source_result=source_result): # Python closure workaround + def iterate( + env=env, + source_result=source_result + ): # Python closure workaround if self.name: for item in source_result: - for result_item in self.body.eval(env.bind(self.name, item)): + for result_item in self.body.eval( + env.bind(self.name, item) + ): yield result_item else: for item in source_result: @@ -316,29 +367,39 @@ def iterate(env=env, source_result=source_result): # Python closure workaround return RepeatableIterator(iterate) def __eq__(self, other): - return isinstance(other, FlatMap) and self.name == other.name and self.source == other.source and self.body == other.body - + return ( + isinstance(other, FlatMap) + and self.name == other.name + and self.source == other.source + and self.body == other.body + ) @classmethod def from_jvalue(cls, jvalue): fields = jvalue['FlatMap'] # TODO: catch errors and give informative error messages - return cls(body = MiniLinq.from_jvalue(fields['body']), - source = MiniLinq.from_jvalue(fields['source']), - name = fields.get('name')) + return cls( + body=MiniLinq.from_jvalue(fields['body']), + source=MiniLinq.from_jvalue(fields['source']), + name=fields.get('name') + ) def to_jvalue(self): - return {'FlatMap': {'body': self.body.to_jvalue(), - 'source': self.source.to_jvalue(), - 'name': self.name}} + return { + 'FlatMap': { + 'body': self.body.to_jvalue(), + 'source': self.source.to_jvalue(), + 'name': self.name + } + } class Apply(MiniLinq): """ Abstract syntax for function or operator application. """ - + def __init__(self, fn, *args): self.fn = fn self.args = args @@ -359,46 +420,62 @@ def eval(self, env): doc_id = 'unknown' message = e.args[0] + ( - ": Error processing document '%s'. " - "Failure to evaluating expression '%r' with arguments '%s'" - ) % (doc_id, self, args) + f": Error processing document '{doc_id}'. Failure to " + f"evaluating expression '{self!r}' with arguments '{args}'" + ) e.args = (message,) + e.args[1:] raise return result def __eq__(self, other): - return isinstance(other, Apply) and self.fn == other.fn and self.args == other.args + return ( + isinstance(other, Apply) + and self.fn == other.fn + and self.args == other.args + ) @classmethod def from_jvalue(cls, jvalue): fields = jvalue['Apply'] # TODO: catch errors and give informative error messages - return cls(MiniLinq.from_jvalue(fields['fn']), - *[MiniLinq.from_jvalue(arg) for arg in fields['args']]) + return cls( + MiniLinq.from_jvalue(fields['fn']), + *[MiniLinq.from_jvalue(arg) for arg in fields['args']] + ) def to_jvalue(self): - return {'Apply': {'fn': self.fn.to_jvalue(), - 'args': [arg.to_jvalue() for arg in self.args]}} + return { + 'Apply': { + 'fn': self.fn.to_jvalue(), + 'args': [arg.to_jvalue() for arg in self.args] + } + } def __repr__(self): - return '%s(%r, *%r)' % (self.__class__.__name__, self.fn, self.args) + return f'{self.__class__.__name__}({self.fn!r}, *{self.args!r})' class Emit(MiniLinq): """ - This MiniLinq writes a whole table to whatever writer is registered in the `env`. - In practice, a table to a dict of a name, headers, and rows, so the - writer is free to do an idempotent upsert, etc. + This MiniLinq writes a whole table to whatever writer is registered + in the `env`. In practice, a table to a dict of a name, headers, + and rows, so the writer is free to do an idempotent upsert, etc. Note that it does not actually check that the number of headings is - correct, nor does it try to ensure that the things being emitted - are actually lists - it is just crashy instead. + correct, nor does it try to ensure that the things being emitted are + actually lists - it is just crashy instead. """ - def __init__(self, table, headings, source, missing_value=None, data_types=None): - "(str, [str], [MiniLinq]) -> MiniLinq" + def __init__( + self, + table: str, + headings: ListType[str], + source: ListType[MiniLinq], + missing_value=None, + data_types=None, + ): self.table = table self.headings = headings self.source = source @@ -418,20 +495,24 @@ def coerce_cell(self, cell): try: return self.coerce_cell_blithely(cell) except Exception: - logger.exception('Error converting value to exportable form: %r' % cell) + logger.exception( + 'Error converting value to exportable form: %r' % cell + ) return '' - + def coerce_row(self, row): return [self.coerce_cell(cell) for cell in row] def eval(self, env): rows = self.source.eval(env) - env.emit_table(TableSpec( - name=self.table, - headings=[heading.eval(env) for heading in self.headings], - rows=map(self.coerce_row, rows), - data_types=[lit.v for lit in self.data_types] - )) + env.emit_table( + TableSpec( + name=self.table, + headings=[heading.eval(env) for heading in self.headings], + rows=map(self.coerce_row, rows), + data_types=[lit.v for lit in self.data_types] + ) + ) @classmethod def from_jvalue(cls, jvalue): @@ -439,30 +520,41 @@ def from_jvalue(cls, jvalue): return cls( table=fields['table'], source=MiniLinq.from_jvalue(fields['source']), - headings=[MiniLinq.from_jvalue(heading) for heading in fields['headings']], + headings=[ + MiniLinq.from_jvalue(heading) for heading in fields['headings'] + ], missing_value=fields.get('missing_value'), data_types=fields.get('data_types'), ) def to_jvalue(self): - return {'Emit': {'table': self.table, - 'headings': [heading.to_jvalue() for heading in self.headings], - 'source': self.source.to_jvalue(), - 'missing_value': self.missing_value, - 'data_types': [heading.to_jvalue() for heading in self.headings]}} + return { + 'Emit': { + 'table': + self.table, + 'headings': [heading.to_jvalue() for heading in self.headings], + 'source': + self.source.to_jvalue(), + 'missing_value': + self.missing_value, + 'data_types': [ + heading.to_jvalue() for heading in self.headings + ] + } + } def __eq__(self, other): return ( isinstance(other, Emit) and self.table == other.table - and self.headings == other.headings - and self.source == other.source + and self.headings == other.headings and self.source == other.source and self.missing_value == other.missing_value and self.data_types == other.data_types ) def __repr__(self): return '%s(table=%r, headings=%r, source=%r, missing_value=%r)' % ( - self.__class__.__name__, self.table, self.headings, self.source, self.missing_value + self.__class__.__name__, self.table, self.headings, self.source, + self.missing_value ) diff --git a/commcare_export/misc.py b/commcare_export/misc.py index 70aa41e3..f399e605 100644 --- a/commcare_export/misc.py +++ b/commcare_export/misc.py @@ -11,7 +11,8 @@ def digest_file(path): with io.open(path, 'rb') as filehandle: digest = hashlib.md5() while True: - chunk = filehandle.read(4096) # Arbitrary choice of size to be ~filesystem block size friendly + # Arbitrary choice of size to be ~filesystem block size friendly + chunk = filehandle.read(4096) if not chunk: break digest.update(chunk) @@ -21,6 +22,7 @@ def digest_file(path): def unwrap(arg_name): def unwrapper(fn): + @functools.wraps(fn) def _inner(*args): callargs = inspect.getcallargs(fn, *args) diff --git a/commcare_export/repeatable_iterator.py b/commcare_export/repeatable_iterator.py index 596d6413..75b22d3e 100644 --- a/commcare_export/repeatable_iterator.py +++ b/commcare_export/repeatable_iterator.py @@ -1,11 +1,9 @@ - - class RepeatableIterator(object): """ - Pass something iterable into this and, - unless it has crufty issues, voila. + Pass something iterable into this and, unless it has crufty issues, + voila. """ - + def __init__(self, generator): self.generator = generator self.__val = None diff --git a/commcare_export/specs.py b/commcare_export/specs.py index a121e36e..f4ee0216 100644 --- a/commcare_export/specs.py +++ b/commcare_export/specs.py @@ -1,5 +1,3 @@ - - class TableSpec: def __init__(self, name, headings, rows, data_types=None): diff --git a/commcare_export/utils.py b/commcare_export/utils.py index 85f4b045..7154e86f 100644 --- a/commcare_export/utils.py +++ b/commcare_export/utils.py @@ -14,17 +14,15 @@ def get_checkpoint_manager(args, require_query=True): raise return CheckpointManager( - args.output, args.query, md5, - args.project, args.commcare_hq, args.checkpoint_key + args.output, args.query, md5, args.project, args.commcare_hq, + args.checkpoint_key ) def confirm(message): - confirm = input( - """ - {}? [y/N] - """.format(message) - ) + confirm = input(f""" + {message}? [y/N] + """) return confirm == "y" @@ -34,18 +32,18 @@ def print_runs(runs): for run in runs: rows.append([ run.time_of_run, run.since_param, "True" if run.final else "False", - run.project, run.query_file_name, run.query_file_md5, run.key, run.table_name, run.commcare + run.project, run.query_file_name, run.query_file_md5, run.key, + run.table_name, run.commcare ]) - rows = [ - [val if val is not None else '' for val in row] - for row in rows - ] + rows = [[val if val is not None else '' for val in row] for row in rows] - StreamingMarkdownTableWriter(sys.stdout, compute_widths=True).write_table({ + StreamingMarkdownTableWriter( + sys.stdout, compute_widths=True + ).write_table({ 'headings': [ - "Checkpoint Time", "Batch end date", "Export Complete", - "Project", "Query Filename", "Query MD5", "Key", "Table", "CommCare HQ" + "Checkpoint Time", "Batch end date", "Export Complete", "Project", + "Query Filename", "Query MD5", "Key", "Table", "CommCare HQ" ], 'rows': rows }) diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index be84d0a6..3322409d 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -26,14 +26,18 @@ def run(self, args): class ListHistoryCommand(BaseCommand): slug = 'history' help = """List export history. History will be filtered by arguments provided. - - This command only applies when exporting to a SQL database. The command lists - the checkpoints that have been created by the command. + + This command only applies when exporting to a SQL database. The command + lists the checkpoints that have been created by the command. """ @classmethod def add_arguments(cls, parser): - parser.add_argument('--limit', default=10, help="Limit the number of export runs to display") + parser.add_argument( + '--limit', + default=10, + help="Limit the number of export runs to display" + ) parser.add_argument('--output', required=True, help='SQL Database URL') shared_args = {'project', 'query', 'checkpoint_key', 'commcare_hq'} for arg in CLI_ARGS: @@ -62,22 +66,27 @@ class SetKeyCommand(BaseCommand): slug = 'set-checkpoint-key' help = """Set the key for a particular checkpoint. - This command is used to migrate an non-keyed checkpoint to a keyed checkpoint. + This command is used to migrate an non-keyed checkpoint to a keyed + checkpoint. - This is useful if you already have a populated export database and do not wish to trigger - rebuilds after editing the query file. + This is useful if you already have a populated export database and do + not wish to trigger rebuilds after editing the query file. - For example, you've been running the export tool with query file A.xlsx and have a fully populated - database. Now you need to add an extra column to the table but only want to populate it with new data. + For example, you've been running the export tool with query file A.xlsx + and have a fully populated database. Now you need to add an extra column + to the table but only want to populate it with new data. - What you need to do is update your current checkpoint with a key that you can then use when running - the command from now on. + What you need to do is update your current checkpoint with a key that + you can then use when running the command from now on. - $ commcare-export-utils set-key --project X --query A.xlsx --output [SQL URL] --checkpoint-key my-key + $ commcare-export-utils set-key --project X --query A.xlsx \\ + --output [SQL URL] --checkpoint-key my-key Now when you run the export tool in future you can use this key: - $ commcare-export --project X --query A.xlsx --output [SQL URL] --checkpoint-key my-key ... + $ commcare-export --project X --query A.xlsx --output [SQL URL] \\ + --checkpoint-key my-key ... + """ @classmethod @@ -109,7 +118,9 @@ def run(self, args): return print_runs(runs_no_key) - if confirm("Do you want to set the key for this checkpoint to '{}'".format(key)): + if confirm( + f"Do you want to set the key for this checkpoint to '{key}'" + ): for checkpoint in runs_no_key: checkpoint.key = key manager.update_checkpoint(checkpoint) @@ -118,10 +129,7 @@ def run(self, args): print_runs(runs_no_key) -COMMANDS = [ - ListHistoryCommand, - SetKeyCommand -] +COMMANDS = [ListHistoryCommand, SetKeyCommand] def main(argv): @@ -143,12 +151,17 @@ def main(argv): try: arg.encode('utf-8') except UnicodeDecodeError: - sys.stderr.write(u"ERROR: Argument '%s' contains unicode characters. " - u"Only ASCII characters are supported.\n" % unicode(arg, 'utf-8')) + sys.stderr.write( + u"ERROR: Argument '%s' contains unicode characters. " + u"Only ASCII characters are supported.\n" + % unicode(arg, 'utf-8') + ) sys.exit(1) - logging.basicConfig(level=logging.WARN, - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s') + logging.basicConfig( + level=logging.WARN, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + ) exit(main_with_args(args)) diff --git a/commcare_export/version.py b/commcare_export/version.py index f2e430ea..bfa32bb9 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -16,7 +16,9 @@ def stored_version(): def git_version(): - described_version_bytes = subprocess.Popen(['git', 'describe'], stdout=subprocess.PIPE).communicate()[0].strip() + described_version_bytes = subprocess.Popen( + ['git', 'describe'], stdout=subprocess.PIPE + ).communicate()[0].strip() return described_version_bytes.decode('ascii') diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 779f0a85..f176f354 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -227,10 +227,7 @@ def __init__(self, output_stream, compute_widths=False): self.output_stream = output_stream self.compute_widths = compute_widths - def write_table( - self, - table, - ): + def write_table(self, table): col_widths = None if self.compute_widths: col_widths = self._get_column_widths(table) From 84191db429b8a841b86dd6dc27c5bdec130e9cff Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 22 Apr 2022 15:03:43 +0100 Subject: [PATCH 135/257] yapf tests --- tests/conftest.py | 91 ++- tests/test_checkpointmanager.py | 195 +++++-- tests/test_cli.py | 918 +++++++++++++++++++----------- tests/test_commcare_hq_client.py | 254 +++++++-- tests/test_commcare_minilinq.py | 257 ++++++--- tests/test_excel_query.py | 867 ++++++++++++++++++---------- tests/test_map_format.py | 34 +- tests/test_minilinq.py | 755 +++++++++++++++++------- tests/test_misc.py | 13 +- tests/test_repeatable_iterator.py | 13 +- tests/test_writers.py | 714 +++++++++++++++-------- tests/utils.py | 1 + 12 files changed, 2761 insertions(+), 1351 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2c1fca04..decb3cb7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,14 +7,16 @@ import pytest -TEST_DB = 'test_commcare_export_%s' % uuid.uuid4().hex +TEST_DB = f'test_commcare_export_{uuid.uuid4().hex}' logging.getLogger().setLevel(logging.DEBUG) logging.getLogger().addHandler(logging.StreamHandler()) def pytest_configure(config): - config.addinivalue_line("markers", "dbtest: mark test that requires database access") + config.addinivalue_line( + "markers", "dbtest: mark test that requires database access" + ) config.addinivalue_line("markers", "postgres: mark PostgreSQL test") config.addinivalue_line("markers", "mysql: mark MySQL test") config.addinivalue_line("markers", "mssql: mark MSSQL test") @@ -22,7 +24,10 @@ def pytest_configure(config): def _db_params(request, db_name): db_url = request.param['url'] - sudo_engine = sqlalchemy.create_engine(db_url % request.param.get('admin_db', ''), poolclass=sqlalchemy.pool.NullPool) + sudo_engine = sqlalchemy.create_engine( + db_url % request.param.get('admin_db', ''), + poolclass=sqlalchemy.pool.NullPool + ) db_connection_url = db_url % db_name def tear_down(): @@ -36,7 +41,10 @@ def tear_down(): try: with sqlalchemy.create_engine(db_connection_url).connect(): pass - except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.InternalError, DBAPIError): + except ( + sqlalchemy.exc.OperationalError, sqlalchemy.exc.InternalError, + DBAPIError + ): with sudo_engine.connect() as conn: if 'postgres' in db_url: conn.execute('rollback') @@ -44,7 +52,9 @@ def tear_down(): conn.connection.connection.autocommit = True conn.execute('create database %s' % db_name) else: - raise Exception('Database %s already exists; refusing to overwrite' % db_name) + raise Exception( + 'Database %s already exists; refusing to overwrite' % db_name + ) request.addfinalizer(tear_down) @@ -53,33 +63,56 @@ def tear_down(): return params -postgres_base = os.environ.get('POSTGRES_URL', 'postgresql://postgres@localhost/') +postgres_base = os.environ.get( + 'POSTGRES_URL', 'postgresql://postgres@localhost/' +) mysql_base = os.environ.get('MYSQL_URL', 'mysql+pymysql://travis@/') -mssql_base = os.environ.get('MSSQL_URL', 'mssql+pyodbc://SA:Password-123@localhost/') - - -@pytest.fixture(scope="class", params=[ - pytest.param({ - 'url': "{}%s".format(postgres_base), - 'admin_db': 'postgres' - }, marks=pytest.mark.postgres), - pytest.param({ - 'url': '{}%s?charset=utf8mb4'.format(mysql_base), - }, marks=pytest.mark.mysql), - pytest.param({ - 'url': '{}%s?driver=ODBC+Driver+17+for+SQL+Server'.format(mssql_base), - 'admin_db': 'master' - }, marks=pytest.mark.mssql) -], ids=['postgres', 'mysql', 'mssql']) +mssql_base = os.environ.get( + 'MSSQL_URL', 'mssql+pyodbc://SA:Password-123@localhost/' +) + + +@pytest.fixture( + scope="class", + params=[ + pytest.param( + { + 'url': f"{postgres_base}%s", + 'admin_db': 'postgres' + }, + marks=pytest.mark.postgres, + ), + pytest.param( + { + 'url': f'{mysql_base}%s?charset=utf8mb4', + }, + marks=pytest.mark.mysql, + ), + pytest.param( + { + 'url': + f'{mssql_base}%s?driver=ODBC+Driver+17+for+SQL+Server', + 'admin_db': + 'master' + }, + marks=pytest.mark.mssql, + ) + ], + ids=['postgres', 'mysql', 'mssql'] +) def db_params(request): return _db_params(request, TEST_DB) -@pytest.fixture(scope="class", params=[ - { - 'url': "{}%s".format(postgres_base), - 'admin_db': 'postgres' - }, -], ids=['postgres']) +@pytest.fixture( + scope="class", + params=[ + { + 'url': "{}%s".format(postgres_base), + 'admin_db': 'postgres' + }, + ], + ids=['postgres'] +) def pg_db_params(request): - return _db_params(request, 'test_commcare_export_%s' % uuid.uuid4().hex) + return _db_params(request, f'test_commcare_export_{uuid.uuid4().hex}') diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index 1906d7e0..ab26469e 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -15,13 +15,25 @@ @pytest.fixture() def manager(db_params): - manager = CheckpointManager(db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) + manager = CheckpointManager( + db_params['url'], + 'query', + '123', + 'test', + 'hq', + poolclass=sqlalchemy.pool.NullPool + ) try: yield manager finally: with manager: - manager.connection.execute(sqlalchemy.sql.text('DROP TABLE IF EXISTS commcare_export_runs')) - manager.connection.execute(sqlalchemy.sql.text('DROP TABLE IF EXISTS alembic_version')) + manager.connection.execute( + sqlalchemy.sql + .text('DROP TABLE IF EXISTS commcare_export_runs') + ) + manager.connection.execute( + sqlalchemy.sql.text('DROP TABLE IF EXISTS alembic_version') + ) @pytest.fixture() @@ -32,6 +44,7 @@ def configured_manager(manager): @pytest.mark.dbtest class TestCheckpointManager(object): + def test_create_checkpoint_table(self, manager, revision='head'): manager.create_checkpoint_table(revision) with manager: @@ -43,12 +56,16 @@ def test_checkpoint_table_exists(self, manager): # This test can be removed at some point in the future. self.test_create_checkpoint_table(manager, '9945abb4ec70') with manager: - manager.connection.execute(sqlalchemy.sql.text('DROP TABLE alembic_version')) + manager.connection.execute( + sqlalchemy.sql.text('DROP TABLE alembic_version') + ) manager.create_checkpoint_table() def test_get_time_of_last_checkpoint(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1']) - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) + manager.set_checkpoint( + datetime.datetime.utcnow(), PaginationMode.date_indexed + ) second_run = datetime.datetime.utcnow() manager.set_checkpoint(second_run, PaginationMode.date_indexed) @@ -58,16 +75,18 @@ def test_get_last_checkpoint_no_args(self, configured_manager): # test that we can still get the time of last run no project and commcare args with session_scope(configured_manager.Session) as session: since_param = datetime.datetime.utcnow().isoformat() - session.add(Checkpoint( - id=uuid.uuid4().hex, - query_file_name=configured_manager.query, - query_file_md5=configured_manager.query_md5, - project=None, - commcare=None, - since_param=since_param, - time_of_run=datetime.datetime.utcnow().isoformat(), - final=True - )) + session.add( + Checkpoint( + id=uuid.uuid4().hex, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, + project=None, + commcare=None, + since_param=since_param, + time_of_run=datetime.datetime.utcnow().isoformat(), + final=True + ) + ) manager = configured_manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param @@ -80,27 +99,31 @@ def test_get_last_checkpoint_no_table(self, configured_manager): # also tests that new checkoints are created with the tables with session_scope(configured_manager.Session) as session: since_param = datetime.datetime.utcnow().isoformat() - session.add(Checkpoint( - id=uuid.uuid4().hex, - query_file_name=configured_manager.query, - query_file_md5=configured_manager.query_md5, - project=None, - commcare=None, - since_param=since_param, - time_of_run=datetime.datetime.utcnow().isoformat(), - final=True - )) - - session.add(Checkpoint( - id=uuid.uuid4().hex, - query_file_name=configured_manager.query, - query_file_md5=configured_manager.query_md5, - project=configured_manager.project, - commcare=configured_manager.commcare, - since_param=since_param, - time_of_run=datetime.datetime.utcnow().isoformat(), - final=True - )) + session.add( + Checkpoint( + id=uuid.uuid4().hex, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, + project=None, + commcare=None, + since_param=since_param, + time_of_run=datetime.datetime.utcnow().isoformat(), + final=True + ) + ) + + session.add( + Checkpoint( + id=uuid.uuid4().hex, + query_file_name=configured_manager.query, + query_file_md5=configured_manager.query_md5, + project=configured_manager.project, + commcare=configured_manager.commcare, + since_param=since_param, + time_of_run=datetime.datetime.utcnow().isoformat(), + final=True + ) + ) manager = configured_manager.for_dataset('form', ['t1', 't2']) checkpoint = manager.get_last_checkpoint() assert checkpoint.since_param == since_param @@ -111,15 +134,28 @@ def test_get_last_checkpoint_no_table(self, configured_manager): def test_clean_on_final_run(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1']) - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, doc_id="1") - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, doc_id="2") + manager.set_checkpoint( + datetime.datetime.utcnow(), + PaginationMode.date_indexed, + doc_id="1" + ) + manager.set_checkpoint( + datetime.datetime.utcnow(), + PaginationMode.date_indexed, + doc_id="2" + ) def _get_non_final_rows_count(): with session_scope(manager.Session) as session: return session.query(Checkpoint).filter_by(final=False).count() assert _get_non_final_rows_count() == 2 - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed, True, doc_id="3") + manager.set_checkpoint( + datetime.datetime.utcnow(), + PaginationMode.date_indexed, + True, + doc_id="3" + ) assert _get_non_final_rows_count() == 0 def test_get_time_of_last_checkpoint_with_key(self, configured_manager): @@ -128,7 +164,8 @@ def test_get_time_of_last_checkpoint_with_key(self, configured_manager): last_run_time = datetime.datetime.utcnow() manager.set_checkpoint(last_run_time, PaginationMode.date_indexed) - assert manager.get_time_of_last_checkpoint() == last_run_time.isoformat() + assert manager.get_time_of_last_checkpoint( + ) == last_run_time.isoformat() manager.key = None assert manager.get_time_of_last_checkpoint() is None @@ -138,23 +175,36 @@ def test_multiple_tables(self, configured_manager): manager = configured_manager.for_dataset('form', [t1, t2]) last_run_time = datetime.datetime.utcnow() doc_id = uuid.uuid4().hex - manager.set_checkpoint(last_run_time, PaginationMode.date_indexed, doc_id=doc_id) - - assert manager.for_dataset('form', [t1]).get_time_of_last_checkpoint() == last_run_time.isoformat() - assert manager.for_dataset('form', [t2]).get_time_of_last_checkpoint() == last_run_time.isoformat() - assert manager.for_dataset('form', ['t3']).get_last_checkpoint() is None + manager.set_checkpoint( + last_run_time, PaginationMode.date_indexed, doc_id=doc_id + ) + + assert manager.for_dataset('form', [ + t1 + ]).get_time_of_last_checkpoint() == last_run_time.isoformat() + assert manager.for_dataset('form', [ + t2 + ]).get_time_of_last_checkpoint() == last_run_time.isoformat() + assert manager.for_dataset('form', + ['t3']).get_last_checkpoint() is None checkpoints = manager.list_checkpoints() assert len(checkpoints) == 2 - assert {checkpoints[0].table_name, checkpoints[1].table_name} == {t1, t2} - assert {checkpoints[0].last_doc_id, checkpoints[1].last_doc_id} == {doc_id} + assert {checkpoints[0].table_name, + checkpoints[1].table_name} == {t1, t2} + assert {checkpoints[0].last_doc_id, + checkpoints[1].last_doc_id} == {doc_id} def test_get_latest_checkpoints(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1', 't2']) - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) + manager.set_checkpoint( + datetime.datetime.utcnow(), PaginationMode.date_indexed + ) manager.query_md5 = '456' - manager.set_checkpoint(datetime.datetime.utcnow(), PaginationMode.date_indexed) + manager.set_checkpoint( + datetime.datetime.utcnow(), PaginationMode.date_indexed + ) latest_time = datetime.datetime.utcnow() manager.set_checkpoint(latest_time, PaginationMode.date_indexed) @@ -162,15 +212,23 @@ def test_get_latest_checkpoints(self, configured_manager): assert len(checkpoints) == 2 assert [c.table_name for c in checkpoints] == ['t1', 't2'] assert {c.query_file_md5 for c in checkpoints} == {'456'} - assert {c.since_param for c in checkpoints} == {latest_time.isoformat()} + assert {c.since_param for c in checkpoints + } == {latest_time.isoformat()} -@pytest.mark.parametrize('since, start_over, expected_since, expected_paginator', [ - (None, True, None, PaginationMode.date_indexed), - ('since', False, 'since', PaginationMode.date_indexed), - (None, False, None, PaginationMode.date_indexed), -]) -def test_checkpoint_details_static(since, start_over, expected_since, expected_paginator): +@pytest.mark.parametrize( + 'since, start_over, expected_since, expected_paginator', [ + (None, True, None, PaginationMode.date_indexed), + ('since', False, 'since', PaginationMode.date_indexed), + (None, False, None, PaginationMode.date_indexed), + ] +) +def test_checkpoint_details_static( + since, + start_over, + expected_since, + expected_paginator, +): cmp = CheckpointManagerProvider(None, since, start_over) assert expected_since == cmp.get_since(None) assert expected_paginator == cmp.get_pagination_mode(None) @@ -178,19 +236,32 @@ def test_checkpoint_details_static(since, start_over, expected_since, expected_p @pytest.mark.dbtest class TestCheckpointManagerProvider(object): + def test_checkpoint_details_no_checkpoint(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1']) assert None is CheckpointManagerProvider().get_since(manager) - assert PaginationMode.date_indexed == CheckpointManagerProvider().get_pagination_mode(manager) + assert PaginationMode.date_indexed == CheckpointManagerProvider( + ).get_pagination_mode(manager) def test_checkpoint_details_latest_from_db(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1']) - self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_modified) - self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_indexed) - self._test_checkpoint_details(manager, datetime.datetime.utcnow(), PaginationMode.date_modified) - - def _test_checkpoint_details(self, manager, checkpoint_date, pagination_mode): + self._test_checkpoint_details( + manager, datetime.datetime.utcnow(), PaginationMode.date_modified + ) + self._test_checkpoint_details( + manager, datetime.datetime.utcnow(), PaginationMode.date_indexed + ) + self._test_checkpoint_details( + manager, datetime.datetime.utcnow(), PaginationMode.date_modified + ) + + def _test_checkpoint_details( + self, + manager, + checkpoint_date, + pagination_mode, + ): manager.set_checkpoint(checkpoint_date, pagination_mode) cmp = CheckpointManagerProvider() diff --git a/tests/test_cli.py b/tests/test_cli.py index 269cd67c..a6b54430 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -26,12 +26,7 @@ from commcare_export.specs import TableSpec from commcare_export.writers import JValueTableWriter -CLI_ARGS_BY_NAME = { - arg.name: arg - for arg in CLI_ARGS -} - - +CLI_ARGS_BY_NAME = {arg.name: arg for arg in CLI_ARGS} DEFAULT_BATCH_SIZE = 200 @@ -55,253 +50,291 @@ def make_args(project='test', username='test', password='test', **kwargs): def mock_hq_client(include_parent): return MockCommCareHqClient({ - 'form': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': 1, 'form': {'name': 'f1', 'case': {'@case_id': 'c1'}}, - 'metadata': {'userID': 'id1'}}, - {'id': 2, 'form': {'name': 'f2', 'case': {'@case_id': 'c2'}}, - 'metadata': {'userID': 'id2'}}, - ] - ), - ], - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': 'case1'}, - {'id': 'case2'}, - ] - ) - ], - 'user': [ - ( - {'limit': DEFAULT_BATCH_SIZE}, - [ - {'id': 'id1', 'email': 'em1', 'first_name': 'fn1', - 'last_name': 'ln1', - 'user_data': {'commcare_location_id': 'lid1', - 'commcare_location_ids': ['lid1', 'lid2'], - 'commcare_project': 'p1'}, - 'username': 'u1'}, - {'id': 'id2', 'default_phone_number': 'pn2', 'email': 'em2', - 'first_name': 'fn2', 'last_name': 'ln2', - 'resource_uri': 'ru0', - 'user_data': {'commcare_location_id': 'lid2', - 'commcare_project': 'p2'}, - 'username': 'u2'} - ] - ) - ], - 'location_type': [ - ( - {'limit': DEFAULT_BATCH_SIZE}, - [ - {'administrative': True, 'code': 'hq', 'domain': 'd1', 'id': 1, - 'name': 'HQ', 'parent': None, 'resource_uri': 'lt1', - 'shares_cases': False, 'view_descendants': True}, - {'administrative': False, 'code': 'local', 'domain': 'd1', - 'id': 2, 'name': 'Local', - 'parent': 'lt1', 'resource_uri': 'lt2', - 'shares_cases': True, 'view_descendants': True} - ] - ) - ], - 'location': [ - ( - {'limit': DEFAULT_BATCH_SIZE}, - [ - {'id': 'id1', 'created_at': '2020-04-01T21:57:26.403053', - 'domain': 'd1', 'external_id': 'eid1', - 'last_modified': '2020-04-01T21:58:23.88343', - 'latitude': '11.2', 'location_data': {'p1': 'ld1'}, - 'location_id': 'lid1', 'location_type': 'lt1', - 'longitude': '-20.5', 'name': 'n1', - 'resource_uri': 'ru1', 'site_code': 'sc1'}, - {'id': 'id2', 'created_at': '2020-04-01T21:58:47.627371', - 'domain': 'd2', 'last_modified': '2020-04-01T21:59:16.018411', - 'latitude': '-56.3', 'location_data': {'p1': 'ld2'}, - 'location_id': 'lid2', 'location_type': 'lt2', - 'longitude': '18.7', 'name': 'n2', - 'parent': 'ru1' if include_parent else None, - 'resource_uri': 'ru2', 'site_code': 'sc2'} - ] - ) - ], + 'form': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [ + { + 'id': 1, + 'form': { + 'name': 'f1', + 'case': { + '@case_id': 'c1' + } + }, + 'metadata': { + 'userID': 'id1' + } + }, + { + 'id': 2, + 'form': { + 'name': 'f2', + 'case': { + '@case_id': 'c2' + } + }, + 'metadata': { + 'userID': 'id2' + } + }, + ]),], + 'case': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [ + { + 'id': 'case1' + }, + { + 'id': 'case2' + }, + ])], + 'user': [({ + 'limit': DEFAULT_BATCH_SIZE + }, [{ + 'id': 'id1', + 'email': 'em1', + 'first_name': 'fn1', + 'last_name': 'ln1', + 'user_data': { + 'commcare_location_id': 'lid1', + 'commcare_location_ids': ['lid1', 'lid2'], + 'commcare_project': 'p1' + }, + 'username': 'u1' + }, { + 'id': 'id2', + 'default_phone_number': 'pn2', + 'email': 'em2', + 'first_name': 'fn2', + 'last_name': 'ln2', + 'resource_uri': 'ru0', + 'user_data': { + 'commcare_location_id': 'lid2', + 'commcare_project': 'p2' + }, + 'username': 'u2' + }])], + 'location_type': [({ + 'limit': DEFAULT_BATCH_SIZE + }, [{ + 'administrative': True, + 'code': 'hq', + 'domain': 'd1', + 'id': 1, + 'name': 'HQ', + 'parent': None, + 'resource_uri': 'lt1', + 'shares_cases': False, + 'view_descendants': True + }, { + 'administrative': False, + 'code': 'local', + 'domain': 'd1', + 'id': 2, + 'name': 'Local', + 'parent': 'lt1', + 'resource_uri': 'lt2', + 'shares_cases': True, + 'view_descendants': True + }])], + 'location': [({ + 'limit': DEFAULT_BATCH_SIZE + }, [{ + 'id': 'id1', + 'created_at': '2020-04-01T21:57:26.403053', + 'domain': 'd1', + 'external_id': 'eid1', + 'last_modified': '2020-04-01T21:58:23.88343', + 'latitude': '11.2', + 'location_data': { + 'p1': 'ld1' + }, + 'location_id': 'lid1', + 'location_type': 'lt1', + 'longitude': '-20.5', + 'name': 'n1', + 'resource_uri': 'ru1', + 'site_code': 'sc1' + }, { + 'id': 'id2', + 'created_at': '2020-04-01T21:58:47.627371', + 'domain': 'd2', + 'last_modified': '2020-04-01T21:59:16.018411', + 'latitude': '-56.3', + 'location_data': { + 'p1': 'ld2' + }, + 'location_id': 'lid2', + 'location_type': 'lt2', + 'longitude': '18.7', + 'name': 'n2', + 'parent': 'ru1' if include_parent else None, + 'resource_uri': 'ru2', + 'site_code': 'sc2' + }])], }) -EXPECTED_MULTIPLE_TABLES_RESULTS = [ - { - "name": "Forms", - "headings": ["id", "name"], - "rows": [ - ["1", "f1"], - ["2", "f2"] - ], - }, - { - "name": "Other cases", - "headings": ["id"], - "rows": [ - ["case1"], - ["case2"] - ], - }, - { - "name": "Cases", - "headings": ["case_id"], - "rows": [ - ["c1"], - ["c2"] - ], - } -] - -EXPECTED_USERS_RESULTS = [ - { - "name": "commcare_users", - "headings": [ - "id", - "default_phone_number", - "email", - "first_name", - "groups", - "last_name", - "phone_numbers", - "resource_uri", - "commcare_location_id", - "commcare_location_ids", - "commcare_primary_case_sharing_id", - "commcare_project", - "username" - ], - "rows": [ - ["id1", None, "em1", "fn1", None, "ln1", None, None, "lid1", - "lid1,lid2", None, "p1", "u1"], - ["id2", "pn2", "em2", "fn2", None, "ln2", None, "ru0", "lid2", - None, None, "p2", "u2"] - ] - } -] +EXPECTED_MULTIPLE_TABLES_RESULTS = [{ + "name": "Forms", + "headings": ["id", "name"], + "rows": [["1", "f1"], ["2", "f2"]], +}, { + "name": "Other cases", + "headings": ["id"], + "rows": [["case1"], ["case2"]], +}, { + "name": "Cases", + "headings": ["case_id"], + "rows": [["c1"], ["c2"]], +}] + +EXPECTED_USERS_RESULTS = [{ + "name": + "commcare_users", + "headings": [ + "id", "default_phone_number", "email", "first_name", "groups", + "last_name", "phone_numbers", "resource_uri", "commcare_location_id", + "commcare_location_ids", "commcare_primary_case_sharing_id", + "commcare_project", "username" + ], + "rows": [[ + "id1", None, "em1", "fn1", None, "ln1", None, None, "lid1", + "lid1,lid2", None, "p1", "u1" + ], + [ + "id2", "pn2", "em2", "fn2", None, "ln2", None, "ru0", "lid2", + None, None, "p2", "u2" + ]] +}] def get_expected_locations_results(include_parent): - return [ - { - "name": "commcare_locations", - "headings": [ - "id", - "created_at", - "domain", - "external_id", - "last_modified", - "latitude", - "location_data", - "location_id", - "location_type", - "longitude", - "name", - "parent", - "resource_uri", - "site_code", - "location_type_administrative", - "location_type_code", - "location_type_name", - "location_type_parent", - "local", - "hq" - ], - "rows": [ - ["id1", "2020-04-01 21:57:26", "d1", "eid1", - "2020-04-01 21:58:23", "11.2", '{"p1": "ld1", "id": "id1.location_data"}', "lid1", "lt1", - "-20.5", "n1", None, "ru1", "sc1", True, "hq", "HQ", None, - None, "lid1"], - ["id2", "2020-04-01 21:58:47", "d2", None, - "2020-04-01 21:59:16", "-56.3", '{"p1": "ld2", "id": "id2.location_data"}', "lid2", "lt2", - "18.7", "n2", ("ru1" if include_parent else None), "ru2", - "sc2", False, "local", "Local", "lt1", - "lid2", ("lid1" if include_parent else None)] - ] - } - ] + return [{ + "name": + "commcare_locations", + "headings": [ + "id", "created_at", "domain", "external_id", "last_modified", + "latitude", "location_data", "location_id", "location_type", + "longitude", "name", "parent", "resource_uri", "site_code", + "location_type_administrative", "location_type_code", + "location_type_name", "location_type_parent", "local", "hq" + ], + "rows": [[ + "id1", "2020-04-01 21:57:26", "d1", "eid1", "2020-04-01 21:58:23", + "11.2", '{"p1": "ld1", "id": "id1.location_data"}', "lid1", "lt1", + "-20.5", "n1", None, "ru1", "sc1", True, "hq", "HQ", None, None, + "lid1" + ], + [ + "id2", "2020-04-01 21:58:47", "d2", None, + "2020-04-01 21:59:16", "-56.3", + '{"p1": "ld2", "id": "id2.location_data"}', + "lid2", "lt2", "18.7", "n2", + ("ru1" if include_parent else None), "ru2", "sc2", False, + "local", "Local", "lt1", "lid2", + ("lid1" if include_parent else None) + ]] + }] class TestCli(unittest.TestCase): def _test_cli(self, args, expected): writer = JValueTableWriter() - with mock.patch('commcare_export.cli._get_writer', return_value=writer): + with mock.patch( + 'commcare_export.cli._get_writer', return_value=writer + ): main_with_args(args) for table in expected: assert writer.tables[table['name']] == TableSpec(**table) - - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(True) + ) def test_cli(self, mock_client): args = make_args( - query='tests/008_multiple-tables.xlsx', - output_format='json' + query='tests/008_multiple-tables.xlsx', output_format='json' ) self._test_cli(args, EXPECTED_MULTIPLE_TABLES_RESULTS) - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(True) + ) def test_cli_just_users(self, mock_client): - args = make_args( - output_format='json', - users=True - ) + args = make_args(output_format='json', users=True) self._test_cli(args, EXPECTED_USERS_RESULTS) - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(True) + ) def test_cli_table_plus_users(self, mock_client): args = make_args( query='tests/008_multiple-tables.xlsx', output_format='json', users=True ) - self._test_cli(args, EXPECTED_MULTIPLE_TABLES_RESULTS + - EXPECTED_USERS_RESULTS) + self._test_cli( + args, EXPECTED_MULTIPLE_TABLES_RESULTS + EXPECTED_USERS_RESULTS + ) - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(True) + ) def test_cli_just_locations(self, mock_client): - args = make_args( - output_format='json', - locations=True - ) + args = make_args(output_format='json', locations=True) self._test_cli(args, get_expected_locations_results(True)) - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(False)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(False) + ) def test_cli_locations_without_parents(self, mock_client): - args = make_args( - output_format='json', - locations=True - ) + args = make_args(output_format='json', locations=True) self._test_cli(args, get_expected_locations_results(False)) - @mock.patch('commcare_export.cli._get_api_client', return_value=mock_hq_client(True)) + @mock.patch( + 'commcare_export.cli._get_api_client', + return_value=mock_hq_client(True) + ) def test_cli_table_plus_locations(self, mock_client): args = make_args( query='tests/008_multiple-tables.xlsx', output_format='json', locations=True ) - self._test_cli(args, EXPECTED_MULTIPLE_TABLES_RESULTS + - get_expected_locations_results(True)) + self._test_cli( + args, EXPECTED_MULTIPLE_TABLES_RESULTS + + get_expected_locations_results(True) + ) @pytest.fixture(scope='function') def writer(pg_db_params): - writer = SqlWriterWithTearDown(pg_db_params['url'], poolclass=sqlalchemy.pool.NullPool) + writer = SqlWriterWithTearDown( + pg_db_params['url'], poolclass=sqlalchemy.pool.NullPool + ) yield writer writer.tear_down() @pytest.fixture(scope='function') def checkpoint_manager(pg_db_params): - cm = CheckpointManager(pg_db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) + cm = CheckpointManager( + pg_db_params['url'], + 'query', + '123', + 'test', + 'hq', + poolclass=sqlalchemy.pool.NullPool + ) cm.create_checkpoint_table() return cm @@ -323,24 +356,32 @@ def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): # set this so that it gets written to the checkpoints checkpoint_manager.query = query - # have to mock these to override the pool class otherwise they hold the db connection open - writer_patch = mock.patch('commcare_export.cli._get_writer', return_value=writer) - checkpoint_patch = mock.patch('commcare_export.cli._get_checkpoint_manager', return_value=checkpoint_manager) + # have to mock these to override the pool class otherwise they hold + # the db connection open + writer_patch = mock.patch( + 'commcare_export.cli._get_writer', return_value=writer + ) + checkpoint_patch = mock.patch( + 'commcare_export.cli._get_checkpoint_manager', + return_value=checkpoint_manager + ) with writer_patch, checkpoint_patch: main_with_args(args) def _check_data(writer, expected, table_name, columns): actual = [ - list(row) for row in - writer.engine.execute(f'SELECT {", ".join(columns)} FROM "{table_name}"') + list(row) for row in writer.engine + .execute(f'SELECT {", ".join(columns)} FROM "{table_name}"') ] message = '' if actual != expected: message += 'Data not equal to expected:\n' if len(actual) != len(expected): - message += ' {} rows compared to {} expected\n'.format(len(actual), len(expected)) + message += ' {} rows compared to {} expected\n'.format( + len(actual), len(expected) + ) message += 'Diff:\n' for i, rows in enumerate(zip_longest(actual, expected)): if rows[0] != rows[1]: @@ -350,26 +391,46 @@ def _check_data(writer, expected, table_name, columns): @pytest.mark.dbtest class TestCLIIntegrationTests(object): - def test_write_to_sql_with_checkpoints(self, writer, checkpoint_manager, caplog): + + def test_write_to_sql_with_checkpoints( + self, writer, checkpoint_manager, caplog + ): with open('tests/009_expected_form_data.csv', 'r') as f: reader = csv.reader(f) expected_form_data = list(reader)[1:] - _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', '2012-01-01', '2017-08-29') + _pull_data( + writer, checkpoint_manager, 'tests/009_integration.xlsx', + '2012-01-01', '2017-08-29' + ) self._check_checkpoints(caplog, ['forms', 'batch', 'final']) self._check_data(writer, expected_form_data[:13], 'forms') caplog.clear() - _pull_data(writer, checkpoint_manager, 'tests/009_integration.xlsx', None, '2020-10-11', batch_size=8) + _pull_data( + writer, + checkpoint_manager, + 'tests/009_integration.xlsx', + None, + '2020-10-11', + batch_size=8 + ) self._check_data(writer, expected_form_data, 'forms') self._check_checkpoints(caplog, ['forms', 'batch', 'final']) - runs = list(writer.engine.execute( - 'SELECT * from commcare_export_runs where query_file_name = %s', 'tests/009_integration.xlsx' - )) + runs = list( + writer.engine.execute( + 'SELECT * FROM commcare_export_runs ' + 'WHERE query_file_name = %s', + + 'tests/009_integration.xlsx' + ) + ) assert len(runs) == 2, runs - def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_manager, caplog): + def test_write_to_sql_with_checkpoints_multiple_tables( + self, writer, checkpoint_manager, caplog + ): with open('tests/009b_expected_form_1_data.csv', 'r') as f: reader = csv.reader(f) expected_form_1_data = list(reader)[1:] @@ -378,16 +439,29 @@ def test_write_to_sql_with_checkpoints_multiple_tables(self, writer, checkpoint_ reader = csv.reader(f) expected_form_2_data = list(reader)[1:] - _pull_data(writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', None, '2020-10-11') - self._check_checkpoints(caplog, ['forms_1', 'batch', 'batch', 'final', 'forms_2', 'final']) - self._check_checkpoints(caplog, ['forms_1', 'forms_1', 'forms_1', 'forms_1', 'forms_2', 'forms_2']) + _pull_data( + writer, checkpoint_manager, 'tests/009b_integration_multiple.xlsx', + None, '2020-10-11' + ) + self._check_checkpoints( + caplog, ['forms_1', 'batch', 'batch', 'final', 'forms_2', 'final'] + ) + self._check_checkpoints( + caplog, + ['forms_1', 'forms_1', 'forms_1', 'forms_1', 'forms_2', 'forms_2'] + ) self._check_data(writer, expected_form_1_data, 'forms_1') self._check_data(writer, expected_form_2_data, 'forms_2') - runs = list(writer.engine.execute( - 'SELECT table_name, since_param from commcare_export_runs where query_file_name = %s', - 'tests/009b_integration_multiple.xlsx' - )) + runs = list( + writer.engine.execute( + 'SELECT table_name, since_param ' + 'FROM commcare_export_runs ' + 'WHERE query_file_name = %s', + + 'tests/009b_integration_multiple.xlsx' + ) + ) assert {r[0]: r[1] for r in runs} == { 'forms_1': '2017-09-02T20:05:35.459547', 'forms_2': '2020-06-01T17:43:26.107701', @@ -397,9 +471,11 @@ def _check_data(self, writer, expected, table_name): _check_data(writer, expected, table_name, ['id', 'name', 'indexed_on']) def _check_checkpoints(self, caplog, expected): - # Depends on the logging in the CheckpointManager._set_checkpoint method + # Depends on the logging in the CheckpointManager._set_checkpoint + # method log_messages = [ - record[2] for record in caplog.record_tuples + record[2] + for record in caplog.record_tuples if record[0] == 'commcare_export.checkpoint' ] fail = False @@ -413,36 +489,42 @@ def _check_checkpoints(self, caplog, expected): assert not fail, 'Checkpoint comparison failed:\n' + message -# Conflicting types for 'count' will cause errors when inserting into database.' +# Conflicting types for 'count' will cause errors when inserting into +# database. CONFLICTING_TYPES_CLIENT = MockCommCareHqClient({ - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': 1, 'name': 'n1', 'count': 10}, - {'id': 2, 'name': 'f2', 'count': 'abc'} - ] - ), - ], + 'case': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [{ + 'id': 1, + 'name': 'n1', + 'count': 10 + }, { + 'id': 2, + 'name': 'f2', + 'count': 'abc' + }]),], }) class MockCheckpointingClient(CommCareHqClient): - """Mock client that uses the main client for iteration but overrides the data request - to return mocked data. + """ + Mock client that uses the main client for iteration but overrides + the data request to return mocked data. + + Note this client needs to be re-initialized after use. + """ - Note this client needs to be re-initialized after use.""" def __init__(self, mock_data): self.mock_data = { resource: { _params_to_url(params): result for params, result in resource_results - } - for resource, resource_results in mock_data.items() + } for resource, resource_results in mock_data.items() } self.totals = { - resource: sum(len(results) for _, results in resource_results) - for resource, resource_results in mock_data.items() + resource: sum(len(results) for _, results in resource_results + ) for resource, resource_results in mock_data.items() } def get(self, resource, params=None): @@ -450,10 +532,16 @@ def get(self, resource, params=None): key = _params_to_url(params) objects = mock_requests.pop(key) if objects: - return {'meta': {'limit': len(objects), 'next': bool(mock_requests), - 'offset': 0, 'previous': None, - 'total_count': self.totals[resource]}, - 'objects': objects} + return { + 'meta': { + 'limit': len(objects), + 'next': bool(mock_requests), + 'offset': 0, + 'previous': None, + 'total_count': self.totals[resource] + }, + 'objects': objects + } else: return None @@ -461,124 +549,193 @@ def get(self, resource, params=None): def get_conflicting_types_checkpoint_client(): return MockCheckpointingClient({ 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': "doc 1", 'name': 'n1', 'count': 10, 'indexed_on': '2012-04-23T05:13:01.000000Z'}, - {'id': "doc 2", 'name': 'f2', 'count': 123, 'indexed_on': '2012-04-24T05:13:01.000000Z'} - ] - ), - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on', 'indexed_on_start': '2012-04-24T05:13:01'}, - [ - {'id': "doc 3", 'name': 'n1', 'count': 10, 'indexed_on': '2012-04-25T05:13:01.000000Z'}, - {'id': "doc 4", 'name': 'f2', 'count': 'abc', 'indexed_on': '2012-04-26T05:13:01.000000Z'} - ] - ), + ({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [{ + 'id': "doc 1", + 'name': 'n1', + 'count': 10, + 'indexed_on': '2012-04-23T05:13:01.000000Z' + }, { + 'id': "doc 2", + 'name': 'f2', + 'count': 123, + 'indexed_on': '2012-04-24T05:13:01.000000Z' + }]), + ({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on', + 'indexed_on_start': '2012-04-24T05:13:01' + }, [{ + 'id': "doc 3", + 'name': 'n1', + 'count': 10, + 'indexed_on': '2012-04-25T05:13:01.000000Z' + }, { + 'id': "doc 4", + 'name': 'f2', + 'count': 'abc', + 'indexed_on': '2012-04-26T05:13:01.000000Z' + }]), ], }) @pytest.fixture(scope='function') def strict_writer(db_params): - writer = SqlWriterWithTearDown(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) + writer = SqlWriterWithTearDown( + db_params['url'], + poolclass=sqlalchemy.pool.NullPool, + strict_types=True + ) yield writer writer.tear_down() @pytest.fixture(scope='function') def all_db_checkpoint_manager(db_params): - cm = CheckpointManager(db_params['url'], 'query', '123', 'test', 'hq', poolclass=sqlalchemy.pool.NullPool) + cm = CheckpointManager( + db_params['url'], + 'query', + '123', + 'test', + 'hq', + poolclass=sqlalchemy.pool.NullPool + ) cm.create_checkpoint_table() yield cm with session_scope(cm.Session) as session: session.query(Checkpoint).delete(synchronize_session='fetch') -def _pull_mock_data(writer, checkpoint_manager, api_client, query, start_over=None, since=None): +def _pull_mock_data( + writer, + checkpoint_manager, + api_client, + query, + start_over=None, + since=None +): args = make_args( query=query, output_format='sql', start_over=start_over, - since=since + since=since, ) - assert not (checkpoint_manager and since), "'checkpoint_manager' must be None when using 'since'" + assert not (checkpoint_manager and since), \ + "'checkpoint_manager' must be None when using 'since'" if checkpoint_manager: # set this so that it gets written to the checkpoints checkpoint_manager.query = query - # have to mock these to override the pool class otherwise they hold the db connection open - api_client_patch = mock.patch('commcare_export.cli._get_api_client', - return_value=api_client) - writer_patch = mock.patch('commcare_export.cli._get_writer', return_value=writer) - checkpoint_patch = mock.patch('commcare_export.cli._get_checkpoint_manager', return_value=checkpoint_manager) + # have to mock these to override the pool class otherwise they hold + # the db connection open + api_client_patch = mock.patch( + 'commcare_export.cli._get_api_client', return_value=api_client + ) + writer_patch = mock.patch( + 'commcare_export.cli._get_writer', return_value=writer + ) + checkpoint_patch = mock.patch( + 'commcare_export.cli._get_checkpoint_manager', + return_value=checkpoint_manager + ) with api_client_patch, writer_patch, checkpoint_patch: return main_with_args(args) @pytest.mark.dbtest class TestCLIWithDatabaseErrors(object): - def test_cli_database_error(self, strict_writer, all_db_checkpoint_manager, capfd): - _pull_mock_data(strict_writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/013_ConflictingTypes.xlsx') + + def test_cli_database_error( + self, strict_writer, all_db_checkpoint_manager, capfd + ): + _pull_mock_data( + strict_writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, + 'tests/013_ConflictingTypes.xlsx' + ) out, err = capfd.readouterr() expected_re = re.compile('Stopping because of database error') assert re.search(expected_re, out) - def test_cli_database_error_checkpoint(self, strict_writer, all_db_checkpoint_manager, capfd): + def test_cli_database_error_checkpoint( + self, strict_writer, all_db_checkpoint_manager, capfd + ): _pull_mock_data( strict_writer, all_db_checkpoint_manager, - get_conflicting_types_checkpoint_client(), 'tests/013_ConflictingTypes.xlsx' + get_conflicting_types_checkpoint_client(), + 'tests/013_ConflictingTypes.xlsx' ) out, err = capfd.readouterr() expected_re = re.compile('Stopping because of database error') assert re.search(expected_re, out), out - # expect checkpoint to have the date from the first batch and not the 2nd - runs = list(strict_writer.engine.execute( - sqlalchemy.text('SELECT table_name, since_param, last_doc_id from commcare_export_runs where query_file_name = :file'), - file='tests/013_ConflictingTypes.xlsx' - )) + # expect checkpoint to have the date from the first batch and + # not the 2nd + runs = list( + strict_writer.engine.execute( + sqlalchemy.text( + 'SELECT table_name, since_param, last_doc_id ' + 'FROM commcare_export_runs ' + 'WHERE query_file_name = :file' + ), + file='tests/013_ConflictingTypes.xlsx' + ) + ) assert runs == [ ('Case', '2012-04-24T05:13:01', 'doc 2'), ] -# An input where missing fields should be added due to declared data types. +# An input where missing fields should be added due to declared data +# types. DATA_TYPES_CLIENT = MockCommCareHqClient({ - 'form': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': 1, 'form': {}}, - {'id': 2, 'form': {}} - ] - ), - ], + 'form': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [{ + 'id': 1, + 'form': {} + }, { + 'id': 2, + 'form': {} + }]),], }) @pytest.mark.dbtest class TestCLIWithDataTypes(object): - def test_cli_data_types_add_columns(self, writer, all_db_checkpoint_manager, capfd): - _pull_mock_data(writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, 'tests/014_ExportWithDataTypes.xlsx') + + def test_cli_data_types_add_columns( + self, + writer, + all_db_checkpoint_manager, + capfd, + ): + _pull_mock_data( + writer, all_db_checkpoint_manager, CONFLICTING_TYPES_CLIENT, + 'tests/014_ExportWithDataTypes.xlsx' + ) metadata = sqlalchemy.schema.MetaData(bind=writer.engine) metadata.reflect() cols = metadata.tables['forms'].c - assert sorted([c.name for c in cols]) == sorted([u'id', u'a_bool', u'an_int', u'a_date', u'a_datetime', u'a_text']) + assert sorted([c.name for c in cols]) == sorted([ + u'id', u'a_bool', u'an_int', u'a_date', u'a_datetime', u'a_text' + ]) - # We intentionally don't check the types because SQLAlchemy doesn't - # support type comparison, and even if we convert to strings, the - # values are backend specific. + # We intentionally don't check the types because SQLAlchemy + # doesn't support type comparison, and even if we convert to + # strings, the values are backend specific. values = [ - list(row) for row in - writer.engine.execute('SELECT * FROM forms') + list(row) for row in writer.engine.execute('SELECT * FROM forms') ] assert values == [['1', None, None, None, None, None], @@ -587,90 +744,165 @@ def test_cli_data_types_add_columns(self, writer, all_db_checkpoint_manager, cap def get_indexed_on_client(page): p1 = MockCheckpointingClient({ - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on'}, - [ - {'id': "doc 1", 'name': 'n1', 'indexed_on': '2012-04-23T05:13:01.000000Z'}, - {'id': "doc 2", 'name': 'n2', 'indexed_on': '2012-04-24T05:13:01.000000Z'} - ] - ) - ] + 'case': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on' + }, [{ + 'id': "doc 1", + 'name': 'n1', + 'indexed_on': '2012-04-23T05:13:01.000000Z' + }, { + 'id': "doc 2", + 'name': 'n2', + 'indexed_on': '2012-04-24T05:13:01.000000Z' + }])] }) p2 = MockCheckpointingClient({ - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'indexed_on', 'indexed_on_start': '2012-04-24T05:13:01'}, - [ - {'id': "doc 3", 'name': 'n3', 'indexed_on': '2012-04-25T05:13:01.000000Z'}, - {'id': "doc 4", 'name': 'n4', 'indexed_on': '2012-04-26T05:13:01.000000Z'} - ] - ) - ] + 'case': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'indexed_on', + 'indexed_on_start': '2012-04-24T05:13:01' + }, [{ + 'id': "doc 3", + 'name': 'n3', + 'indexed_on': '2012-04-25T05:13:01.000000Z' + }, { + 'id': "doc 4", + 'name': 'n4', + 'indexed_on': '2012-04-26T05:13:01.000000Z' + }])] }) return [p1, p2][page] @pytest.mark.dbtest class TestCLIPaginationMode(object): + def test_cli_pagination_fresh(self, writer, all_db_checkpoint_manager): - checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + checkpoint_manager = all_db_checkpoint_manager.for_dataset( + "case", ["Case"] + ) - _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(0), 'tests/013_ConflictingTypes.xlsx') + _pull_mock_data( + writer, all_db_checkpoint_manager, get_indexed_on_client(0), + 'tests/013_ConflictingTypes.xlsx' + ) self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") - self._check_checkpoint(checkpoint_manager, '2012-04-24T05:13:01', 'doc 2') + self._check_checkpoint( + checkpoint_manager, '2012-04-24T05:13:01', 'doc 2' + ) - _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx') - self._check_data(writer, [["doc 1"], ["doc 2"], ["doc 3"], ["doc 4"]], "Case") - self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 4') + _pull_mock_data( + writer, all_db_checkpoint_manager, get_indexed_on_client(1), + 'tests/013_ConflictingTypes.xlsx' + ) + self._check_data( + writer, [["doc 1"], ["doc 2"], ["doc 3"], ["doc 4"]], "Case" + ) + self._check_checkpoint( + checkpoint_manager, '2012-04-26T05:13:01', 'doc 4' + ) def test_cli_pagination_legacy(self, writer, all_db_checkpoint_manager): - """Test that we continue with the same pagination mode as was already in use""" + """ + Test that we continue with the same pagination mode as was + already in use + """ - checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + checkpoint_manager = all_db_checkpoint_manager.for_dataset( + "case", ["Case"] + ) # simulate previous run with legacy pagination mode - checkpoint_manager.set_checkpoint('2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True) + checkpoint_manager.set_checkpoint( + '2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True + ) client = MockCheckpointingClient({ - 'case': [ - ( - {'limit': DEFAULT_BATCH_SIZE, 'order_by': 'server_date_modified', 'server_date_modified_start': '2012-04-24T05:13:01'}, - [ - {'id': "doc 1", 'name': 'n1', 'server_date_modified': '2012-04-25T05:13:01.000000Z'}, - {'id': "doc 2", 'name': 'n2', 'server_date_modified': '2012-04-26T05:13:01.000000Z'} - ] - ) - ] + 'case': [({ + 'limit': DEFAULT_BATCH_SIZE, + 'order_by': 'server_date_modified', + 'server_date_modified_start': '2012-04-24T05:13:01' + }, [{ + 'id': "doc 1", + 'name': 'n1', + 'server_date_modified': '2012-04-25T05:13:01.000000Z' + }, { + 'id': "doc 2", + 'name': 'n2', + 'server_date_modified': '2012-04-26T05:13:01.000000Z' + }])] }) - _pull_mock_data(writer, all_db_checkpoint_manager, client, 'tests/013_ConflictingTypes.xlsx') + _pull_mock_data( + writer, all_db_checkpoint_manager, client, + 'tests/013_ConflictingTypes.xlsx' + ) self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") - self._check_checkpoint(checkpoint_manager, '2012-04-26T05:13:01', 'doc 2', PaginationMode.date_modified.name) + self._check_checkpoint( + checkpoint_manager, '2012-04-26T05:13:01', 'doc 2', + PaginationMode.date_modified.name + ) - def test_cli_pagination_start_over(self, writer, all_db_checkpoint_manager): - """Test that we switch to the new pagination mode when using 'start_over'""" - checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + def test_cli_pagination_start_over( + self, writer, all_db_checkpoint_manager + ): + """ + Test that we switch to the new pagination mode when using + 'start_over' + """ + checkpoint_manager = all_db_checkpoint_manager.for_dataset( + "case", ["Case"] + ) # simulate previous run with legacy pagination mode - checkpoint_manager.set_checkpoint('2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True) + checkpoint_manager.set_checkpoint( + '2012-04-24T05:13:01', PaginationMode.date_modified, is_final=True + ) - _pull_mock_data(writer, all_db_checkpoint_manager, get_indexed_on_client(0), 'tests/013_ConflictingTypes.xlsx', start_over=True) + _pull_mock_data( + writer, + all_db_checkpoint_manager, + get_indexed_on_client(0), + 'tests/013_ConflictingTypes.xlsx', + start_over=True + ) self._check_data(writer, [["doc 1"], ["doc 2"]], "Case") - self._check_checkpoint(checkpoint_manager, '2012-04-24T05:13:01', 'doc 2') + self._check_checkpoint( + checkpoint_manager, '2012-04-24T05:13:01', 'doc 2' + ) def test_cli_pagination_since(self, writer, all_db_checkpoint_manager): - """Test that we use to the new pagination mode when using 'since'""" - checkpoint_manager = all_db_checkpoint_manager.for_dataset("case", ["Case"]) + """ + Test that we use to the new pagination mode when using 'since' + """ + checkpoint_manager = all_db_checkpoint_manager.for_dataset( + "case", ["Case"] + ) # simulate previous run with legacy pagination mode - checkpoint_manager.set_checkpoint('2012-04-28T05:13:01', PaginationMode.date_modified, is_final=True) + checkpoint_manager.set_checkpoint( + '2012-04-28T05:13:01', PaginationMode.date_modified, is_final=True + ) - # this will fail if it doesn't use the 'date_indexed' pagination mode due to how the mock client is setup - _pull_mock_data(writer, None, get_indexed_on_client(1), 'tests/013_ConflictingTypes.xlsx', since='2012-04-24T05:13:01') + # this will fail if it doesn't use the 'date_indexed' pagination + # mode due to how the mock client is set up + _pull_mock_data( + writer, + None, + get_indexed_on_client(1), + 'tests/013_ConflictingTypes.xlsx', + since='2012-04-24T05:13:01' + ) self._check_data(writer, [["doc 3"], ["doc 4"]], "Case") def _check_data(self, writer, expected, table_name): _check_data(writer, expected, table_name, ['id']) - def _check_checkpoint(self, checkpoint_manager, since_param, doc_id, pagination_mode=PaginationMode.date_indexed.name): + def _check_checkpoint( + self, + checkpoint_manager, + since_param, + doc_id, + pagination_mode=PaginationMode.date_indexed.name + ): checkpoint = checkpoint_manager.get_last_checkpoint() assert checkpoint.pagination_mode == pagination_mode assert checkpoint.since_param == since_param diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 6337d371..a9273a97 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -20,6 +20,7 @@ class FakeSession(object): + def get(self, resource_url, params=None, auth=None, timeout=None): result = self._get_results(params) # Mutatey construction method required by requests.Response @@ -32,62 +33,139 @@ def _get_results(self, params): if params: assert params['offset'][0] == '1' return { - 'meta': { 'next': None, 'offset': params['offset'][0], 'limit': 1, 'total_count': 2 }, - 'objects': [ {'id': 1, 'foo': 2} ] + 'meta': { + 'next': None, + 'offset': params['offset'][0], + 'limit': 1, + 'total_count': 2 + }, + 'objects': [{ + 'id': 1, + 'foo': 2 + }] } else: return { - 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 2 }, - 'objects': [ {'id': 2, 'foo': 1} ] + 'meta': { + 'next': '?offset=1', + 'offset': 0, + 'limit': 1, + 'total_count': 2 + }, + 'objects': [{ + 'id': 2, + 'foo': 1 + }] } class FakeDateCaseSession(FakeSession): + def _get_results(self, params): if not params: return { - 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 2}, - 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}] + 'meta': { + 'next': '?offset=1', + 'offset': 0, + 'limit': 1, + 'total_count': 2 + }, + 'objects': [{ + 'id': 1, + 'foo': 1, + 'indexed_on': '2017-01-01T15:36:22Z' + }] } else: since_query_param = DATE_PARAMS['indexed_on'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' # include ID=1 again to make sure it gets filtered out return { - 'meta': { 'next': None, 'offset': 1, 'limit': 1, 'total_count': 2 }, - 'objects': [ {'id': 1, 'foo': 1}, {'id': 2, 'foo': 2} ] + 'meta': { + 'next': None, + 'offset': 1, + 'limit': 1, + 'total_count': 2 + }, + 'objects': [{ + 'id': 1, + 'foo': 1 + }, { + 'id': 2, + 'foo': 2 + }] } class FakeRepeatedDateCaseSession(FakeSession): # Model the case where there are as many or more cases with the same - # indexed_on than the batch size (2), so the client requests - # the same set of cases in a loop. + # indexed_on than the batch size (2), so the client requests the + # same set of cases in a loop. def _get_results(self, params): if not params: return { - 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'indexed_on': '2017-01-01T15:36:22Z'}] + 'meta': { + 'next': '?offset=1', + 'offset': 0, + 'limit': 2, + 'total_count': 4 + }, + 'objects': [{ + 'id': 1, + 'foo': 1, + 'indexed_on': '2017-01-01T15:36:22Z' + }, { + 'id': 2, + 'foo': 2, + 'indexed_on': '2017-01-01T15:36:22Z' + }] } else: since_query_param = DATE_PARAMS['indexed_on'].start_param assert params[since_query_param] == '2017-01-01T15:36:22' return { - 'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': 4}, - 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '2017-01-01T15:36:22Z'}, - {'id': 2, 'foo': 2, 'indexed_on': '2017-01-01T15:36:22Z'}] + 'meta': { + 'next': '?offset=1', + 'offset': 0, + 'limit': 2, + 'total_count': 4 + }, + 'objects': [{ + 'id': 1, + 'foo': 1, + 'indexed_on': '2017-01-01T15:36:22Z' + }, { + 'id': 2, + 'foo': 2, + 'indexed_on': '2017-01-01T15:36:22Z' + }] } class FakeMessageLogSession(FakeSession): + def _get_results(self, params): - obj_1 = {'id': 1, 'foo': 1, 'date_last_activity': '2017-01-01T15:36:22Z'} - obj_2 = {'id': 2, 'foo': 2, 'date_last_activity': '2017-01-01T15:37:22Z'} - obj_3 = {'id': 3, 'foo': 3, 'date_last_activity': '2017-01-01T15:38:22Z'} + obj_1 = { + 'id': 1, + 'foo': 1, + 'date_last_activity': '2017-01-01T15:36:22Z' + } + obj_2 = { + 'id': 2, + 'foo': 2, + 'date_last_activity': '2017-01-01T15:37:22Z' + } + obj_3 = { + 'id': 3, + 'foo': 3, + 'date_last_activity': '2017-01-01T15:38:22Z' + } if not params: return { - 'meta': {'next': '?cursor=xyz', 'limit': 2}, + 'meta': { + 'next': '?cursor=xyz', + 'limit': 2 + }, 'objects': [obj_1, obj_2] } else: @@ -95,7 +173,10 @@ def _get_results(self, params): since = params[since_query_param] if since == '2017-01-01T15:37:22': return { - 'meta': {'next': '?cursor=xyz', 'limit': 2}, + 'meta': { + 'next': '?cursor=xyz', + 'limit': 2 + }, 'objects': [obj_3] } if since == '2017-01-01T15:38:22': @@ -105,13 +186,23 @@ def _get_results(self, params): class FakeDateFormSession(FakeSession): + def _get_results(self, params): since1 = '2017-01-01T15:36:22' since2 = '2017-01-01T16:00:00' if not params: return { - 'meta': {'next': '?offset=1', 'offset': 0, 'limit': 1, 'total_count': 3}, - 'objects': [{'id': 1, 'foo': 1, 'indexed_on': '{}Z'.format(since1)}] + 'meta': { + 'next': '?offset=1', + 'offset': 0, + 'limit': 1, + 'total_count': 3 + }, + 'objects': [{ + 'id': 1, + 'foo': 1, + 'indexed_on': '{}Z'.format(since1) + }] } else: since_query_param = DATE_PARAMS['indexed_on'].start_param @@ -119,13 +210,33 @@ def _get_results(self, params): if indexed_on == since1: # include ID=1 again to make sure it gets filtered out return { - 'meta': { 'next': '?offset=2', 'offset': 0, 'limit': 1, 'total_count': 3 }, - 'objects': [{'id': 1, 'foo': 1}, {'id': 2, 'foo': 2, 'indexed_on': '{}Z'.format(since2)}] + 'meta': { + 'next': '?offset=2', + 'offset': 0, + 'limit': 1, + 'total_count': 3 + }, + 'objects': [{ + 'id': 1, + 'foo': 1 + }, { + 'id': 2, + 'foo': 2, + 'indexed_on': '{}Z'.format(since2) + }] } elif indexed_on == since2: return { - 'meta': { 'next': None, 'offset': 0, 'limit': 1, 'total_count': 3 }, - 'objects': [{'id': 3, 'foo': 3}] + 'meta': { + 'next': None, + 'offset': 0, + 'limit': 1, + 'total_count': 3 + }, + 'objects': [{ + 'id': 3, + 'foo': 3 + }] } else: raise Exception(indexed_on) @@ -134,13 +245,22 @@ def _get_results(self, params): class TestCommCareHqClient(unittest.TestCase): def _test_iterate(self, session, paginator, expected_count, expected_vals): - client = CommCareHqClient('/fake/commcare-hq/url', 'fake-project', None, None) + client = CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ) client.session = session - # Iteration should do two "gets" because the first will have something in the "next" metadata field + # Iteration should do two "gets" because the first will have + # something in the "next" metadata field paginator.init() - checkpoint_manager = CheckpointManagerWithDetails(None, None, PaginationMode.date_indexed) - results = list(client.iterate('/fake/uri', paginator, checkpoint_manager=checkpoint_manager)) + checkpoint_manager = CheckpointManagerWithDetails( + None, None, PaginationMode.date_indexed + ) + results = list( + client.iterate( + '/fake/uri', paginator, checkpoint_manager=checkpoint_manager + ) + ) self.assertEqual(len(results), expected_count) self.assertEqual([result['foo'] for result in results], expected_vals) @@ -148,16 +268,28 @@ def test_iterate_simple(self): self._test_iterate(FakeSession(), SimplePaginator('fake'), 2, [1, 2]) def test_iterate_date(self): - self._test_iterate(FakeDateFormSession(), get_paginator('form'), 3, [1, 2, 3]) - self._test_iterate(FakeDateCaseSession(), get_paginator('case'), 2, [1, 2]) + self._test_iterate( + FakeDateFormSession(), get_paginator('form'), 3, [1, 2, 3] + ) + self._test_iterate( + FakeDateCaseSession(), get_paginator('case'), 2, [1, 2] + ) def test_repeat_limit(self): - with pytest.raises(ResourceRepeatException, - match="Requested resource '/fake/uri' 10 times with same parameters"): - self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2]) + with pytest.raises( + ResourceRepeatException, + match="Requested resource '/fake/uri' 10 times with same parameters" + ): + self._test_iterate( + FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, + [1, 2] + ) def test_message_log(self): - self._test_iterate(FakeMessageLogSession(), get_paginator('messaging-event', 2), 3, [1, 2, 3]) + self._test_iterate( + FakeMessageLogSession(), get_paginator('messaging-event', 2), 3, + [1, 2, 3] + ) class TestDatePaginator(unittest.TestCase): @@ -167,27 +299,41 @@ def setup_class(cls): pass def test_empty_batch(self): - self.assertIsNone(DatePaginator('since', params=SimplePaginator()).next_page_params_from_batch({'objects': []})) + self.assertIsNone( + DatePaginator('since', params=SimplePaginator() + ).next_page_params_from_batch({'objects': []}) + ) def test_bad_date(self): - self.assertIsNone(DatePaginator('since', params=SimplePaginator()).next_page_params_from_batch({'objects': [{ - 'since': 'not a date' - }]})) + self.assertIsNone( + DatePaginator('since', params=SimplePaginator() + ).next_page_params_from_batch({ + 'objects': [{ + 'since': 'not a date' + }] + }) + ) def test_multi_field_sort(self): d1 = '2017-01-01T15:36:22Z' d2 = '2017-01-01T18:36:22Z' paginator = DatePaginator(['s1', 's2'], params=SimplePaginator()) - self.assertEqual(paginator.get_since_date({'objects': [{ - 's1': d1, - 's2': d2 - }]}), datetime.strptime(d1, '%Y-%m-%dT%H:%M:%SZ')) - - self.assertEqual(paginator.get_since_date({'objects': [{ - 's2': d2 - }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ')) - - self.assertEqual(paginator.get_since_date({'objects': [{ - 's1': None, - 's2': d2 - }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ')) + self.assertEqual( + paginator.get_since_date({'objects': [{ + 's1': d1, + 's2': d2 + }]}), datetime.strptime(d1, '%Y-%m-%dT%H:%M:%SZ') + ) + + self.assertEqual( + paginator.get_since_date({'objects': [{ + 's2': d2 + }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ') + ) + + self.assertEqual( + paginator.get_since_date({'objects': [{ + 's1': None, + 's2': d2 + }]}), datetime.strptime(d2, '%Y-%m-%dT%H:%M:%SZ') + ) diff --git a/tests/test_commcare_minilinq.py b/tests/test_commcare_minilinq.py index c24bd851..516ab303 100644 --- a/tests/test_commcare_minilinq.py +++ b/tests/test_commcare_minilinq.py @@ -17,7 +17,11 @@ def setup_class(cls): def check_case(self, val, result): if isinstance(result, list): - assert [datum.value if isinstance(datum, jsonpath.DatumInContext) else datum for datum in val] == result + assert [ + datum.value + if isinstance(datum, jsonpath.DatumInContext) else datum + for datum in val + ] == result def test_eval_indexed_on(self): self._test_eval(PaginationMode.date_indexed) @@ -26,106 +30,171 @@ def test_eval_modified_on(self): self._test_eval(PaginationMode.date_modified) def _test_eval(self, pagination_mode): - form_order_by = get_paginator('form', pagination_mode=pagination_mode).since_field - case_order_by = get_paginator('case', pagination_mode=pagination_mode).since_field + form_order_by = get_paginator( + 'form', pagination_mode=pagination_mode + ).since_field + case_order_by = get_paginator( + 'case', pagination_mode=pagination_mode + ).since_field - def die(msg): raise Exception(msg) + def die(msg): + raise Exception(msg) client = MockCommCareHqClient({ 'form': [ ( - {'limit': 1000, 'filter': 'test1', 'order_by': form_order_by}, + { + 'limit': 1000, + 'filter': 'test1', + 'order_by': form_order_by + }, [1, 2, 3], ), - ( - {'limit': 1000, 'filter': 'test2', 'order_by': form_order_by}, - [ - { 'x': [{ 'y': 1 }, {'y': 2}] }, - { 'x': [{ 'y': 3 }, {'z': 4}] }, - { 'x': [{ 'y': 5 }] } - ] - ), - ( - {'limit': 1000, 'filter': 'laziness-test', 'order_by': form_order_by}, - (i if i < 5 else die('Not lazy enough') for i in range(12)) - ), - ( - {'limit': 1000, 'cases__full': 'true', 'order_by': form_order_by}, - [1, 2, 3, 4, 5] - ), - ], - - 'case': [ - ( - {'limit': 1000, 'type': 'foo', 'order_by': case_order_by}, - [ - { 'x': 1 }, - { 'x': 2 }, - { 'x': 3 }, - ] - ) + ({ + 'limit': 1000, + 'filter': 'test2', + 'order_by': form_order_by + }, [{ + 'x': [{ + 'y': 1 + }, { + 'y': 2 + }] + }, { + 'x': [{ + 'y': 3 + }, { + 'z': 4 + }] + }, { + 'x': [{ + 'y': 5 + }] + }]), + ({ + 'limit': 1000, + 'filter': 'laziness-test', + 'order_by': form_order_by + }, + (i if i < 5 else die('Not lazy enough') for i in range(12))), + ({ + 'limit': 1000, + 'cases__full': 'true', + 'order_by': form_order_by + }, [1, 2, 3, 4, 5]), ], - - 'user': [ - ( - {'limit': 1000}, - [ - { 'x': 1 }, - { 'x': 2 }, - { 'x': 3 }, - ] - ) - ] + 'case': [({ + 'limit': 1000, + 'type': 'foo', + 'order_by': case_order_by + }, [ + { + 'x': 1 + }, + { + 'x': 2 + }, + { + 'x': 3 + }, + ])], + 'user': [({ + 'limit': 1000 + }, [ + { + 'x': 1 + }, + { + 'x': 2 + }, + { + 'x': 3 + }, + ])] }) - env = BuiltInEnv() | CommCareHqEnv(client) | JsonPathEnv({}) # {'form': api_client.iterate('form')}) - - checkpoint_manager = CheckpointManagerWithDetails(None, None, pagination_mode) - assert list(Apply(Reference('api_data'), - Literal('form'), - Literal(checkpoint_manager), - Literal({"filter": 'test1'})).eval(env)) == [1, 2, 3] - - # just check that we can still apply some deeper xpath by mapping; first ensure the basics work - assert list(Apply(Reference('api_data'), - Literal('form'), - Literal(checkpoint_manager), - Literal({"filter": 'test2'})).eval(env)) == [ - { 'x': [{ 'y': 1 }, {'y': 2}] }, - { 'x': [{ 'y': 3 }, {'z': 4}] }, - { 'x': [{ 'y': 5 }] } - ] - - self.check_case(FlatMap(source=Apply(Reference('api_data'), - Literal('form'), - Literal(checkpoint_manager), - Literal({"filter": 'test2'})), - body=Reference('x[*].y')).eval(env), - [1, 2, 3, 5]) - - self.check_case(islice(Apply(Reference('api_data'), - Literal('form'), - Literal(checkpoint_manager), - Literal({"filter": "laziness-test"})).eval(env), 5), - [0, 1, 2, 3, 4]) - - self.check_case(Apply(Reference('api_data'), - Literal('form'), - Literal(checkpoint_manager), - Literal(None), - Literal(['cases'])).eval(env), - [1, 2, 3, 4, 5]) - - self.check_case(FlatMap(source=Apply(Reference('api_data'), - Literal('case'), - Literal(checkpoint_manager), - Literal({'type': 'foo'})), - body=Reference('x')).eval(env), - [1, 2, 3]) - - self.check_case(FlatMap(source=Apply(Reference('api_data'), - Literal('user'), - Literal(checkpoint_manager), - Literal(None)), - body=Reference('x')).eval(env), - [1, 2, 3]) + env = BuiltInEnv() | CommCareHqEnv(client) | JsonPathEnv( + {} + ) # {'form': api_client.iterate('form')}) + + checkpoint_manager = CheckpointManagerWithDetails( + None, None, pagination_mode + ) + assert list( + Apply( + Reference('api_data'), Literal('form'), + Literal(checkpoint_manager), Literal({"filter": 'test1'}) + ).eval(env) + ) == [1, 2, 3] + + # just check that we can still apply some deeper xpath by + # mapping; first ensure the basics work + assert list( + Apply( + Reference('api_data'), Literal('form'), + Literal(checkpoint_manager), Literal({"filter": 'test2'}) + ).eval(env) + ) == [{ + 'x': [{ + 'y': 1 + }, { + 'y': 2 + }] + }, { + 'x': [{ + 'y': 3 + }, { + 'z': 4 + }] + }, { + 'x': [{ + 'y': 5 + }] + }] + + self.check_case( + FlatMap( + source=Apply( + Reference('api_data'), Literal('form'), + Literal(checkpoint_manager), Literal({"filter": 'test2'}) + ), + body=Reference('x[*].y') + ).eval(env), [1, 2, 3, 5] + ) + + self.check_case( + islice( + Apply( + Reference('api_data'), Literal('form'), + Literal(checkpoint_manager), + Literal({"filter": "laziness-test"}) + ).eval(env), 5 + ), [0, 1, 2, 3, 4] + ) + + self.check_case( + Apply( + Reference('api_data'), Literal('form'), + Literal(checkpoint_manager), Literal(None), Literal(['cases']) + ).eval(env), [1, 2, 3, 4, 5] + ) + + self.check_case( + FlatMap( + source=Apply( + Reference('api_data'), Literal('case'), + Literal(checkpoint_manager), Literal({'type': 'foo'}) + ), + body=Reference('x') + ).eval(env), [1, 2, 3] + ) + + self.check_case( + FlatMap( + source=Apply( + Reference('api_data'), Literal('user'), + Literal(checkpoint_manager), Literal(None) + ), + body=Reference('x') + ).eval(env), [1, 2, 3] + ) diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index 526454b6..319b958f 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -17,19 +17,35 @@ def setup_class(cls): pass def test_split_leftmost(self): - assert split_leftmost(parse_jsonpath('foo')) == (jsonpath.Fields('foo'), jsonpath.This()) - assert split_leftmost(parse_jsonpath('foo.baz')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz')) - assert split_leftmost(parse_jsonpath('foo.baz.bar')) == (jsonpath.Fields('foo'), jsonpath.Fields('baz').child(jsonpath.Fields('bar'))) - assert split_leftmost(parse_jsonpath('[*].baz')) == (jsonpath.Slice(), jsonpath.Fields('baz')) - assert split_leftmost(parse_jsonpath('foo[*].baz')) == (jsonpath.Fields('foo'), jsonpath.Slice().child(jsonpath.Fields('baz'))) + assert split_leftmost( + parse_jsonpath('foo') + ) == (jsonpath.Fields('foo'), jsonpath.This()) + assert split_leftmost( + parse_jsonpath('foo.baz') + ) == (jsonpath.Fields('foo'), jsonpath.Fields('baz')) + assert split_leftmost(parse_jsonpath('foo.baz.bar')) == ( + jsonpath.Fields('foo'), + jsonpath.Fields('baz').child(jsonpath.Fields('bar')) + ) + assert split_leftmost( + parse_jsonpath('[*].baz') + ) == (jsonpath.Slice(), jsonpath.Fields('baz')) + assert split_leftmost(parse_jsonpath('foo[*].baz')) == ( + jsonpath.Fields('foo'), + jsonpath.Slice().child(jsonpath.Fields('baz')) + ) def test_get_safe_source_field(self): - assert _get_safe_source_field('foo.bar.baz') == Reference('foo.bar.baz') + assert _get_safe_source_field( + 'foo.bar.baz') == Reference('foo.bar.baz') assert _get_safe_source_field('foo[*].baz') == Reference('foo[*].baz') - assert _get_safe_source_field('foo..baz[*]') == Reference('foo..baz[*]') + assert _get_safe_source_field( + 'foo..baz[*]') == Reference('foo..baz[*]') assert _get_safe_source_field('foo.#baz') == Reference('foo."#baz"') - assert _get_safe_source_field('foo.bar[*]..%baz') == Reference('foo.bar[*].."%baz"') - assert _get_safe_source_field('foo.bar:1.baz') == Reference('foo."bar:1".baz') + assert _get_safe_source_field( + 'foo.bar[*]..%baz') == Reference('foo.bar[*].."%baz"') + assert _get_safe_source_field( + 'foo.bar:1.baz') == Reference('foo."bar:1".baz') try: assert _get_safe_source_field('foo.bar.') @@ -39,28 +55,29 @@ def test_get_safe_source_field(self): def test_compile_mappings(self): test_cases = [ - ('mappings.xlsx', - { - 'a': { - 'w': 12, - 'x': 13, - 'y': 14, - 'z': 15, - 'q': 16, - 'r': 17, - }, - 'b': { - 'www': 'hello', - 'xxx': 'goodbye', - 'yyy': 'what is up', - }, - 'c': { - 1: 'foo', - 2: 'bar', - 3: 'biz', - 4: 'bizzle', - } - }), + ( + 'mappings.xlsx', { + 'a': { + 'w': 12, + 'x': 13, + 'y': 14, + 'z': 15, + 'q': 16, + 'r': 17, + }, + 'b': { + 'www': 'hello', + 'xxx': 'goodbye', + 'yyy': 'what is up', + }, + 'c': { + 1: 'foo', + 2: 'bar', + 3: 'biz', + 4: 'bizzle', + } + } + ), ] def flatten(dd): @@ -71,7 +88,9 @@ def flatten(dd): for filename, mappings in test_cases: abs_path = os.path.join(os.path.dirname(__file__), filename) - compiled = compile_mappings(openpyxl.load_workbook(abs_path)['Mappings']) + compiled = compile_mappings( + openpyxl.load_workbook(abs_path)['Mappings'] + ) # Print will be suppressed by pytest unless it fails if not (flatten(compiled) == mappings): print('In %s:' % filename) @@ -83,104 +102,171 @@ def flatten(dd): def test_parse_sheet(self): test_cases = [ - ('001_JustDataSource.xlsx', SheetParts( - name='Forms', - headings=[], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=None, - data_source="form"), + ( + '001_JustDataSource.xlsx', + SheetParts( + name='Forms', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=None, + data_source="form" + ), + ), + # ( + # '001a_JustDataSource_LibreOffice.xlsx', + # Emit( + # table='Forms', + # headings=[], + # source=Apply(Reference("api_data"), Literal("form")) + # ) + # ), + ( + '002_DataSourceAndFilters.xlsx', + SheetParts( + name='Forms', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference("checkpoint_manager"), + Literal({ + 'app_id': 'foobizzle', + 'type': 'intake', + }) + ), + body=None, + data_source="form" + ) + ), + ( + '003_DataSourceAndEmitColumns.xlsx', + SheetParts( + name='Forms', + headings=[ + Literal('Form Type'), + Literal('Fecha de Nacimiento'), + Literal('Sexo'), + Literal('Danger 0'), + Literal('Danger 1'), + Literal('Danger Fever'), + Literal('Danger error'), + Literal('Danger error'), + Literal('special'), + Literal('Danger substring 1'), + Literal('Danger substring 2'), + Literal('Danger substring error 3'), + Literal('Danger substring error 4'), + Literal('Danger substring error 5') + ], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=List([ + Reference("type"), + Apply( + Reference("FormatDate"), + Reference("date_of_birth") + ), + Apply(Reference("sexo"), Reference("gender")), + Apply( + Reference("selected-at"), Reference("dangers"), + Literal(0) + ), + Apply( + Reference("selected-at"), Reference("dangers"), + Literal(1) + ), + Apply( + Reference("selected"), Reference("dangers"), + Literal('fever') + ), + Literal( + 'Error: selected-at index must be an integer: ' + 'selected-at(abc)' + ), + Literal('Error: Unable to parse: selected(fever'), + Reference('path."#text"'), + Apply( + Reference("substr"), Reference("dangers"), + Literal(0), Literal(10) + ), + Apply( + Reference("substr"), Reference("dangers"), + Literal(4), Literal(3) + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(a, b)' + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(-1, 10)' + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(3, -4)' + ) + ]), + data_source="form" + ) + ), + ( + '005_DataSourcePath.xlsx', + SheetParts( + name='Forms', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=None, + root_expr=Reference( + 'form.delivery_information.child_questions.[*]' + ), + data_source="form" + ) + ), + ( + '006_IncludeReferencedItems.xlsx', + SheetParts( + name='Forms', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference("checkpoint_manager"), Literal(None), + Literal(['foo', 'bar', 'bizzle']) + ), + body=None, + data_source="form" + ) + ), + ( + '010_JustDataSourceTableName.xlsx', + SheetParts( + name='my_table', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=None, + data_source="form" + ) ), - #('001a_JustDataSource_LibreOffice.xlsx', Emit(table='Forms', headings=[], source=Apply(Reference("api_data"), Literal("form")))), - - ('002_DataSourceAndFilters.xlsx', - SheetParts( - name='Forms', - headings=[], - source=Apply( - Reference("api_data"), - Literal("form"), - Reference("checkpoint_manager"), - Literal({ - 'app_id': 'foobizzle', - 'type': 'intake', - }) - ), - body=None, - data_source="form" - )), - - ('003_DataSourceAndEmitColumns.xlsx', - SheetParts( - name='Forms', - headings = [ - Literal('Form Type'), Literal('Fecha de Nacimiento'), Literal('Sexo'), - Literal('Danger 0'), Literal('Danger 1'), Literal('Danger Fever'), - Literal('Danger error'), Literal('Danger error'), Literal('special'), - Literal('Danger substring 1'), Literal('Danger substring 2'), - Literal('Danger substring error 3'), Literal('Danger substring error 4'), - Literal('Danger substring error 5') - ], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=List([ - Reference("type"), - Apply(Reference("FormatDate"), Reference("date_of_birth")), - Apply(Reference("sexo"), Reference("gender")), - Apply(Reference("selected-at"), Reference("dangers"), Literal(0)), - Apply(Reference("selected-at"), Reference("dangers"), Literal(1)), - Apply(Reference("selected"), Reference("dangers"), Literal('fever')), - Literal('Error: selected-at index must be an integer: selected-at(abc)'), - Literal('Error: Unable to parse: selected(fever'), - Reference('path."#text"'), - Apply(Reference("substr"), Reference("dangers"), Literal(0), Literal(10)), - Apply(Reference("substr"), Reference("dangers"), Literal(4), Literal(3)), - Literal('Error: both substr arguments must be non-negative integers: substr(a, b)'), - Literal('Error: both substr arguments must be non-negative integers: substr(-1, 10)'), - Literal('Error: both substr arguments must be non-negative integers: substr(3, -4)') - ]), - data_source="form" - )), - - ('005_DataSourcePath.xlsx', - SheetParts( - name='Forms', - headings = [], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=None, - root_expr=Reference('form.delivery_information.child_questions.[*]'), - data_source="form" - )), - - ('006_IncludeReferencedItems.xlsx', - SheetParts( - name='Forms', - headings=[], - source=Apply( - Reference("api_data"), - Literal("form"), - Reference("checkpoint_manager"), - Literal(None), - Literal(['foo', 'bar', 'bizzle']) - ), - body=None, - data_source="form" - )), - - ('010_JustDataSourceTableName.xlsx', SheetParts( - name='my_table', - headings=[], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=None, - data_source="form" - )), ] for filename, minilinq in test_cases: - print('Compiling sheet %s' % filename) # This output will be captured by pytest and printed in case of failure; helpful to isolate which test case + # This output will be captured by pytest and printed in case + # of failure; helpful to isolate which test case + print(f'Compiling sheet {filename}') abs_path = os.path.join(os.path.dirname(__file__), filename) compiled = parse_sheet(openpyxl.load_workbook(abs_path).active) # Print will be suppressed by pytest unless it fails if not (compiled == minilinq): - print('In %s:' % filename) + print(f'In {filename}:') pprint.pprint(compiled) print('!=') pprint.pprint(minilinq) @@ -189,36 +275,54 @@ def test_parse_sheet(self): def test_parse_workbook(self): field_mappings = {'t1': 'Form 1', 't2': 'Form 2'} test_cases = [ - ('004_TwoDataSources.xlsx', - [ - SheetParts( - name='Forms', - headings=[], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=None, - data_source="form"), - SheetParts( - name='Cases', - headings=[], - source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), - body=None, - data_source="case") - ]), - ('007_Mappings.xlsx', - [ - SheetParts( - name='Forms', - headings=[Literal('Form Type')], - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=List([compile_mapped_field(field_mappings, Reference("type"))]), - data_source="form" - ) - ]), - + ( + '004_TwoDataSources.xlsx', [ + SheetParts( + name='Forms', + headings=[], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=None, + data_source="form" + ), + SheetParts( + name='Cases', + headings=[], + source=Apply( + Reference("api_data"), Literal("case"), + Reference('checkpoint_manager') + ), + body=None, + data_source="case" + ) + ] + ), + ( + '007_Mappings.xlsx', [ + SheetParts( + name='Forms', + headings=[Literal('Form Type')], + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=List([ + compile_mapped_field( + field_mappings, Reference("type") + ) + ]), + data_source="form" + ) + ] + ), ] for filename, minilinq in test_cases: - print('Compiling workbook %s' % filename) # This output will be captured by pytest and printed in case of failure; helpful to isolate which test case + # This output will be captured by pytest and printed in case + # of failure; helpful to isolate which test case + print(f'Compiling workbook {filename}') abs_path = os.path.join(os.path.dirname(__file__), filename) compiled = parse_workbook(openpyxl.load_workbook(abs_path)) # Print will be suppressed by pytest unless it fails @@ -231,98 +335,179 @@ def test_parse_workbook(self): def test_compile_mapped_field(self): env = BuiltInEnv() | JsonPathEnv({'foo': {'bar': 'a', 'baz': 'b'}}) - expression = compile_mapped_field({'a': 'mapped from a'}, Reference('foo.bar')) + expression = compile_mapped_field({'a': 'mapped from a'}, + Reference('foo.bar')) assert expression.eval(env) == 'mapped from a' - expression = compile_mapped_field({'a': 'mapped from a'}, Reference('foo.baz')) + expression = compile_mapped_field({'a': 'mapped from a'}, + Reference('foo.baz')) assert list(expression.eval(env))[0].value == 'b' - expression = compile_mapped_field({'a': 'mapped from a'}, Reference('foo.boo')) + expression = compile_mapped_field({'a': 'mapped from a'}, + Reference('foo.boo')) assert list(expression.eval(env)) == [] def test_get_queries_from_excel(self): - minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), + minilinq = Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms"]) + ), Emit( - table='Forms', - missing_value='---', - headings =[ - Literal('Form Type'), Literal('Fecha de Nacimiento'), Literal('Sexo'), - Literal('Danger 0'), Literal('Danger 1'), Literal('Danger Fever'), - Literal('Danger error'), Literal('Danger error'), Literal('special'), - Literal('Danger substring 1'), Literal('Danger substring 2'), - Literal('Danger substring error 3'), Literal('Danger substring error 4'), - Literal('Danger substring error 5') - ], - source = Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body = List([ - Reference("type"), - Apply(Reference("FormatDate"), Reference("date_of_birth")), - Apply(Reference("sexo"), Reference("gender")), - Apply(Reference("selected-at"), Reference("dangers"), Literal(0)), - Apply(Reference("selected-at"), Reference("dangers"), Literal(1)), - Apply(Reference("selected"), Reference("dangers"), Literal('fever')), - Literal('Error: selected-at index must be an integer: selected-at(abc)'), - Literal('Error: Unable to parse: selected(fever'), - Reference('path."#text"'), - Apply(Reference("substr"), Reference("dangers"), Literal(0), Literal(10)), - Apply(Reference("substr"), Reference("dangers"), Literal(4), Literal(3)), - Literal('Error: both substr arguments must be non-negative integers: substr(a, b)'), - Literal('Error: both substr arguments must be non-negative integers: substr(-1, 10)'), - Literal('Error: both substr arguments must be non-negative integers: substr(3, -4)') - ])) + table='Forms', + missing_value='---', + headings=[ + Literal('Form Type'), + Literal('Fecha de Nacimiento'), + Literal('Sexo'), + Literal('Danger 0'), + Literal('Danger 1'), + Literal('Danger Fever'), + Literal('Danger error'), + Literal('Danger error'), + Literal('special'), + Literal('Danger substring 1'), + Literal('Danger substring 2'), + Literal('Danger substring error 3'), + Literal('Danger substring error 4'), + Literal('Danger substring error 5') + ], + source=Map( + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=List([ + Reference("type"), + Apply( + Reference("FormatDate"), + Reference("date_of_birth") + ), + Apply(Reference("sexo"), Reference("gender")), + Apply( + Reference("selected-at"), Reference("dangers"), + Literal(0) + ), + Apply( + Reference("selected-at"), Reference("dangers"), + Literal(1) + ), + Apply( + Reference("selected"), Reference("dangers"), + Literal('fever') + ), + Literal( + 'Error: selected-at index must be an integer: ' + 'selected-at(abc)' + ), + Literal('Error: Unable to parse: selected(fever'), + Reference('path."#text"'), + Apply( + Reference("substr"), Reference("dangers"), + Literal(0), Literal(10) + ), + Apply( + Reference("substr"), Reference("dangers"), + Literal(4), Literal(3) + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(a, b)' + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(-1, 10)' + ), + Literal( + 'Error: both substr arguments must be ' + 'non-negative integers: substr(3, -4)' + ) + ]) + ) ) ) - self._compare_minilinq_to_compiled(minilinq, '003_DataSourceAndEmitColumns.xlsx') + self._compare_minilinq_to_compiled( + minilinq, '003_DataSourceAndEmitColumns.xlsx' + ) def test_alternate_source_fields(self): minilinq = List([ # First sheet uses a CSV column and also tests combining "Map Via" - Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), + Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms"]) + ), Emit( - table='Forms', missing_value='---', - headings =[ + table='Forms', + missing_value='---', + headings=[ Literal('dob'), ], - source = Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body = List([ + source=Map( + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=List([ Apply( Reference("str2date"), Apply( - Reference("or"), - Reference("dob"), Reference("date_of_birth"), Reference("d_o_b") + Reference("or"), Reference("dob"), + Reference("date_of_birth"), + Reference("d_o_b") ) ), - ])) + ]) + ) ) ), # Second sheet uses multiple alternate source field columns (listed out of order) - Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms1"])), + Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms1"]) + ), Emit( - table='Forms1', missing_value='---', + table='Forms1', + missing_value='---', headings=[ - Literal('dob'), Literal('Sex'), + Literal('dob'), + Literal('Sex'), ], source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), body=List([ Reference("dob"), Apply( - Reference("or"), - Reference("gender"), Reference("sex"), Reference("sex0") + Reference("or"), Reference("gender"), + Reference("sex"), Reference("sex0") ) - ])) + ]) + ) ) ), ]) - self._compare_minilinq_to_compiled(minilinq, '011_AlternateSourceFields.xlsx') + self._compare_minilinq_to_compiled( + minilinq, '011_AlternateSourceFields.xlsx' + ) def test_columns_with_data_types(self): - minilinq = Bind('checkpoint_manager', Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), + minilinq = Bind( + 'checkpoint_manager', + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms"]) + ), Emit( table='Forms', missing_value='---', @@ -334,7 +519,10 @@ def test_columns_with_data_types(self): Literal('Bad Type'), ], source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), body=List([ Reference("name"), Reference("date_of_birth"), @@ -352,22 +540,30 @@ def test_columns_with_data_types(self): ], ), ) - self._compare_minilinq_to_compiled(minilinq, '012_ColumnsWithTypes.xlsx') + self._compare_minilinq_to_compiled( + minilinq, '012_ColumnsWithTypes.xlsx' + ) def test_multi_emit(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms", "Cases"])), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms", "Cases"]) + ), Filter( - predicate=Apply( - Reference("filter_empty"), - Reference("$") - ), + predicate=Apply(Reference("filter_empty"), Reference("$")), source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), body=List([ Emit( table="Forms", - headings=[Literal("id"), Literal("name")], + headings=[Literal("id"), + Literal("name")], missing_value='---', source=Map( source=Reference("`this`"), @@ -394,40 +590,59 @@ def test_multi_emit(self): ), Bind( 'checkpoint_manager', - Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), + Apply( + Reference('get_checkpoint_manager'), Literal("case"), + Literal(["Other cases"]) + ), Emit( table="Other cases", headings=[Literal("id")], missing_value='---', source=Map( - source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), - body=List([ - Reference("id") - ]) + source=Apply( + Reference("api_data"), Literal("case"), + Reference('checkpoint_manager') + ), + body=List([Reference("id")]) ) ) ) ]) - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=True) + self._compare_minilinq_to_compiled( + minilinq, '008_multiple-tables.xlsx', combine_emits=True + ) def test_multi_emit_no_combine(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), - Emit( + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms"]) + ), + Emit( table="Forms", headings=[Literal("id"), Literal("name")], missing_value='---', source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), body=List([ Reference("id"), Reference("form.name"), ]), ) - ) + ) ), - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Cases"])), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Cases"]) + ), Emit( table="Cases", headings=[Literal("case_id")], @@ -435,7 +650,10 @@ def test_multi_emit_no_combine(self): source=Map( source=FlatMap( body=Reference("form..case"), - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')) + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ) ), body=List([ Reference("@case_id"), @@ -443,37 +661,54 @@ def test_multi_emit_no_combine(self): ) ) ), - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("case"), + Literal(["Other cases"]) + ), Emit( table="Other cases", headings=[Literal("id")], missing_value='---', source=Map( - source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), - body=List([ - Reference("id") - ]) + source=Apply( + Reference("api_data"), Literal("case"), + Reference('checkpoint_manager') + ), + body=List([Reference("id")]) ) ) ) ]) - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=False) + self._compare_minilinq_to_compiled( + minilinq, '008_multiple-tables.xlsx', combine_emits=False + ) def test_multi_emit_with_organization(self): minilinq = List([ - Bind("checkpoint_manager", Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms", "Cases"])), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms", "Cases"]) + ), Filter( - predicate=Apply( - Reference("filter_empty"), - Reference("$") - ), + predicate=Apply(Reference("filter_empty"), Reference("$")), source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), body=List([ Emit( table="Forms", - headings=[Literal("id"), Literal("name"), Literal("commcare_userid")], + headings=[ + Literal("id"), + Literal("name"), + Literal("commcare_userid") + ], missing_value='---', source=Map( source=Reference("`this`"), @@ -486,7 +721,10 @@ def test_multi_emit_with_organization(self): ), Emit( table="Cases", - headings=[Literal("case_id"), Literal("commcare_userid")], + headings=[ + Literal("case_id"), + Literal("commcare_userid") + ], missing_value='---', source=Map( source=Reference("form..case"), @@ -502,84 +740,121 @@ def test_multi_emit_with_organization(self): ), Bind( 'checkpoint_manager', - Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), + Apply( + Reference('get_checkpoint_manager'), Literal("case"), + Literal(["Other cases"]) + ), Emit( table="Other cases", - headings=[Literal("id"), Literal("commcare_userid")], + headings=[Literal("id"), + Literal("commcare_userid")], missing_value='---', source=Map( - source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), - body=List([ - Reference("id"), - Reference("$.user_id") - ]) + source=Apply( + Reference("api_data"), Literal("case"), + Reference('checkpoint_manager') + ), + body=List([Reference("id"), + Reference("$.user_id")]) ) ) ) ]) column_enforcer = ColumnEnforcer() - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=True, - column_enforcer=column_enforcer) + self._compare_minilinq_to_compiled( + minilinq, + '008_multiple-tables.xlsx', + combine_emits=True, + column_enforcer=column_enforcer + ) def test_value_or_root(self): minilinq = List([ - Bind("checkpoint_manager", - Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Forms"])), - Emit( - table="Forms", - headings=[Literal("id"), Literal("name")], - missing_value='---', - source=Map( - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')), - body=List([ - Reference("id"), - Reference("form.name"), - ]), - ) - ) - ), - Bind("checkpoint_manager", - Apply(Reference('get_checkpoint_manager'), Literal("form"), Literal(["Cases"])), - Emit( - table="Cases", - headings=[Literal("case_id")], - missing_value='---', - source=Map( - source=FlatMap( - body=Apply( - Reference("_or_raw"), - Reference("form..case"), - Bind("__root_only", Literal(True), Reference("$")) - ), - source=Apply(Reference("api_data"), Literal("form"), Reference('checkpoint_manager')) - ), - body=List([ - Reference("@case_id"), - ]), - ) - ) - ), - Bind("checkpoint_manager", - Apply(Reference('get_checkpoint_manager'), Literal("case"), Literal(["Other cases"])), - Emit( - table="Other cases", - headings=[Literal("id")], - missing_value='---', - source=Map( - source=Apply(Reference("api_data"), Literal("case"), Reference('checkpoint_manager')), - body=List([ - Reference("id") - ]) - ) - ) - ) + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Forms"]) + ), + Emit( + table="Forms", + headings=[Literal("id"), Literal("name")], + missing_value='---', + source=Map( + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ), + body=List([ + Reference("id"), + Reference("form.name"), + ]), + ) + ) + ), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("form"), + Literal(["Cases"]) + ), + Emit( + table="Cases", + headings=[Literal("case_id")], + missing_value='---', + source=Map( + source=FlatMap( + body=Apply( + Reference("_or_raw"), Reference("form..case"), + Bind( + "__root_only", Literal(True), + Reference("$") + ) + ), + source=Apply( + Reference("api_data"), Literal("form"), + Reference('checkpoint_manager') + ) + ), + body=List([ + Reference("@case_id"), + ]), + ) + ) + ), + Bind( + "checkpoint_manager", + Apply( + Reference('get_checkpoint_manager'), Literal("case"), + Literal(["Other cases"]) + ), + Emit( + table="Other cases", + headings=[Literal("id")], + missing_value='---', + source=Map( + source=Apply( + Reference("api_data"), Literal("case"), + Reference('checkpoint_manager') + ), + body=List([Reference("id")]) + ) + ) + ) ]) - self._compare_minilinq_to_compiled(minilinq, '008_multiple-tables.xlsx', combine_emits=False, value_or_root=True) + self._compare_minilinq_to_compiled( + minilinq, + '008_multiple-tables.xlsx', + combine_emits=False, + value_or_root=True + ) def _compare_minilinq_to_compiled(self, minilinq, filename, **kwargs): print("Parsing {}".format(filename)) abs_path = os.path.join(os.path.dirname(__file__), filename) - compiled = get_queries_from_excel(openpyxl.load_workbook(abs_path), missing_value='---', **kwargs) + compiled = get_queries_from_excel( + openpyxl.load_workbook(abs_path), missing_value='---', **kwargs + ) assert compiled.to_jvalue() == minilinq.to_jvalue(), filename diff --git a/tests/test_map_format.py b/tests/test_map_format.py index fb964b17..d662aec8 100644 --- a/tests/test_map_format.py +++ b/tests/test_map_format.py @@ -5,13 +5,24 @@ class TestMapFormats(unittest.TestCase): + def test_parse_template_no_args(self): - expected = Apply(Reference('template'), Literal('my name is {}'), Reference('form.question1')) - assert parse_template(Reference('form.question1'), 'template(my name is {})') == expected + expected = Apply( + Reference('template'), Literal('my name is {}'), + Reference('form.question1') + ) + assert parse_template( + Reference('form.question1'), 'template(my name is {})' + ) == expected def test_parse_template_args(self): - expected = Apply(Reference('template'), Literal('my name is {}'), Reference('form.question2')) - assert parse_template('form.question1', 'template(my name is {}, form.question2)') == expected + expected = Apply( + Reference('template'), Literal('my name is {}'), + Reference('form.question2') + ) + assert parse_template( + 'form.question1', 'template(my name is {}, form.question2)' + ) == expected def test_parse_template_args_long(self): expected = Apply( @@ -20,14 +31,23 @@ def test_parse_template_args_long(self): Reference('$.domain'), Reference('$.id'), ) - assert parse_template('form.id', 'template(https://www.commcarehq.org/a/{}/reports/form_data/{}/, $.domain, $.id)') == expected + assert parse_template( + 'form.id', + + 'template(https://www.commcarehq.org/a/{}/reports/form_data/{}/, ' + '$.domain, $.id)' + ) == expected def test_parse_template_no_template(self): - expected = Literal('Error: template function requires the format template: template()') + expected = Literal( + 'Error: template function requires the format template: template()' + ) assert parse_template('form.question1', 'template()') == expected def test_parse_function_arg_with_brackets(self): - value_returned = parse_function_arg('selected', 'selected(Other_(Specify))') + value_returned = parse_function_arg( + 'selected', 'selected(Other_(Specify))' + ) assert value_returned == 'Other_(Specify)' def test_parse_function_arg_empty_returns(self): diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index f9a0d15d..9677b509 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -10,8 +10,14 @@ from commcare_export.writers import JValueTableWriter -class LazinessException(Exception): pass -def die(msg): raise LazinessException(msg) # Hack: since "raise" is a statement not an expression, need a funcall wrapping it +class LazinessException(Exception): + pass + + +def die(msg): + # Hack: since "raise" is a statement not an expression, need a + # funcall wrapping it + raise LazinessException(msg) class TestMiniLinq(unittest.TestCase): @@ -35,49 +41,93 @@ def test_eval_literal(self): def test_eval_reference(self): env = BuiltInEnv() assert Reference("foo").eval(DictEnv({'foo': 2})) == 2 - assert Reference(Reference(Reference('a'))).eval(DictEnv({'a': 'b', 'b': 'c', 'c': 2})) == 2 - self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': [2]})), [2]) - self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': range(0, 1)})), [0]) # Should work the same w/ iterators as with lists + assert Reference(Reference(Reference('a')) + ).eval(DictEnv({ + 'a': 'b', + 'b': 'c', + 'c': 2 + })) == 2 + self.check_case( + Reference("foo[*]").eval(JsonPathEnv({'foo': [2]})), [2] + ) + # Should work the same w/ iterators as with lists + self.check_case( + Reference("foo[*]").eval(JsonPathEnv({'foo': range(0, 1)})), [0] + ) - # Should be able to get back out to the root, as the JsonPathEnv actually passes the full datum around - self.check_case(Reference("foo.$.baz").eval(JsonPathEnv({'foo': [2], 'baz': 3})), [3]) + # Should be able to get back out to the root, as the JsonPathEnv + # actually passes the full datum around + self.check_case( + Reference("foo.$.baz").eval(JsonPathEnv({ + 'foo': [2], + 'baz': 3 + })), [3] + ) def test_eval_auto_id_reference(self): - "Test that we have turned on the jsonpath_ng.jsonpath.auto_id field properly" + """ + Test that we have turned on the jsonpath_ng.jsonpath.auto_id + field properly + """ env = BuiltInEnv() - self.check_case(Reference("foo.id").eval(JsonPathEnv({'foo': [2]})), ['foo']) + self.check_case( + Reference("foo.id").eval(JsonPathEnv({'foo': [2]})), ['foo'] + ) # When auto id is on, this always becomes a string. Sorry! - self.check_case(Reference("foo.id").eval(JsonPathEnv({'foo': {'id': 2}})), ['2']) + self.check_case( + Reference("foo.id").eval(JsonPathEnv({'foo': { + 'id': 2 + }})), ['2'] + ) def test_eval_auto_id_reference_nested(self): # this test is documentation of existing (weird) functionality - # that results from a combination of jsonpath_ng auto_id feature and - # JsonPathEnv.lookup (which adds an additional auto ID for some reason). + # that results from a combination of jsonpath_ng auto_id feature + # and JsonPathEnv.lookup (which adds an additional auto ID for + # some reason). env = JsonPathEnv({}) - flatmap = FlatMap(source=Literal([{ - "id": 1, - "foo": {'id': 'bid', 'name': 'bob'}, - "bar": [ - {'baz': 'a1'}, {'baz': 'a2', 'id': 'bazzer'} - ] - }]), body=Reference('bar.[*]')) - mmap = Map(source=flatmap, body=List([ - Reference("id"), Reference('baz'), Reference('$.id'), Reference('$.foo.id'), Reference('$.foo.name') - ])) - self.check_case(mmap.eval(env), [ - ['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'bob'], - ['1.bar.bazzer', 'a2', '1', '1.bid', 'bob'] - ]) + flatmap = FlatMap( + source=Literal([{ + "id": 1, + "foo": { + 'id': 'bid', + 'name': 'bob' + }, + "bar": [{ + 'baz': 'a1' + }, { + 'baz': 'a2', + 'id': 'bazzer' + }] + }]), + body=Reference('bar.[*]') + ) + mmap = Map( + source=flatmap, + body=List([ + Reference("id"), + Reference('baz'), + Reference('$.id'), + Reference('$.foo.id'), + Reference('$.foo.name') + ]) + ) + self.check_case( + mmap.eval(env), [['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'bob'], + ['1.bar.bazzer', 'a2', '1', '1.bid', 'bob']] + ) - # Without the additional auto id field added in JsonPathEnv the result for Reference("id") changes - # as follows: + # Without the additional auto id field added in JsonPathEnv the + # result for Reference("id") changes as follows: # '1.bar.1.bar.[0]' -> '1.bar.[0]' - # With the change above AND a change to jsonpath_ng to prevent converting IDs that exist into - # auto IDs (see https://github.com/kennknowles/python-jsonpath-rw/pull/96) we get the following: + # With the change above AND a change to jsonpath_ng to prevent + # converting IDs that exist into auto IDs (see + # https://github.com/kennknowles/python-jsonpath-rw/pull/96) we + # get the following: # Reference("id"): # '1.bar.bazzer' -> 'bazzer' # @@ -85,17 +135,16 @@ def test_eval_auto_id_reference_nested(self): # '1.bid' -> 'bid' def test_value_or_root(self): - """Test that when accessing a child object the child data is used if it exists (normal case).""" - data = { - "id": 1, - "bar": [ - {'baz': 'a1'}, {'baz': 'a2'} - ] - } - self._test_value_or_root([Reference('id'), Reference('baz')], data, [ - ['1.bar.1.bar.[0]', 'a1'], - ['1.bar.1.bar.[1]', 'a2'], - ]) + """ + Test that when accessing a child object the child data is used + if it exists (normal case). + """ + data = {"id": 1, "bar": [{'baz': 'a1'}, {'baz': 'a2'}]} + self._test_value_or_root([Reference('id'), + Reference('baz')], data, [ + ['1.bar.1.bar.[0]', 'a1'], + ['1.bar.1.bar.[1]', 'a2'], + ]) def test_value_or_root_empty_list(self): """Should use the root object if the child is an empty list""" @@ -104,7 +153,11 @@ def test_value_or_root_empty_list(self): "foo": "I am foo", "bar": [], } - self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + self._test_value_or_root([ + Reference('id'), + Reference('baz'), + Reference('$.foo') + ], data, [ ['1', [], "I am foo"], ]) @@ -115,7 +168,11 @@ def test_value_or_root_empty_dict(self): "foo": "I am foo", "bar": {}, } - self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + self._test_value_or_root([ + Reference('id'), + Reference('baz'), + Reference('$.foo') + ], data, [ ['1', [], "I am foo"], ]) @@ -125,9 +182,10 @@ def test_value_or_root_None(self): "id": 1, "bar": None, } - self._test_value_or_root([Reference('id'), Reference('baz')], data, [ - ['1', []], - ]) + self._test_value_or_root([Reference('id'), + Reference('baz')], data, [ + ['1', []], + ]) def test_value_or_root_missing(self): """Should use the root object if the child does not exist""" @@ -136,20 +194,28 @@ def test_value_or_root_missing(self): "foo": "I am foo", # 'bar' is missing } - self._test_value_or_root([Reference('id'), Reference('baz'), Reference('$.foo')], data, [ + self._test_value_or_root([ + Reference('id'), + Reference('baz'), + Reference('$.foo') + ], data, [ ['1', [], 'I am foo'], ]) def test_value_or_root_ignore_field_in_root(self): - """Test that a child reference is ignored if we are using the root doc even if there is a field - wit that name. (this doesn't apply to 'id')""" + """ + Test that a child reference is ignored if we are using the root + doc even if there is a field with that name. (this doesn't apply + to 'id') + """ data = { "id": 1, "foo": "I am foo", } - self._test_value_or_root([Reference('id'), Reference('foo')], data, [ - ['1', []], - ]) + self._test_value_or_root([Reference('id'), + Reference('foo')], data, [ + ['1', []], + ]) def _test_value_or_root(self, columns, data, expected): """Low level test case for 'value-or-root'""" @@ -161,12 +227,14 @@ def _test_value_or_root(self, columns, data, expected): def test_eval_collapsed_list(self): """ - Special case to handle XML -> JSON conversion where there just happened to be a single value at save time + Special case to handle XML -> JSON conversion where there just + happened to be a single value at save time """ env = BuiltInEnv() self.check_case(Reference("foo[*]").eval(JsonPathEnv({'foo': 2})), [2]) assert Apply(Reference("*"), Literal(2), Literal(3)).eval(env) == 6 - assert Apply(Reference(">"), Literal(56), Literal(23.5)).eval(env) == True + assert Apply(Reference(">"), Literal(56), + Literal(23.5)).eval(env) == True assert Apply(Reference("len"), Literal([1, 2, 3])).eval(env) == 3 assert Apply(Reference("bool"), Literal('a')).eval(env) == True assert Apply(Reference("bool"), Literal('')).eval(env) == False @@ -174,158 +242,315 @@ def test_eval_collapsed_list(self): assert Apply(Reference("str2bool"), Literal('t')).eval(env) == True assert Apply(Reference("str2bool"), Literal('1')).eval(env) == True assert Apply(Reference("str2bool"), Literal('0')).eval(env) == False - assert Apply(Reference("str2bool"), Literal('false')).eval(env) == False + assert Apply(Reference("str2bool"), + Literal('false')).eval(env) == False assert Apply(Reference("str2bool"), Literal(u'日本')).eval(env) == False assert Apply(Reference("str2num"), Literal('10')).eval(env) == 10 assert Apply(Reference("str2num"), Literal('10.56')).eval(env) == 10.56 assert Apply(Reference("str2num"), Literal('')).eval(env) == None - assert Apply(Reference("str2date"), Literal('2015-01-01')).eval(env) == datetime(2015, 1, 1) - assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57')).eval(env) == datetime(2015, 1, 1, 18, 32, 57) - assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57.001200')).eval(env) == datetime(2015, 1, 1, 18, 32, 57) - assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57.001200Z')).eval(env) == datetime(2015, 1, 1, 18, 32, 57) - assert Apply(Reference("str2date"), Literal(u'日'.encode('utf8'))).eval(env) == None + assert Apply(Reference("str2date"), + Literal('2015-01-01')).eval(env) == datetime(2015, 1, 1) + assert Apply(Reference("str2date"), Literal('2015-01-01T18:32:57') + ).eval(env) == datetime(2015, 1, 1, 18, 32, 57) + assert Apply( + Reference("str2date"), Literal('2015-01-01T18:32:57.001200') + ).eval(env) == datetime(2015, 1, 1, 18, 32, 57) + assert Apply( + Reference("str2date"), Literal('2015-01-01T18:32:57.001200Z') + ).eval(env) == datetime(2015, 1, 1, 18, 32, 57) + assert Apply(Reference("str2date"), + Literal(u'日'.encode('utf8'))).eval(env) == None assert Apply(Reference("str2date"), Literal(u'日')).eval(env) == None - assert Apply(Reference("format-uuid"), Literal(0xf00)).eval(env) == None - assert Apply(Reference("format-uuid"), Literal('f00')).eval(env) == None - assert Apply(Reference("format-uuid"), Literal('00a3e019-4ce1-4587-94c5-0971dee2de22')).eval(env) == '00a3e019-4ce1-4587-94c5-0971dee2de22' - assert Apply(Reference("selected-at"), Literal('a b c'), Literal('1')).eval(env) == 'b' - assert Apply(Reference("selected-at"), Literal(u'a b 日'), Literal('-1')).eval(env) == u'日' - assert Apply(Reference("selected-at"), Literal('a b c'), Literal('5')).eval(env) is None - assert Apply(Reference("selected"), Literal('a b c'), Literal('b')).eval(env) is True - assert Apply(Reference("selected"), Literal(u'a b 日本'), Literal('d')).eval(env) is False - assert Apply(Reference("selected"), Literal(u'a bb 日本'), Literal('b')).eval(env) is False - assert Apply(Reference("selected"), Literal(u'a bb 日本'), Literal(u'日本')).eval(env) is True - assert Apply(Reference("join"), Literal('.'), Literal('a'), Literal('b'), Literal('c')).eval(env) == 'a.b.c' - assert Apply(Reference("default"), Literal(None), Literal('a')).eval(env) == 'a' - assert Apply(Reference("default"), Literal('b'), Literal('a')).eval(env) == 'b' - assert Apply(Reference("count-selected"), Literal(u'a bb 日本')).eval(env) == 3 - assert Apply(Reference("sha1"), Literal(u'a bb 日本')).eval(env) == 'e25a54025417b06d88d40baa8c71f6eee9c07fb1' - assert Apply(Reference("sha1"), Literal(b'2015')).eval(env) == '9cdda67ded3f25811728276cefa76b80913b4c54' - assert Apply(Reference("sha1"), Literal(2015)).eval(env) == '9cdda67ded3f25811728276cefa76b80913b4c54' + assert Apply(Reference("format-uuid"), + Literal(0xf00)).eval(env) == None + assert Apply(Reference("format-uuid"), + Literal('f00')).eval(env) == None + assert Apply( + Reference("format-uuid"), + Literal('00a3e019-4ce1-4587-94c5-0971dee2de22') + ).eval(env) == '00a3e019-4ce1-4587-94c5-0971dee2de22' + assert Apply(Reference("selected-at"), Literal('a b c'), + Literal('1')).eval(env) == 'b' + assert Apply( + Reference("selected-at"), Literal(u'a b 日'), Literal('-1') + ).eval(env) == u'日' + assert Apply(Reference("selected-at"), Literal('a b c'), + Literal('5')).eval(env) is None + assert Apply(Reference("selected"), Literal('a b c'), + Literal('b')).eval(env) is True + assert Apply(Reference("selected"), Literal(u'a b 日本'), + Literal('d')).eval(env) is False + assert Apply(Reference("selected"), Literal(u'a bb 日本'), + Literal('b')).eval(env) is False + assert Apply( + Reference("selected"), Literal(u'a bb 日本'), Literal(u'日本') + ).eval(env) is True + assert Apply( + Reference("join"), Literal('.'), Literal('a'), Literal('b'), + Literal('c') + ).eval(env) == 'a.b.c' + assert Apply(Reference("default"), Literal(None), + Literal('a')).eval(env) == 'a' + assert Apply(Reference("default"), Literal('b'), + Literal('a')).eval(env) == 'b' + assert Apply(Reference("count-selected"), + Literal(u'a bb 日本')).eval(env) == 3 + assert Apply(Reference("sha1"), Literal(u'a bb 日本') + ).eval(env) == 'e25a54025417b06d88d40baa8c71f6eee9c07fb1' + assert Apply(Reference("sha1"), Literal(b'2015') + ).eval(env) == '9cdda67ded3f25811728276cefa76b80913b4c54' + assert Apply(Reference("sha1"), Literal(2015) + ).eval(env) == '9cdda67ded3f25811728276cefa76b80913b4c54' def test_or(self): env = BuiltInEnv() assert Apply(Reference("or"), Literal(None), Literal(2)).eval(env) == 2 - laziness_iterator = RepeatableIterator(lambda: (i if i < 1 else die('Not lazy enough') for i in range(2))) - assert Apply(Reference("or"), Literal(1), Literal(laziness_iterator)).eval(env) == 1 - assert Apply(Reference("or"), Literal(''), Literal(laziness_iterator)).eval(env) == '' - assert Apply(Reference("or"), Literal(0), Literal(laziness_iterator)).eval(env) == 0 + laziness_iterator = RepeatableIterator( + lambda: (i if i < 1 else die('Not lazy enough') for i in range(2)) + ) + assert Apply(Reference("or"), Literal(1), + Literal(laziness_iterator)).eval(env) == 1 + assert Apply(Reference("or"), Literal(''), + Literal(laziness_iterator)).eval(env) == '' + assert Apply(Reference("or"), Literal(0), + Literal(laziness_iterator)).eval(env) == 0 with pytest.raises(LazinessException): - Apply(Reference("or"), Literal(None), Literal(laziness_iterator)).eval(env) + Apply(Reference("or"), Literal(None), + Literal(laziness_iterator)).eval(env) env = env | JsonPathEnv({'a': {'c': 'c val'}}) - assert Apply(Reference("or"), Reference('a.b'), Reference('a.c')).eval(env) == 'c val' - assert Apply(Reference("or"), Reference('a.b'), Reference('a.d')).eval(env) is None + assert Apply(Reference("or"), Reference('a.b'), + Reference('a.c')).eval(env) == 'c val' + assert Apply(Reference("or"), Reference('a.b'), + Reference('a.d')).eval(env) is None env = env.replace({'a': [], 'b': [1, 2], 'c': 2}) - self.check_case(Apply(Reference("or"), Reference('a.[*]'), Reference('b')).eval(env), [1, 2]) - self.check_case(Apply(Reference("or"), Reference('b.[*]'), Reference('c')).eval(env), [1, 2]) self.check_case( - Apply(Reference("or"), Reference('a.[*]'), Reference('$')).eval(env), - {'a': [], 'b': [1, 2], 'c': 2, 'id': '$'} + Apply(Reference("or"), Reference('a.[*]'), + Reference('b')).eval(env), [1, 2] + ) + self.check_case( + Apply(Reference("or"), Reference('b.[*]'), + Reference('c')).eval(env), [1, 2] + ) + self.check_case( + Apply(Reference("or"), Reference('a.[*]'), + Reference('$')).eval(env), { + 'a': [], + 'b': [1, 2], + 'c': 2, + 'id': '$' + } ) def test_attachment_url(self): - env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv({'id': '123', 'domain': 'd1', 'photo': 'a.jpg'}) + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'} + ) | JsonPathEnv({ + 'id': '123', + 'domain': 'd1', + 'photo': 'a.jpg' + }) expected = 'https://www.commcarehq.org/a/d1/api/form/attachment/123/a.jpg' - assert Apply(Reference('attachment_url'), Reference('photo')).eval(env) == expected + assert Apply(Reference('attachment_url'), + Reference('photo')).eval(env) == expected def test_attachment_url_repeat(self): - env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv({ - 'id': '123', 'domain': 'd1', 'repeat': [ - {'photo': 'a.jpg'}, {'photo': 'b.jpg'} - ] - }) + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'} + ) | JsonPathEnv({ + 'id': '123', + 'domain': 'd1', + 'repeat': [{ + 'photo': 'a.jpg' + }, { + 'photo': 'b.jpg' + }] + }) expected = [ 'https://www.commcarehq.org/a/d1/api/form/attachment/123/a.jpg', 'https://www.commcarehq.org/a/d1/api/form/attachment/123/b.jpg', ] - result = unwrap_val(Map( - source=Reference('repeat.[*]'), - body=Apply(Reference('attachment_url'), Reference('photo')) - ).eval(env)) + result = unwrap_val( + Map( + source=Reference('repeat.[*]'), + body=Apply(Reference('attachment_url'), Reference('photo')) + ).eval(env) + ) assert result == expected def test_form_url(self): - env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv( - {'id': '123', 'domain': 'd1'}) + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'} + ) | JsonPathEnv({ + 'id': '123', + 'domain': 'd1' + }) expected = 'https://www.commcarehq.org/a/d1/reports/form_data/123/' - assert Apply(Reference('form_url'), Reference('id')).eval(env) == expected + assert Apply(Reference('form_url'), + Reference('id')).eval(env) == expected def test_case_url(self): - env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'}) | JsonPathEnv( - {'id': '123', 'domain': 'd1'}) + env = BuiltInEnv({'commcarehq_base_url': 'https://www.commcarehq.org'} + ) | JsonPathEnv({ + 'id': '123', + 'domain': 'd1' + }) expected = 'https://www.commcarehq.org/a/d1/reports/case_data/123/' - assert Apply(Reference('case_url'), Reference('id')).eval(env) == expected + assert Apply(Reference('case_url'), + Reference('id')).eval(env) == expected def test_unique(self): - env = BuiltInEnv() | JsonPathEnv( - {"list": [{"a": 1}, {"a": 2}, {"a": 3}, {"a": 2}]}) - assert Apply(Reference('unique'), Reference('list[*].a')).eval(env) == [1, 2, 3] + env = BuiltInEnv() | JsonPathEnv({ + "list": [{ + "a": 1 + }, { + "a": 2 + }, { + "a": 3 + }, { + "a": 2 + }] + }) + assert Apply(Reference('unique'), + Reference('list[*].a')).eval(env) == [1, 2, 3] def test_template(self): env = BuiltInEnv() | JsonPathEnv({'a': '1', 'b': '2'}) - assert Apply(Reference('template'), Literal('{}.{}'), Reference('a'), Reference('b')).eval(env) == '1.2' + assert Apply( + Reference('template'), Literal('{}.{}'), Reference('a'), + Reference('b') + ).eval(env) == '1.2' def test_substr(self): - env = BuiltInEnv({'single_byte_chars': u'abcdefghijklmnopqrstuvwxyz', - 'multi_byte_chars': u'αβγδεζηθικλμνξοπρςστυφχψω', - 'an_integer': 123456 + env = BuiltInEnv({ + 'single_byte_chars': u'abcdefghijklmnopqrstuvwxyz', + 'multi_byte_chars': u'αβγδεζηθικλμνξοπρςστυφχψω', + 'an_integer': 123456 }) - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(-4), Literal(30)).eval(env) == None - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(0), Literal(26)).eval(env) == u'abcdefghijklmnopqrstuvwxyz' - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(10), Literal(16)).eval(env) == u'klmnop' - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(13), Literal(14)).eval(env) == u'n' - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(13), Literal(13)).eval(env) == u'' - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(14), Literal(13)).eval(env) == u'' - assert Apply(Reference('substr'), Reference('single_byte_chars'), - Literal(5), Literal(-1)).eval(env) == None - - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(-4), Literal(30)).eval(env) == None - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(0), Literal(25)).eval(env) == u'αβγδεζηθικλμνξοπρςστυφχψω' - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(10), Literal(15)).eval(env) == u'λμνξο' - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(13), Literal(14)).eval(env) == u'ξ' - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(13), Literal(12)).eval(env) == u'' - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(14), Literal(13)).eval(env) == u'' - assert Apply(Reference('substr'), Reference('multi_byte_chars'), - Literal(5), Literal(-1)).eval(env) == None - - assert Apply(Reference('substr'), Reference('an_integer'), - Literal(-1), Literal(3)).eval(env) == None - assert Apply(Reference('substr'), Reference('an_integer'), - Literal(0), Literal(6)).eval(env) == u'123456' - assert Apply(Reference('substr'), Reference('an_integer'), - Literal(2), Literal(4)).eval(env) == u'34' - assert Apply(Reference('substr'), Reference('an_integer'), - Literal(4), Literal(2)).eval(env) == u'' - assert Apply(Reference('substr'), Reference('an_integer'), - Literal(5), Literal(-1)).eval(env) == None + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(-4), + Literal(30) + ).eval(env) == None + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(0), + Literal(26) + ).eval(env) == u'abcdefghijklmnopqrstuvwxyz' + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(10), + Literal(16) + ).eval(env) == u'klmnop' + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(13), + Literal(14) + ).eval(env) == u'n' + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(13), + Literal(13) + ).eval(env) == u'' + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(14), + Literal(13) + ).eval(env) == u'' + assert Apply( + Reference('substr'), Reference('single_byte_chars'), Literal(5), + Literal(-1) + ).eval(env) == None + + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(-4), + Literal(30) + ).eval(env) == None + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(0), + Literal(25) + ).eval(env) == u'αβγδεζηθικλμνξοπρςστυφχψω' + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(10), + Literal(15) + ).eval(env) == u'λμνξο' + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(13), + Literal(14) + ).eval(env) == u'ξ' + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(13), + Literal(12) + ).eval(env) == u'' + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(14), + Literal(13) + ).eval(env) == u'' + assert Apply( + Reference('substr'), Reference('multi_byte_chars'), Literal(5), + Literal(-1) + ).eval(env) == None + + assert Apply( + Reference('substr'), Reference('an_integer'), Literal(-1), + Literal(3) + ).eval(env) == None + assert Apply( + Reference('substr'), Reference('an_integer'), Literal(0), + Literal(6) + ).eval(env) == u'123456' + assert Apply( + Reference('substr'), Reference('an_integer'), Literal(2), + Literal(4) + ).eval(env) == u'34' + assert Apply( + Reference('substr'), Reference('an_integer'), Literal(4), + Literal(2) + ).eval(env) == u'' + assert Apply( + Reference('substr'), Reference('an_integer'), Literal(5), + Literal(-1) + ).eval(env) == None def test_map(self): env = BuiltInEnv() | DictEnv({}) - laziness_iterator = RepeatableIterator(lambda: ({'a':i} if i < 5 else die('Not lazy enough') for i in range(12))) - - assert list(Map(source=Literal([{'a':1}, {'a':2}, {'a':3}]), body=Literal(1)).eval(env)) == [1, 1, 1] - assert list(Map(source=Literal([{'a':1}, {'a':2}, {'a':3}]), body=Reference('a')).eval(env)) == [1, 2, 3] + laziness_iterator = RepeatableIterator( + lambda: ({ + 'a': i + } if i < 5 else die('Not lazy enough') for i in range(12)) + ) - assert list(islice(Map(source=Literal(laziness_iterator), body=Reference('a')).eval(env), 5)) == [0, 1, 2, 3, 4] + assert list( + Map( + source=Literal([{ + 'a': 1 + }, { + 'a': 2 + }, { + 'a': 3 + }]), + body=Literal(1) + ).eval(env) + ) == [1, 1, 1] + assert list( + Map( + source=Literal([{ + 'a': 1 + }, { + 'a': 2 + }, { + 'a': 3 + }]), + body=Reference('a') + ).eval(env) + ) == [1, 2, 3] + + assert list( + islice( + Map(source=Literal(laziness_iterator), + body=Reference('a')).eval(env), 5 + ) + ) == [0, 1, 2, 3, 4] try: - list(Map(source=Literal(laziness_iterator), body=Reference('a')).eval(env)) + list( + Map(source=Literal(laziness_iterator), + body=Reference('a')).eval(env) + ) raise Exception('Should have failed') except LazinessException: pass @@ -333,29 +558,76 @@ def test_map(self): def test_flatmap(self): env = BuiltInEnv() | DictEnv({}) - laziness_iterator = RepeatableIterator(lambda: ({'a':range(i)} if i < 4 else die('Not lazy enough') for i in range(12))) - - assert list(FlatMap(source=Literal([{'a':[1]}, {'a':'foo'}, {'a':[3, 4]}]), body=Literal([1, 2, 3])).eval(env)) == [1, 2, 3, 1, 2, 3, 1, 2, 3] - assert list(FlatMap(source=Literal([{'a':[1]}, {'a':[2]}, {'a':[3, 4]}]), body=Reference('a')).eval(env)) == [1, 2, 3, 4] + laziness_iterator = RepeatableIterator( + lambda: ({ + 'a': range(i) + } if i < 4 else die('Not lazy enough') for i in range(12)) + ) - assert list(islice(FlatMap(source=Literal(laziness_iterator), body=Reference('a')).eval(env), 6)) == [0, - 0, 1, - 0, 1, 2] + assert list( + FlatMap( + source=Literal([{ + 'a': [1] + }, { + 'a': 'foo' + }, { + 'a': [3, 4] + }]), + body=Literal([1, 2, 3]) + ).eval(env) + ) == [1, 2, 3, 1, 2, 3, 1, 2, 3] + assert list( + FlatMap( + source=Literal([{ + 'a': [1] + }, { + 'a': [2] + }, { + 'a': [3, 4] + }]), + body=Reference('a') + ).eval(env) + ) == [1, 2, 3, 4] + + assert list( + islice( + FlatMap( + source=Literal(laziness_iterator), body=Reference('a') + ).eval(env), 6 + ) + ) == [0, 0, 1, 0, 1, 2] try: - list(FlatMap(source=Literal(laziness_iterator), body=Reference('a')).eval(env)) + list( + FlatMap( + source=Literal(laziness_iterator), body=Reference('a') + ).eval(env) + ) raise Exception('Should have failed') except LazinessException: pass def _setup_emit_test(self, emitter_env): - env = BuiltInEnv() | JsonPathEnv({'foo': {'baz': 3, 'bar': True, 'boo': None}}) | emitter_env - Emit(table='Foo', - headings=[Literal('foo')], - source=List([ - List([Reference('foo.baz'), Reference('foo.bar'), Reference('foo.foo'), Reference('foo.boo')]) - ]), - missing_value='---').eval(env) + env = BuiltInEnv() | JsonPathEnv({ + 'foo': { + 'baz': 3, + 'bar': True, + 'boo': None + } + }) | emitter_env + Emit( + table='Foo', + headings=[Literal('foo')], + source=List([ + List([ + Reference('foo.baz'), + Reference('foo.bar'), + Reference('foo.foo'), + Reference('foo.boo') + ]) + ]), + missing_value='---' + ).eval(env) def test_emit(self): writer = JValueTableWriter() @@ -363,16 +635,22 @@ def test_emit(self): assert list(writer.tables['Foo'].rows) == [[3, True, '---', None]] def test_emit_generator(self): + class TestWriter(JValueTableWriter): + def write_table(self, table): self.tables[table.name] = table writer = TestWriter() self._setup_emit_test(EmitterEnv(writer)) - assert isinstance(writer.tables['Foo'].rows, (map, filter, types.GeneratorType)) + assert isinstance( + writer.tables['Foo'].rows, (map, filter, types.GeneratorType) + ) def test_emit_env_generator(self): + class TestEmitterEnv(EmitterEnv): + def emit_table(self, table_spec): self.table = table_spec @@ -380,35 +658,44 @@ def emit_table(self, table_spec): self._setup_emit_test(env) assert isinstance(env.table.rows, (map, filter, types.GeneratorType)) - def test_emit_multi_same_query(self): - """Test that we can emit multiple tables from the same set of source data. - This is useful if you need to generate multiple tables from the same datasource. + """ + Test that we can emit multiple tables from the same set of + source data. This is useful if you need to generate multiple + tables from the same datasource. """ writer = JValueTableWriter() env = BuiltInEnv() | JsonPathEnv() | EmitterEnv(writer) result = Map( source=Literal([ - {'foo': {'baz': 3, 'bar': True, 'boo': None}}, - {'foo': {'baz': 4, 'bar': False, 'boo': 1}}, + { + 'foo': { + 'baz': 3, + 'bar': True, + 'boo': None + } + }, + { + 'foo': { + 'baz': 4, + 'bar': False, + 'boo': 1 + } + }, ]), body=List([ Emit( table='FooBaz', headings=[Literal('foo')], - source=List([ - List([ Reference('foo.baz')]) - ]), + source=List([List([Reference('foo.baz')])]), ), Emit( table='FooBar', headings=[Literal('foo')], - source=List([ - List([Reference('foo.bar')]) - ]), + source=List([List([Reference('foo.bar')])]), ) - ]), + ]), ).eval(env) # evaluate result @@ -418,10 +705,13 @@ def test_emit_multi_same_query(self): assert writer.tables['FooBar'].rows == [[True], [False]] def test_emit_mutli_different_query(self): - """Test that we can emit multiple tables from the same set of source data even - if the emitted table have different 'root doc' expressions. + """ + Test that we can emit multiple tables from the same set of + source data even if the emitted table have different 'root doc' + expressions. - Example use case could be emitting cases and case actions, or form data and repeats. + Example use case could be emitting cases and case actions, or + form data and repeats. """ writer = JValueTableWriter() env = BuiltInEnv() | JsonPathEnv() | EmitterEnv(writer) @@ -466,37 +756,76 @@ def test_emit_mutli_different_query(self): # evaluate result list(result) assert writer.tables['t1'].rows == [['1'], ['2']] - assert writer.tables['t2'].rows == [['1', 3], ['1', 4], ['2', 5], ['2', 6]] + assert writer.tables['t2'].rows == [['1', 3], ['1', 4], ['2', 5], + ['2', 6]] def test_from_jvalue(self): - assert MiniLinq.from_jvalue({"Ref": "form.log_subreport"}) == Reference("form.log_subreport") - assert (MiniLinq.from_jvalue({"Apply": {"fn": {"Ref":"len"}, "args": [{"Ref": "form.log_subreport"}]}}) - == Apply(Reference("len"), Reference("form.log_subreport"))) - assert MiniLinq.from_jvalue([{"Ref": "form.log_subreport"}]) == [Reference("form.log_subreport")] + assert MiniLinq.from_jvalue({"Ref": "form.log_subreport"} + ) == Reference("form.log_subreport") + assert ( + MiniLinq.from_jvalue({ + "Apply": { + "fn": { + "Ref": "len" + }, + "args": [{ + "Ref": "form.log_subreport" + }] + } + }) == Apply(Reference("len"), Reference("form.log_subreport")) + ) + assert MiniLinq.from_jvalue([{ + "Ref": "form.log_subreport" + }]) == [Reference("form.log_subreport")] def test_filter(self): env = BuiltInEnv() | DictEnv({}) named = [{'n': n} for n in range(1, 5)] - assert list(Filter(Literal(named), Apply(Reference('>'), Reference('n'), Literal(2))).eval(env)) == [{'n': 3}, {'n': 4}] - assert list(Filter(Literal([1, 2, 3, 4]), Apply(Reference('>'), Reference('n'), Literal(2)), 'n').eval(env)) == [3, 4] + assert list( + Filter( + Literal(named), + Apply(Reference('>'), Reference('n'), Literal(2)) + ).eval(env) + ) == [{ + 'n': 3 + }, { + 'n': 4 + }] + assert list( + Filter( + Literal([1, 2, 3, 4]), + Apply(Reference('>'), Reference('n'), Literal(2)), 'n' + ).eval(env) + ) == [3, 4] def test_emit_table_unwrap_dicts(self): writer = JValueTableWriter() env = EmitterEnv(writer) - env.emit_table(TableSpec(**{ - 'name': 't1', - 'headings': ['a'], - 'rows':[ - ['hi'], - [{'#text': 'test_text','@case_type': 'person','@relationship': 'child','id': 'nothing'}], - [{'@case_type': '', '@relationship': 'child', 'id': 'some_id'}], - [{'t': 123}], - ] - })) - - writer.tables['t1'].rows = [ - ['hi'], - ['test_text'], - [''], - [{'t': 123}] - ] + env.emit_table( + TableSpec( + **{ + 'name': + 't1', + 'headings': ['a'], + 'rows': [ + ['hi'], + [{ + '#text': 'test_text', + '@case_type': 'person', + '@relationship': 'child', + 'id': 'nothing' + }], + [{ + '@case_type': '', + '@relationship': 'child', + 'id': 'some_id' + }], + [{ + 't': 123 + }], + ] + } + ) + ) + + writer.tables['t1'].rows = [['hi'], ['test_text'], [''], [{'t': 123}]] diff --git a/tests/test_misc.py b/tests/test_misc.py index 3975a6ef..47ffd73a 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -9,15 +9,18 @@ class TestDigestFile(unittest.TestCase): def check_digest(self, contents): - with tempfile.NamedTemporaryFile(prefix='commcare-export-test-', mode='wb') as file: - file.write(contents) + with tempfile.NamedTemporaryFile( + prefix='commcare-export-test-', mode='wb' + ) as file: + file.write(contents) file.flush() file_digest = misc.digest_file(file.name) - assert file_digest == hashlib.md5(contents).hexdigest() # Make sure the chunking does not mess with stuff - + # Make sure the chunking does not mess with stuff + assert file_digest == hashlib.md5(contents).hexdigest() + def test_digest_file_ascii(self): - self.check_digest('Hello'.encode('utf-8')) # Even a call to `write` requires encoding (as it should) in Python 3 + self.check_digest('Hello'.encode('utf-8')) def test_digest_file_long(self): self.check_digest(('Hello' * 100000).encode('utf-8')) diff --git a/tests/test_repeatable_iterator.py b/tests/test_repeatable_iterator.py index b8a396c8..89dc601d 100644 --- a/tests/test_repeatable_iterator.py +++ b/tests/test_repeatable_iterator.py @@ -12,10 +12,11 @@ def setup_class(cls): def test_iteration(self): - class LazinessException(Exception): pass + class LazinessException(Exception): + pass - def test1(): - for i in range(1, 100): + def test1(): + for i in range(1, 100): yield i def test2(): @@ -24,12 +25,12 @@ def test2(): raise LazinessException('Not lazy enough') yield i - # First make sure that we've properly set up a situation that fails - # without RepeatableIterator + # First make sure that we've properly set up a situation that + # fails without RepeatableIterator iterator = test1() assert list(iterator) == list(range(1, 100)) assert list(iterator) == [] - + # Now test that the RepeatableIterator restores functionality iterator = RepeatableIterator(test1) assert list(iterator) == list(range(1, 100)) diff --git a/tests/test_writers.py b/tests/test_writers.py index 43f580fc..45f08009 100644 --- a/tests/test_writers.py +++ b/tests/test_writers.py @@ -24,7 +24,11 @@ def writer(db_params): @pytest.fixture() def strict_writer(db_params): - return SqlTableWriter(db_params['url'], poolclass=sqlalchemy.pool.NullPool, strict_types=True) + return SqlTableWriter( + db_params['url'], + poolclass=sqlalchemy.pool.NullPool, + strict_types=True + ) TYPE_MAP = { @@ -35,97 +39,127 @@ def strict_writer(db_params): class TestWriters(object): + def test_JValueTableWriter(self): writer = JValueTableWriter() - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c', 'd'], - 'rows': [ - [1, '2', 3, datetime.date(2015, 1, 1)], - [4, '日本', 6, datetime.date(2015, 1, 2)], - ] - })) - - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c', 'd'], - 'rows': [ - [5, 'bob', 9, datetime.date(2018, 1, 2)], - ] - })) + writer.write_table( + TableSpec( + **{ + 'name': + 'foo', + 'headings': ['a', 'bjørn', 'c', 'd'], + 'rows': [ + [1, '2', 3, datetime.date(2015, 1, 1)], + [4, '日本', 6, datetime.date(2015, 1, 2)], + ] + } + ) + ) + + writer.write_table( + TableSpec( + **{ + 'name': 'foo', + 'headings': ['a', 'bjørn', 'c', 'd'], + 'rows': [[5, 'bob', 9, + datetime.date(2018, 1, 2)],] + } + ) + ) assert writer.tables == { - 'foo': TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c', 'd'], - 'rows': [ - [1, '2', 3, '2015-01-01'], - [4, '日本', 6, '2015-01-02'], - [5, 'bob', 9, '2018-01-02'], - ], - }) + 'foo': + TableSpec( + **{ + 'name': + 'foo', + 'headings': ['a', 'bjørn', 'c', 'd'], + 'rows': [ + [1, '2', 3, '2015-01-01'], + [4, '日本', 6, '2015-01-02'], + [5, 'bob', 9, '2018-01-02'], + ], + } + ) } def test_Excel2007TableWriter(self): with tempfile.NamedTemporaryFile(suffix='.xlsx') as file: with Excel2007TableWriter(file=file) as writer: - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c'], - 'rows': [ - [1, '2', 3], - [4, '日本', 6], - ] - })) + writer.write_table( + TableSpec( + **{ + 'name': 'foo', + 'headings': ['a', 'bjørn', 'c'], + 'rows': [ + [1, '2', 3], + [4, '日本', 6], + ] + } + ) + ) self._check_Excel2007TableWriter_output(file.name) def test_Excel2007TableWriter_write_mutli(self): with tempfile.NamedTemporaryFile(suffix='.xlsx') as file: with Excel2007TableWriter(file=file) as writer: - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c'], - 'rows': [ - [1, '2', 3], - ] - })) - - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c'], - 'rows': [ - [4, '日本', 6], - ] - })) + writer.write_table( + TableSpec( + **{ + 'name': 'foo', + 'headings': ['a', 'bjørn', 'c'], + 'rows': [[1, '2', 3],] + } + ) + ) + + writer.write_table( + TableSpec( + **{ + 'name': 'foo', + 'headings': ['a', 'bjørn', 'c'], + 'rows': [[4, '日本', 6],] + } + ) + ) self._check_Excel2007TableWriter_output(file.name) def _check_Excel2007TableWriter_output(self, filename): - output_wb = openpyxl.load_workbook(filename) - - assert output_wb.sheetnames == ['foo'] - foo_sheet = output_wb['foo'] - assert [ [cell.value for cell in row] for row in foo_sheet['A1:C3']] == [ - ['a', 'bjørn', 'c'], - ['1', '2', '3'], # Note how pyxl does some best-effort parsing to *whatever* type - ['4', '日本', '6'], - ] + output_wb = openpyxl.load_workbook(filename) + + assert output_wb.sheetnames == ['foo'] + foo_sheet = output_wb['foo'] + assert [ + [cell.value for cell in row] for row in foo_sheet['A1:C3'] + ] == [ + ['a', 'bjørn', 'c'], + ['1', '2', '3' + ], # Note how pyxl does some best-effort parsing to *whatever* type + ['4', '日本', '6'], + ] def test_CsvTableWriter(self): with tempfile.NamedTemporaryFile() as file: with CsvTableWriter(file=file) as writer: - writer.write_table(TableSpec(**{ - 'name': 'foo', - 'headings': ['a', 'bjørn', 'c'], - 'rows': [ - [1, '2', 3], - [4, '日本', 6], - ] - })) + writer.write_table( + TableSpec( + **{ + 'name': 'foo', + 'headings': ['a', 'bjørn', 'c'], + 'rows': [ + [1, '2', 3], + [4, '日本', 6], + ] + } + ) + ) with zipfile.ZipFile(file.name, 'r') as output_zip: with output_zip.open('foo.csv') as csv_file: - output = csv.reader(io.TextIOWrapper(csv_file, encoding='utf-8')) + output = csv.reader( + io.TextIOWrapper(csv_file, encoding='utf-8') + ) assert [row for row in output] == [ ['a', 'bjørn', 'c'], @@ -136,11 +170,13 @@ def test_CsvTableWriter(self): @pytest.mark.dbtest class TestSQLWriters(object): + def _type_convert(self, connection, row): """ - Different databases store and return values differently so convert the values - in the expected row to match the DB. + Different databases store and return values differently so + convert the values in the expected row to match the DB. """ + def convert(type_map, value): func = type_map.get(value.__class__, None) return func(value) if func else value @@ -153,129 +189,233 @@ def convert(type_map, value): def test_insert(self, writer): with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_insert', - 'headings': ['id', 'a', 'b', 'c'], - 'rows': [ - ['bizzle', 1, 2, 3], - ['bazzle', 4, 5, 6], - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': 'foo_insert', + 'headings': ['id', 'a', 'b', 'c'], + 'rows': [ + ['bizzle', 1, 2, 3], + ['bazzle', 4, 5, 6], + ] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: - result = dict([(row['id'], row) for row in writer.connection.execute('SELECT id, a, b, c FROM foo_insert')]) + result = dict([(row['id'], row) for row in writer.connection + .execute('SELECT id, a, b, c FROM foo_insert')]) assert len(result) == 2 - assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 1, 'b': 2, 'c': 3} - assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': 5, 'c': 6} + assert dict(result['bizzle']) == { + 'id': 'bizzle', + 'a': 1, + 'b': 2, + 'c': 3 + } + assert dict(result['bazzle']) == { + 'id': 'bazzle', + 'a': 4, + 'b': 5, + 'c': 6 + } def test_upsert(self, writer): with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_upsert', - 'headings': ['id', 'a', 'b', 'c'], - 'rows': [ - ['zing', 3, None, 5] - ] - })) + writer.write_table( + TableSpec( + **{ + 'name': 'foo_upsert', + 'headings': ['id', 'a', 'b', 'c'], + 'rows': [['zing', 3, None, 5]] + } + ) + ) # don't select column 'b' since it hasn't been created yet with writer: - result = dict([(row['id'], row) for row in writer.connection.execute('SELECT id, a, c FROM foo_upsert')]) + result = dict([ + (row['id'], row) for row in + writer.connection.execute('SELECT id, a, c FROM foo_upsert') + ]) assert len(result) == 1 assert dict(result['zing']) == {'id': 'zing', 'a': 3, 'c': 5} with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_upsert', - 'headings': ['id', 'a', 'b', 'c'], - 'rows': [ - ['bizzle', 1, 'yo', 3], - ['bazzle', 4, '日本', 6], - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': + 'foo_upsert', + 'headings': ['id', 'a', 'b', 'c'], + 'rows': [ + ['bizzle', 1, 'yo', 3], + ['bazzle', 4, '日本', 6], + ] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: - result = dict([(row['id'], row) for row in writer.connection.execute('SELECT id, a, b, c FROM foo_upsert')]) + result = dict([(row['id'], row) for row in writer.connection + .execute('SELECT id, a, b, c FROM foo_upsert')]) assert len(result) == 3 - assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 1, 'b': 'yo', 'c': 3} - assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': '日本', 'c': 6} + assert dict(result['bizzle']) == { + 'id': 'bizzle', + 'a': 1, + 'b': 'yo', + 'c': 3 + } + assert dict(result['bazzle']) == { + 'id': 'bazzle', + 'a': 4, + 'b': '日本', + 'c': 6 + } with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_upsert', - 'headings': ['id', 'a', 'b', 'c'], - 'rows': [ - ['bizzle', 7, '本', 9], - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': 'foo_upsert', + 'headings': ['id', 'a', 'b', 'c'], + 'rows': [['bizzle', 7, '本', 9],] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: - result = dict([(row['id'], row) for row in writer.connection.execute('SELECT id, a, b, c FROM foo_upsert')]) + result = dict([(row['id'], row) for row in writer.connection + .execute('SELECT id, a, b, c FROM foo_upsert')]) assert len(result) == 3 - assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 7, 'b': '本', 'c': 9} - assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': '日本', 'c': 6} + assert dict(result['bizzle']) == { + 'id': 'bizzle', + 'a': 7, + 'b': '本', + 'c': 9 + } + assert dict(result['bazzle']) == { + 'id': 'bazzle', + 'a': 4, + 'b': '日本', + 'c': 6 + } def test_types(self, writer): self._test_types(writer, 'foo_fancy_types') def _test_types(self, writer, table_name): with writer: - writer.write_table(TableSpec(**{ - 'name': table_name, - 'headings': ['id', 'a', 'b', 'c', 'd', 'e'], - 'rows': [ - ['bizzle', 1, 'yo', True, datetime.date(2015, 1, 1), datetime.datetime(2014, 4, 2, 18, 56, 12)], - ['bazzle', 4, '日本', False, datetime.date(2015, 1, 2), datetime.datetime(2014, 5, 1, 11, 16, 45)], - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': + table_name, + 'headings': ['id', 'a', 'b', 'c', 'd', 'e'], + 'rows': [ + [ + 'bizzle', 1, 'yo', True, + datetime.date(2015, 1, 1), + datetime.datetime(2014, 4, 2, 18, 56, 12) + ], + [ + 'bazzle', 4, '日本', False, + datetime.date(2015, 1, 2), + datetime.datetime(2014, 5, 1, 11, 16, 45) + ], + ] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: connection = writer.connection - result = dict( - [(row['id'], row) for row in connection.execute('SELECT id, a, b, c, d, e FROM %s' % table_name)]) + result = dict([ + (row['id'], row) for row in connection + .execute('SELECT id, a, b, c, d, e FROM %s' % table_name) + ]) assert len(result) == 2 expected = { - 'bizzle': {'id': 'bizzle', 'a': 1, 'b': 'yo', 'c': True, - 'd': datetime.date(2015, 1, 1), 'e': datetime.datetime(2014, 4, 2, 18, 56, 12)}, - 'bazzle': {'id': 'bazzle', 'a': 4, 'b': '日本', 'c': False, - 'd': datetime.date(2015, 1, 2), 'e': datetime.datetime(2014, 5, 1, 11, 16, 45)} + 'bizzle': { + 'id': 'bizzle', + 'a': 1, + 'b': 'yo', + 'c': True, + 'd': datetime.date(2015, 1, 1), + 'e': datetime.datetime(2014, 4, 2, 18, 56, 12) + }, + 'bazzle': { + 'id': 'bazzle', + 'a': 4, + 'b': '日本', + 'c': False, + 'd': datetime.date(2015, 1, 2), + 'e': datetime.datetime(2014, 5, 1, 11, 16, 45) + } } for id, row in result.items(): assert id in expected - assert dict(row) == self._type_convert(connection, expected[id]) + assert dict(row + ) == self._type_convert(connection, expected[id]) def test_change_type(self, writer): self._test_types(writer, 'foo_fancy_type_changes') with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_fancy_type_changes', - 'headings': ['id', 'a', 'b', 'c', 'd', 'e'], - 'rows': [ - ['bizzle', 'yo dude', '本', 'true', datetime.datetime(2015, 2, 13), '2014-08-01T11:23:45:00.0000Z'], - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': + 'foo_fancy_type_changes', + 'headings': ['id', 'a', 'b', 'c', 'd', 'e'], + 'rows': [[ + 'bizzle', 'yo dude', '本', 'true', + datetime.datetime(2015, 2, 13), + '2014-08-01T11:23:45:00.0000Z' + ],] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: - result = dict([(row['id'], row) for row in - writer.connection.execute('SELECT id, a, b, c, d, e FROM foo_fancy_type_changes')]) + result = dict([ + (row['id'], row) for row in writer.connection.execute( + 'SELECT id, a, b, c, d, e FROM foo_fancy_type_changes' + ) + ]) assert len(result) == 2 expected = { - 'bizzle': {'id': 'bizzle', 'a': 'yo dude', 'b': '本', 'c': 'true', - 'd': datetime.date(2015, 2, 13), 'e': '2014-08-01T11:23:45:00.0000Z'}, - 'bazzle': {'id': 'bazzle', 'a': '4', 'b': '日本', 'c': 'false', - 'd': datetime.date(2015, 1, 2), 'e': '2014-05-01 11:16:45'} + 'bizzle': { + 'id': 'bizzle', + 'a': 'yo dude', + 'b': '本', + 'c': 'true', + 'd': datetime.date(2015, 2, 13), + 'e': '2014-08-01T11:23:45:00.0000Z' + }, + 'bazzle': { + 'id': 'bazzle', + 'a': '4', + 'b': '日本', + 'c': 'false', + 'd': datetime.date(2015, 1, 2), + 'e': '2014-05-01 11:16:45' + } } if 'mysql' in writer.connection.engine.driver: @@ -283,15 +423,15 @@ def test_change_type(self, writer): expected['bazzle']['c'] = '0' if 'pyodbc' in writer.connection.engine.driver: expected['bazzle']['c'] = '0' - # couldn't figure out how to make SQL Server convert date to ISO8601 - # see https://docs.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-2017#date-and-time-styles + # couldn't figure out how to make SQL Server convert date to + # ISO8601 see + # https://docs.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-2017#date-and-time-styles expected['bazzle']['e'] = 'May 1 2014 11:16AM' for id, row in result.items(): assert id in expected assert dict(row) == expected[id] - def test_json_type(self, writer): complex_object = { 'poke1': { @@ -325,92 +465,156 @@ def test_json_type(self, writer): with writer: if not writer.is_postgres: return - writer.write_table(TableSpec(**{ - 'name': 'foo_with_json', - 'headings': ['id', 'json_col'], - 'rows': [ - ['simple', {'k1': 'v1', 'k2': 'v2'}], - ['with_lists', {'l1': ['i1', 'i2']}], - ['complex', complex_object], - ], - 'data_types': [ - 'text', - 'json', - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + writer.write_table( + TableSpec( + **{ + 'name': 'foo_with_json', + 'headings': ['id', 'json_col'], + 'rows': [ + ['simple', { + 'k1': 'v1', + 'k2': 'v2' + }], + ['with_lists', { + 'l1': ['i1', 'i2'] + }], + ['complex', complex_object], + ], + 'data_types': [ + 'text', + 'json', + ] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with writer: - result = dict([(row['id'], row) for row in writer.connection.execute( - 'SELECT id, json_col FROM foo_with_json' - )]) + result = dict([(row['id'], row) for row in writer.connection + .execute('SELECT id, json_col FROM foo_with_json')]) assert len(result) == 3 - assert dict(result['simple']) == {'id': 'simple', 'json_col': {'k1': 'v1', 'k2': 'v2'}} - assert dict(result['with_lists']) == {'id': 'with_lists', 'json_col': {'l1': ['i1', 'i2']}} - assert dict(result['complex']) == {'id': 'complex', 'json_col': complex_object} + assert dict(result['simple']) == { + 'id': 'simple', + 'json_col': { + 'k1': 'v1', + 'k2': 'v2' + } + } + assert dict(result['with_lists']) == { + 'id': 'with_lists', + 'json_col': { + 'l1': ['i1', 'i2'] + } + } + assert dict(result['complex']) == { + 'id': 'complex', + 'json_col': complex_object + } def test_explicit_types(self, strict_writer): with strict_writer: - strict_writer.write_table(TableSpec(**{ - 'name': 'foo_explicit_types', - 'headings': ['id', 'a', 'b', 'c', 'd'], - 'rows': [ - ['bizzle', '1', 2, 3, '7'], - ['bazzle', '4', 5, 6, '8'], - ], - 'data_types': [ - 'text', - 'integer', - 'text', - None, - ] - })) - - # We can use raw SQL instead of SqlAlchemy expressions because we built the DB above + strict_writer.write_table( + TableSpec( + **{ + 'name': 'foo_explicit_types', + 'headings': ['id', 'a', 'b', 'c', 'd'], + 'rows': [ + ['bizzle', '1', 2, 3, '7'], + ['bazzle', '4', 5, 6, '8'], + ], + 'data_types': [ + 'text', + 'integer', + 'text', + None, + ] + } + ) + ) + + # We can use raw SQL instead of SqlAlchemy expressions because + # we built the DB above with strict_writer: - result = dict([(row['id'], row) for row in strict_writer.connection.execute( - 'SELECT id, a, b, c, d FROM foo_explicit_types' - )]) + result = dict([ + (row['id'], row) for row in strict_writer.connection + .execute('SELECT id, a, b, c, d FROM foo_explicit_types') + ]) assert len(result) == 2 # a casts strings to ints, b casts ints to text, c default falls back to ints, d default falls back to text - assert dict(result['bizzle']) == {'id': 'bizzle', 'a': 1, 'b': '2', 'c': 3, 'd': '7'} - assert dict(result['bazzle']) == {'id': 'bazzle', 'a': 4, 'b': '5', 'c': 6, 'd': '8'} + assert dict(result['bizzle']) == { + 'id': 'bizzle', + 'a': 1, + 'b': '2', + 'c': 3, + 'd': '7' + } + assert dict(result['bazzle']) == { + 'id': 'bazzle', + 'a': 4, + 'b': '5', + 'c': 6, + 'd': '8' + } def test_mssql_nvarchar_length_upsize(self, writer): with writer: if 'odbc' not in writer.connection.engine.driver: return - # Initialize a table with columns where we expect the "some_data" - # column to be of length 900 bytes, and the "big_data" column to be - # of nvarchar(max) - writer.write_table(TableSpec(**{ - 'name': 'mssql_nvarchar_length', - 'headings': ['id', 'some_data', 'big_data'], - 'rows': [ - ['bizzle', (b'\0' * 800).decode('utf-8'), (b'\0' * 901).decode('utf-8')], - ['bazzle', (b'\0' * 500).decode('utf-8'), (b'\0' * 800).decode('utf-8')], - ] - })) + # Initialize a table with columns where we expect the + # "some_data" column to be of length 900 bytes, and the + # "big_data" column to be of nvarchar(max) + writer.write_table( + TableSpec( + **{ + 'name': + 'mssql_nvarchar_length', + 'headings': ['id', 'some_data', 'big_data'], + 'rows': [ + [ + 'bizzle', (b'\0' * 800).decode('utf-8'), + (b'\0' * 901).decode('utf-8') + ], + [ + 'bazzle', (b'\0' * 500).decode('utf-8'), + (b'\0' * 800).decode('utf-8') + ], + ] + } + ) + ) connection = writer.connection - result = self._get_column_lengths(connection, 'mssql_nvarchar_length') + result = self._get_column_lengths( + connection, 'mssql_nvarchar_length' + ) assert result['some_data'] == ('some_data', 'nvarchar', 900) - assert result['big_data'] == ('big_data', 'nvarchar', -1) # nvarchar(max) is listed as -1 - - # put bigger data into "some_column" to ensure it is resized properly - writer.write_table(TableSpec(**{ - 'name': 'mssql_nvarchar_length', - 'headings': ['id', 'some_data', 'big_data'], - 'rows': [ - ['sizzle', (b'\0' * 901).decode('utf-8'), (b'\0' * 901).decode('utf-8')], - ] - })) - - result = self._get_column_lengths(connection, 'mssql_nvarchar_length') + # nvarchar(max) is listed as -1 + assert result['big_data'] == ('big_data', 'nvarchar', -1) + + # put bigger data into "some_column" to ensure it is resized + # properly + writer.write_table( + TableSpec( + **{ + 'name': + 'mssql_nvarchar_length', + 'headings': ['id', 'some_data', 'big_data'], + 'rows': [[ + 'sizzle', (b'\0' * 901).decode('utf-8'), + (b'\0' * 901).decode('utf-8') + ],] + } + ) + ) + + result = self._get_column_lengths( + connection, 'mssql_nvarchar_length' + ) assert result['some_data'] == ('some_data', 'nvarchar', -1) assert result['big_data'] == ('big_data', 'nvarchar', -1) @@ -419,43 +623,69 @@ def test_mssql_nvarchar_length_downsize(self, writer): if 'odbc' not in writer.connection.engine.driver: return - # Initialize a table with NVARCHAR(max), and make sure smaller data - # doesn't reduce the size of the column + # Initialize a table with NVARCHAR(max), and make sure + # smaller data doesn't reduce the size of the column metadata = sqlalchemy.MetaData() - create_sql = sqlalchemy.schema.CreateTable(sqlalchemy.Table( - 'mssql_nvarchar_length_downsize', - metadata, - sqlalchemy.Column('id', sqlalchemy.NVARCHAR(length=100), primary_key=True), - sqlalchemy.Column('some_data', sqlalchemy.NVARCHAR(length=None)), - )).compile(writer.connection.engine) + create_sql = sqlalchemy.schema.CreateTable( + sqlalchemy.Table( + 'mssql_nvarchar_length_downsize', + metadata, + sqlalchemy.Column( + 'id', + sqlalchemy.NVARCHAR(length=100), + primary_key=True + ), + sqlalchemy.Column( + 'some_data', sqlalchemy.NVARCHAR(length=None) + ), + ) + ).compile(writer.connection.engine) metadata.create_all(writer.connection.engine) - writer.write_table(TableSpec(**{ - 'name': 'mssql_nvarchar_length', - 'headings': ['id', 'some_data'], - 'rows': [ - ['bizzle', (b'\0' * 800).decode('utf-8'), (b'\0' * 800).decode('utf-8')], - ['bazzle', (b'\0' * 500).decode('utf-8'), (b'\0' * 800).decode('utf-8')], - ] - })) - result = self._get_column_lengths(writer.connection, 'mssql_nvarchar_length_downsize') + writer.write_table( + TableSpec( + **{ + 'name': + 'mssql_nvarchar_length', + 'headings': ['id', 'some_data'], + 'rows': [ + [ + 'bizzle', (b'\0' * 800).decode('utf-8'), + (b'\0' * 800).decode('utf-8') + ], + [ + 'bazzle', (b'\0' * 500).decode('utf-8'), + (b'\0' * 800).decode('utf-8') + ], + ] + } + ) + ) + result = self._get_column_lengths( + writer.connection, 'mssql_nvarchar_length_downsize' + ) assert result['some_data'] == ('some_data', 'nvarchar', -1) def test_big_lump_of_poo(self, writer): with writer: - writer.write_table(TableSpec(**{ - 'name': 'foo_with_emoji', - 'headings': ['id', 'fun_to_be_had'], - 'rows': [ - ['A steaming poo', '💩'], - ['2020', '😷'], - ], - })) + writer.write_table( + TableSpec( + **{ + 'name': 'foo_with_emoji', + 'headings': ['id', 'fun_to_be_had'], + 'rows': [ + ['A steaming poo', '💩'], + ['2020', '😷'], + ], + } + ) + ) def _get_column_lengths(self, connection, table_name): return { row['COLUMN_NAME']: row for row in connection.execute( "SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH " "FROM INFORMATION_SCHEMA.COLUMNS " - "WHERE TABLE_NAME = '{}';".format(table_name)) + "WHERE TABLE_NAME = '{}';".format(table_name) + ) } diff --git a/tests/utils.py b/tests/utils.py index 4a5035a9..70295d3c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -2,6 +2,7 @@ class SqlWriterWithTearDown(SqlTableWriter): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.tables = set() From 9d0eaf88f57a04a3010ff0abaae26b3e4cc5edf5 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sat, 23 Apr 2022 11:53:05 +0100 Subject: [PATCH 136/257] Use split_all_top_level_comma_separated_values --- .style.yapf | 1 + commcare_export/builtin_queries.py | 37 ++++++++++++------ commcare_export/checkpoint.py | 30 +++++++++------ commcare_export/cli.py | 55 +++++++++++++++++---------- commcare_export/commcare_hq_client.py | 12 ++++-- commcare_export/commcare_minilinq.py | 29 ++++++++------ commcare_export/env.py | 9 +++-- commcare_export/excel_query.py | 54 ++++++++++++++++---------- commcare_export/exceptions.py | 2 +- commcare_export/minilinq.py | 29 ++++++-------- commcare_export/utils.py | 29 +++++++++++--- commcare_export/version.py | 3 +- commcare_export/writers.py | 20 ++++++---- 13 files changed, 195 insertions(+), 115 deletions(-) diff --git a/.style.yapf b/.style.yapf index 4398b865..d7bee203 100644 --- a/.style.yapf +++ b/.style.yapf @@ -8,3 +8,4 @@ spaces_before_comment = 2 split_before_arithmetic_operator = true split_before_bitwise_operator = true split_before_logical_operator = true +split_all_top_level_comma_separated_values = true diff --git a/commcare_export/builtin_queries.py b/commcare_export/builtin_queries.py index f47debde..b1167ed8 100644 --- a/commcare_export/builtin_queries.py +++ b/commcare_export/builtin_queries.py @@ -24,19 +24,23 @@ def mapped_source_field(self): return Reference(self.source) else: return Apply( - Reference(self.map_function), Reference(self.source), + Reference(self.map_function), + Reference(self.source), *self.extra_args ) def compile_query(columns, data_source, table_name): source = Apply( - Reference('api_data'), Literal(data_source), + Reference('api_data'), + Literal(data_source), Reference('checkpoint_manager') ) part = excel_query.SheetParts( - table_name, [c.name for c in columns], source, - List([c.mapped_source_field for c in columns]), None + table_name, [c.name for c in columns], + source, + List([c.mapped_source_field for c in columns]), + None ) return excel_query.compile_queries([part], None, False)[0] @@ -92,8 +96,7 @@ def set_depth(lt): set_depth(lt) ordered_location_types = sorted( - location_types.values(), - key=lambda lt: -depth[lt['resource_uri']] + location_types.values(), key=lambda lt: -depth[lt['resource_uri']] ) location_codes = [lt['code'] for lt in ordered_location_types] @@ -119,24 +122,34 @@ def sql_column_name(code): Column('resource_uri', 'resource_uri'), Column('site_code', 'site_code'), Column( - 'location_type_administrative', 'location_type', - 'get_location_info', Literal('administrative') + 'location_type_administrative', + 'location_type', + 'get_location_info', + Literal('administrative') ), Column( - 'location_type_code', 'location_type', 'get_location_info', + 'location_type_code', + 'location_type', + 'get_location_info', Literal('code') ), Column( - 'location_type_name', 'location_type', 'get_location_info', + 'location_type_name', + 'location_type', + 'get_location_info', Literal('name') ), Column( - 'location_type_parent', 'location_type', 'get_location_info', + 'location_type_parent', + 'location_type', + 'get_location_info', Literal('parent') ), ] + [ Column( - sql_column_name(code), 'resource_uri', 'get_location_ancestor', + sql_column_name(code), + 'resource_uri', + 'get_location_ancestor', Literal(code) ) for code in location_codes ] diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 5f615a74..9f5a11d7 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -152,7 +152,7 @@ def _set_checkpoint( logger.info( 'Setting %s checkpoint: data_source: %s, tables: %s, ' 'pagination_mode: %s, checkpoint: %s:%s', - + # 'final' if final else 'batch', self.data_source, ', '.join(self.table_names), @@ -276,8 +276,10 @@ def get_legacy_checkpoints(self): ) if table_run: return self._set_checkpoint( - table_run.since_param, PaginationMode.date_modified, - table_run.final, table_run.time_of_run + table_run.since_param, + PaginationMode.date_modified, + table_run.final, + table_run.time_of_run ) # Check for run without the args @@ -291,8 +293,10 @@ def get_legacy_checkpoints(self): ) if table_run: return self._set_checkpoint( - table_run.since_param, PaginationMode.date_modified, - table_run.final, table_run.time_of_run + table_run.since_param, + PaginationMode.date_modified, + table_run.final, + table_run.time_of_run ) def _get_last_checkpoint(self, session, **kwarg_filters): @@ -309,7 +313,10 @@ def log_warnings(self, run: Checkpoint) -> None: "Query differs from most recent checkpoint:\n" "From checkpoint: name=%s, md5=%s\n" "From command line args: name=%s, md5=%s\n", - run.query_file_name, run.query_file_md5, self.query, + # + run.query_file_name, + run.query_file_md5, + self.query, self.query_md5 ) @@ -349,8 +356,10 @@ def get_latest_checkpoints(self): """ with session_scope(self.Session) as session: cols = [ - Checkpoint.project, Checkpoint.commcare, - Checkpoint.query_file_md5, Checkpoint.table_name + Checkpoint.project, + Checkpoint.commcare, + Checkpoint.query_file_md5, + Checkpoint.table_name ] inner_query = self._filter_query( session.query( @@ -482,7 +491,7 @@ def get_checkpoint_manager(self, data_source, table_names): logger.info( "Creating checkpoint manager for tables: %s, since: %s, " "pagination_mode: %s", - + # ', '.join(table_names), since, pagination_mode.name, @@ -495,8 +504,7 @@ def get_checkpoint_manager(self, data_source, table_names): "future versions. To switch to the new mode you must re-sync " "your data using\n" "`--start-over`. For more details see: %s" - "\n====================================\n", - + "\n====================================\n", # "https://github.com/dimagi/commcare-export/releases/tag/1.5.0" ) return CheckpointManagerWithDetails(manager, since, pagination_mode) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index bb70ee14..d685a06a 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -69,7 +69,7 @@ def add_to_parser(self, parser, **additional_kwargs): 'commcare-hq', default='prod', help='Base url for the CommCare HQ instance e.g. ' - 'https://www.commcarehq.org' + 'https://www.commcarehq.org' ), Argument('api-version', default=LATEST_KNOWN_VERSION), Argument('project'), @@ -83,24 +83,24 @@ def add_to_parser(self, parser, **additional_kwargs): default='password', choices=['password', 'apikey'], help='Use "digest" auth, or "apikey" auth (for two factor enabled ' - 'domains).' + 'domains).' ), Argument( 'since', help='Export all data after this date. Format YYYY-MM-DD or ' - 'YYYY-MM-DDTHH:mm:SS' + 'YYYY-MM-DDTHH:mm:SS' ), Argument( 'until', help='Export all data up until this date. Format YYYY-MM-DD or ' - 'YYYY-MM-DDTHH:mm:SS' + 'YYYY-MM-DDTHH:mm:SS' ), Argument( 'start-over', default=False, action='store_true', help='When saving to a SQL database; the default is to pick up ' - 'since the last success. This disables that.' + 'since the last success. This disables that.' ), Argument('profile'), Argument('verbose', default=False, action='store_true'), @@ -121,7 +121,7 @@ def add_to_parser(self, parser, **additional_kwargs): default=False, action='store_true', help="When saving to a SQL database don't allow changing column types " - "once they are created." + "once they are created." ), Argument( 'missing-value', @@ -136,15 +136,15 @@ def add_to_parser(self, parser, **additional_kwargs): Argument( 'checkpoint-key', help="Use this key for all checkpoints instead of the query file MD5 " - "hash in order to prevent table rebuilds after a query file has " - "been edited." + "hash in order to prevent table rebuilds after a query file has " + "been edited." ), Argument( 'users', default=False, action='store_true', help="Export a table containing data about this project's mobile " - "workers" + "workers" ), Argument( 'locations', @@ -157,16 +157,16 @@ def add_to_parser(self, parser, **additional_kwargs): default=False, action='store_true', help="Export tables containing mobile worker data and location data " - "and add a commcare_userid field to any exported form or case" + "and add a commcare_userid field to any exported form or case" ), Argument( 'export-root-if-no-subdocument', default=False, action='store_true', help="Use this when you are exporting a nested document e.g. " - "form.form..case, messaging-event.messages.[*] And you want to " - "have a record exported even if the nested document does not " - "exist or is empty.", + "form.form..case, messaging-event.messages.[*] And you want to " + "have a record exported even if the nested document does not " + "exist or is empty.", ) ] @@ -239,23 +239,37 @@ def main(argv): def _get_query(args, writer, column_enforcer=None): return _get_query_from_file( - args.query, args.missing_value, writer.supports_multi_table_write, - writer.max_column_length, writer.required_columns, column_enforcer, + args.query, + args.missing_value, + writer.supports_multi_table_write, + writer.max_column_length, + writer.required_columns, + column_enforcer, args.export_root_if_no_subdocument ) def _get_query_from_file( - query_arg, missing_value, combine_emits, max_column_length, - required_columns, column_enforcer, value_or_root + query_arg, + missing_value, + combine_emits, + max_column_length, + required_columns, + column_enforcer, + value_or_root ): if os.path.exists(query_arg): if os.path.splitext(query_arg)[1] in ['.xls', '.xlsx']: import openpyxl workbook = openpyxl.load_workbook(query_arg) return excel_query.get_queries_from_excel( - workbook, missing_value, combine_emits, max_column_length, - required_columns, column_enforcer, value_or_root + workbook, + missing_value, + combine_emits, + max_column_length, + required_columns, + column_enforcer, + value_or_root ) else: with io.open(query_arg, encoding='utf-8') as fh: @@ -383,7 +397,8 @@ def evaluate_query(env, query): print('Try increasing --batch-size to overcome the error') return EXIT_STATUS_ERROR except ( - sqlalchemy.exc.DataError, sqlalchemy.exc.InternalError, + sqlalchemy.exc.DataError, + sqlalchemy.exc.InternalError, sqlalchemy.exc.ProgrammingError ) as e: print('Stopping because of database error:\n', e) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 53ddb2d3..78cea343 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -208,7 +208,10 @@ def iterate_resource(resource=resource, params=params): more_to_fetch = not repeated_last_page_of_non_counting_resource self.checkpoint( - checkpoint_manager, paginator, batch, not more_to_fetch + checkpoint_manager, + paginator, + batch, + not more_to_fetch ) return RepeatableIterator(iterate_resource) @@ -255,15 +258,16 @@ def __init__(self, mock_data): self.mock_data = { resource: { _params_to_url(params): result - for params, result in resource_results - } for resource, resource_results in mock_data.items() + for (params, result) in resource_results + } for (resource, resource_results) in mock_data.items() } def iterate( self, resource, paginator, params=None, checkpoint_manager=None ): logger.debug( - 'Mock client call to resource "%s" with params "%s"', resource, + 'Mock client call to resource "%s" with params "%s"', + resource, params ) return self.mock_data[resource][_params_to_url(params)] diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 9e990432..5d193209 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -14,7 +14,12 @@ from commcare_export.misc import unwrap SUPPORTED_RESOURCES = { - 'form', 'case', 'user', 'location', 'application', 'web-user', + 'form', + 'case', + 'user', + 'location', + 'application', + 'web-user', 'messaging-event' } @@ -66,15 +71,16 @@ def __call__(self, since, until): "server_modified_on": range_expression } }] - }, { - "and": [ - server_modified_missing, { - "range": { - "received_on": range_expression - } - } - ] - }] + }, + { + "and": [ + server_modified_missing, { + "range": { + "received_on": range_expression + } + } + ] + }] } }) @@ -87,8 +93,7 @@ def __call__(self, since, until): 'server_date_modified': SimpleSinceParams( 'server_date_modified_start', 'server_date_modified_end' - ), - # used by messaging-events + ), # used by messaging-events 'date_last_activity': SimpleSinceParams('date_last_activity.gte', 'date_last_activity.lt'), } diff --git a/commcare_export/env.py b/commcare_export/env.py index 15d0fdd4..d1c8e3f9 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -429,9 +429,12 @@ def attachment_url(val): return None from commcare_export.minilinq import Apply, Reference, Literal return Apply( - Reference('template'), Literal('{}/a/{}/api/form/attachment/{}/{}'), - Reference('commcarehq_base_url'), Reference('$.domain'), - Reference('$.id'), Literal(val) + Reference('template'), + Literal('{}/a/{}/api/form/attachment/{}/{}'), + Reference('commcarehq_base_url'), + Reference('$.domain'), + Reference('$.id'), + Literal(val) ) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index bb4baa84..ea31ceb5 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -116,7 +116,8 @@ def compile_filters(worksheet, mappings=None): return [] filter_values = extended_to_len( - len(filter_names), [ + len(filter_names), + [ cell.value for cell in get_column_by_name(worksheet, 'filter value') or [] ] @@ -167,7 +168,8 @@ def compile_field( if alternate_source_fields: expr = Apply( - Reference('or'), expr, + Reference('or'), + expr, *[Reference(alt_field) for alt_field in alternate_source_fields] ) if map_via: @@ -185,7 +187,10 @@ def compile_field( def compile_mapped_field(field_mappings, field_expression): # quote the ref in case it has special chars quoted_field = Apply( - Reference('join'), Literal(''), Literal('"'), field_expression, + Reference('join'), + Literal(''), + Literal('"'), + field_expression, Literal('"') ) # produce the mapping reference i.e. 'mapping."X"' @@ -222,7 +227,7 @@ def _get_alternate_source_fields_from_columns(worksheet, num_fields): alt_source_cols = [ extended_to_len( num_fields, [cell.value if cell else cell for cell in alt_col] - ) for col_name, alt_col in matching_columns + ) for (col_name, alt_col) in matching_columns ] # transpose columns to rows alt_srouce_fields = map(list, zip(*alt_source_cols)) @@ -272,7 +277,8 @@ def compile_fields(worksheet, mappings=None): format_via=format_via.value if format_via else None, mappings=mappings ) - for field, source_field, alt_source_fields, map_via, format_via in args + for (field, source_field, alt_source_fields, map_via, format_via) + in args ] @@ -466,8 +472,12 @@ def parse_sheet( data_types = [Literal(data_type.value) for data_type in output_types] if column_enforcer is not None: (headings, body) = require_column_in_sheet( - worksheet.title, data_source, output_table_name, - output_headings, output_fields, column_enforcer + worksheet.title, + data_source, + output_table_name, + output_headings, + output_fields, + column_enforcer ) source = source_expr else: @@ -508,7 +518,13 @@ def __new__( ): data_types = data_types or [] return super(SheetParts, cls).__new__( - cls, name, headings, source, body, root_expr, data_types, + cls, + name, + headings, + source, + body, + root_expr, + data_types, data_source ) @@ -600,14 +616,8 @@ def get_multi_emit_query(source, sheets, missing_value): # the filter here is to prevent accumulating a `[None]` value for # each doc multi_query = Filter( - predicate=Apply( - Reference("filter_empty"), - Reference("$") - ), - source=Map( - source=source, - body=List(emits) - ) + predicate=Apply(Reference("filter_empty"), Reference("$")), + source=Map(source=source, body=List(emits)) ) for sheet in sheets: @@ -630,9 +640,11 @@ def get_multi_emit_query(source, sheets, missing_value): return Bind( 'checkpoint_manager', Apply( - Reference('get_checkpoint_manager'), Literal(data_source), + Reference('get_checkpoint_manager'), + Literal(data_source), Literal(table_names) - ), multi_query + ), + multi_query ) @@ -660,9 +672,11 @@ def _get_source(source, root_expr): return Bind( 'checkpoint_manager', Apply( - Reference('get_checkpoint_manager'), Literal(sheet.data_source), + Reference('get_checkpoint_manager'), + Literal(sheet.data_source), Literal([sheet.name]) - ), emit + ), + emit ) diff --git a/commcare_export/exceptions.py b/commcare_export/exceptions.py index cfa775f5..f4e7dbd2 100644 --- a/commcare_export/exceptions.py +++ b/commcare_export/exceptions.py @@ -36,7 +36,7 @@ def message(self): lines = [ 'Sheet "{}" is missing definitions for required fields: "{}"' .format(sheet, '", "'.join(missing_cols)) - for sheet, missing_cols in self.errors_by_sheet.items() + for (sheet, missing_cols) in self.errors_by_sheet.items() ] return '\n'.join(lines) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 45ae9b94..1fc75b70 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -211,10 +211,8 @@ def iterate( def __eq__(self, other): return ( - isinstance(other, Filter) - and self.source == other.source - and self.name == other.name - and self.predicate == other.predicate + isinstance(other, Filter) and self.source == other.source + and self.name == other.name and self.predicate == other.predicate ) @classmethod @@ -302,10 +300,8 @@ def iterate( def __eq__(self, other): return ( - isinstance(other, Map) - and self.name == other.name - and self.source == other.source - and self.body == other.body + isinstance(other, Map) and self.name == other.name + and self.source == other.source and self.body == other.body ) @classmethod @@ -350,8 +346,7 @@ def eval(self, env): source_result = self.source.eval(env) def iterate( - env=env, - source_result=source_result + env=env, source_result=source_result ): # Python closure workaround if self.name: for item in source_result: @@ -368,10 +363,8 @@ def iterate( def __eq__(self, other): return ( - isinstance(other, FlatMap) - and self.name == other.name - and self.source == other.source - and self.body == other.body + isinstance(other, FlatMap) and self.name == other.name + and self.source == other.source and self.body == other.body ) @classmethod @@ -430,8 +423,7 @@ def eval(self, env): def __eq__(self, other): return ( - isinstance(other, Apply) - and self.fn == other.fn + isinstance(other, Apply) and self.fn == other.fn and self.args == other.args ) @@ -553,7 +545,10 @@ def __eq__(self, other): def __repr__(self): return '%s(table=%r, headings=%r, source=%r, missing_value=%r)' % ( - self.__class__.__name__, self.table, self.headings, self.source, + self.__class__.__name__, + self.table, + self.headings, + self.source, self.missing_value ) diff --git a/commcare_export/utils.py b/commcare_export/utils.py index 7154e86f..d01209c5 100644 --- a/commcare_export/utils.py +++ b/commcare_export/utils.py @@ -14,7 +14,11 @@ def get_checkpoint_manager(args, require_query=True): raise return CheckpointManager( - args.output, args.query, md5, args.project, args.commcare_hq, + args.output, + args.query, + md5, + args.project, + args.commcare_hq, args.checkpoint_key ) @@ -31,9 +35,15 @@ def print_runs(runs): rows = [] for run in runs: rows.append([ - run.time_of_run, run.since_param, "True" if run.final else "False", - run.project, run.query_file_name, run.query_file_md5, run.key, - run.table_name, run.commcare + run.time_of_run, + run.since_param, + "True" if run.final else "False", + run.project, + run.query_file_name, + run.query_file_md5, + run.key, + run.table_name, + run.commcare ]) rows = [[val if val is not None else '' for val in row] for row in rows] @@ -42,8 +52,15 @@ def print_runs(runs): sys.stdout, compute_widths=True ).write_table({ 'headings': [ - "Checkpoint Time", "Batch end date", "Export Complete", "Project", - "Query Filename", "Query MD5", "Key", "Table", "CommCare HQ" + "Checkpoint Time", + "Batch end date", + "Export Complete", + "Project", + "Query Filename", + "Query MD5", + "Key", + "Table", + "CommCare HQ" ], 'rows': rows }) diff --git a/commcare_export/version.py b/commcare_export/version.py index bfa32bb9..3f083bf3 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -17,7 +17,8 @@ def stored_version(): def git_version(): described_version_bytes = subprocess.Popen( - ['git', 'describe'], stdout=subprocess.PIPE + ['git', 'describe'], + stdout=subprocess.PIPE ).communicate()[0].strip() return described_version_bytes.decode('ascii') diff --git a/commcare_export/writers.py b/commcare_export/writers.py index f176f354..3a4e0a79 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -321,8 +321,7 @@ def max_column_length(self): @property def metadata(self): if ( - not hasattr(self, '_metadata') - or self._metadata.bind.closed + not hasattr(self, '_metadata') or self._metadata.bind.closed or self._metadata.bind.invalidated ): if self.connection.closed: @@ -510,7 +509,8 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): if self.strict_types: create_sql = sqlalchemy.schema.CreateTable( sqlalchemy.Table( - table_name, sqlalchemy.MetaData(), + table_name, + sqlalchemy.MetaData(), *self._get_columns_for_data(row_dict, data_type_dict) ) ).compile(self.connection.engine) @@ -519,7 +519,7 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): f"with:\n{create_sql}" ) empty_cols = [ - name for name, val in row_dict.items() + name for (name, val) in row_dict.items() if val is None and name not in data_type_dict ] if empty_cols: @@ -571,7 +571,11 @@ def get_current_table_columns(): if new_type: logger.warning( 'Altering column %s from %s to %s for value: "%s:%s"', - columns[column], current_ty, new_type, type(val), val + columns[column], + current_ty, + new_type, + type(val), + val ) op.alter_column(table_name, column, type_=new_type) self.metadata.clear() @@ -615,7 +619,7 @@ def _get_columns_for_data(self, row_dict, data_type_dict): self.get_data_type(data_type_dict[column_name], val), nullable=True ) - for column_name, val in row_dict.items() - if (val is not None or data_type_dict[column_name]) - and column_name != 'id' + for (column_name, val) in row_dict.items() + if ((val is not None or data_type_dict[column_name]) + and column_name != 'id') ] From fad65e9b99b80bffa809ac6b8b606e6f6efc3fa0 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sun, 24 Apr 2022 00:23:27 +0100 Subject: [PATCH 137/257] Fix f-string --- commcare_export/commcare_hq_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 78cea343..b8bf94bd 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -127,7 +127,7 @@ def get(self, resource, params=None): want this to work like (or via) slumber. """ logger.debug("Fetching '%s' batch: %s", resource, params) - resource_url = '{self.api_url}/{resource}/' + resource_url = f'{self.api_url}/{resource}/' response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) From b13556bd724ca3dd8208778cdaf421045e2b581f Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 22 Apr 2022 17:33:52 +0100 Subject: [PATCH 138/257] Move types from docstrings into type hints Ken Knowles used typing back in 2013, and added type hints to docstrings. Pull those docs out, and use real type hints instead, so they can be parsed and more useful. --- commcare_export/env.py | 29 ++++++++++++++++------------- commcare_export/minilinq.py | 33 +++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index d1c8e3f9..12a99330 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -2,6 +2,7 @@ import json import operator import uuid +from typing import Any, Dict, Union, overload import pytz @@ -42,20 +43,16 @@ class Env(object): # # Interface # - def bind(self, name, value): + def bind(self, name: str, value: Any) -> 'Env': """ - (key, ??) -> Env - Returns a new environment that is equivalent to the current except the provided key is bound to the value passed in. If the environment does not support such a binding, raises CannotBind """ raise NotImplementedError() - def lookup(self, key): + def lookup(self, key: str) -> Any: """ - key -> ?? - Note that the return value may be ``None`` which may mean the value was unbound or may mean it was found and was None. This may need revisiting. This may also raise NotFound if it is the @@ -63,10 +60,8 @@ def lookup(self, key): """ raise NotImplementedError() - def replace(self, data): + def replace(self, data: dict) -> 'Env': """ - data -> Env - Completely replace the environment with new data (somewhat like "this"-based Map functions a la jQuery). Could be the same as creating a new empty env and binding "@" in JsonPath. @@ -212,8 +207,10 @@ def parse(self, jsonpath_string): JSONPATH_CACHE[jsonpath_string] = parse_jsonpath(jsonpath_string) return JSONPATH_CACHE[jsonpath_string] - def lookup(self, name): - "str|JsonPath -> ??" + def lookup( + self, + name: Union[str, jsonpath.JSONPath] + ) -> RepeatableIterator: if isinstance(name, str): jsonpath_expr = self.parse(name) elif isinstance(name, jsonpath.JSONPath): @@ -238,9 +235,15 @@ def iterator(jsonpath_expr=jsonpath_expr): # Capture closure return RepeatableIterator(iterator) - def bind(self, *args): - "(str, ??) -> Env | ({str: ??}) -> Env" + @overload + def bind(self, key: str, value: Any, *args) -> Env: + ... + @overload + def bind(self, bindings: Dict[str, Any], *args) -> Env: + ... + + def bind(self, *args): new_bindings = dict(self.__bindings) if isinstance(args[0], dict): new_bindings.update(args[0]) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 1fc75b70..77e69923 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,6 +1,9 @@ import logging +from typing import Any from typing import List as ListType +from typing import Optional +from commcare_export.env import Env from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.specs import TableSpec @@ -17,8 +20,7 @@ class MiniLinq(object): def __init__(self, *args, **kwargs): raise NotImplementedError() - def eval(self, env): - "( env: object(bindings: {str: ??}, writer: Writer) )-> ??" + def eval(self, env: Env) -> Any: raise NotImplementedError() #### Factory methods #### @@ -138,8 +140,7 @@ class Bind(MiniLinq): too large to store, so it'll be re-run on each access. """ - def __init__(self, name, value, body): - "(str, MiniLinq, MiniLinq) -> MiniLinq" + def __init__(self, name: str, value: MiniLinq, body: MiniLinq) -> None: self.name = name self.value = value self.body = body @@ -186,8 +187,12 @@ class Filter(MiniLinq): Just what it sounds like """ - def __init__(self, source, predicate, name=None): - "(MiniLinq, MiniLinq, var?) -> MiniLinq" + def __init__( + self, + source: MiniLinq, + predicate: MiniLinq, + name: Optional[str] = None + ) -> None: self.source = source self.name = name self.predicate = predicate @@ -277,8 +282,12 @@ class Map(MiniLinq): enabling references to the rest of the env. """ - def __init__(self, source, body, name=None): - "(MiniLinq, MiniLinq, var?) -> MiniLinq" + def __init__( + self, + source: MiniLinq, + body: MiniLinq, + name: Optional[str] = None + ) -> None: self.source = source self.name = name self.body = body @@ -336,8 +345,12 @@ class FlatMap(MiniLinq): enabling references to the rest of the env. """ - def __init__(self, source, body, name=None): - "(MiniLinq, MiniLinq, var?) -> MiniLinq" + def __init__( + self, + source: MiniLinq, + body: MiniLinq, + name: Optional[str] = None + ) -> None: self.source = source self.name = name self.body = body From 9aba7f1f118c6f36d6c5a0c24b2a4105503fd946 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sun, 24 Apr 2022 00:51:57 +0100 Subject: [PATCH 139/257] Add mypy config --- .travis.yml | 5 ++++- mypy.ini | 35 +++++++++++++++++++++++++++++++++++ mypy_typed_modules.txt | 2 ++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 mypy.ini create mode 100644 mypy_typed_modules.txt diff --git a/.travis.yml b/.travis.yml index 34a9b784..d99cbd42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,12 +28,15 @@ install: - pip install pymysql psycopg2 pyodbc - pip install coverage coveralls - sudo ACCEPT_EULA=Y apt-get install msodbcsql17 + - pip install mypy before_script: - mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'travis'@'%';"; - docker ps -a - odbcinst -q -d - .travis/wait.sh -script: coverage run setup.py test +script: + - coverage run setup.py test + - mypy --install-types --non-interactive @mypy_typed_modules.txt after_success: - coveralls services: diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..c0997e92 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,35 @@ +# Global options: + +[mypy] +python_version = 3.9 +follow_imports = silent + +# TODO: Get or create stubs for libraries. Until then: +ignore_missing_imports = True + +# Typing strictness +check_untyped_defs = True +disallow_subclassing_any = True +disallow_any_generics = True +warn_return_any = True +strict_equality = True +# Set "disallow_untyped_defs = True" for completely typed modules in +# per-module options below + +# Check for drift +warn_redundant_casts = True +warn_unused_ignores = True +warn_unused_configs = True + +# Non-typing checks +implicit_reexport = False +warn_unreachable = True + +# Reporting +show_error_codes = True + + +# Per-module options: + +# [mypy-commcare_export.minilinq] +# disallow_untyped_defs = True diff --git a/mypy_typed_modules.txt b/mypy_typed_modules.txt new file mode 100644 index 00000000..d2155693 --- /dev/null +++ b/mypy_typed_modules.txt @@ -0,0 +1,2 @@ +commcare_export/env.py +commcare_export/minilinq.py From bcd3def626b3f695858dda03806a15bd1940e1b5 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sat, 23 Apr 2022 11:08:36 +0100 Subject: [PATCH 140/257] Fix types --- commcare_export/env.py | 10 +++++----- commcare_export/minilinq.py | 16 +++++++--------- commcare_export/utils.py | 10 ++++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index 12a99330..14df587a 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -60,7 +60,7 @@ def lookup(self, key: str) -> Any: """ raise NotImplementedError() - def replace(self, data: dict) -> 'Env': + def replace(self, data: Dict[str, Any]) -> 'Env': """ Completely replace the environment with new data (somewhat like "this"-based Map functions a la jQuery). Could be the same as @@ -412,9 +412,9 @@ def format_uuid(val): def join(*args): - args = [unwrap_val(arg) for arg in args] + args_ = [unwrap_val(arg) for arg in args] try: - return args[0].join(args[1:]) + return args_[0].join(args_[1:]) except TypeError: return '""' @@ -463,8 +463,8 @@ def _doc_url(url_path): def template(format_template, *args): - args = [unwrap_val(arg) for arg in args] - return format_template.format(*args) + args_ = [unwrap_val(arg) for arg in args] + return format_template.format(*args_) def _or(*args): diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 77e69923..95e0f009 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,5 +1,5 @@ import logging -from typing import Any +from typing import Any, Dict from typing import List as ListType from typing import Optional @@ -25,7 +25,7 @@ def eval(self, env: Env) -> Any: #### Factory methods #### - _node_classes = {} + _node_classes: Dict[str, 'MiniLinq'] = {} @classmethod def register(cls, clazz, slug=None): @@ -72,6 +72,9 @@ def from_jvalue(cls, jvalue): return cls._node_classes[slug].from_jvalue(jvalue) + def to_jvalue(self): + raise NotImplementedError() + class Reference(MiniLinq): """ @@ -176,11 +179,6 @@ def to_jvalue(self): } } - def __repr__(self): - return '%s(name=%r, value=%r, body=%r)' % ( - self.__class__.__name__, self.name, self.value, self.body - ) - class Filter(MiniLinq): """ @@ -476,8 +474,8 @@ class Emit(MiniLinq): def __init__( self, table: str, - headings: ListType[str], - source: ListType[MiniLinq], + headings: ListType[MiniLinq], + source: MiniLinq, missing_value=None, data_types=None, ): diff --git a/commcare_export/utils.py b/commcare_export/utils.py index d01209c5..7a7d7e4b 100644 --- a/commcare_export/utils.py +++ b/commcare_export/utils.py @@ -2,6 +2,7 @@ from commcare_export import misc from commcare_export.checkpoint import CheckpointManager +from commcare_export.specs import TableSpec from commcare_export.writers import StreamingMarkdownTableWriter @@ -50,8 +51,9 @@ def print_runs(runs): StreamingMarkdownTableWriter( sys.stdout, compute_widths=True - ).write_table({ - 'headings': [ + ).write_table(TableSpec( + name='', + headings=[ "Checkpoint Time", "Batch end date", "Export Complete", @@ -62,5 +64,5 @@ def print_runs(runs): "Table", "CommCare HQ" ], - 'rows': rows - }) + rows=rows, + )) From 23a83b2ee63a50ac2ed86fec4fd9ba534d49b44e Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sun, 24 Apr 2022 00:14:23 +0100 Subject: [PATCH 141/257] parse_args() doesn't raise UnicodeDecodeError --- commcare_export/cli.py | 15 +-------------- commcare_export/utils_cli.py | 14 +------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index d685a06a..8fba0904 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -178,20 +178,7 @@ def main(argv): for arg in CLI_ARGS: arg.add_to_parser(parser) - try: - args = parser.parse_args(argv) - except UnicodeDecodeError: - for arg in argv: - try: - arg.encode('utf-8') - except UnicodeDecodeError: - print( - u"ERROR: Argument '%s' contains unicode characters. " - u"Only ASCII characters are supported.\n" - % unicode(arg, 'utf-8'), - file=sys.stderr - ) - sys.exit(1) + args = parser.parse_args(argv) if args.verbose: logging.basicConfig( diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index 3322409d..b5ef1da0 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -144,19 +144,7 @@ def main(argv): ) command_type.add_arguments(sub) - try: - args = parser.parse_args(argv) - except UnicodeDecodeError: - for arg in argv: - try: - arg.encode('utf-8') - except UnicodeDecodeError: - sys.stderr.write( - u"ERROR: Argument '%s' contains unicode characters. " - u"Only ASCII characters are supported.\n" - % unicode(arg, 'utf-8') - ) - sys.exit(1) + args = parser.parse_args(argv) logging.basicConfig( level=logging.WARN, From 389298bf21effd398dd103353d70a0ef0445cf6c Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sat, 23 Apr 2022 12:10:09 +0100 Subject: [PATCH 142/257] Drop unnecessary `__init__()` --- commcare_export/minilinq.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 1fc75b70..9f1c01a9 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -14,9 +14,6 @@ class MiniLinq(object): for dispatching parsing, etc. """ - def __init__(self, *args, **kwargs): - raise NotImplementedError() - def eval(self, env): "( env: object(bindings: {str: ??}, writer: Writer) )-> ??" raise NotImplementedError() From db4797bcc2f77c405444bb80ccee2b93297094fc Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sun, 24 Apr 2022 00:58:50 +0100 Subject: [PATCH 143/257] Drop duplicate `__repr__()` --- commcare_export/minilinq.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 9f1c01a9..6d9765be 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -172,11 +172,6 @@ def to_jvalue(self): } } - def __repr__(self): - return '%s(name=%r, value=%r, body=%r)' % ( - self.__class__.__name__, self.name, self.value, self.body - ) - class Filter(MiniLinq): """ From e3a410b4f663d36bd679c23c8839125b54757ad5 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Sat, 23 Apr 2022 12:10:32 +0100 Subject: [PATCH 144/257] Don't shadow outer variables --- commcare_export/minilinq.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 6d9765be..9841f53b 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -187,16 +187,18 @@ def __init__(self, source, predicate, name=None): def eval(self, env): source_result = self.source.eval(env) + # Python closure workaround def iterate( - env=env, source_result=source_result - ): # Python closure workaround + env_=env, + source_result_=source_result, + ): if self.name: - for item in source_result: - if self.predicate.eval(env.bind(self.name, item)): + for item in source_result_: + if self.predicate.eval(env_.bind(self.name, item)): yield item else: - for item in source_result: - if self.predicate.eval(env.replace(item)): + for item in source_result_: + if self.predicate.eval(env_.replace(item)): yield item return RepeatableIterator(iterate) @@ -337,18 +339,20 @@ def __init__(self, source, body, name=None): def eval(self, env): source_result = self.source.eval(env) + # Python closure workaround def iterate( - env=env, source_result=source_result - ): # Python closure workaround + env_=env, + source_result_=source_result, + ): if self.name: - for item in source_result: + for item in source_result_: for result_item in self.body.eval( - env.bind(self.name, item) + env_.bind(self.name, item) ): yield result_item else: - for item in source_result: - for result_item in self.body.eval(env.replace(item)): + for item in source_result_: + for result_item in self.body.eval(env_.replace(item)): yield result_item return RepeatableIterator(iterate) From 066db8b2c3c1998283b02162ac807d8c150d3fa7 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 11:34:38 +0100 Subject: [PATCH 145/257] Initialize self._metadata in __init__() --- commcare_export/writers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 3a4e0a79..371280c1 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -279,6 +279,7 @@ def __init__(self, db_url, poolclass=None, engine=None): self.engine = engine or sqlalchemy.create_engine( db_url, poolclass=poolclass ) + self._metadata = None def __enter__(self): self.connection = self.engine.connect() @@ -321,7 +322,8 @@ def max_column_length(self): @property def metadata(self): if ( - not hasattr(self, '_metadata') or self._metadata.bind.closed + self._metadata is None + or self._metadata.bind.closed or self._metadata.bind.invalidated ): if self.connection.closed: From d236d66f7e9675b0fa78a259ea264a3e896b1694 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 17:28:00 +0100 Subject: [PATCH 146/257] Drop unnecessary comments --- mypy.ini | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mypy.ini b/mypy.ini index c0997e92..94ebe3c5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,3 @@ -# Global options: - [mypy] python_version = 3.9 follow_imports = silent @@ -27,9 +25,3 @@ warn_unreachable = True # Reporting show_error_codes = True - - -# Per-module options: - -# [mypy-commcare_export.minilinq] -# disallow_untyped_defs = True From be3faf111c3d1e5d2539760ff3f5e408d71f52d4 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 17:48:54 +0100 Subject: [PATCH 147/257] We don't need to reflect all the tables. We can check the table instead of the connection metadata. `sqlalchemy.Table(..., autoload_with=self.connection)` gives us the metadata we need. --- commcare_export/writers.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 371280c1..1454fa8f 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -6,6 +6,7 @@ from itertools import zip_longest import sqlalchemy +from sqlalchemy.exc import NoSuchTableError from alembic.migration import MigrationContext from alembic.operations import Operations @@ -327,14 +328,10 @@ def metadata(self): or self._metadata.bind.invalidated ): if self.connection.closed: - raise Exception('Tried to reflect via a closed connection') + raise Exception('Tried to bind to a closed connection') if self.connection.invalidated: - raise Exception( - 'Tried to reflect via an invalidated connection' - ) - self._metadata = sqlalchemy.MetaData() - self._metadata.bind = self.connection - self._metadata.reflect() + raise Exception('Tried to bind to an invalidated connection') + self._metadata = sqlalchemy.MetaData(bind=self.connection) return self._metadata def table(self, table_name): @@ -507,7 +504,9 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): ctx = MigrationContext.configure(self.connection) op = Operations(ctx) - if not table_name in self.metadata.tables: + try: + self.table(table_name) + except NoSuchTableError: if self.strict_types: create_sql = sqlalchemy.schema.CreateTable( sqlalchemy.Table( @@ -535,7 +534,6 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): *self._get_columns_for_data(row_dict, data_type_dict) ) self.metadata.clear() - self.metadata.reflect() return def get_current_table_columns(): @@ -556,7 +554,6 @@ def get_current_table_columns(): table_name, sqlalchemy.Column(column, ty, nullable=True) ) self.metadata.clear() - self.metadata.reflect() columns = get_current_table_columns() elif not columns[column].primary_key: current_ty = columns[column].type @@ -581,7 +578,6 @@ def get_current_table_columns(): ) op.alter_column(table_name, column, type_=new_type) self.metadata.clear() - self.metadata.reflect() columns = get_current_table_columns() def upsert(self, table, row_dict): From 3cfef23b4858e378df86ff2f29e27d28a5bf5b09 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 19:59:41 +0100 Subject: [PATCH 148/257] Refactor: Pull out `_create_table()` method --- commcare_export/writers.py | 59 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 1454fa8f..eb072f4e 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -507,33 +507,7 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): try: self.table(table_name) except NoSuchTableError: - if self.strict_types: - create_sql = sqlalchemy.schema.CreateTable( - sqlalchemy.Table( - table_name, - sqlalchemy.MetaData(), - *self._get_columns_for_data(row_dict, data_type_dict) - ) - ).compile(self.connection.engine) - logger.warning( - f"Table '{table_name}' does not exist. Creating table " - f"with:\n{create_sql}" - ) - empty_cols = [ - name for (name, val) in row_dict.items() - if val is None and name not in data_type_dict - ] - if empty_cols: - logger.warning( - "This schema does not include the following columns " - "since we are unable to determine the column type at " - f"this stage: {empty_cols}" - ) - op.create_table( - table_name, - *self._get_columns_for_data(row_dict, data_type_dict) - ) - self.metadata.clear() + self._create_table(table_name, row_dict, data_type_dict) return def get_current_table_columns(): @@ -580,6 +554,37 @@ def get_current_table_columns(): self.metadata.clear() columns = get_current_table_columns() + def _create_table(self, table_name, row_dict, data_type_dict): + ctx = MigrationContext.configure(self.connection) + op = Operations(ctx) + if self.strict_types: + create_sql = sqlalchemy.schema.CreateTable( + sqlalchemy.Table( + table_name, + sqlalchemy.MetaData(), + *self._get_columns_for_data(row_dict, data_type_dict) + ) + ).compile(self.connection.engine) + logger.warning( + f"Table '{table_name}' does not exist. Creating table " + f"with:\n{create_sql}" + ) + empty_cols = [ + name for (name, val) in row_dict.items() + if val is None and name not in data_type_dict + ] + if empty_cols: + logger.warning( + "This schema does not include the following columns " + "since we are unable to determine the column type at " + f"this stage: {empty_cols}" + ) + op.create_table( + table_name, + *self._get_columns_for_data(row_dict, data_type_dict) + ) + self.metadata.clear() + def upsert(self, table, row_dict): # For atomicity "insert, catch, update" is slightly better than # "select, insert or update". The latter may crash, while the From 5be52c394a05802332c0733795d5dae8572e3ffb Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 20:02:53 +0100 Subject: [PATCH 149/257] We don't need to refresh `columns` We only ever use `columns[column]`. We are iterating `row_dict` where `column` is a key. We cannot encounter the same key twice. So we don't need to store the new column state in `columns[column]`; we'll never use it. --- commcare_export/writers.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index eb072f4e..cf2ba915 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -510,10 +510,7 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): self._create_table(table_name, row_dict, data_type_dict) return - def get_current_table_columns(): - return {c.name: c for c in self.table(table_name).columns} - - columns = get_current_table_columns() + columns = {c.name: c for c in self.table(table_name).columns} for column, val in row_dict.items(): if val is None: @@ -528,7 +525,6 @@ def get_current_table_columns(): table_name, sqlalchemy.Column(column, ty, nullable=True) ) self.metadata.clear() - columns = get_current_table_columns() elif not columns[column].primary_key: current_ty = columns[column].type new_type = None @@ -552,7 +548,6 @@ def get_current_table_columns(): ) op.alter_column(table_name, column, type_=new_type) self.metadata.clear() - columns = get_current_table_columns() def _create_table(self, table_name, row_dict, data_type_dict): ctx = MigrationContext.configure(self.connection) From 93db21b25d3149bf55e8afb3b76a30a5938b96b6 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 20:14:47 +0100 Subject: [PATCH 150/257] Clean up a little --- commcare_export/writers.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index cf2ba915..1e5552e6 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -501,16 +501,15 @@ def least_upper_bound(self, source_type, dest_type): return sqlalchemy.UnicodeText(collation=self.collation) def make_table_compatible(self, table_name, row_dict, data_type_dict): - ctx = MigrationContext.configure(self.connection) - op = Operations(ctx) - try: - self.table(table_name) + table = self.table(table_name) except NoSuchTableError: self._create_table(table_name, row_dict, data_type_dict) return - columns = {c.name: c for c in self.table(table_name).columns} + ctx = MigrationContext.configure(self.connection) + op = Operations(ctx) + columns = {c.name: c for c in table.columns} for column, val in row_dict.items(): if val is None: From 401d76b78a1cc25879666e1f99acb29264853c81 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Mon, 25 Apr 2022 19:55:29 +0100 Subject: [PATCH 151/257] Nits --- commcare_export/writers.py | 46 +++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 1e5552e6..b158d042 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -396,9 +396,7 @@ def best_type_for(self, val): # MySQL cannot build an index on TEXT due to the lack of # a field length, so we try to use VARCHAR when # possible. - if len( - val - ) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? + if len(val) < self.MAX_VARCHAR_LEN: return sqlalchemy.Unicode( max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation @@ -418,12 +416,10 @@ def best_type_for(self, val): return sqlalchemy.NVARCHAR( length=column_length_in_bytes, collation=self.collation ) - if self.is_oracle: + elif self.is_oracle: return sqlalchemy.Unicode(4000, collation=self.collation) else: - raise Exception( - "Unknown database dialect: {}".format(self.db_url) - ) + raise Exception(f"Unknown database dialect: {self.db_url}") else: # We do not have a name for "bottom" in SQL aka the type # whose least upper bound with any other type is the other @@ -515,37 +511,34 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): if val is None: continue - ty = self.get_data_type(data_type_dict[column], val) + val_type = self.get_data_type(data_type_dict[column], val) if column not in columns: logger.warning( - "Adding column '{}.{} {}'".format(table_name, column, ty) + f"Adding column '{table_name}.{column} {val_type}'" ) op.add_column( - table_name, sqlalchemy.Column(column, ty, nullable=True) + table_name, + sqlalchemy.Column(column, val_type, nullable=True) ) self.metadata.clear() elif not columns[column].primary_key: - current_ty = columns[column].type - new_type = None + col_type = columns[column].type + new_col_type = None if self.strict_types: # don't bother checking compatibility since we're # not going to change anything - new_type = self.strict_types_compatibility_check( - ty, current_ty, val + new_col_type = self.strict_types_compatibility_check( + val_type, col_type, val ) - elif not self.compatible(ty, current_ty): - new_type = self.least_upper_bound(ty, current_ty) + elif not self.compatible(val_type, col_type): + new_col_type = self.least_upper_bound(val_type, col_type) - if new_type: + if new_col_type: logger.warning( - 'Altering column %s from %s to %s for value: "%s:%s"', - columns[column], - current_ty, - new_type, - type(val), - val + f'Altering column {columns[column]} from {col_type} ' + f'to {new_col_type} for value: "{type(val)}:{val}"', ) - op.alter_column(table_name, column, type_=new_type) + op.alter_column(table_name, column, type_=new_col_type) self.metadata.clear() def _create_table(self, table_name, row_dict, data_type_dict): @@ -595,8 +588,9 @@ def upsert(self, table, row_dict): insert = table.insert().values(**row_dict) self.connection.execute(insert) except sqlalchemy.exc.IntegrityError: - update = table.update().where(table.c.id == row_dict['id'] - ).values(**row_dict) + update = (table.update() + .where(table.c.id == row_dict['id']) + .values(**row_dict)) self.connection.execute(update) def write_table(self, table: TableSpec) -> None: From bd93a9f021c0f1f2c6672d8cbc46eee73710e4f1 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 26 Apr 2022 14:43:03 +0100 Subject: [PATCH 152/257] Add missing types --- commcare_export/minilinq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index f816048f..bb8a768f 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -477,9 +477,9 @@ def __init__( table: str, headings: ListType[MiniLinq], source: MiniLinq, - missing_value=None, - data_types=None, - ): + missing_value: Optional[str] = None, # Denotes "no value". e.g. "---" + data_types: Optional[ListType[Literal]] = None, + ) -> None: self.table = table self.headings = headings self.source = source From 91a0dcbf1e264a0c61715bd5892bba930061fed9 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 26 Apr 2022 15:17:42 +0100 Subject: [PATCH 153/257] How to check types in `env` and `minilinq` modules --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aae74301..4f44342c 100644 --- a/README.md +++ b/README.md @@ -484,15 +484,21 @@ tests/test_writers.py ... ============ 17 passed in 2.09 seconds ============ ``` -5\. Push the feature branch up +5\. Type hints are used in the `env` and `minilinq` modules. Check that any changes in those modules adhere to those types: + +``` +$ mypy --install-types @mypy_typed_modules.txt +``` + +6\. Push the feature branch up ``` $ git push -u origin my-super-duper-feature ``` -6\. Visit https://github.com/dimagi/commcare-export and submit a pull request. +7\. Visit https://github.com/dimagi/commcare-export and submit a pull request. -7\. Accept our gratitude for contributing: Thanks! +8\. Accept our gratitude for contributing: Thanks! Release process --------------- From 8a722d8f8f82d2d8272db8bb0b6278d39ae9a49d Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 26 Apr 2022 15:18:31 +0100 Subject: [PATCH 154/257] Spelling, while we're here. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4f44342c..4b7354bf 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ mssql+pyodbc://scott:tiger@localhost/mydatabases?driver=ODBC+Driver+17+for+SQL+S Excel Queries ------------- -An excel query is any `.xlsx` workbook. Each sheet in the workbook represents one table you wish +An Excel query is any `.xlsx` workbook. Each sheet in the workbook represents one table you wish to create. There are two grouping of columns to configure the table: - **Data Source**: Set this to `form` to export form data, or `case` for case data. @@ -121,7 +121,7 @@ JSON Queries ------------ JSON queries are a described in the table below. You build a JSON object that represents the query you have in mind. -A good way to get started is to work from the examples, or you could make an excel query and run the tool +A good way to get started is to work from the examples, or you could make an Excel query and run the tool with `--dump-query` to see the resulting JSON query. @@ -132,7 +132,7 @@ The --users and --locations options export data from a CommCare project that can be joined with form and case data. The --with-organization option does all of that and adds a field to Excel query specifications to be joined on. -Specifiying the --users option or --with-organization option will export an +Specifying the --users option or --with-organization option will export an additional table named 'commcare_users' containing the following columns: Column | Type | Note @@ -200,7 +200,7 @@ a9ca40 | Supervisor | NULL | a9ca40 | c4cbef | 939fa8 In order to join form or case data to 'commcare_users' and 'commcare_locations' the exported forms and cases need to contain a field identifying which user submitted them. The --with-organization option automatically adds a field -called 'commcare_userid' to each query in an Excel specifiction for this +called 'commcare_userid' to each query in an Excel specification for this purpose. Using that field, you can use a SQL query with a join to report data about any level of you organization. For example, to count the number of forms submitted by all workers in each clinic: @@ -239,7 +239,7 @@ As a library, the various `commcare_export` modules make it easy to - Load and save JSON representations of Minilinq queries - Compile Excel configurations to Minilinq queries -To directly access the CommCareHq REST API: +To directly access the CommCare HQ REST API: ```python >>> import getpass @@ -451,7 +451,7 @@ $ pip install "commcare-export[base_sql]" Contributing ------------ -0\. Sign up for github, if you have not already, at https://github.com. +0\. Sign up for GitHub, if you have not already, at https://github.com. 1\. Fork the repository at https://github.com/dimagi/commcare-export. From 1da8522405a0198879f267bff99f401bcc8afbbf Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 26 Apr 2022 15:38:10 +0100 Subject: [PATCH 155/257] Move comment to docstring --- commcare_export/minilinq.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index bb8a768f..d78c6478 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -477,9 +477,19 @@ def __init__( table: str, headings: ListType[MiniLinq], source: MiniLinq, - missing_value: Optional[str] = None, # Denotes "no value". e.g. "---" + missing_value: Optional[str] = None, data_types: Optional[ListType[Literal]] = None, ) -> None: + """ + Initializes an ``Emit`` instance. + + :param table: The name/title of the table to be written. + :param headings: Evaluated to determine column headings. + :param source: Evaluated to determine the table rows. + :param missing_value: Denotes "no value". e.g. ``"---"`` + :param data_types: The data types of the columns. e.g. + ``[Literal('text'), Literal('date'), ...]`` + """ self.table = table self.headings = headings self.source = source From 69c6f9f543b5415e664352ea8d4c0d364cf20db8 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 27 Apr 2022 12:02:48 +0100 Subject: [PATCH 156/257] Rename `table` -> `table_spec` --- commcare_export/writers.py | 10 +++++----- tests/utils.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index b158d042..8f9115da 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -593,12 +593,12 @@ def upsert(self, table, row_dict): .values(**row_dict)) self.connection.execute(update) - def write_table(self, table: TableSpec) -> None: - table_name = table.name - headings = table.headings - data_type_dict = dict(zip_longest(headings, table.data_types)) + def write_table(self, table_spec: TableSpec) -> None: + table_name = table_spec.name + headings = table_spec.headings + data_type_dict = dict(zip_longest(headings, table_spec.data_types)) # Rather inefficient for now... - for row in table.rows: + for row in table_spec.rows: row_dict = dict(zip(headings, row)) self.make_table_compatible(table_name, row_dict, data_type_dict) self.upsert(self.table(table_name), row_dict) diff --git a/tests/utils.py b/tests/utils.py index 70295d3c..2397dc46 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,10 +7,10 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.tables = set() - def write_table(self, table): - super().write_table(table) - if table.rows: - self.tables.add(table.name) + def write_table(self, table_spec): + super().write_table(table_spec) + if table_spec.rows: + self.tables.add(table_spec.name) def tear_down(self): for table in self.tables: From 376d5cf605c824b99b65c3812a3fc54c1d18666a Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 27 Apr 2022 12:26:28 +0100 Subject: [PATCH 157/257] Just get the table once --- commcare_export/writers.py | 51 ++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 8f9115da..141a8f3f 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -334,13 +334,15 @@ def metadata(self): self._metadata = sqlalchemy.MetaData(bind=self.connection) return self._metadata - def table(self, table_name): - return sqlalchemy.Table( - table_name, - self.metadata, - autoload=True, - autoload_with=self.connection - ) + def get_table(self, table_name): + try: + return sqlalchemy.Table( + table_name, + self.metadata, + autoload_with=self.connection, + ) + except NoSuchTableError: + return None def get_id_column(self): return sqlalchemy.Column( @@ -496,13 +498,7 @@ def least_upper_bound(self, source_type, dest_type): # FIXME: Don't be so silly return sqlalchemy.UnicodeText(collation=self.collation) - def make_table_compatible(self, table_name, row_dict, data_type_dict): - try: - table = self.table(table_name) - except NoSuchTableError: - self._create_table(table_name, row_dict, data_type_dict) - return - + def make_table_compatible(self, table, row_dict, data_type_dict): ctx = MigrationContext.configure(self.connection) op = Operations(ctx) columns = {c.name: c for c in table.columns} @@ -514,10 +510,10 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): val_type = self.get_data_type(data_type_dict[column], val) if column not in columns: logger.warning( - f"Adding column '{table_name}.{column} {val_type}'" + f"Adding column '{table.name}.{column} {val_type}'" ) op.add_column( - table_name, + table.name, sqlalchemy.Column(column, val_type, nullable=True) ) self.metadata.clear() @@ -538,10 +534,10 @@ def make_table_compatible(self, table_name, row_dict, data_type_dict): f'Altering column {columns[column]} from {col_type} ' f'to {new_col_type} for value: "{type(val)}:{val}"', ) - op.alter_column(table_name, column, type_=new_col_type) + op.alter_column(table.name, column, type_=new_col_type) self.metadata.clear() - def _create_table(self, table_name, row_dict, data_type_dict): + def create_table(self, table_name, row_dict, data_type_dict): ctx = MigrationContext.configure(self.connection) op = Operations(ctx) if self.strict_types: @@ -571,6 +567,7 @@ def _create_table(self, table_name, row_dict, data_type_dict): *self._get_columns_for_data(row_dict, data_type_dict) ) self.metadata.clear() + return self.get_table(table_name) def upsert(self, table, row_dict): # For atomicity "insert, catch, update" is slightly better than @@ -597,11 +594,21 @@ def write_table(self, table_spec: TableSpec) -> None: table_name = table_spec.name headings = table_spec.headings data_type_dict = dict(zip_longest(headings, table_spec.data_types)) - # Rather inefficient for now... - for row in table_spec.rows: + for i, row in enumerate(table_spec.rows): row_dict = dict(zip(headings, row)) - self.make_table_compatible(table_name, row_dict, data_type_dict) - self.upsert(self.table(table_name), row_dict) + if i == 0: + table = self.get_table(table_name) + if table is None: + table = self.create_table( + table_name, + row_dict, + data_type_dict, + ) + # Checks the data type for every cell in every row. Maybe we + # can use a future version of the data dictionary to avoid + # this? + self.make_table_compatible(table, row_dict, data_type_dict) + self.upsert(table, row_dict) def _get_columns_for_data(self, row_dict, data_type_dict): return [self.get_id_column()] + [ From c693ff2476cc7061d62fe3a5a45542fd2af99643 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 27 Apr 2022 12:43:02 +0100 Subject: [PATCH 158/257] Refresh table when we change it --- commcare_export/writers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 141a8f3f..5cd31fdd 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -517,6 +517,7 @@ def make_table_compatible(self, table, row_dict, data_type_dict): sqlalchemy.Column(column, val_type, nullable=True) ) self.metadata.clear() + table = self.get_table(table.name) elif not columns[column].primary_key: col_type = columns[column].type new_col_type = None @@ -536,6 +537,8 @@ def make_table_compatible(self, table, row_dict, data_type_dict): ) op.alter_column(table.name, column, type_=new_col_type) self.metadata.clear() + table = self.get_table(table.name) + return table def create_table(self, table_name, row_dict, data_type_dict): ctx = MigrationContext.configure(self.connection) @@ -607,7 +610,7 @@ def write_table(self, table_spec: TableSpec) -> None: # Checks the data type for every cell in every row. Maybe we # can use a future version of the data dictionary to avoid # this? - self.make_table_compatible(table, row_dict, data_type_dict) + table = self.make_table_compatible(table, row_dict, data_type_dict) self.upsert(table, row_dict) def _get_columns_for_data(self, row_dict, data_type_dict): From ee3da161010b625764fee65be41471ec63ce6505 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 27 Apr 2022 12:44:40 +0100 Subject: [PATCH 159/257] Update tests --- tests/test_checkpointmanager.py | 3 ++- tests/test_cli.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index ab26469e..1d0cb0e1 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -48,7 +48,8 @@ class TestCheckpointManager(object): def test_create_checkpoint_table(self, manager, revision='head'): manager.create_checkpoint_table(revision) with manager: - assert 'commcare_export_runs' in manager.metadata.tables + table = manager.get_table('commcare_export_runs') + assert table is not None def test_checkpoint_table_exists(self, manager): # Test that the migrations don't fail for tables that existed before diff --git a/tests/test_cli.py b/tests/test_cli.py index a6b54430..e8ca69fd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -723,9 +723,12 @@ def test_cli_data_types_add_columns( ) metadata = sqlalchemy.schema.MetaData(bind=writer.engine) - metadata.reflect() - - cols = metadata.tables['forms'].c + table = sqlalchemy.Table( + 'forms', + metadata, + autoload_with=writer.engine, + ) + cols = table.c assert sorted([c.name for c in cols]) == sorted([ u'id', u'a_bool', u'an_int', u'a_date', u'a_datetime', u'a_text' ]) From ff07831484ed6d6673d98ef98b2eb729f2cd291d Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 27 Apr 2022 18:11:01 +0100 Subject: [PATCH 160/257] "CommCare HQ" --- README.md | 10 +++++----- setup.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index aae74301..b126dd8c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ https://github.com/dimagi/commcare-export [![Test coverage](https://coveralls.io/repos/dimagi/commcare-export/badge.png?branch=master)](https://coveralls.io/r/dimagi/commcare-export) [![PyPI version](https://badge.fury.io/py/commcare-export.svg)](https://badge.fury.io/py/commcare-export) -A command-line tool (and Python library) to generate customized exports from the [CommCareHQ](https://www.commcarehq.org) [REST API](https://wiki.commcarehq.org/display/commcarepublic/Data+APIs). +A command-line tool (and Python library) to generate customized exports from the [CommCare HQ](https://www.commcarehq.org) [REST API](https://wiki.commcarehq.org/display/commcarepublic/Data+APIs). * [User documentation](https://wiki.commcarehq.org/display/commcarepublic/CommCare+Data+Export+Tool) * [Changelog](https://github.com/dimagi/commcare-export/releases) @@ -17,7 +17,7 @@ Installation & Quick Start 0a\. Install [Python 3](https://www.python.org/downloads/). This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). -0b\. Sign up for [CommCareHQ](https://www.commcarehq.org/) if you have not already. +0b\. Sign up for [CommCare HQ](https://www.commcarehq.org/) if you have not already. 1\. Install CommCare Export via `pip` @@ -86,7 +86,7 @@ $ commcare-export --commcare-hq \ See `commcare-export --help` for the full list of options. -There are example query files for the CommCare Demo App (available on the CommCareHq Exchange) in the `examples/` +There are example query files for the CommCare Demo App (available on the CommCare HQ Exchange) in the `examples/` directory. `--output` @@ -234,12 +234,12 @@ Python Library Usage As a library, the various `commcare_export` modules make it easy to - - Interact with the CommCareHQ REST API + - Interact with the CommCare HQ REST API - Execute "Minilinq" queries against the API (a very simple query language, described below) - Load and save JSON representations of Minilinq queries - Compile Excel configurations to Minilinq queries -To directly access the CommCareHq REST API: +To directly access the CommCare HQ REST API: ```python >>> import getpass diff --git a/setup.py b/setup.py index bc1b3586..e07a0b62 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ def run_tests(self): name="commcare-export", version=version, description='A command-line tool (and Python library) to extract data from ' - 'CommCareHQ into a SQL database or Excel workbook', + 'CommCare HQ into a SQL database or Excel workbook', long_description=io.open(readme, encoding='utf-8').read(), long_description_content_type='text/markdown', author='Dimagi', From ebfbf13d1629f3e162a7613d4d60ca95dd030574 Mon Sep 17 00:00:00 2001 From: Ethan Soergel Date: Mon, 25 Jul 2022 12:20:30 -0400 Subject: [PATCH 161/257] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c68c3bfc..4cddfb2e 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ $ commcare-export \ You'll see the tables printed out. Change to `--output-format sql --output URL_TO_YOUR_DB --since DATE` to sync all forms submitted since that date. -All examples are present in Excel and also equivalent JSON, however it is recommended +Example query files are provided in both Excel and JSON format. It is recommended to use the Excel format as the JSON format may change upon future library releases. Command-line Usage From 89914f44dc63a8cfb966392c9665a4f8dd37a167 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 1 Feb 2023 17:08:55 +0000 Subject: [PATCH 162/257] Preserve values of duplicate filter names --- commcare_export/excel_query.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index ea31ceb5..29cdf4d3 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -122,7 +122,12 @@ def compile_filters(worksheet, mappings=None): for cell in get_column_by_name(worksheet, 'filter value') or [] ] ) - return zip(filter_names, filter_values) + # Preserve values of duplicate filter names. Results in an OR filter. + # e.g. {'type': ['person'], 'owner_id': ['abc123', 'def456']} + filters = defaultdict(list) + for k, v in zip(filter_names, filter_values): + filters[k].append(v) + return filters def extended_to_len(desired_len, some_list, value=None): @@ -350,7 +355,7 @@ def compile_source(worksheet, value_or_root=False): # conditional api_query_args.append(Literal(None)) else: - api_query_args.append(Literal(dict(filters))) + api_query_args.append(Literal(filters)) if include_referenced_items: api_query_args.append(Literal(include_referenced_items)) From ff7fc441f0137ba622c143ba9f02d3d8eb7ecafd Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 10 Feb 2023 11:23:24 +0000 Subject: [PATCH 163/257] Fix test --- tests/test_excel_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_excel_query.py b/tests/test_excel_query.py index 319b958f..00176656 100644 --- a/tests/test_excel_query.py +++ b/tests/test_excel_query.py @@ -132,8 +132,8 @@ def test_parse_sheet(self): Reference("api_data"), Literal("form"), Reference("checkpoint_manager"), Literal({ - 'app_id': 'foobizzle', - 'type': 'intake', + 'app_id': ['foobizzle'], + 'type': ['intake'], }) ), body=None, From 46c5acaa56da7ec715237a45dacd1d43acd8bcaf Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Fri, 10 Feb 2023 11:23:48 +0000 Subject: [PATCH 164/257] OCD --- commcare_export/excel_query.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/commcare_export/excel_query.py b/commcare_export/excel_query.py index 29cdf4d3..12f6d980 100644 --- a/commcare_export/excel_query.py +++ b/commcare_export/excel_query.py @@ -504,12 +504,10 @@ def parse_sheet( ) -class SheetParts( - namedtuple( - 'SheetParts', - 'name headings source body root_expr data_types data_source' - ) -): +class SheetParts(namedtuple( + 'SheetParts', + 'name headings source body root_expr data_types data_source' +)): def __new__( cls, @@ -521,15 +519,14 @@ def __new__( data_types=None, data_source=None ): - data_types = data_types or [] - return super(SheetParts, cls).__new__( + return super().__new__( cls, name, headings, source, body, root_expr, - data_types, + data_types or [], data_source ) From 9a2cfb6d5f41de81f6ceca05aa37e484521f6b74 Mon Sep 17 00:00:00 2001 From: Martin Riese Date: Tue, 7 Mar 2023 13:13:10 -0600 Subject: [PATCH 165/257] USH-1425: Replace travis CI config with GitHub actions * Add .github/workflows/test.yml * Use python version 3.7 to 3.10 * python 3.6 is not supported by the docker anymore * Remove .travis.yml --- .github/workflows/test.yml | 66 ++++++++++++++++++++++++++++++++++++++ .travis.yml | 45 -------------------------- 2 files changed, 66 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/test.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..0e3bcfe3 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,66 @@ +name: commcare-export tests +on: + pull_request: + branches: + - master +env: + DB_USER: db_user + DB_PASSWORD: Password123 +jobs: + test: + runs-on: ubuntu-22.04 + services: + mssql: + image: mcr.microsoft.com/mssql/server:2017-latest + env: + SA_PASSWORD: ${{ env.DB_PASSWORD }} + ACCEPT_EULA: 'Y' + ports: + - 1433:1433 + postgres: + image: postgres + env: + POSTGRES_PASSWORD: ${{ env.DB_PASSWORD }} + POSTGRES_USER: ${{ env.DB_USER }} + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + strategy: + matrix: + python-version: [3.7, 3.8, 3.9, '3.10'] # Version 3.6 with arch x64 not found + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 50 + - run: git fetch --tags origin # So we can use git describe. actions/checkout@v3 does not pull tags. + + # MySQL set up + - run: sudo service mysql start # Ubuntu already includes mysql no need to use service + - run: mysql -uroot -proot -e "CREATE USER '${{ env.DB_USER }}'@'%';" + - run: mysql -uroot -proot -e "GRANT ALL PRIVILEGES ON *.* TO '${{ env.DB_USER }}'@'%';" + + - uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - run: sudo apt-get install pandoc + - run: python setup.py sdist + - run: pip install dist/* + - run: pip install pymysql psycopg2 pyodbc + - run: pip install coverage coveralls + - run: pip install mypy + - run: pip install pytest + - run: pip install -e ".[test]" + - run: coverage run setup.py test + env: + POSTGRES_URL: postgresql://${{ env.DB_USER }}:${{ env.DB_PASSWORD }}@localhost/ + MYSQL_URL: mysql+pymysql://${{ env.DB_USER }}:@localhost/ + MSSQL_URL: mssql+pyodbc://sa:${{ env.DB_PASSWORD }}@localhost/ + HQ_USERNAME: ${{ secrets.HQ_USERNAME }} + HQ_API_KEY: ${{ secrets.HQ_API_KEY }} + - run: mypy --install-types --non-interactive @mypy_typed_modules.txt + - run: coverage lcov -o coverage/lcov.info + - uses: coverallsapp/github-action@v1 \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d99cbd42..00000000 --- a/.travis.yml +++ /dev/null @@ -1,45 +0,0 @@ -language: python -sudo: required -dist: "bionic" -python: - - "3.6" - - "3.7" - - "3.8" -addons: - apt: - packages: - - unixodbc-dev -env: - global: - - MSSQL_SA_PASSWORD=Password-123 - # HQ_USERNAME and HQ_API_KEY - - secure: "AhNARIXHCKJhDpDHNT97h4WThW/eyofBpp4rI80i+DA8whlfKucHrfDaJ190tRblY8viBiC8FhucFxYVX6oSS2MaPN3X1bF8WzdMYiBzMJ05ODIRQ3pPjRsGD3e14MhpRriaHfa1w90/WdnU2QWXY6J8zitXlwXV5PRBdsk7raQ=" - - secure: "hJqnvpMEgiiU8AT21T4dPRLqA+n4BSsWk9yVYV7DrfsZIIFN0Ioao7bg8MMh/XZ6mGMEjxtgFZ307ApuoQdfGpct9Yg2uxj/vx6n7VHdEzvoWfzhDEPn9mTG7QBxPkZJlz444m7hVW/jW8Pqr2LEdR+ORjGfe8FlwCU2MauOmIc=" -before_install: - - docker pull mcr.microsoft.com/mssql/server:2017-latest - - docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PASSWORD" -p 1433:1433 --name mssql1 -d mcr.microsoft.com/mssql/server:2017-latest - - curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - - - curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list - - sudo apt-get update -qq -install: - - sudo apt-get install pandoc - - python setup.py sdist - - pip install dist/* - - pip install pymysql psycopg2 pyodbc - - pip install coverage coveralls - - sudo ACCEPT_EULA=Y apt-get install msodbcsql17 - - pip install mypy -before_script: - - mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'travis'@'%';"; - - docker ps -a - - odbcinst -q -d - - .travis/wait.sh -script: - - coverage run setup.py test - - mypy --install-types --non-interactive @mypy_typed_modules.txt -after_success: - - coveralls -services: - - postgresql - - mysql - - docker From 0f8825e01ca600f2169d2f2f3f3bc10095b2fa45 Mon Sep 17 00:00:00 2001 From: Martin Riese Date: Fri, 10 Mar 2023 09:38:29 -0600 Subject: [PATCH 166/257] Fix sqlalchemy version * Python 3.7 and up are supported by sqlalchemy version 2. But our code is not compatible with that version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e07a0b62..a12f4261 100644 --- a/setup.py +++ b/setup.py @@ -86,7 +86,7 @@ def run_tests(self): 'pytz', 'requests', 'simplejson', - 'sqlalchemy', + 'sqlalchemy~=1.4', 'sqlalchemy-migrate' ], extras_require={ From a1ed95233ef11ecb334ece365690f503b8b875a4 Mon Sep 17 00:00:00 2001 From: Martin Riese Date: Mon, 13 Mar 2023 13:09:33 -0500 Subject: [PATCH 167/257] Remove .travise/wait.sh. Not needed with GitHub actions --- .travis/wait.sh | 9 --------- 1 file changed, 9 deletions(-) delete mode 100755 .travis/wait.sh diff --git a/.travis/wait.sh b/.travis/wait.sh deleted file mode 100755 index 67497e45..00000000 --- a/.travis/wait.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -echo "Waiting MSSQL docker to launch on 1433..." - -while ! nc -z localhost 1433; do - sleep 0.1 -done - -echo "MSSQL launched" From c3815d9087f6c04b46b6e79d37077e0b324f12fd Mon Sep 17 00:00:00 2001 From: MartinRiese Date: Fri, 17 Mar 2023 08:48:28 -0500 Subject: [PATCH 168/257] Use setup-python action v4 Co-authored-by: Simon Kelly --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0e3bcfe3..cc8df9b3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,7 @@ jobs: - run: mysql -uroot -proot -e "CREATE USER '${{ env.DB_USER }}'@'%';" - run: mysql -uroot -proot -e "GRANT ALL PRIVILEGES ON *.* TO '${{ env.DB_USER }}'@'%';" - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - run: sudo apt-get install pandoc From 7139f94911b5d049d61bfb36f76195b635acdb7f Mon Sep 17 00:00:00 2001 From: Martin Riese Date: Fri, 17 Mar 2023 08:49:24 -0500 Subject: [PATCH 169/257] Add python 3.11 to version matrix --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cc8df9b3..70377d9e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: - 5432:5432 strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10'] # Version 3.6 with arch x64 not found + python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] steps: - uses: actions/checkout@v3 with: From 382c95e8e435b4b6492c0685a0e1cae2712de6e4 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Wed, 23 Aug 2023 09:37:15 +0200 Subject: [PATCH 170/257] Add UCR support --- commcare_export/commcare_minilinq.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 5d193209..f9d273e0 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -20,7 +20,8 @@ 'location', 'application', 'web-user', - 'messaging-event' + 'messaging-event', + 'ucr', } @@ -109,6 +110,7 @@ def get_paginator( 'form': DatePaginator('indexed_on', page_size), 'case': DatePaginator('indexed_on', page_size), 'messaging-event': DatePaginator('date_last_activity', page_size), + 'ucr': UCRPaginator(page_size), }, PaginationMode.date_modified: { 'form': @@ -121,6 +123,7 @@ def get_paginator( DatePaginator('server_date_modified', page_size), 'messaging-event': DatePaginator('date_last_activity', page_size), + 'ucr': UCRPaginator(page_size), } }[pagination_mode].get(resource, SimplePaginator(page_size)) @@ -262,3 +265,11 @@ def get_since_date(self, batch): ) except ParserError: return None + + +class UCRPaginator(SimplePaginator): + + def next_page_params_from_batch(self, batch): + params = super(UCRPaginator, self).next_page_params_from_batch(batch) + if params: + return params | self.payload From 7424d90996ad0cbd6f55fb2a2f960ea7e6b4a9e3 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Wed, 30 Aug 2023 10:22:07 +0200 Subject: [PATCH 171/257] Add checkpoint support for datasources --- commcare_export/checkpoint.py | 23 +++++++++++----- commcare_export/commcare_hq_client.py | 10 ++++++- ...3b37b3b06104_added_cursor_to_checkpoint.py | 27 +++++++++++++++++++ ...36489c5a628_create_commcare_export_runs.py | 1 + 4 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 9f5a11d7..fa1e97f5 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -36,6 +36,7 @@ class Checkpoint(Base): data_source = Column(String) last_doc_id = Column(String) pagination_mode = Column(String) + cursor = Column(String) def get_pagination_mode(self): """ @@ -63,7 +64,8 @@ def __repr__(self): "final={r.final}), " "data_source={r.data_source}, " "last_doc_id={r.last_doc_id}, " - "pagination_mode={r.pagination_mode}>" + "pagination_mode={r.pagination_mode}," + "cursor={r.cursor}>" ).format(r=self) @@ -131,12 +133,14 @@ def set_checkpoint( pagination_mode, is_final=False, doc_id=None, + cursor=None, ): self._set_checkpoint( checkpoint_time, pagination_mode, is_final, doc_id=doc_id, + cursor=cursor, ) if is_final: self._cleanup() @@ -147,7 +151,8 @@ def _set_checkpoint( pagination_mode, final, time_of_run=None, - doc_id=None + doc_id=None, + cursor=None, ): logger.info( 'Setting %s checkpoint: data_source: %s, tables: %s, ' @@ -188,7 +193,8 @@ def _set_checkpoint( final=final, data_source=self.data_source, last_doc_id=doc_id, - pagination_mode=pagination_mode.name + pagination_mode=pagination_mode.name, + cursor=cursor, ) session.add(checkpoint) created.append(checkpoint) @@ -423,10 +429,10 @@ def __init__(self, manager, since_param, pagination_mode): self.since_param = since_param self.pagination_mode = pagination_mode - def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None): + def set_checkpoint(self, checkpoint_time, is_final=False, doc_id=None, cursor=None): if self.manager: self.manager.set_checkpoint( - checkpoint_time, self.pagination_mode, is_final, doc_id=doc_id + checkpoint_time, self.pagination_mode, is_final, doc_id=doc_id, cursor=cursor ) @@ -442,7 +448,7 @@ def __init__( self.since = since self.base_checkpoint_manager = base_checkpoint_manager - def get_since(self, checkpoint_manager): + def get_since(self, checkpoint_manager, data_source=None): if self.start_over: return None @@ -450,6 +456,9 @@ def get_since(self, checkpoint_manager): return self.since if checkpoint_manager: + if data_source and data_source == 'ucr': + return checkpoint_manager.get_last_checkpoint().cursor + since = checkpoint_manager.get_time_of_last_checkpoint() return dateutil.parser.parse(since) if since else None @@ -485,7 +494,7 @@ def get_checkpoint_manager(self, data_source, table_names): data_source, table_names ) - since = self.get_since(manager) + since = self.get_since(manager, data_source) pagination_mode = self.get_pagination_mode(manager) logger.info( diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index b8bf94bd..5c2e9bc9 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -18,6 +18,7 @@ import backoff import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator +from datetime import datetime AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' @@ -217,7 +218,7 @@ def iterate_resource(resource=resource, params=params): return RepeatableIterator(iterate_resource) def checkpoint(self, checkpoint_manager, paginator, batch, is_final): - from commcare_export.commcare_minilinq import DatePaginator + from commcare_export.commcare_minilinq import DatePaginator, UCRPaginator if isinstance(paginator, DatePaginator): since_date = paginator.get_since_date(batch) if since_date: @@ -232,6 +233,13 @@ def checkpoint(self, checkpoint_manager, paginator, batch, is_final): logger.warning( 'Failed to get a checkpoint date from a batch of data.' ) + if isinstance(paginator, UCRPaginator): + cursor = paginator.next_page_params_from_batch(batch)['cursor'][0] + checkpoint_manager.set_checkpoint( + datetime.utcnow(), + is_final, + cursor=cursor, + ) class MockCommCareHqClient(object): diff --git a/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py b/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py new file mode 100644 index 00000000..2e7c05d9 --- /dev/null +++ b/migrations/versions/3b37b3b06104_added_cursor_to_checkpoint.py @@ -0,0 +1,27 @@ +"""Added cursor to checkpoint + +Revision ID: 3b37b3b06104 +Revises: 6f158d161ab6 +Create Date: 2023-08-25 11:10:38.713189 + +""" +from alembic import op +import sqlalchemy as sa + + +revision = '3b37b3b06104' +down_revision = '6f158d161ab6' +branch_labels = None +depends_on = None + + +def upgrade(): + url = op.get_bind().engine.url + collation = 'utf8_bin' if 'mysql' in url.drivername else None + op.add_column( + 'commcare_export_runs', + sa.Column('cursor', sa.Unicode(255, collation=collation)) + ) + +def downgrade(): + op.drop_column('commcare_export_runs', 'cursor') diff --git a/migrations/versions/c36489c5a628_create_commcare_export_runs.py b/migrations/versions/c36489c5a628_create_commcare_export_runs.py index 302b1b13..15660170 100644 --- a/migrations/versions/c36489c5a628_create_commcare_export_runs.py +++ b/migrations/versions/c36489c5a628_create_commcare_export_runs.py @@ -18,6 +18,7 @@ def upgrade(): meta = sa.MetaData(bind=op.get_bind()) meta.reflect() + if 'commcare_export_runs' not in meta.tables: url = op.get_bind().engine.url collation = 'utf8_bin' if 'mysql' in url.drivername else None From 4d381044a2ab36cf0554d630090671484bb5ae21 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Mon, 4 Sep 2023 10:02:44 +0200 Subject: [PATCH 172/257] Upgrade pip in github test workflow --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 70377d9e..e9cd96da 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} - run: sudo apt-get install pandoc - run: python setup.py sdist + - run: pip install --upgrade pip - run: pip install dist/* - run: pip install pymysql psycopg2 pyodbc - run: pip install coverage coveralls From 96b90fdf45a6afaa026b229f471b5c0cb9220c05 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 5 Sep 2023 09:49:28 +0200 Subject: [PATCH 173/257] convert git version to a PEP440 compatible version --- commcare_export/version.py | 18 +++++++++++++++++- tests/test_version.py | 19 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/test_version.py diff --git a/commcare_export/version.py b/commcare_export/version.py index 3f083bf3..b4f2a631 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -1,4 +1,5 @@ import io +import re import os.path import subprocess @@ -20,7 +21,22 @@ def git_version(): ['git', 'describe'], stdout=subprocess.PIPE ).communicate()[0].strip() - return described_version_bytes.decode('ascii') + version_raw = described_version_bytes.decode('ascii') + return parse_version(version_raw) + + +def parse_version(version_raw): + """Attempt to convert a git version to a version + compatible with PEP440: https://peps.python.org/pep-0440/ + """ + match = re.match('(\d+\.\d+\.\d+)(?:-(\d+).*)?', version_raw) + if match: + tag_version, lead_count = match.groups() + if lead_count: + tag_version += ".dev{}".format(lead_count) + return tag_version + + return version_raw def version(): diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 00000000..c6559fe2 --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,19 @@ +import pytest + +from commcare_export.version import parse_version + + +@pytest.mark.parametrize( + "input,output", + [ + ("1.2.3", "1.2.3"), + ("1.2", "1.2"), + ("0.1.5-3", "0.1.5.dev3"), + ("0.1.5-3-g1234567", "0.1.5.dev3"), + ("0.1.5-4-g1234567-dirty", "0.1.5.dev4"), + ("0.1.5-15-g1234567-dirty-123", "0.1.5.dev15"), + ("a.b.c", "a.b.c"), + ] +) +def test_parse_version(input, output): + assert parse_version(input) == output From 0c22b4cf9a796b28dd2f0e56468f4a11a061879c Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 26 Sep 2023 10:33:03 +0200 Subject: [PATCH 174/257] Ensure initial params makes use of last checkpoint --- commcare_export/checkpoint.py | 2 ++ commcare_export/commcare_minilinq.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index fa1e97f5..290bd4ab 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -457,6 +457,8 @@ def get_since(self, checkpoint_manager, data_source=None): if checkpoint_manager: if data_source and data_source == 'ucr': + if not checkpoint_manager.get_last_checkpoint(): + return None return checkpoint_manager.get_last_checkpoint().cursor since = checkpoint_manager.get_time_of_last_checkpoint() diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index f9d273e0..a03c2819 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -273,3 +273,7 @@ def next_page_params_from_batch(self, batch): params = super(UCRPaginator, self).next_page_params_from_batch(batch) if params: return params | self.payload + + def next_page_params_since(self, since=None): + params = self.payload | {'cursor': since} + return params From 4f8e5e6a7b98a17dc3a8d5270a34f1be22faf67b Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 26 Sep 2023 21:05:44 +0200 Subject: [PATCH 175/257] Add cursor PaginationMode --- commcare_export/checkpoint.py | 17 +++++++++-------- commcare_export/commcare_minilinq.py | 12 +++++++++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 290bd4ab..5a5624c5 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -448,7 +448,7 @@ def __init__( self.since = since self.base_checkpoint_manager = base_checkpoint_manager - def get_since(self, checkpoint_manager, data_source=None): + def get_since(self, checkpoint_manager): if self.start_over: return None @@ -456,10 +456,9 @@ def get_since(self, checkpoint_manager, data_source=None): return self.since if checkpoint_manager: - if data_source and data_source == 'ucr': - if not checkpoint_manager.get_last_checkpoint(): - return None - return checkpoint_manager.get_last_checkpoint().cursor + if checkpoint_manager.data_source == 'ucr': + last_checkpoint = checkpoint_manager.get_last_checkpoint() + return last_checkpoint.cursor if last_checkpoint else None since = checkpoint_manager.get_time_of_last_checkpoint() return dateutil.parser.parse(since) if since else None @@ -473,6 +472,9 @@ def get_pagination_mode(self, checkpoint_manager): if self.start_over or self.since or not checkpoint_manager: return PaginationMode.date_indexed + if checkpoint_manager.data_source == 'ucr': + return PaginationMode.cursor + last_checkpoint = checkpoint_manager.get_last_checkpoint() if not last_checkpoint: return PaginationMode.date_indexed @@ -496,9 +498,8 @@ def get_checkpoint_manager(self, data_source, table_names): data_source, table_names ) - since = self.get_since(manager, data_source) + since = self.get_since(manager) pagination_mode = self.get_pagination_mode(manager) - logger.info( "Creating checkpoint manager for tables: %s, since: %s, " "pagination_mode: %s", @@ -507,7 +508,7 @@ def get_checkpoint_manager(self, data_source, table_names): since, pagination_mode.name, ) - if pagination_mode != PaginationMode.date_indexed: + if pagination_mode not in PaginationMode.supported_modes(): logger.warning( "\n====================================\n" "This export is using a deprecated pagination mode which will " diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index a03c2819..1c7b4dd2 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -28,6 +28,14 @@ class PaginationMode(Enum): date_indexed = "date_indexed" date_modified = "date_modified" + cursor = "cursor" + + @classmethod + def supported_modes(cls): + return [ + cls.date_indexed, + cls.cursor, + ] class SimpleSinceParams(object): @@ -123,8 +131,10 @@ def get_paginator( DatePaginator('server_date_modified', page_size), 'messaging-event': DatePaginator('date_last_activity', page_size), + }, + PaginationMode.cursor: { 'ucr': UCRPaginator(page_size), - } + }, }[pagination_mode].get(resource, SimplePaginator(page_size)) From 90ba91789191a71ecb92bd1871933b41c94c01f9 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Wed, 27 Sep 2023 08:23:43 +0200 Subject: [PATCH 176/257] Move set_checkpoint to paginator class --- commcare_export/commcare_hq_client.py | 27 +----------------------- commcare_export/commcare_minilinq.py | 30 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 5c2e9bc9..e785dec6 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -208,39 +208,14 @@ def iterate_resource(resource=resource, params=params): ) more_to_fetch = not repeated_last_page_of_non_counting_resource - self.checkpoint( + paginator.set_checkpoint( checkpoint_manager, - paginator, batch, not more_to_fetch ) return RepeatableIterator(iterate_resource) - def checkpoint(self, checkpoint_manager, paginator, batch, is_final): - from commcare_export.commcare_minilinq import DatePaginator, UCRPaginator - if isinstance(paginator, DatePaginator): - since_date = paginator.get_since_date(batch) - if since_date: - try: - last_obj = batch['objects'][-1] - except IndexError: - last_obj = {} - checkpoint_manager.set_checkpoint( - since_date, is_final, doc_id=last_obj.get("id", None) - ) - else: - logger.warning( - 'Failed to get a checkpoint date from a batch of data.' - ) - if isinstance(paginator, UCRPaginator): - cursor = paginator.next_page_params_from_batch(batch)['cursor'][0] - checkpoint_manager.set_checkpoint( - datetime.utcnow(), - is_final, - cursor=cursor, - ) - class MockCommCareHqClient(object): """ diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 1c7b4dd2..bccddcbc 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -5,14 +5,18 @@ API directly. """ import json +import logging from enum import Enum from urllib.parse import parse_qs, urlparse +from datetime import datetime from dateutil.parser import ParserError, parse from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap +logger = logging.getLogger(__name__) + SUPPORTED_RESOURCES = { 'form', 'case', @@ -215,6 +219,9 @@ def next_page_params_from_batch(self, batch): if batch['meta']['next']: return parse_qs(urlparse(batch['meta']['next']).query) + def set_checkpoint(self, *args, **kwargs): + pass + class DatePaginator(SimplePaginator): """ @@ -276,6 +283,21 @@ def get_since_date(self, batch): except ParserError: return None + def set_checkpoint(self, checkpoint_manager, batch, is_final): + since_date = self.get_since_date(batch) + if since_date: + try: + last_obj = batch['objects'][-1] + except IndexError: + last_obj = {} + checkpoint_manager.set_checkpoint( + since_date, is_final, doc_id=last_obj.get("id", None) + ) + else: + logger.warning( + 'Failed to get a checkpoint date from a batch of data.' + ) + class UCRPaginator(SimplePaginator): @@ -287,3 +309,11 @@ def next_page_params_from_batch(self, batch): def next_page_params_since(self, since=None): params = self.payload | {'cursor': since} return params + + def set_checkpoint(self, checkpoint_manager, batch, is_final): + cursor = self.next_page_params_from_batch(batch)['cursor'][0] + checkpoint_manager.set_checkpoint( + datetime.utcnow(), + is_final, + cursor=cursor, + ) From df8083f5d843f0c46e574c47f0f86dc24b8601ea Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Mon, 9 Oct 2023 16:32:12 +0200 Subject: [PATCH 177/257] Sleep after failed GET request --- commcare_export/commcare_hq_client.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index b8bf94bd..cbb56ab8 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -9,13 +9,15 @@ import copy import logging +import time from collections import OrderedDict +from math import ceil from urllib.parse import urlencode +import backoff import requests from requests.auth import AuthBase, HTTPDigestAuth -import backoff import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator @@ -29,6 +31,8 @@ def on_backoff(details): + breakpoint() + print() _log_backoff(details, 'Waiting for retry.') @@ -120,7 +124,9 @@ def api_url(self): ) def get(self, resource, params=None): """ - Gets the named resource. + Gets the named resource. When the server returns a 429 (too many requests), the process will sleep for + the amount of seconds specified in the Retry-After header from the response, after which it will raise + an exception to trigger the retry action. Currently a bit of a vulnerable stub that works for this particular use case in the hands of a trusted user; would likely @@ -131,6 +137,10 @@ def get(self, resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) + retry_after = response.headers.get("Retry-After", None) + if response.status_code == 429 and retry_after: + retry_after = ceil(float(retry_after)) + time.sleep(retry_after) response.raise_for_status() return response.json() From 869c2ccea47ab1b9f3f14032d34971fd83854129 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 10 Oct 2023 09:17:59 +0200 Subject: [PATCH 178/257] Move up check for ucr in get_pagination_mode --- commcare_export/checkpoint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 5a5624c5..ed99b1f9 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -469,12 +469,12 @@ def get_pagination_mode(self, checkpoint_manager): from a previous checkpoint in which case use the same pagination mode as before. """ - if self.start_over or self.since or not checkpoint_manager: - return PaginationMode.date_indexed - if checkpoint_manager.data_source == 'ucr': return PaginationMode.cursor + if self.start_over or self.since or not checkpoint_manager: + return PaginationMode.date_indexed + last_checkpoint = checkpoint_manager.get_last_checkpoint() if not last_checkpoint: return PaginationMode.date_indexed From 5f00383882b41001b9a560ff2f525e35ae45495b Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 10 Oct 2023 09:35:59 +0200 Subject: [PATCH 179/257] Check for truthiness of checkpoint_manager first thing --- commcare_export/checkpoint.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index ed99b1f9..3f67a799 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -469,10 +469,13 @@ def get_pagination_mode(self, checkpoint_manager): from a previous checkpoint in which case use the same pagination mode as before. """ + if not checkpoint_manager: + return PaginationMode.date_indexed + if checkpoint_manager.data_source == 'ucr': return PaginationMode.cursor - if self.start_over or self.since or not checkpoint_manager: + if self.start_over or self.since: return PaginationMode.date_indexed last_checkpoint = checkpoint_manager.get_last_checkpoint() From a717e1695650d7e800093b3f03a27a35b8496c69 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Oct 2023 10:08:01 +0200 Subject: [PATCH 180/257] upgrade jsonpath-ng version spec --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a12f4261..6c77a214 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ def run_tests(self): 'alembic', 'argparse', 'backoff', - 'jsonpath-ng~=1.5', + 'jsonpath-ng~=1.6.0', 'ndg-httpsclient', 'openpyxl==2.5.12', 'python-dateutil', From 8e85a880f498d9d2efa000a9f61e887e2d0c6f85 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Oct 2023 10:14:44 +0200 Subject: [PATCH 181/257] update tests with changes in jsonpath-ng See https://github.com/h2non/jsonpath-ng/commit/4c1effd23d22162d7081566accd85d755815a089 --- tests/test_minilinq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_minilinq.py b/tests/test_minilinq.py index 9677b509..b814d145 100644 --- a/tests/test_minilinq.py +++ b/tests/test_minilinq.py @@ -116,7 +116,7 @@ def test_eval_auto_id_reference_nested(self): ]) ) self.check_case( - mmap.eval(env), [['1.bar.1.bar.[0]', 'a1', '1', '1.bid', 'bob'], + mmap.eval(env), [["1.bar.'1.bar.[0]'", 'a1', '1', '1.bid', 'bob'], ['1.bar.bazzer', 'a2', '1', '1.bid', 'bob']] ) @@ -142,8 +142,8 @@ def test_value_or_root(self): data = {"id": 1, "bar": [{'baz': 'a1'}, {'baz': 'a2'}]} self._test_value_or_root([Reference('id'), Reference('baz')], data, [ - ['1.bar.1.bar.[0]', 'a1'], - ['1.bar.1.bar.[1]', 'a2'], + ["1.bar.'1.bar.[0]'", 'a1'], + ["1.bar.'1.bar.[1]'", 'a2'], ]) def test_value_or_root_empty_list(self): From c147a948ba1c08c35b4510f2703389e4cabda6b1 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Fri, 13 Oct 2023 11:31:53 +0200 Subject: [PATCH 182/257] Check datasource for paginator --- commcare_export/checkpoint.py | 22 +++++++++++----------- commcare_export/commcare_minilinq.py | 1 - 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 3f67a799..824eaa41 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -463,27 +463,27 @@ def get_since(self, checkpoint_manager): since = checkpoint_manager.get_time_of_last_checkpoint() return dateutil.parser.parse(since) if since else None - def get_pagination_mode(self, checkpoint_manager): + def get_pagination_mode(self, data_source, checkpoint_manager=None): """ Always use the default pagination mode unless we are continuing from a previous checkpoint in which case use the same pagination mode as before. """ - if not checkpoint_manager: - return PaginationMode.date_indexed - - if checkpoint_manager.data_source == 'ucr': - return PaginationMode.cursor - - if self.start_over or self.since: - return PaginationMode.date_indexed + if self.start_over or self.since or not checkpoint_manager: + return self.get_paginator_for_datasource(data_source) last_checkpoint = checkpoint_manager.get_last_checkpoint() if not last_checkpoint: - return PaginationMode.date_indexed + return self.get_paginator_for_datasource(data_source) return last_checkpoint.get_pagination_mode() + @staticmethod + def get_paginator_for_datasource(datasource): + if datasource == 'ucr': + return PaginationMode.cursor + return PaginationMode.date_indexed + def get_checkpoint_manager(self, data_source, table_names): """ This get's called before each table is exported and set in the @@ -502,7 +502,7 @@ def get_checkpoint_manager(self, data_source, table_names): ) since = self.get_since(manager) - pagination_mode = self.get_pagination_mode(manager) + pagination_mode = self.get_pagination_mode(data_source, checkpoint_manager=manager) logger.info( "Creating checkpoint manager for tables: %s, since: %s, " "pagination_mode: %s", diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index bccddcbc..124afd7d 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -122,7 +122,6 @@ def get_paginator( 'form': DatePaginator('indexed_on', page_size), 'case': DatePaginator('indexed_on', page_size), 'messaging-event': DatePaginator('date_last_activity', page_size), - 'ucr': UCRPaginator(page_size), }, PaginationMode.date_modified: { 'form': From 34b49d0a7e97690da321431757acdf9f1ba0bc09 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Fri, 13 Oct 2023 11:42:02 +0200 Subject: [PATCH 183/257] Fix test --- tests/test_checkpointmanager.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_checkpointmanager.py b/tests/test_checkpointmanager.py index 1d0cb0e1..19c54583 100644 --- a/tests/test_checkpointmanager.py +++ b/tests/test_checkpointmanager.py @@ -232,7 +232,7 @@ def test_checkpoint_details_static( ): cmp = CheckpointManagerProvider(None, since, start_over) assert expected_since == cmp.get_since(None) - assert expected_paginator == cmp.get_pagination_mode(None) + assert expected_paginator == cmp.get_pagination_mode('', None) @pytest.mark.dbtest @@ -242,19 +242,20 @@ def test_checkpoint_details_no_checkpoint(self, configured_manager): manager = configured_manager.for_dataset('form', ['t1']) assert None is CheckpointManagerProvider().get_since(manager) assert PaginationMode.date_indexed == CheckpointManagerProvider( - ).get_pagination_mode(manager) + ).get_pagination_mode('form', manager) def test_checkpoint_details_latest_from_db(self, configured_manager): - manager = configured_manager.for_dataset('form', ['t1']) + data_source = 'form' + manager = configured_manager.for_dataset(data_source, ['t1']) self._test_checkpoint_details( - manager, datetime.datetime.utcnow(), PaginationMode.date_modified + manager, datetime.datetime.utcnow(), PaginationMode.date_modified, data_source ) self._test_checkpoint_details( - manager, datetime.datetime.utcnow(), PaginationMode.date_indexed + manager, datetime.datetime.utcnow(), PaginationMode.date_indexed, data_source ) self._test_checkpoint_details( - manager, datetime.datetime.utcnow(), PaginationMode.date_modified + manager, datetime.datetime.utcnow(), PaginationMode.date_modified, data_source ) def _test_checkpoint_details( @@ -262,9 +263,10 @@ def _test_checkpoint_details( manager, checkpoint_date, pagination_mode, + data_source, ): manager.set_checkpoint(checkpoint_date, pagination_mode) cmp = CheckpointManagerProvider() - assert pagination_mode == cmp.get_pagination_mode(manager) + assert pagination_mode == cmp.get_pagination_mode(data_source, manager) assert checkpoint_date == cmp.get_since(manager) From b498fdac4152d58a43350330b3d746d43170a32f Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Fri, 20 Oct 2023 09:33:42 +0200 Subject: [PATCH 184/257] Small refactor --- commcare_export/commcare_hq_client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index cbb56ab8..0a50e545 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -31,8 +31,6 @@ def on_backoff(details): - breakpoint() - print() _log_backoff(details, 'Waiting for retry.') @@ -137,8 +135,8 @@ def get(self, resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) - retry_after = response.headers.get("Retry-After", None) - if response.status_code == 429 and retry_after: + if response.status_code == 429: + retry_after = response.headers.get("Retry-After", 0.0) retry_after = ceil(float(retry_after)) time.sleep(retry_after) response.raise_for_status() From a814d71afae1e4d79e66de895599e4cf0039cf4b Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Fri, 20 Oct 2023 09:44:42 +0200 Subject: [PATCH 185/257] Increase default page size for UCRs and change default value setting --- commcare_export/commcare_minilinq.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index f9d273e0..61f5e54d 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -102,7 +102,7 @@ def __call__(self, since, until): def get_paginator( resource, - page_size=1000, + page_size=None, pagination_mode=PaginationMode.date_indexed, ): return { @@ -134,7 +134,7 @@ class CommCareHqEnv(DictEnv): CommCareHq API. """ - def __init__(self, commcare_hq_client, until=None, page_size=1000): + def __init__(self, commcare_hq_client, page_size=None, until=None): self.commcare_hq_client = commcare_hq_client self.until = until self.page_size = page_size @@ -177,7 +177,8 @@ class SimplePaginator(object): Paginate based on the 'next' URL provided in the API response. """ - def __init__(self, page_size=1000, params=None): + def __init__(self, page_size=None, params=None): + page_size = page_size if page_size else 1000 self.page_size = page_size self.params = params @@ -224,7 +225,8 @@ class DatePaginator(SimplePaginator): DEFAULT_PARAMS = object() - def __init__(self, since_field, page_size=1000, params=DEFAULT_PARAMS): + def __init__(self, since_field, page_size=None, params=DEFAULT_PARAMS): + page_size = page_size if page_size else 1000 params = DATE_PARAMS[ since_field] if params is DatePaginator.DEFAULT_PARAMS else params super(DatePaginator, self).__init__(page_size, params) @@ -268,6 +270,9 @@ def get_since_date(self, batch): class UCRPaginator(SimplePaginator): + def __init__(self, page_size=None, *args, **kwargs): + page_size = page_size if page_size else 10000 + super().__init__(page_size, *args, **kwargs) def next_page_params_from_batch(self, batch): params = super(UCRPaginator, self).next_page_params_from_batch(batch) From 0963b252c809badc2c02cf84ad5be40fa4392976 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Fri, 20 Oct 2023 13:34:18 +0200 Subject: [PATCH 186/257] Handle sleeping in the backoff handler --- commcare_export/commcare_hq_client.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 0a50e545..b7604421 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -32,6 +32,12 @@ def on_backoff(details): _log_backoff(details, 'Waiting for retry.') + response = details["exception"].response + if response.status_code == 429: + retry_after = response.headers.get("Retry-After", 0.0) + retry_after = ceil(float(retry_after)) + logger.warning(f"Sleeping for {retry_after} seconds") + time.sleep(retry_after) def on_giveup(details): @@ -135,10 +141,6 @@ def get(self, resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) - if response.status_code == 429: - retry_after = response.headers.get("Retry-After", 0.0) - retry_after = ceil(float(retry_after)) - time.sleep(retry_after) response.raise_for_status() return response.json() From a32d003bcf74eda938b8aa869801870b0b2685b5 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Fri, 20 Oct 2023 17:35:43 +0200 Subject: [PATCH 187/257] Use constants for default page sizes --- commcare_export/commcare_minilinq.py | 7 +++++-- reports.zip | Bin 0 -> 22 bytes 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 reports.zip diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 61f5e54d..50d9a894 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -24,6 +24,9 @@ 'ucr', } +DEFAULT_PAGE_SIZE = 1000 +DEFAULT_UCR_PAGE_SIZE = 10000 + class PaginationMode(Enum): date_indexed = "date_indexed" @@ -226,7 +229,7 @@ class DatePaginator(SimplePaginator): DEFAULT_PARAMS = object() def __init__(self, since_field, page_size=None, params=DEFAULT_PARAMS): - page_size = page_size if page_size else 1000 + page_size = page_size if page_size else DEFAULT_PAGE_SIZE params = DATE_PARAMS[ since_field] if params is DatePaginator.DEFAULT_PARAMS else params super(DatePaginator, self).__init__(page_size, params) @@ -271,7 +274,7 @@ def get_since_date(self, batch): class UCRPaginator(SimplePaginator): def __init__(self, page_size=None, *args, **kwargs): - page_size = page_size if page_size else 10000 + page_size = page_size if page_size else DEFAULT_UCR_PAGE_SIZE super().__init__(page_size, *args, **kwargs) def next_page_params_from_batch(self, batch): diff --git a/reports.zip b/reports.zip new file mode 100644 index 0000000000000000000000000000000000000000..15cb0ecb3e219d1701294bfdf0fe3f5cb5d208e7 GIT binary patch literal 22 NcmWIWW@Tf*000g10H*)| literal 0 HcmV?d00001 From 388a5077df9bada11af0ebf29f6ff28080d1ebe1 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 10:16:29 +0200 Subject: [PATCH 188/257] Let backoff handle the retry --- commcare_export/commcare_hq_client.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index b7604421..cf407ec0 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -29,15 +29,12 @@ LATEST_KNOWN_VERSION = '0.5' RESOURCE_REPEAT_LIMIT = 10 +def on_wait(details): + time_to_wait = details["wait"] + logger.warning(f"Rate limit reached. Waiting for {time_to_wait} seconds.") def on_backoff(details): _log_backoff(details, 'Waiting for retry.') - response = details["exception"].response - if response.status_code == 429: - retry_after = response.headers.get("Retry-After", 0.0) - retry_after = ceil(float(retry_after)) - logger.warning(f"Sleeping for {retry_after} seconds") - time.sleep(retry_after) def on_giveup(details): @@ -118,6 +115,13 @@ def session(self, session): def api_url(self): return '%s/a/%s/api/v%s' % (self.url, self.project, self.version) + @backoff.on_predicate( + backoff.runtime, + predicate=lambda r: r.status_code == 429, + value=lambda r: ceil(float(r.headers.get("Retry-After", 0.0))), + jitter=None, + on_backoff=on_wait, + ) @backoff.on_exception( backoff.expo, requests.exceptions.RequestException, @@ -141,8 +145,9 @@ def get(self, resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) - response.raise_for_status() - return response.json() + if response.status_code != 429: + response.raise_for_status() + return response def iterate( self, @@ -177,7 +182,8 @@ def iterate_resource(resource=resource, params=params): "times with same parameters" ) - batch = self.get(resource, params) + response = self.get(resource, params) + batch = response.json() last_params = copy.copy(params) batch_meta = batch['meta'] if total_count == UNKNOWN_COUNT or fetched >= total_count: From ea25a868263582514aaa37b9dc05577402fd2a81 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 10:57:24 +0200 Subject: [PATCH 189/257] Check something --- commcare_export/commcare_hq_client.py | 46 ++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index cf407ec0..343252fc 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -115,21 +115,6 @@ def session(self, session): def api_url(self): return '%s/a/%s/api/v%s' % (self.url, self.project, self.version) - @backoff.on_predicate( - backoff.runtime, - predicate=lambda r: r.status_code == 429, - value=lambda r: ceil(float(r.headers.get("Retry-After", 0.0))), - jitter=None, - on_backoff=on_wait, - ) - @backoff.on_exception( - backoff.expo, - requests.exceptions.RequestException, - max_time=300, - giveup=is_client_error, - on_backoff=on_backoff, - on_giveup=on_giveup - ) def get(self, resource, params=None): """ Gets the named resource. When the server returns a 429 (too many requests), the process will sleep for @@ -140,14 +125,31 @@ def get(self, resource, params=None): particular use case in the hands of a trusted user; would likely want this to work like (or via) slumber. """ - logger.debug("Fetching '%s' batch: %s", resource, params) - resource_url = f'{self.api_url}/{resource}/' - response = self.session.get( - resource_url, params=params, auth=self.__auth, timeout=60 + @backoff.on_predicate( + backoff.runtime, + predicate=lambda r: r.status_code == 429, + value=lambda r: ceil(float(r.headers.get("Retry-After", 0.0))), + jitter=None, + on_backoff=on_wait, + ) + @backoff.on_exception( + backoff.expo, + requests.exceptions.RequestException, + max_time=300, + giveup=is_client_error, + on_backoff=on_backoff, + on_giveup=on_giveup ) - if response.status_code != 429: - response.raise_for_status() - return response + def _get(resource, params=None): + logger.debug("Fetching '%s' batch: %s", resource, params) + resource_url = f'{self.api_url}/{resource}/' + response = self.session.get( + resource_url, params=params, auth=self.__auth, timeout=60 + ) + if response.status_code != 429: + response.raise_for_status() + response = _get(resource, params) + return response.json() def iterate( self, From 8645d6302d986e39a7ae9dee5fbbb8d32d0cc46d Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 11:02:48 +0200 Subject: [PATCH 190/257] Add return value --- commcare_export/commcare_hq_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 343252fc..21fe4cfa 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -148,6 +148,8 @@ def _get(resource, params=None): ) if response.status_code != 429: response.raise_for_status() + return response + response = _get(resource, params) return response.json() From 4e754227f2c194f29f2209c24b7217e3042fe7a7 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 11:14:37 +0200 Subject: [PATCH 191/257] Fix: Silly mistake. I got to take a break.. --- commcare_export/commcare_hq_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 21fe4cfa..2a1a0633 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -186,8 +186,7 @@ def iterate_resource(resource=resource, params=params): "times with same parameters" ) - response = self.get(resource, params) - batch = response.json() + batch = self.get(resource, params) last_params = copy.copy(params) batch_meta = batch['meta'] if total_count == UNKNOWN_COUNT or fetched >= total_count: From 21108bfa37bebd5bd69defb4cd9448913d3fba45 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 13:08:22 +0200 Subject: [PATCH 192/257] Raise for status if retry-after is not in the headers --- commcare_export/commcare_hq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 2a1a0633..e45b0085 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -128,7 +128,7 @@ def get(self, resource, params=None): @backoff.on_predicate( backoff.runtime, predicate=lambda r: r.status_code == 429, - value=lambda r: ceil(float(r.headers.get("Retry-After", 0.0))), + value=lambda r: ceil(float(r.headers.get("Retry-After", 1.0))), jitter=None, on_backoff=on_wait, ) @@ -146,7 +146,7 @@ def _get(resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) - if response.status_code != 429: + if "Retry-After" not in response.headers: response.raise_for_status() return response From 6dd1ae7d9e1b3e436e752d8679194c9592d92c1e Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Tue, 24 Oct 2023 17:36:27 +0200 Subject: [PATCH 193/257] Test the check to determine if raise_for_status should be called --- commcare_export/commcare_hq_client.py | 5 ++++- tests/__init__.py | 0 tests/test_commcare_hq_client.py | 22 +++++++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/__init__.py diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index e45b0085..930847c9 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -115,6 +115,9 @@ def session(self, session): def api_url(self): return '%s/a/%s/api/v%s' % (self.url, self.project, self.version) + def _should_raise_for_status(self, response): + return "Retry-After" not in response.headers + def get(self, resource, params=None): """ Gets the named resource. When the server returns a 429 (too many requests), the process will sleep for @@ -146,7 +149,7 @@ def _get(resource, params=None): response = self.session.get( resource_url, params=params, auth=self.__auth, timeout=60 ) - if "Retry-After" not in response.headers: + if self._should_raise_for_status(response): response.raise_for_status() return response diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index a9273a97..89d3771a 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -17,7 +17,7 @@ SimplePaginator, get_paginator, ) - +from mock import Mock, patch class FakeSession(object): @@ -291,6 +291,26 @@ def test_message_log(self): [1, 2, 3] ) + @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") + def test_dont_raise_on_too_many_requests(self, session_mock): + response = requests.Response() + response.headers = {'Retry-After': "0.0"} + client = CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ) + + self.assertFalse(client._should_raise_for_status(response)) + + @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") + def test_raise_on_too_many_requests(self, session_mock): + response = requests.Response() + response.headers = {} + + client = CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ) + + self.assertTrue(client._should_raise_for_status(response)) class TestDatePaginator(unittest.TestCase): From 74b6431d4cdb1dc015784db8b379fc9069dd3641 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Thu, 26 Oct 2023 11:17:26 +0200 Subject: [PATCH 194/257] Fix default page size for datasources --- commcare_export/commcare_minilinq.py | 3 ++- tests/test_paginator.py | 33 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/test_paginator.py diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 32eb072c..206b9c15 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -305,8 +305,8 @@ def set_checkpoint(self, checkpoint_manager, batch, is_final): class UCRPaginator(SimplePaginator): def __init__(self, page_size=None, *args, **kwargs): - page_size = page_size if page_size else DEFAULT_UCR_PAGE_SIZE super().__init__(page_size, *args, **kwargs) + self.page_size = page_size if page_size else DEFAULT_UCR_PAGE_SIZE def next_page_params_from_batch(self, batch): params = super(UCRPaginator, self).next_page_params_from_batch(batch) @@ -315,6 +315,7 @@ def next_page_params_from_batch(self, batch): def next_page_params_since(self, since=None): params = self.payload | {'cursor': since} + params["limit"] = self.page_size return params def set_checkpoint(self, checkpoint_manager, batch, is_final): diff --git a/tests/test_paginator.py b/tests/test_paginator.py new file mode 100644 index 00000000..e3b3e7f9 --- /dev/null +++ b/tests/test_paginator.py @@ -0,0 +1,33 @@ +import unittest + +from commcare_export.checkpoint import CheckpointManagerWithDetails +from commcare_export.commcare_minilinq import ( + DEFAULT_UCR_PAGE_SIZE, + PaginationMode, + get_paginator, +) + + +class PaginatorTest(unittest.TestCase): + def test_ucr_paginator_page_size(self): + checkpoint_manager = CheckpointManagerWithDetails( + None, None, PaginationMode.cursor + ) + paginator = get_paginator( + resource="ucr", + pagination_mode=checkpoint_manager.pagination_mode) + paginator.init() + initial_params = paginator.next_page_params_since( + checkpoint_manager.since_param + ) + self.assertEqual(initial_params["limit"], DEFAULT_UCR_PAGE_SIZE) + + paginator = get_paginator( + resource="ucr", + page_size=1, + pagination_mode=checkpoint_manager.pagination_mode) + paginator.init() + initial_params = paginator.next_page_params_since( + checkpoint_manager.since_param + ) + self.assertEqual(initial_params["limit"], 1) \ No newline at end of file From b7d1620c07fb086f4532648e6e41abb8458338e0 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Thu, 26 Oct 2023 12:19:00 +0200 Subject: [PATCH 195/257] Support lower python versions --- commcare_export/commcare_minilinq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 206b9c15..89677bd4 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -314,7 +314,8 @@ def next_page_params_from_batch(self, batch): return params | self.payload def next_page_params_since(self, since=None): - params = self.payload | {'cursor': since} + params = self.payload + params['cursor'] = since params["limit"] = self.page_size return params From 6bd5ddb684652b0418d31db5f319ea82a67e6656 Mon Sep 17 00:00:00 2001 From: Chris Smit Date: Mon, 30 Oct 2023 15:29:43 +0200 Subject: [PATCH 196/257] Update payload with params rather than the other way around --- commcare_export/commcare_minilinq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 89677bd4..ccae28f2 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -311,7 +311,7 @@ def __init__(self, page_size=None, *args, **kwargs): def next_page_params_from_batch(self, batch): params = super(UCRPaginator, self).next_page_params_from_batch(batch) if params: - return params | self.payload + return self.payload | params def next_page_params_since(self, since=None): params = self.payload From 3efba7a440fc5562f2f57855ff248665a6185470 Mon Sep 17 00:00:00 2001 From: Amit Phulera Date: Tue, 21 Nov 2023 17:06:07 +0530 Subject: [PATCH 197/257] update readme with working example of using commcare API --- README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4cddfb2e..271acf6f 100644 --- a/README.md +++ b/README.md @@ -242,11 +242,25 @@ As a library, the various `commcare_export` modules make it easy to To directly access the CommCare HQ REST API: ```python ->>> import getpass ->>> from commcare_export.commcare_hq_client import CommCareHqClient ->>> api_client = CommCareHqClient('http://commcarehq.org', 'your_project', 'your_username', getpass.getpass()) ->>> forms = api_client.iterate('form', {'app_id': "whatever"}) ->>> [ (form['received_on'], form['form.gender']) for form in forms ] +from commcare_export.checkpoint import CheckpointManagerWithDetails +from commcare_export.commcare_hq_client import CommCareHqClient, AUTH_MODE_APIKEY +from commcare_export.commcare_minilinq import get_paginator, PaginationMode + +username = 'some@username.com' +domain = 'your-awesome-domain' +hq_host = 'https://commcarehq.org' +API_KEY= 'your_secret_api_key' + +api_client = CommCareHqClient(hq_host, domain, username, API_KEY, AUTH_MODE_APIKEY) +case_paginator=get_paginator(resource='case', pagination_mode=PaginationMode.date_modified) +case_paginator.init() +checkpoint_manager=CheckpointManagerWithDetails(None, None, PaginationMode.date_modified) + +cases = api_client.iterate('case', case_paginator, checkpoint_manager=checkpoint_manager) + +for case in cases: + print(case['case_id']) + ``` To issue a `minilinq` query against it, and then print out that query in a JSON serialization: From 75338fe3c0758ab6e260ee474cb3573316fbc5ee Mon Sep 17 00:00:00 2001 From: Jing Cheng Date: Thu, 7 Dec 2023 13:39:27 -0500 Subject: [PATCH 198/257] specify backoff version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6c77a214..34539e2e 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ def run_tests(self): install_requires=[ 'alembic', 'argparse', - 'backoff', + 'backoff>=2.0', 'jsonpath-ng~=1.6.0', 'ndg-httpsclient', 'openpyxl==2.5.12', From 7cd1fd9a3efa3033f951bc905dc44e0b3c5014df Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 2 Feb 2024 14:31:35 +0530 Subject: [PATCH 199/257] shows detailed exception only when needed --- commcare_export/commcare_hq_client.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index f9b1d1b1..d78943cf 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -9,6 +9,7 @@ import copy import logging +import sys import time from collections import OrderedDict from math import ceil @@ -151,7 +152,15 @@ def _get(resource, params=None): resource_url, params=params, auth=self.__auth, timeout=60 ) if self._should_raise_for_status(response): - response.raise_for_status() + try: + response.raise_for_status() + except Exception as e: + # for non-verbose output, skip the stacktrace + if not logger.isEnabledFor(logging.DEBUG): + logger.error(str(e)) + sys.exit() + raise e + return response response = _get(resource, params) From 0e34ba5fd2c8a340f011804143e0b5bebf895b08 Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 2 Feb 2024 15:20:21 +0530 Subject: [PATCH 200/257] adds more info for 401 error --- commcare_export/commcare_hq_client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index d78943cf..c6c92ecb 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -157,7 +157,14 @@ def _get(resource, params=None): except Exception as e: # for non-verbose output, skip the stacktrace if not logger.isEnabledFor(logging.DEBUG): - logger.error(str(e)) + if isinstance(e, requests.exceptions.HTTPError) and response.status_code == 401: + logger.error( + f"#{e}. Please ensure that your CommCare HQ credentials are correct & valid for " + f"the auth-mode passed. Also, Verify that your account has the necessary " + f"permissions to access the DET tool." + ) + else: + logger.error(str(e)) sys.exit() raise e From 224334ac90a565edc54aa674d3bae918338fb436 Mon Sep 17 00:00:00 2001 From: Manish Kangia Date: Fri, 2 Feb 2024 15:27:26 +0530 Subject: [PATCH 201/257] lowercase text Co-authored-by: Zandre Engelbrecht <122617251+zandre-eng@users.noreply.github.com> --- commcare_export/commcare_hq_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index c6c92ecb..c0ed6d6c 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -160,7 +160,7 @@ def _get(resource, params=None): if isinstance(e, requests.exceptions.HTTPError) and response.status_code == 401: logger.error( f"#{e}. Please ensure that your CommCare HQ credentials are correct & valid for " - f"the auth-mode passed. Also, Verify that your account has the necessary " + f"the auth-mode passed. Also, verify that your account has the necessary " f"permissions to access the DET tool." ) else: From a4ceeb26298510baf3b9bf163b0da558b21de5e6 Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 2 Feb 2024 17:14:26 +0530 Subject: [PATCH 202/257] updates message to be explicit about authmode for apikey --- commcare_export/commcare_hq_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index c0ed6d6c..ea4509ad 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -159,9 +159,9 @@ def _get(resource, params=None): if not logger.isEnabledFor(logging.DEBUG): if isinstance(e, requests.exceptions.HTTPError) and response.status_code == 401: logger.error( - f"#{e}. Please ensure that your CommCare HQ credentials are correct & valid for " - f"the auth-mode passed. Also, verify that your account has the necessary " - f"permissions to access the DET tool." + f"#{e}. Please ensure that your CommCare HQ credentials are correct and auth-mode" + f"is passed as 'apikey' if using API Key to authenticate. Also, verify that your " + f"account has the necessary permissions to access the DET tool." ) else: logger.error(str(e)) From d78c81df5c227bba2d49e354d59504e13af4c96b Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 2 Feb 2024 17:58:38 +0530 Subject: [PATCH 203/257] expands on python & virtualenv installation steps --- README.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 271acf6f..8116a777 100644 --- a/README.md +++ b/README.md @@ -15,28 +15,79 @@ A command-line tool (and Python library) to generate customized exports from the Installation & Quick Start -------------------------- -0a\. Install [Python 3](https://www.python.org/downloads/). This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). +Following commands are to be run on a terminal or a command line. -0b\. Sign up for [CommCare HQ](https://www.commcarehq.org/) if you have not already. +Once on a terminal window or command line, for simplicity, run commands from the home directory. -1\. Install CommCare Export via `pip` +### Python + +Check for python installed. +This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). + + +```shell +$ python --version +$ python3 --version +``` +If python is installed, all of its available versions would be listed. + +If python isn't installed, Install Python 3.8 from [this link](https://www.python.org/downloads/). + +## Virtualenv (Optional) + +Setup a virtual environment using: + +```shell +$ python3.8.x -m venv .venv # update version for the one installed +``` + +Activate virtual environment by running: + +```shell +source .venv/bin/activate +``` + +**Note**: virtualenv needs to be activated each time you start a new terminal session or command line prompt. + +For convenience, to avoid doing that, you can create an alias to activate virtual environments in +".venv" directory by adding the following to your +`.bashrc` or `.zshrc` file: + +```shell +alias venv='if [[ -d .venv ]] ; then source .venv/bin/activate ; fi' +``` + +Then you can activate virtual environments with +```shell +$ venv +``` + +## Install CommCare Export + +Install CommCare Export via `pip` ``` $ python3 -m pip install wheel $ python3 -m pip install commcare-export ``` -2\. Create a project space and application. +## CommCareHQ + +1. Sign up for [CommCare HQ](https://www.commcarehq.org/) if you have not already. -3\. Visit the Release Manager, make a build, click the star to release it. +2. Create a project space and application. -4\. Use Web Apps and fill out some forms. +3. Visit the Release Manager, make a build, click the star to release it. -5\. Modify one of example queries in the `examples/` directory, modifying the "Filter Value" column +4. Use Web Apps and fill out some forms. + +5. Modify one of example queries in the `examples/` directory, modifying the "Filter Value" column to match your form XMLNS / case type. See [this page](https://confluence.dimagi.com/display/commcarepublic/Finding+a+Form%27s+XMLNS) to determine the XMLNS for your form. +Now you can run the following examples: + ``` $ commcare-export \ --query examples/demo-registration.xlsx \ From 6f82386ec14a3142846129f8058b23fd2de99382 Mon Sep 17 00:00:00 2001 From: mkangia Date: Mon, 5 Feb 2024 12:55:43 +0530 Subject: [PATCH 204/257] updates to a specific directory name --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8116a777..762fb08a 100644 --- a/README.md +++ b/README.md @@ -38,28 +38,28 @@ If python isn't installed, Install Python 3.8 from [this link](https://www.pytho Setup a virtual environment using: ```shell -$ python3.8.x -m venv .venv # update version for the one installed +$ python3.8.x -m venv .commcare-export-venv # update python version for the one installed ``` Activate virtual environment by running: ```shell -source .venv/bin/activate +source .commcare-export-venv/bin/activate ``` **Note**: virtualenv needs to be activated each time you start a new terminal session or command line prompt. For convenience, to avoid doing that, you can create an alias to activate virtual environments in -".venv" directory by adding the following to your +".commcare-export-venv" directory by adding the following to your `.bashrc` or `.zshrc` file: ```shell -alias venv='if [[ -d .venv ]] ; then source .venv/bin/activate ; fi' +alias commcare-export-venv='if [[ -d .commcare-export-venv ]] ; then source .commcare-export-venv/bin/activate ; fi' ``` -Then you can activate virtual environments with +Then you can activate virtual environments with simply typing ```shell -$ venv +$ commcare-export-venv ``` ## Install CommCare Export From 4c07b2497c2882af0b6677474041ad6d0b2bac36 Mon Sep 17 00:00:00 2001 From: mkangia Date: Mon, 5 Feb 2024 13:15:49 +0530 Subject: [PATCH 205/257] adds link for info on virtual environment --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 762fb08a..f739fcfe 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,11 @@ If python isn't installed, Install Python 3.8 from [this link](https://www.pytho ## Virtualenv (Optional) +It is recommended to set up a virtual environment for CommCare Export +to avoid conflicts with other python applications. + +More about virtualenvs on https://docs.python.org/3/tutorial/venv.html + Setup a virtual environment using: ```shell From fbd9a4b8aeea64cc680eb51bfec6a1c9c2cd7d64 Mon Sep 17 00:00:00 2001 From: mkangia Date: Mon, 5 Feb 2024 13:37:34 +0530 Subject: [PATCH 206/257] fixes venv command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f739fcfe..1853a379 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ More about virtualenvs on https://docs.python.org/3/tutorial/venv.html Setup a virtual environment using: ```shell -$ python3.8.x -m venv .commcare-export-venv # update python version for the one installed +$ python3.8 -m venv .commcare-export-venv # update python version for the one installed ``` Activate virtual environment by running: From 0fc2bec459461049b9590732aa68c593928286f2 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:29:06 +0530 Subject: [PATCH 207/257] fixes spacing --- commcare_export/commcare_hq_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index ea4509ad..7e39f545 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -159,7 +159,7 @@ def _get(resource, params=None): if not logger.isEnabledFor(logging.DEBUG): if isinstance(e, requests.exceptions.HTTPError) and response.status_code == 401: logger.error( - f"#{e}. Please ensure that your CommCare HQ credentials are correct and auth-mode" + f"#{e}. Please ensure that your CommCare HQ credentials are correct and auth-mode " f"is passed as 'apikey' if using API Key to authenticate. Also, verify that your " f"account has the necessary permissions to access the DET tool." ) From b8954cbd921935101f0780b1c55f56094a3706b7 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:41:04 +0530 Subject: [PATCH 208/257] adds tests --- tests/test_commcare_hq_client.py | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 89d3771a..52837738 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -312,6 +312,60 @@ def test_raise_on_too_many_requests(self, session_mock): self.assertTrue(client._should_raise_for_status(response)) + @patch('commcare_export.commcare_hq_client.logger') + @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") + def test_get_with_forbidden_response_in_non_debug_mode(self, session_mock, logger_mock): + response = requests.Response() + response.status_code = 401 + session_mock.get.return_value = response + + logger_mock.isEnabledFor.return_value = False + + with self.assertRaises(SystemExit): + CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ).get("location") + + logger_mock.error.assert_called_once_with( + "#401 Client Error: None for url: None. " + "Please ensure that your CommCare HQ credentials are correct and auth-mode is passed as 'apikey' " + "if using API Key to authenticate. Also, verify that your account has the necessary permissions " + "to access the DET tool.") + + @patch('commcare_export.commcare_hq_client.logger') + @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") + def test_get_with_other_http_failure_response_in_non_debug_mode(self, session_mock, logger_mock): + response = requests.Response() + response.status_code = 404 + session_mock.get.return_value = response + + logger_mock.isEnabledFor.return_value = False + + with self.assertRaises(SystemExit): + CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ).get("location") + + logger_mock.error.assert_called_once_with( + "404 Client Error: None for url: None") + + @patch('commcare_export.commcare_hq_client.logger') + @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") + def test_get_with_http_failure_response_in_debug_mode(self, session_mock, logger_mock): + response = requests.Response() + response.status_code = 404 + session_mock.get.return_value = response + + logger_mock.isEnabledFor.return_value = True + + try: + CommCareHqClient( + '/fake/commcare-hq/url', 'fake-project', None, None + ).get("location") + except Exception as e: + self.assertEqual(str(e), "404 Client Error: None for url: None") + + class TestDatePaginator(unittest.TestCase): @classmethod From 03ee10f50455b08d0ef8b902b534122f8448e532 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:45:20 +0530 Subject: [PATCH 209/257] refactor: spacing & unused imports --- commcare_export/commcare_hq_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 7e39f545..827d4bda 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -10,7 +10,6 @@ import copy import logging import sys -import time from collections import OrderedDict from math import ceil from urllib.parse import urlencode @@ -21,7 +20,6 @@ import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator -from datetime import datetime AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' @@ -31,10 +29,12 @@ LATEST_KNOWN_VERSION = '0.5' RESOURCE_REPEAT_LIMIT = 10 + def on_wait(details): time_to_wait = details["wait"] logger.warning(f"Rate limit reached. Waiting for {time_to_wait} seconds.") + def on_backoff(details): _log_backoff(details, 'Waiting for retry.') From 4bd6eddc2185e8ba8e3fdf824bad5fb2aed49d7a Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:45:55 +0530 Subject: [PATCH 210/257] refactor: redundant argument & staticmethods --- commcare_export/commcare_hq_client.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 827d4bda..2fcb79c9 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -83,7 +83,6 @@ def __init__( password, auth_mode=AUTH_MODE_PASSWORD, version=LATEST_KNOWN_VERSION, - checkpoint_manager=None ): self.version = version self.url = url @@ -91,7 +90,8 @@ def __init__( self.__auth = self._get_auth(username, password, auth_mode) self.__session = None - def _get_auth(self, username, password, mode): + @staticmethod + def _get_auth(username, password, mode): if mode == AUTH_MODE_PASSWORD: return HTTPDigestAuth(username, password) elif mode == AUTH_MODE_APIKEY: @@ -117,7 +117,8 @@ def session(self, session): def api_url(self): return '%s/a/%s/api/v%s' % (self.url, self.project, self.version) - def _should_raise_for_status(self, response): + @staticmethod + def _should_raise_for_status(response): return "Retry-After" not in response.headers def get(self, resource, params=None): From a2183203b0aae0d61b08389a0ca987e4397baeb4 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:46:33 +0530 Subject: [PATCH 211/257] refactor: naming --- commcare_export/commcare_hq_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 2fcb79c9..e5ef16de 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -185,13 +185,13 @@ def iterate( Assumes the endpoint is a list endpoint, and iterates over it making a lot of assumptions that it is like a tastypie endpoint. """ - UNKNOWN_COUNT = 'unknown' + unknown_count = 'unknown' params = dict(params or {}) def iterate_resource(resource=resource, params=params): more_to_fetch = True last_batch_ids = set() - total_count = UNKNOWN_COUNT + total_count = unknown_count fetched = 0 repeat_counter = 0 last_params = None @@ -210,11 +210,11 @@ def iterate_resource(resource=resource, params=params): batch = self.get(resource, params) last_params = copy.copy(params) batch_meta = batch['meta'] - if total_count == UNKNOWN_COUNT or fetched >= total_count: + if total_count == unknown_count or fetched >= total_count: if batch_meta.get('total_count'): total_count = int(batch_meta['total_count']) else: - total_count = UNKNOWN_COUNT + total_count = unknown_count fetched = 0 batch_objects = batch['objects'] @@ -242,7 +242,7 @@ def iterate_resource(resource=resource, params=params): # Handle the case where API is 'non-counting' # and repeats the last batch repeated_last_page_of_non_counting_resource = ( - not got_new_data and total_count == UNKNOWN_COUNT + not got_new_data and total_count == unknown_count and (limit and len(batch_objects) < limit) ) more_to_fetch = not repeated_last_page_of_non_counting_resource From dd9fec9d440a30cc7c6f2409aa1df79bd5df2615 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 6 Feb 2024 16:46:48 +0530 Subject: [PATCH 212/257] refactor: minor doc change --- commcare_export/commcare_hq_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index e5ef16de..400ff2bd 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -127,7 +127,7 @@ def get(self, resource, params=None): the amount of seconds specified in the Retry-After header from the response, after which it will raise an exception to trigger the retry action. - Currently a bit of a vulnerable stub that works for this + Currently, a bit of a vulnerable stub that works for this particular use case in the hands of a trusted user; would likely want this to work like (or via) slumber. """ From 7c1d36ed21d3158d9e38a4e6fc68beea1bfb033c Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 9 Feb 2024 01:36:30 +0530 Subject: [PATCH 213/257] upgrades coverallsapp --- .github/workflows/test.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e9cd96da..77697f53 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -64,4 +64,8 @@ jobs: HQ_API_KEY: ${{ secrets.HQ_API_KEY }} - run: mypy --install-types --non-interactive @mypy_typed_modules.txt - run: coverage lcov -o coverage/lcov.info - - uses: coverallsapp/github-action@v1 \ No newline at end of file + - name: Coveralls + uses: coverallsapp/github-action@v2 + with: + github-token: + ${{ secrets.GITHUB_TOKEN }} From 5d99505143702c1bc320adec8635a134e01eeebb Mon Sep 17 00:00:00 2001 From: mkangia Date: Fri, 9 Feb 2024 17:36:26 +0530 Subject: [PATCH 214/257] avoids using internal naming --- commcare_export/commcare_hq_client.py | 2 +- tests/test_commcare_hq_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 400ff2bd..b236a4b9 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -162,7 +162,7 @@ def _get(resource, params=None): logger.error( f"#{e}. Please ensure that your CommCare HQ credentials are correct and auth-mode " f"is passed as 'apikey' if using API Key to authenticate. Also, verify that your " - f"account has the necessary permissions to access the DET tool." + f"account has the necessary permissions to use commcare-export." ) else: logger.error(str(e)) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index 52837738..d98a591b 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -330,7 +330,7 @@ def test_get_with_forbidden_response_in_non_debug_mode(self, session_mock, logge "#401 Client Error: None for url: None. " "Please ensure that your CommCare HQ credentials are correct and auth-mode is passed as 'apikey' " "if using API Key to authenticate. Also, verify that your account has the necessary permissions " - "to access the DET tool.") + "to use commcare-export.") @patch('commcare_export.commcare_hq_client.logger') @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") From b09d372d667c757332774b9e502ab62a4ed4f839 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 6 Feb 2024 17:42:54 +0200 Subject: [PATCH 215/257] Push sys.stderr output and normal logs to log file --- .gitignore | 1 + commcare_export/__init__.py | 24 +++++++++++++++++++++++ commcare_export/checkpoint.py | 3 +-- commcare_export/cli.py | 19 ++++++++++++------ commcare_export/commcare_hq_client.py | 7 +++---- commcare_export/commcare_minilinq.py | 4 +--- commcare_export/location_info_provider.py | 4 +--- commcare_export/minilinq.py | 4 +--- commcare_export/utils_cli.py | 2 -- commcare_export/writers.py | 4 +--- 10 files changed, 46 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index fb47a4b9..69607db9 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ nosetests.xml # Excel ~*.xlsx +commcare_export.log \ No newline at end of file diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 58f3ace6..8450b3e5 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1 +1,25 @@ +import sys +import logging from .version import __version__ + +logging.basicConfig( + filename="commcare_export.log", + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + filemode='w', +) + + +class Logger: + def __init__(self, logger, level): + self.logger = logger + self.level = level + self.linebuf = '' + + def write(self, buf): + for line in buf.rstrip().splitlines(): + self.logger.log(self.level, line.rstrip()) + + +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) +sys.stderr = Logger(logging.getLogger(), logging.ERROR) diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index 824eaa41..fa640986 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -1,5 +1,4 @@ import datetime -import logging import os import uuid from contextlib import contextmanager @@ -13,8 +12,8 @@ from commcare_export.commcare_minilinq import PaginationMode from commcare_export.exceptions import DataExportException from commcare_export.writers import SqlMixin +from commcare_export import logger -logger = logging.getLogger(__name__) repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) Base = declarative_base() diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 8fba0904..62169e6b 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -2,10 +2,9 @@ import getpass import io import json -import logging import os.path import sys - +import logging import dateutil.parser import requests import sqlalchemy @@ -29,11 +28,10 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ +from commcare_export import logger EXIT_STATUS_ERROR = 1 -logger = logging.getLogger(__name__) - commcare_hq_aliases = { 'local': 'http://localhost:8000', 'prod': 'https://www.commcarehq.org' @@ -200,10 +198,14 @@ def main(argv): exit(0) if not args.project: + error_msg = "commcare-export: error: argument --project is required" + # output to log file through sys.stderr print( - 'commcare-export: error: argument --project is required', + error_msg, file=sys.stderr ) + # Output to console for debugging + print(error_msg) exit(1) if args.profile: @@ -214,7 +216,12 @@ def main(argv): profile.start() try: - exit(main_with_args(args)) + print("Running...") + try: + exit(main_with_args(args)) + except Exception: + print("Error occurred! See log file for error.") + raise finally: if args.profile: profile.close() diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index f9b1d1b1..bb890086 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -8,8 +8,6 @@ ) import copy -import logging -import time from collections import OrderedDict from math import ceil from urllib.parse import urlencode @@ -20,20 +18,21 @@ import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator -from datetime import datetime +from commcare_export import logger AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' -logger = logging.getLogger(__name__) LATEST_KNOWN_VERSION = '0.5' RESOURCE_REPEAT_LIMIT = 10 + def on_wait(details): time_to_wait = details["wait"] logger.warning(f"Rate limit reached. Waiting for {time_to_wait} seconds.") + def on_backoff(details): _log_backoff(details, 'Waiting for retry.') diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index ccae28f2..59a75922 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -5,7 +5,6 @@ API directly. """ import json -import logging from enum import Enum from urllib.parse import parse_qs, urlparse from datetime import datetime @@ -14,8 +13,7 @@ from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap - -logger = logging.getLogger(__name__) +from commcare_export import logger SUPPORTED_RESOURCES = { 'form', diff --git a/commcare_export/location_info_provider.py b/commcare_export/location_info_provider.py index c9cebac1..badedbff 100644 --- a/commcare_export/location_info_provider.py +++ b/commcare_export/location_info_provider.py @@ -1,9 +1,7 @@ -import logging from commcare_export.commcare_minilinq import SimplePaginator from commcare_export.misc import unwrap_val - -logger = logging.getLogger(__name__) +from commcare_export import logger # LocationInfoProvider uses the /location_type/ endpoint of the API to # retrieve location type data, stores that information in a dictionary diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index d78c6478..1206aa4e 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -1,4 +1,3 @@ -import logging from typing import Any, Dict from typing import List as ListType from typing import Optional @@ -7,8 +6,7 @@ from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.specs import TableSpec - -logger = logging.getLogger(__name__) +from commcare_export import logger class MiniLinq(object): diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index b5ef1da0..8911bd55 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -8,8 +8,6 @@ EXIT_STATUS_ERROR = 1 -logger = logging.getLogger(__name__) - class BaseCommand(object): slug = None diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 43de0d74..6fa056e7 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -1,7 +1,6 @@ import csv import datetime import io -import logging import zipfile from itertools import zip_longest @@ -12,8 +11,7 @@ from alembic.operations import Operations from commcare_export.data_types import UnknownDataType, get_sqlalchemy_type from commcare_export.specs import TableSpec - -logger = logging.getLogger(__name__) +from commcare_export import logger MAX_COLUMN_SIZE = 2000 From 2e02e91271131ef8c43cbad87398cdde84f93a7d Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 6 Feb 2024 18:11:47 +0200 Subject: [PATCH 216/257] Add more logs and print statements for reporting --- commcare_export/cli.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 62169e6b..c7f709fc 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -216,13 +216,17 @@ def main(argv): profile.start() try: - print("Running...") + print("Running export...") try: - exit(main_with_args(args)) + exit_code = main_with_args(args) + if exit_code > 0: + print("Error occurred! See log file for error.") + exit(exit_code) except Exception: print("Error occurred! See log file for error.") raise finally: + print("Export finished!") if args.profile: profile.close() stats = hotshot.stats.load(args.profile) @@ -415,9 +419,11 @@ def main_with_args(args): return EXIT_STATUS_ERROR if not args.username: + logger.warn("Username not provided") args.username = input('Please provide a username: ') if not args.password: + logger.warn("Password not provided") # Windows getpass does not accept unicode args.password = getpass.getpass() From aafececb92a362c0246de180181293dba78a6659 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 6 Feb 2024 18:24:07 +0200 Subject: [PATCH 217/257] Add option to output all info to console --- commcare_export/__init__.py | 8 -------- commcare_export/cli.py | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 8450b3e5..c75e7b1f 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1,13 +1,6 @@ -import sys import logging from .version import __version__ -logging.basicConfig( - filename="commcare_export.log", - format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', - filemode='w', -) - class Logger: def __init__(self, logger, level): @@ -22,4 +15,3 @@ def write(self, buf): logger = logging.getLogger() logger.setLevel(logging.DEBUG) -sys.stderr = Logger(logging.getLogger(), logging.ERROR) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index c7f709fc..3059fa4e 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -28,7 +28,7 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import logger +from commcare_export import logger, Logger EXIT_STATUS_ERROR = 1 @@ -165,7 +165,14 @@ def add_to_parser(self, parser, **additional_kwargs): "form.form..case, messaging-event.messages.[*] And you want to " "have a record exported even if the nested document does not " "exist or is empty.", - ) + ), + Argument( + 'no-logfile', + default=False, + help="Specify in order to prevent information being logged to the log file and" + " show all output in the console.", + action='store_true', + ), ] @@ -178,6 +185,17 @@ def main(argv): args = parser.parse_args(argv) + if not args.no_logfile: + exe_dir = os.path.dirname(sys.executable) + log_file = os.path.join(exe_dir, "commcare_export.log") + print(f"Printing logs to {log_file}") + logging.basicConfig( + filename=log_file, + format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + filemode='w', + ) + sys.stderr = Logger(logging.getLogger(), logging.ERROR) + if args.verbose: logging.basicConfig( level=logging.DEBUG, From cb3d97594d805d4c7b3ae73e3ca9fabcb480bd06 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Mon, 12 Feb 2024 16:40:09 +0200 Subject: [PATCH 218/257] Remove Logger class from cli.py --- commcare_export/__init__.py | 4 ++++ commcare_export/cli.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index c75e7b1f..7b4e5293 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -13,5 +13,9 @@ def write(self, buf): self.logger.log(self.level, line.rstrip()) +def get_error_logger(): + return Logger(logging.getLogger(), logging.ERROR) + + logger = logging.getLogger() logger.setLevel(logging.DEBUG) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 3059fa4e..53ee2552 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -28,7 +28,7 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import logger, Logger +from commcare_export import logger, get_error_logger EXIT_STATUS_ERROR = 1 @@ -194,7 +194,7 @@ def main(argv): format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', filemode='w', ) - sys.stderr = Logger(logging.getLogger(), logging.ERROR) + sys.stderr = get_error_logger() if args.verbose: logging.basicConfig( From b0f1e816dad11b4cd6739bab21b365e7b785214c Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 17:03:16 +0000 Subject: [PATCH 219/257] Test on current Python releases --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 77697f53..1ff137a2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: - 5432:5432 strategy: matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: [3.8, 3.9, '3.10', 3.11, 3.12] steps: - uses: actions/checkout@v3 with: From 0b134d1ce064d4dde6e53e9dfeeee74f3fdfa340 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 17:05:20 +0000 Subject: [PATCH 220/257] Neaten up README.md a little --- README.md | 78 +++++++++++++++++++++++++++---------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 1853a379..4a2e836f 100644 --- a/README.md +++ b/README.md @@ -21,59 +21,58 @@ Once on a terminal window or command line, for simplicity, run commands from the ### Python -Check for python installed. -This tool is [tested with Python 3.6, 3.7, and 3.8](https://app.travis-ci.com/dimagi/commcare-export). +Check which Python version is installed. +This tool is tested with Python versions from 3.8 to 3.12. ```shell $ python --version $ python3 --version ``` -If python is installed, all of its available versions would be listed. +If Python is installed, its version will be listed. -If python isn't installed, Install Python 3.8 from [this link](https://www.python.org/downloads/). +If Python isn't installed, [download and install](https://www.python.org/downloads/) the latest release. ## Virtualenv (Optional) It is recommended to set up a virtual environment for CommCare Export -to avoid conflicts with other python applications. +to avoid conflicts with other Python applications. More about virtualenvs on https://docs.python.org/3/tutorial/venv.html Setup a virtual environment using: ```shell -$ python3.8 -m venv .commcare-export-venv # update python version for the one installed +$ python3 -m venv .venv ``` Activate virtual environment by running: ```shell -source .commcare-export-venv/bin/activate +source .venv/bin/activate ``` **Note**: virtualenv needs to be activated each time you start a new terminal session or command line prompt. For convenience, to avoid doing that, you can create an alias to activate virtual environments in -".commcare-export-venv" directory by adding the following to your +".venv" directory by adding the following to your `.bashrc` or `.zshrc` file: ```shell -alias commcare-export-venv='if [[ -d .commcare-export-venv ]] ; then source .commcare-export-venv/bin/activate ; fi' +alias venv='if [[ -d .venv ]] ; then source .venv/bin/activate ; fi' ``` Then you can activate virtual environments with simply typing ```shell -$ commcare-export-venv +$ venv ``` ## Install CommCare Export Install CommCare Export via `pip` -``` -$ python3 -m pip install wheel -$ python3 -m pip install commcare-export +```shell +$ pip install commcare-export ``` ## CommCareHQ @@ -93,7 +92,7 @@ $ python3 -m pip install commcare-export Now you can run the following examples: -``` +```shell $ commcare-export \ --query examples/demo-registration.xlsx \ --project YOUR_PROJECT \ @@ -126,7 +125,7 @@ Command-line Usage The basic usage of the command-line tool is with a saved Excel or JSON query (see how to write these, below) -``` +```shell $ commcare-export --commcare-hq \ --username \ --project \ @@ -497,7 +496,7 @@ Required dependencies will be automatically installed via pip. But since you may not care about all export formats, the various dependencies there are optional. Here is how you might install them: -``` +```shell # To export "xlsx" $ pip install "commcare-export[xlsx]" @@ -515,7 +514,7 @@ $ pip install "commcare-export[odbc]" # To sync with another SQL database supported by SQLAlchemy $ pip install "commcare-export[base_sql]" -# Then install the python package for your database +# Then install the Python package for your database ``` Contributing @@ -525,12 +524,13 @@ Contributing 1\. Fork the repository at https://github.com/dimagi/commcare-export. -2\. Clone your fork, install into a `virtualenv`, and start a feature branch +2\. Clone your fork, install into a virtualenv, and start a feature branch -``` -$ mkvirtualenv commcare-export +```shell $ git clone git@github.com:dimagi/commcare-export.git $ cd commcare-export +$ python3 -m venv .venv +$ source .venv/bin/activate $ pip install -e ".[test]" $ git checkout -b my-super-duper-feature ``` @@ -539,7 +539,7 @@ $ git checkout -b my-super-duper-feature 4\. Make sure the tests pass. The best way to test for all versions is to sign up for https://travis-ci.org and turn on automatic continuous testing for your fork. -``` +```shell $ py.test =============== test session starts =============== platform darwin -- Python 2.7.3 -- pytest-2.3.4 @@ -556,13 +556,13 @@ tests/test_writers.py ... 5\. Type hints are used in the `env` and `minilinq` modules. Check that any changes in those modules adhere to those types: -``` +```shell $ mypy --install-types @mypy_typed_modules.txt ``` 6\. Push the feature branch up -``` +```shell $ git push -u origin my-super-duper-feature ``` @@ -575,21 +575,21 @@ Release process 1\. Create a tag for the release -``` +```shell $ git tag -a "X.YY.0" -m "Release X.YY.0" $ git push --tags ``` 2\. Create the source distribution -``` +```shell $ python setup.py sdist ``` Ensure that the archive (`dist/commcare-export-X.YY.0.tar.gz`) has the correct version number (matching the tag name). 3\. Upload to pypi -``` +```shell $ pip install twine $ twine upload -u dimagi dist/commcare-export-X.YY.0.tar.gz ``` @@ -608,28 +608,28 @@ Testing and Test Databases The following command will run the entire test suite (requires DB environment variables to be set as per below): -``` +```shell $ py.test ``` To run an individual test class or method you can run, e.g.: -``` +```shell $ py.test -k "TestExcelQuery" $ py.test -k "test_get_queries_from_excel" ``` To exclude the database tests you can run: -``` +```shell $ py.test -m "not dbtest" ``` When running database tests, supported databases are PostgreSQL, MySQL, MSSQL. To run tests against selected databases can be done using test marks as follows: -``` -py.test -m [postgres,mysql,mssql] +```shell +$ py.test -m [postgres,mysql,mssql] ``` Database URLs can be overridden via environment variables: @@ -641,7 +641,7 @@ MSSQL_URL=mssql+pyodbc://user:password@host/ Postgresql ========== -``` +```shell $ docker pull postgres:9.6 $ docker run --name ccexport-postgres -p 5432:5432 -e POSTGRES_PASSWORD=postgres -d postgres:9.6 $ export POSTGRES_URL=postgresql://postgres:postgres@localhost/ @@ -651,7 +651,7 @@ $ export POSTGRES_URL=postgresql://postgres:postgres@localhost/ MySQL ===== -``` +```shell $ docker pull mysql $ docker run --name ccexport-mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=pw -e MYSQL_USER=travis -e MYSQL_PASSWORD='' -d mysql @@ -663,7 +663,7 @@ mysql> GRANT ALL PRIVILEGES ON *.* TO 'travis'@'%'; MSSQL ===== -``` +```shell $ docker pull mcr.microsoft.com/mssql/server:2017-latest $ docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Password@123" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest @@ -678,7 +678,7 @@ $ odbcinst -q -d MSSQL for Mac OS ========== -``` +```shell $ docker pull mcr.microsoft.com/mssql/server:2017-latest $ docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=Password@123" -p 1433:1433 --name mssql1 -d microsoft/mssql-server-linux:2017-latest @@ -693,7 +693,7 @@ Setup=/usr/local/lib/libtdsodbc.so UsageCount=1 # Create a soft link from /etc/odbcinst.ini to actual file -sudo ln -s /usr/local/etc/odbcinst.ini /etc/odbcinst.ini +$ sudo ln -s /usr/local/etc/odbcinst.ini /etc/odbcinst.ini ``` @@ -705,9 +705,9 @@ access to the corpora domain. These need to be set as environment variables as follows: -``` -export HQ_USERNAME= -export HQ_API_KEY= +```shell +$ export HQ_USERNAME= +$ export HQ_API_KEY= ``` For Travis builds these are included as encrypted vars in the travis From 4ff0e40a82c5526a336b35c8cbb37b40fa566939 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 17:12:34 +0000 Subject: [PATCH 221/257] `python` might not work but `python3` always will --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 4a2e836f..12cafc43 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,6 @@ Check which Python version is installed. This tool is tested with Python versions from 3.8 to 3.12. ```shell -$ python --version $ python3 --version ``` If Python is installed, its version will be listed. From ec54ac3434ca8d63b7f2b01afe08696272a87f86 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 17:27:57 +0000 Subject: [PATCH 222/257] Fix nearly all Python and JSON errors --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 12cafc43..22233739 100644 --- a/README.md +++ b/README.md @@ -320,12 +320,13 @@ for case in cases: To issue a `minilinq` query against it, and then print out that query in a JSON serialization: ```python -import getpass import json +import sys from commcare_export.minilinq import * from commcare_export.commcare_hq_client import CommCareHqClient from commcare_export.commcare_minilinq import CommCareHqEnv -from commcare_export.env import BuiltInEnv +from commcare_export.env import BuiltInEnv, JsonPathEnv +from commcare_export.writers import StreamingMarkdownTableWriter api_client = CommCareHqClient( url="http://www.commcarehq.org", @@ -356,20 +357,20 @@ query = Emit( source ) -print json.dumps(query.to_jvalue(), indent=2) +print(json.dumps(query.to_jvalue(), indent=2)) results = query.eval(BuiltInEnv() | CommCareHqEnv(api_client) | JsonPathEnv()) if len(list(env.emitted_tables())) > 0: - # with writers.Excel2007TableWriter("excel-output.xlsx") as writer: - with writers.StreamingMarkdownTableWriter(sys.stdout) as writer: + # with Excel2007TableWriter("excel-output.xlsx") as writer: + with StreamingMarkdownTableWriter(sys.stdout) as writer: for table in env.emitted_tables(): writer.write_table(table) ``` Which will output JSON equivalent to this: -```javascript +```json { "Emit": { "headings": [ @@ -392,7 +393,7 @@ Which will output JSON equivalent to this: } ] }, - "name": None, + "name": null, "source": { "Apply": { "args": [ From b105c0f1c11f421681e84bca12b87f325565aafe Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 17:36:14 +0000 Subject: [PATCH 223/257] Fix table formatting --- README.md | 94 +++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 22233739..8cb47a9b 100644 --- a/README.md +++ b/README.md @@ -189,21 +189,21 @@ of that and adds a field to Excel query specifications to be joined on. Specifying the --users option or --with-organization option will export an additional table named 'commcare_users' containing the following columns: -Column | Type | Note ------- | ---- | ---- -id | Text | Primary key -default_phone_number | Text | -email | Text | -first_name | Text | -groups | Text | -last_name | Text | -phone_numbers | Text | -resource_uri | Text | -commcare_location_id | Text | Foreign key into the commcare_locations table -commcare_location_ids | Text | -commcare_primary_case_sharing_id | Text | -commcare_project | Text | -username | Text | +| Column | Type | Note | +|----------------------------------|------|-------------------------------------| +| id | Text | Primary key | +| default_phone_number | Text | | +| email | Text | | +| first_name | Text | | +| groups | Text | | +| last_name | Text | | +| phone_numbers | Text | | +| resource_uri | Text | | +| commcare_location_id | Text | Foreign key to `commcare_locations` | +| commcare_location_ids | Text | | +| commcare_primary_case_sharing_id | Text | | +| commcare_project | Text | | +| username | Text | | The data in the 'commcare_users' table comes from the [List Mobile Workers API endpoint](https://confluence.dimagi.com/display/commcarepublic/List+Mobile+Workers). @@ -211,28 +211,28 @@ API endpoint](https://confluence.dimagi.com/display/commcarepublic/List+Mobile+W Specifying the --locations option or --with-organization options will export an additional table named 'commcare_locations' containing the following columns: -Column | Type | Note ------- | ---- | ---- -id | Text | -created_at | Date | -domain | Text | -external_id | Text | -last_modified | Date | -latitude | Text | -location_data | Text | -location_id | Text | Primary key -location_type | Text | -longitude | Text | -name | Text | -parent | Text | Resource URI of parent location -resource_uri | Text | -site_code | Text | -location_type_administrative | Text | -location_type_code | Text | -location_type_name | Text | -location_type_parent | Text | -*location level code* | Text | Column name depends on project's organization -*location level code* | Text | Column name depends on project's organization +| Column | Type | Note | +|------------------------------|------|-----------------------------------------------| +| id | Text | | +| created_at | Date | | +| domain | Text | | +| external_id | Text | | +| last_modified | Date | | +| latitude | Text | | +| location_data | Text | | +| location_id | Text | Primary key | +| location_type | Text | | +| longitude | Text | | +| name | Text | | +| parent | Text | Resource URI of parent location | +| resource_uri | Text | | +| site_code | Text | | +| location_type_administrative | Text | | +| location_type_code | Text | | +| location_type_name | Text | | +| location_type_parent | Text | | +| *location level code* | Text | Column name depends on project's organization | +| *location level code* | Text | Column name depends on project's organization | The data in the 'commcare_locations' table comes from the Location API endpoint along with some additional columns from the Location Type API @@ -244,12 +244,12 @@ location at that level of your organization. Consider the example organization from the [CommCare help page](https://confluence.dimagi.com/display/commcarepublic/Setting+up+Organization+Levels+and+Structure). A piece of the 'commcare_locations' table could look like this: -location_id | location_type_name | chw | supervisor | clinic | district ------------ | ------------------ | ------ | ---------- | ------ | -------- -939fa8 | District | NULL | NULL | NULL | 939fa8 -c4cbef | Clinic | NULL | NULL | c4cbef | 939fa8 -a9ca40 | Supervisor | NULL | a9ca40 | c4cbef | 939fa8 -4545b9 | CHW | 4545b9 | a9ca40 | c4cbef | 939fa8 +| location_id | location_type_name | chw | supervisor | clinic | district | +|-------------|--------------------|--------|------------|--------|----------| +| 939fa8 | District | NULL | NULL | NULL | 939fa8 | +| c4cbef | Clinic | NULL | NULL | c4cbef | 939fa8 | +| a9ca40 | Supervisor | NULL | a9ca40 | c4cbef | 939fa8 | +| 4545b9 | CHW | 4545b9 | a9ca40 | c4cbef | 939fa8 | In order to join form or case data to 'commcare_users' and 'commcare_locations' the exported forms and cases need to contain a field identifying which user @@ -448,9 +448,9 @@ referred to be name using `Ref`, and utilized via `Apply`. List of builtin functions: -| Function | Description | Example Usage | -|--------------------------------|--------------------------------------------------------------------------------|----------------------------------| -| `+, -, *, //, /, >, <, >=, <=` | Standard Math | | +| Function | Description | Example Usage | +|--------------------------------|------------------------------------------------------------------------------|----------------------------------| +| `+, -, *, //, /, >, <, >=, <=` | Standard Math | | | len | Length | | | bool | Bool | | | str2bool | Convert string to boolean. True values are 'true', 't', '1' (case insensitive) | | @@ -462,7 +462,7 @@ List of builtin functions: | selected-at | Returns the Nth word in a string. N is zero-indexed. | selected-at(3) - return 4th word | | selected | Returns True if the given word is in the value. | selected(fever) | | count-selected | Count the number of words | | -| json2str | Convert a JSON object to a string | +| json2str | Convert a JSON object to a string | | | template | Render a string template (not robust) | template({} on {}, state, date) | | attachment_url | Convert an attachment name into it's download URL | | | form_url | Output the URL to the form view on CommCare HQ | | From 68880da35db6b365f694b141ab9b8f3d3d549b22 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Tue, 13 Feb 2024 23:59:12 +0000 Subject: [PATCH 224/257] Provide helpful message on row size too large --- commcare_export/env.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index 14df587a..0a8be4e2 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -1,6 +1,8 @@ import hashlib import json +import logging import operator +import sys import uuid from typing import Any, Dict, Union, overload @@ -12,6 +14,8 @@ from jsonpath_ng import jsonpath from jsonpath_ng.parser import parse as parse_jsonpath +logger = logging.getLogger(__name__) + JSONPATH_CACHE = {} @@ -590,7 +594,20 @@ def lookup(self, key): def emit_table(self, table_spec): self.emitted = True table_spec.rows = self._unwrap_row_vals(table_spec.rows) - self.writer.write_table(table_spec) + try: + self.writer.write_table(table_spec) + except Exception as err: + if ( + not logger.isEnabledFor(logging.DEBUG) # not --verbose + and 'Row size too large' in str(err) + ): + logging.error( + 'Row size too large. You may be trying to export too many ' + 'columns. A maximum of 200 columns is suggested.' + ) + sys.exit(1) + else: + raise def has_emitted_tables(self): return self.emitted From e337ade4245fd1f925219fdf7881d6d200c597bc Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 00:18:15 +0000 Subject: [PATCH 225/257] "CommCare HQ" --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8cb47a9b..d51bb1d4 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Install CommCare Export via `pip` $ pip install commcare-export ``` -## CommCareHQ +## CommCare HQ 1. Sign up for [CommCare HQ](https://www.commcarehq.org/) if you have not already. From 18fec7573fdf6df17b1b4894237c3e707f6da8d6 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 00:30:27 +0000 Subject: [PATCH 226/257] Fix raw string --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 34539e2e..810aaa86 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ # Crash if the VERSION is not a simple version and it is going to register or upload if 'register' in sys.argv or 'upload' in sys.argv: version = commcare_export.version.stored_version() - if not version or not re.match('\d+\.\d+\.\d+', version): + if not version or not re.match(r'\d+\.\d+\.\d+', version): print('Version %s is not an appropriate version for publicizing!' % version) sys.exit(1) From c4b59bc6f43e53d5a4699f4ee4d81c37a1a78688 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 00:30:50 +0000 Subject: [PATCH 227/257] Install setuptools for Python 3.12 --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1ff137a2..0f15a541 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,8 +47,9 @@ jobs: with: python-version: ${{ matrix.python-version }} - run: sudo apt-get install pandoc - - run: python setup.py sdist - run: pip install --upgrade pip + - run: pip install setuptools + - run: python setup.py sdist - run: pip install dist/* - run: pip install pymysql psycopg2 pyodbc - run: pip install coverage coveralls From 9fdfbaa7c4923e202b9a5f2707c6e620d17f50b8 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 00:49:47 +0000 Subject: [PATCH 228/257] Update README.md for Python 3.12 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d51bb1d4..dfc1d090 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,7 @@ $ venv Install CommCare Export via `pip` ```shell +$ pip install setuptools $ pip install commcare-export ``` From 4f4bb301e686abcb2a2c29b07f44434862fe9b14 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 00:50:10 +0000 Subject: [PATCH 229/257] Update versions in setup.py --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 810aaa86..ae379a87 100644 --- a/setup.py +++ b/setup.py @@ -109,9 +109,11 @@ def run_tests(self): 'Intended Audience :: End Users/Desktop', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Database', 'Topic :: Software Development :: Interpreters', 'Topic :: System :: Archiving', From 84fe33b0d467154716a954e9824f159bc9bd1b6b Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 12:37:14 +0000 Subject: [PATCH 230/257] Use "venv" directory for virtualenv --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index dfc1d090..7b6d10d5 100644 --- a/README.md +++ b/README.md @@ -42,23 +42,23 @@ More about virtualenvs on https://docs.python.org/3/tutorial/venv.html Setup a virtual environment using: ```shell -$ python3 -m venv .venv +$ python3 -m venv venv ``` Activate virtual environment by running: ```shell -source .venv/bin/activate +$ source venv/bin/activate ``` **Note**: virtualenv needs to be activated each time you start a new terminal session or command line prompt. For convenience, to avoid doing that, you can create an alias to activate virtual environments in -".venv" directory by adding the following to your +"venv" directory by adding the following to your `.bashrc` or `.zshrc` file: ```shell -alias venv='if [[ -d .venv ]] ; then source .venv/bin/activate ; fi' +$ alias venv='if [[ -d venv ]] ; then source venv/bin/activate ; fi' ``` Then you can activate virtual environments with simply typing @@ -530,8 +530,8 @@ Contributing ```shell $ git clone git@github.com:dimagi/commcare-export.git $ cd commcare-export -$ python3 -m venv .venv -$ source .venv/bin/activate +$ python3 -m venv venv +$ source venv/bin/activate $ pip install -e ".[test]" $ git checkout -b my-super-duper-feature ``` From 3974ae65d35437e250d5ec8141fb5d43db11ad83 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 12:37:41 +0000 Subject: [PATCH 231/257] setuptools not required --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 7b6d10d5..1759e192 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,6 @@ $ venv Install CommCare Export via `pip` ```shell -$ pip install setuptools $ pip install commcare-export ``` From e78dddb531b16593434bb8c83a762892528ea9ef Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 12:42:27 +0000 Subject: [PATCH 232/257] Fix column width --- README.md | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 1759e192..9000c93f 100644 --- a/README.md +++ b/README.md @@ -448,26 +448,26 @@ referred to be name using `Ref`, and utilized via `Apply`. List of builtin functions: -| Function | Description | Example Usage | -|--------------------------------|------------------------------------------------------------------------------|----------------------------------| -| `+, -, *, //, /, >, <, >=, <=` | Standard Math | | -| len | Length | | -| bool | Bool | | -| str2bool | Convert string to boolean. True values are 'true', 't', '1' (case insensitive) | | -| str2date | Convert string to date | | -| bool2int | Convert boolean to integer (0, 1) | | -| str2num | Parse string as a number | | -| format-uuid | Parse a hex UUID, and format it into hyphen-separated groups | | -| substr | Returns substring indexed by [first arg, second arg), zero-indexed. | substr(2, 5) of 'abcdef' = 'cde' | -| selected-at | Returns the Nth word in a string. N is zero-indexed. | selected-at(3) - return 4th word | -| selected | Returns True if the given word is in the value. | selected(fever) | -| count-selected | Count the number of words | | -| json2str | Convert a JSON object to a string | | -| template | Render a string template (not robust) | template({} on {}, state, date) | -| attachment_url | Convert an attachment name into it's download URL | | -| form_url | Output the URL to the form view on CommCare HQ | | -| case_url | Output the URL to the case view on CommCare HQ | | -| unique | Ouptut only unique values in a list | | +| Function | Description | Example Usage | +|--------------------------------|--------------------------------------------------------------------------------|----------------------------------| +| `+, -, *, //, /, >, <, >=, <=` | Standard Math | | +| len | Length | | +| bool | Bool | | +| str2bool | Convert string to boolean. True values are 'true', 't', '1' (case insensitive) | | +| str2date | Convert string to date | | +| bool2int | Convert boolean to integer (0, 1) | | +| str2num | Parse string as a number | | +| format-uuid | Parse a hex UUID, and format it into hyphen-separated groups | | +| substr | Returns substring indexed by [first arg, second arg), zero-indexed. | substr(2, 5) of 'abcdef' = 'cde' | +| selected-at | Returns the Nth word in a string. N is zero-indexed. | selected-at(3) - return 4th word | +| selected | Returns True if the given word is in the value. | selected(fever) | +| count-selected | Count the number of words | | +| json2str | Convert a JSON object to a string | | +| template | Render a string template (not robust) | template({} on {}, state, date) | +| attachment_url | Convert an attachment name into it's download URL | | +| form_url | Output the URL to the form view on CommCare HQ | | +| case_url | Output the URL to the case view on CommCare HQ | | +| unique | Ouptut only unique values in a list | | Output Formats -------------- From dff06513280b4e18f94d70ec939801b7e0b1012f Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 16:24:39 +0000 Subject: [PATCH 233/257] Specify versions that are tested --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9000c93f..23a2f958 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,10 @@ This tool is tested with Python versions from 3.8 to 3.12. ```shell $ python3 --version ``` -If Python is installed, its version will be listed. +If Python is installed, its version will be shown. -If Python isn't installed, [download and install](https://www.python.org/downloads/) the latest release. +If Python isn't installed, [download and install](https://www.python.org/downloads/) +a version of Python from 3.8 to 3.12. ## Virtualenv (Optional) From f7bc3f5369b74ec36c94fb162557cc959089bb4a Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 16:25:29 +0000 Subject: [PATCH 234/257] Quote version numbers --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0f15a541..31f5aa40 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: - 5432:5432 strategy: matrix: - python-version: [3.8, 3.9, '3.10', 3.11, 3.12] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 with: From a2f0bf60e87297a5b20eab4b28351d4aafb1baf3 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 16:26:57 +0000 Subject: [PATCH 235/257] Drop comment --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 23a2f958..639110a8 100644 --- a/README.md +++ b/README.md @@ -363,7 +363,6 @@ print(json.dumps(query.to_jvalue(), indent=2)) results = query.eval(BuiltInEnv() | CommCareHqEnv(api_client) | JsonPathEnv()) if len(list(env.emitted_tables())) > 0: - # with Excel2007TableWriter("excel-output.xlsx") as writer: with StreamingMarkdownTableWriter(sys.stdout) as writer: for table in env.emitted_tables(): writer.write_table(table) From 531df4b78651321edaed0de243467a21fa2a8cb2 Mon Sep 17 00:00:00 2001 From: Norman Hooper Date: Wed, 14 Feb 2024 16:37:59 +0000 Subject: [PATCH 236/257] Explain the error better, offer a guideline. --- commcare_export/env.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/commcare_export/env.py b/commcare_export/env.py index 0a8be4e2..2b2a5223 100644 --- a/commcare_export/env.py +++ b/commcare_export/env.py @@ -602,8 +602,11 @@ def emit_table(self, table_spec): and 'Row size too large' in str(err) ): logging.error( - 'Row size too large. You may be trying to export too many ' - 'columns. A maximum of 200 columns is suggested.' + 'Row size too large. The amount of data required by rows ' + 'is more than this type of database table allows. One ' + 'way to resolve this error is to reduce the number of ' + 'columns that you are exporting. A general guideline is ' + 'not to exceed 200 columns.' ) sys.exit(1) else: From 1d63cf7f68852a14b1b79c30d37aafcb2beb6003 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 13:28:07 +0200 Subject: [PATCH 237/257] Add pyinstaller .spec file --- commcare-export.spec | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 commcare-export.spec diff --git a/commcare-export.spec b/commcare-export.spec new file mode 100644 index 00000000..7aed69c9 --- /dev/null +++ b/commcare-export.spec @@ -0,0 +1,40 @@ +# -*- mode: python ; coding: utf-8 -*- + + +a = Analysis( + ['commcare_export/cli.py'], + pathex=[], + binaries=[], + datas=[ + ('./commcare_export', './commcare_export'), + ('./migrations', './migrations'), + ], + hiddenimports=[ + 'sqlalchemy.sql.default_comparator', + ], + hookspath=[], + runtime_hooks=[], + excludes=[], +) +pyz = PYZ(a.pure) + +exe = EXE( + pyz, + a.scripts, + a.binaries, + a.datas, + [], + name='commcare-export', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, +) From eb641e4047fc7e88fa32ada675aeb4f123e02540 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 13:28:59 +0200 Subject: [PATCH 238/257] Add build folder for compiling exe --- build_exe/README.md | 54 +++++++++++++++++++++++++ build_exe/linux/Dockerfile-py3-amd64 | 59 ++++++++++++++++++++++++++++ build_exe/linux/entrypoint-linux.sh | 14 +++++++ build_exe/requirements.txt | 8 ++++ 4 files changed, 135 insertions(+) create mode 100644 build_exe/README.md create mode 100644 build_exe/linux/Dockerfile-py3-amd64 create mode 100644 build_exe/linux/entrypoint-linux.sh create mode 100644 build_exe/requirements.txt diff --git a/build_exe/README.md b/build_exe/README.md new file mode 100644 index 00000000..afebee00 --- /dev/null +++ b/build_exe/README.md @@ -0,0 +1,54 @@ +# Compiling DET to running executable +This folder contains relevant files needed (dockerfiles and scripts) for compiling the DET into an executable file. +The file structure is segmented into the different operating systems the resultant executable will +be compatible on. + +(Currently only Linux is supported; Windows coming soon) + + +## How it works +In order to compile the DET script into a working executable we use [pyinstaller](https://github.com/pyinstaller/pyinstaller) in a containerized +environment. The dockerfile is an edited version from [cdrx/docker-pyinstaller](https://github.com/cdrx/docker-pyinstaller) +which is slightly modified to suit our use-case. + +When a new release of the DET is published, a workflow is triggered which automatically compiles an executable from the latest +code using the custom built docker image, `dimagi/commcare-export-pyinstaller-linux`, then uploads it to the release as an asset. + +If you ever have to compile the executable yourself you can follow the section below, *Compiling executable files locally*, on how to compile an executable locally. + + +Compiling executable files locally +----------------------------------- +The DET executable files are compiled using a tool called [pyinstaller](https://pyinstaller.org/en/stable/). +Pyinstaller is very easy to use, but only works out-of-the-box for Linux as support for cross-compilation was +dropped in earlier releases. Another tool, [wine](https://www.winehq.org/), can be used in conjuction with +pyinstaller to compile the Windows exe files (not yet supported). + +Luckily in the world we live containerization is a thing. We use a docker container, `dimagi/commcare-export-pyinstaller-linux` +(based on [docker-pyinstaller](https://github.com/cdrx/docker-pyinstaller)), which allows you to seamlessly compile the Linux binary, so we don't ever have to worry about installing any additional packages ourselves. + +To compile a new linux binary, first make sure you have the docker image used to generate the executable: +> docker pull dimagi/commcare-export-pyinstaller-linux:latest + +Now it's really as simple as running +> docker run -v "$(pwd):/src/" dimagi/commcare-export-pyinstaller-linux + +Once you're done, the compiled file can be located at `./dist/linux/commcare-export`. + +The tool needs two files to make the process work: +1. `commcare-export.spec`: this file is used by `pyinstaller` and is already defined and sits at the top of this project. +It shouldn't be necessary for you to change any parameters in the file. +2. `requirements.txt`: this file lists all the necessary packages needed for running commcare-export. + + +## Updating the docker image +Are you sure you need to update the image? + +Just checking... + + +If it's needed to make any changes (for whatever reason) to the docker image you can rebuild the image as follows: +> docker build -f ./build_exe/linux/Dockerfile-py3-amd64 -t dimagi/commcare-export-pyinstaller-linux:latest . + +Now upload the new image to dockerhub (remember to log in to the account first!): +> docker image push dimagi/commcare-export-pyinstaller-linux:latest diff --git a/build_exe/linux/Dockerfile-py3-amd64 b/build_exe/linux/Dockerfile-py3-amd64 new file mode 100644 index 00000000..1c33b421 --- /dev/null +++ b/build_exe/linux/Dockerfile-py3-amd64 @@ -0,0 +1,59 @@ +FROM ubuntu:20.04 +SHELL ["/bin/bash", "-i", "-c"] + +ARG PYTHON_VERSION=3.9.18 +ARG PYINSTALLER_VERSION=6.4 + +ENV PYPI_URL=https://pypi.python.org/ +ENV PYPI_INDEX_URL=https://pypi.python.org/simple +ENV PYENV_VERSION=${PYTHON_VERSION} + +COPY ./build_exe/linux/entrypoint-linux.sh /entrypoint.sh + +RUN \ + set -x \ + # update system + && apt-get update \ + # install requirements + && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + curl \ + wget \ + git \ + libbz2-dev \ + libreadline-dev \ + libsqlite3-dev \ + libssl-dev \ + zlib1g-dev \ + libffi-dev \ + # required because openSSL on Ubuntu 12.04 and 14.04 run out of support versions of OpenSSL + && mkdir openssl \ + && cd openssl \ + # latest version, there won't be anything newer for this + && wget https://www.openssl.org/source/openssl-1.0.2u.tar.gz \ + && tar -xzvf openssl-1.0.2u.tar.gz \ + && cd openssl-1.0.2u \ + && ./config --prefix=$HOME/openssl --openssldir=$HOME/openssl shared zlib \ + && make \ + && make install \ + # install pyenv + && echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc \ + && echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc \ + && source ~/.bashrc \ + && curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash \ + && echo 'eval "$(pyenv init -)"' >> ~/.bashrc \ + && source ~/.bashrc \ + # install python + && PATH="$HOME/openssl:$PATH" CPPFLAGS="-O2 -I$HOME/openssl/include" CFLAGS="-I$HOME/openssl/include/" LDFLAGS="-L$HOME/openssl/lib -Wl,-rpath,$HOME/openssl/lib" LD_LIBRARY_PATH=$HOME/openssl/lib:$LD_LIBRARY_PATH LD_RUN_PATH="$HOME/openssl/lib" CONFIGURE_OPTS="--with-openssl=$HOME/openssl" PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install $PYTHON_VERSION \ + && pyenv global $PYTHON_VERSION \ + && pip install --upgrade pip \ + # install pyinstaller + && pip install pyinstaller==$PYINSTALLER_VERSION \ + && mkdir /src/ \ + && chmod +x /entrypoint.sh + +VOLUME /src/ +WORKDIR /src/ + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/build_exe/linux/entrypoint-linux.sh b/build_exe/linux/entrypoint-linux.sh new file mode 100644 index 00000000..ba02df50 --- /dev/null +++ b/build_exe/linux/entrypoint-linux.sh @@ -0,0 +1,14 @@ +#!/bin/bash -i + +# Fail on errors. +set -e + +# Make sure .bashrc is sourced +. /root/.bashrc + +cd /src + +pip install . +pip install -r build_exe/requirements.txt + +pyinstaller --clean -y --dist ./dist/linux --workpath /tmp *.spec diff --git a/build_exe/requirements.txt b/build_exe/requirements.txt new file mode 100644 index 00000000..5231b26e --- /dev/null +++ b/build_exe/requirements.txt @@ -0,0 +1,8 @@ +# This file is only used by pyinstaller to create the executable DET instance +chardet +psycopg2-binary +pymysql +pyodbc +urllib3==1.26.7 +xlwt +openpyxl From da90bef971d32d8dda0bda62ef091ad2c69fbd5c Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 13:30:33 +0200 Subject: [PATCH 239/257] Add github workflow action --- .github/workflows/release_actions.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/release_actions.yml diff --git a/.github/workflows/release_actions.yml b/.github/workflows/release_actions.yml new file mode 100644 index 00000000..cf7f7f8a --- /dev/null +++ b/.github/workflows/release_actions.yml @@ -0,0 +1,26 @@ +name: commcare-export release actions +on: + release: + types: [published] + +jobs: + generate_release_assets: + name: Generate release assets + runs-on: ubuntu-22.04 + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Pull pyinstaller docker image + run: | + docker pull dimagi/commcare-export-pyinstaller-linux + + - name: Compile linux binary + run: | + docker run -v "$(pwd):/src/" dimagi/commcare-export-pyinstaller-linux + + - name: Upload release assets + uses: AButler/upload-release-assets@v3.0 + with: + files: "./dist/linux/*" + repo-token: ${{ secrets.GITHUB_TOKEN }} From 860b048892e16b48b284cdd02b83b03d553bfc92 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 13:30:42 +0200 Subject: [PATCH 240/257] Update readme --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 639110a8..f12418fa 100644 --- a/README.md +++ b/README.md @@ -602,6 +602,12 @@ https://pypi.python.org/pypi/commcare-export https://github.com/dimagi/commcare-export/releases +Once the release is published a GitHub workflow is kicked off that compiles an executable of the DET compatible with +running on a Linux machine (Windows coming soon), adding it as a release asset. + +If you decide to download and use the executable file, please make sure the file has the executable permission enabled, +after which it can be invoked like any other executable though the command line. + Testing and Test Databases -------------------------- From 95d3e923ac399e92193406624da58c47b1d36a20 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 13:33:20 +0200 Subject: [PATCH 241/257] Use sys.exit instead of exit --- commcare_export/cli.py | 6 +++--- commcare_export/utils_cli.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 8fba0904..fc8214a1 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -197,14 +197,14 @@ def main(argv): if args.version: print('commcare-export version {}'.format(__version__)) - exit(0) + sys.exit(0) if not args.project: print( 'commcare-export: error: argument --project is required', file=sys.stderr ) - exit(1) + sys.exit(1) if args.profile: # hotshot is gone in Python 3 @@ -214,7 +214,7 @@ def main(argv): profile.start() try: - exit(main_with_args(args)) + sys.exit(main_with_args(args)) finally: if args.profile: profile.close() diff --git a/commcare_export/utils_cli.py b/commcare_export/utils_cli.py index b5ef1da0..fc58e278 100644 --- a/commcare_export/utils_cli.py +++ b/commcare_export/utils_cli.py @@ -151,7 +151,7 @@ def main(argv): format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s' ) - exit(main_with_args(args)) + sys.exit(main_with_args(args)) def main_with_args(args): From 65b2bcb2d35ce92732deca649982ab72c3e1ba6d Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 7 Mar 2024 15:08:36 +0200 Subject: [PATCH 242/257] Add runtime hook to set bundled env variable --- build_exe/linux/entrypoint-linux.sh | 2 +- build_exe/runtime_hook.py | 4 ++++ commcare-export.spec | 2 +- commcare_export/version.py | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 build_exe/runtime_hook.py diff --git a/build_exe/linux/entrypoint-linux.sh b/build_exe/linux/entrypoint-linux.sh index ba02df50..5d781fb4 100644 --- a/build_exe/linux/entrypoint-linux.sh +++ b/build_exe/linux/entrypoint-linux.sh @@ -8,7 +8,7 @@ set -e cd /src -pip install . +pip install commcare-export pip install -r build_exe/requirements.txt pyinstaller --clean -y --dist ./dist/linux --workpath /tmp *.spec diff --git a/build_exe/runtime_hook.py b/build_exe/runtime_hook.py new file mode 100644 index 00000000..d226247a --- /dev/null +++ b/build_exe/runtime_hook.py @@ -0,0 +1,4 @@ +import os + +# This env variable is used to alter bundled behaviour +os.environ['DET_EXECUTABLE'] = '1' diff --git a/commcare-export.spec b/commcare-export.spec index 7aed69c9..428f7f40 100644 --- a/commcare-export.spec +++ b/commcare-export.spec @@ -13,7 +13,7 @@ a = Analysis( 'sqlalchemy.sql.default_comparator', ], hookspath=[], - runtime_hooks=[], + runtime_hooks=['build_exe/runtime_hook.py'], excludes=[], ) pyz = PYZ(a.pure) diff --git a/commcare_export/version.py b/commcare_export/version.py index b4f2a631..5f3c3362 100644 --- a/commcare_export/version.py +++ b/commcare_export/version.py @@ -17,6 +17,9 @@ def stored_version(): def git_version(): + if os.environ.get('DET_EXECUTABLE'): + return None + described_version_bytes = subprocess.Popen( ['git', 'describe'], stdout=subprocess.PIPE From 263c69e72a1eeae81346e156e68e50437c582495 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Fri, 8 Mar 2024 09:05:20 +0200 Subject: [PATCH 243/257] Add argument to test --- tests/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index e8ca69fd..cec979cf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -351,6 +351,7 @@ def _pull_data(writer, checkpoint_manager, query, since, until, batch_size=10): batch_size=batch_size, since=since, until=until, + no_logfile=True, ) # set this so that it gets written to the checkpoints From a75ec285f11c853c32fbc9b577733372fe8d0b30 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Tue, 12 Mar 2024 17:02:19 +0200 Subject: [PATCH 244/257] Add logger names --- commcare_export/__init__.py | 24 +++++++++++++++++++++-- commcare_export/checkpoint.py | 5 ++--- commcare_export/cli.py | 3 ++- commcare_export/commcare_hq_client.py | 4 +++- commcare_export/commcare_minilinq.py | 4 +++- commcare_export/location_info_provider.py | 4 +++- commcare_export/minilinq.py | 4 +++- commcare_export/writers.py | 3 ++- 8 files changed, 40 insertions(+), 11 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 7b4e5293..34a32c54 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1,6 +1,9 @@ import logging +import os from .version import __version__ +repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) + class Logger: def __init__(self, logger, level): @@ -13,9 +16,26 @@ def write(self, buf): self.logger.log(self.level, line.rstrip()) +def logger_name_from_filepath(filepath): + relative_path = os.path.relpath(filepath, start=repo_root) + return ( + relative_path. + replace('/', '.'). + replace('.py', '') + ) + + def get_error_logger(): return Logger(logging.getLogger(), logging.ERROR) -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) +def get_logger(filepath=None): + if filepath: + logger = logging.getLogger( + logger_name_from_filepath(filepath) + ) + else: + logger = logging.getLogger() + + logger.setLevel(logging.DEBUG) + return logger diff --git a/commcare_export/checkpoint.py b/commcare_export/checkpoint.py index fa640986..f28e6266 100644 --- a/commcare_export/checkpoint.py +++ b/commcare_export/checkpoint.py @@ -12,10 +12,9 @@ from commcare_export.commcare_minilinq import PaginationMode from commcare_export.exceptions import DataExportException from commcare_export.writers import SqlMixin -from commcare_export import logger - -repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) +from commcare_export import get_logger, repo_root +logger = get_logger(__file__) Base = declarative_base() diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 53ee2552..1f5d6453 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -28,9 +28,10 @@ from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.utils import get_checkpoint_manager from commcare_export.version import __version__ -from commcare_export import logger, get_error_logger +from commcare_export import get_logger, get_error_logger EXIT_STATUS_ERROR = 1 +logger = get_logger(__file__) commcare_hq_aliases = { 'local': 'http://localhost:8000', diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 1b99f4c3..1bcdc0ab 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -20,7 +20,7 @@ import commcare_export from commcare_export.repeatable_iterator import RepeatableIterator -from commcare_export import logger +from commcare_export import get_logger AUTH_MODE_PASSWORD = 'password' AUTH_MODE_APIKEY = 'apikey' @@ -29,6 +29,8 @@ LATEST_KNOWN_VERSION = '0.5' RESOURCE_REPEAT_LIMIT = 10 +logger = get_logger(__file__) + def on_wait(details): time_to_wait = details["wait"] diff --git a/commcare_export/commcare_minilinq.py b/commcare_export/commcare_minilinq.py index 59a75922..b575e7f7 100644 --- a/commcare_export/commcare_minilinq.py +++ b/commcare_export/commcare_minilinq.py @@ -13,7 +13,9 @@ from commcare_export.env import CannotBind, CannotReplace, DictEnv from commcare_export.misc import unwrap -from commcare_export import logger +from commcare_export import get_logger + +logger = get_logger(__file__) SUPPORTED_RESOURCES = { 'form', diff --git a/commcare_export/location_info_provider.py b/commcare_export/location_info_provider.py index badedbff..e71f2b71 100644 --- a/commcare_export/location_info_provider.py +++ b/commcare_export/location_info_provider.py @@ -1,7 +1,9 @@ from commcare_export.commcare_minilinq import SimplePaginator from commcare_export.misc import unwrap_val -from commcare_export import logger +from commcare_export import get_logger + +logger = get_logger(__file__) # LocationInfoProvider uses the /location_type/ endpoint of the API to # retrieve location type data, stores that information in a dictionary diff --git a/commcare_export/minilinq.py b/commcare_export/minilinq.py index 1206aa4e..b0d476f3 100644 --- a/commcare_export/minilinq.py +++ b/commcare_export/minilinq.py @@ -6,7 +6,9 @@ from commcare_export.misc import unwrap, unwrap_val from commcare_export.repeatable_iterator import RepeatableIterator from commcare_export.specs import TableSpec -from commcare_export import logger +from commcare_export import get_logger + +logger = get_logger(__file__) class MiniLinq(object): diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 6fa056e7..bfeef62b 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -11,8 +11,9 @@ from alembic.operations import Operations from commcare_export.data_types import UnknownDataType, get_sqlalchemy_type from commcare_export.specs import TableSpec -from commcare_export import logger +from commcare_export import get_logger +logger = get_logger(__file__) MAX_COLUMN_SIZE = 2000 From af99b494b5bea41d595612ca3f9a98c25b51d119 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 21 Mar 2024 10:57:58 +0200 Subject: [PATCH 245/257] Add tests for logger_name_from_filepath --- commcare_export/__init__.py | 2 +- tests/test_commcare_export.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 tests/test_commcare_export.py diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index 34a32c54..b271a1a0 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -21,7 +21,7 @@ def logger_name_from_filepath(filepath): return ( relative_path. replace('/', '.'). - replace('.py', '') + strip('.py') ) diff --git a/tests/test_commcare_export.py b/tests/test_commcare_export.py new file mode 100644 index 00000000..fa05ca77 --- /dev/null +++ b/tests/test_commcare_export.py @@ -0,0 +1,29 @@ +import os +from commcare_export import logger_name_from_filepath, repo_root + + +class TestLoggerNameFromFilePath: + + @staticmethod + def _file_path(rel_path): + return os.path.join(repo_root, rel_path) + + def test_file_in_root(self): + path = self._file_path("file.py") + assert logger_name_from_filepath(path) == 'file' + + def test_file_in_subdirectory(self): + path = self._file_path("subdir/file.py") + assert logger_name_from_filepath(path) == 'subdir.file' + + def test_file_in_deeper_subdirectory(self): + path = self._file_path("subdir/another_sub/file.py") + assert logger_name_from_filepath(path) == 'subdir.another_sub.file' + + def test_file_contains_py(self): + path = self._file_path("subdir/pytest.py") + assert logger_name_from_filepath(path) == 'subdir.pytest' + + def test_file_dir_contains_periods(self): + path = self._file_path("sub.dir/pytest.py") + assert logger_name_from_filepath(path) == 'sub.dir.pytest' From 6af14f6e19848a9de8da6db36852df184e054062 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Wed, 27 Mar 2024 08:44:13 +0200 Subject: [PATCH 246/257] Use regex substitution and add one more test --- commcare_export/__init__.py | 8 +++----- tests/test_commcare_export.py | 4 ++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/commcare_export/__init__.py b/commcare_export/__init__.py index b271a1a0..cd015d74 100644 --- a/commcare_export/__init__.py +++ b/commcare_export/__init__.py @@ -1,5 +1,6 @@ import logging import os +import re from .version import __version__ repo_root = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir)) @@ -18,11 +19,8 @@ def write(self, buf): def logger_name_from_filepath(filepath): relative_path = os.path.relpath(filepath, start=repo_root) - return ( - relative_path. - replace('/', '.'). - strip('.py') - ) + cleaned_path = relative_path.replace('/', '.') + return re.sub(r'\.py$', '', cleaned_path) def get_error_logger(): diff --git a/tests/test_commcare_export.py b/tests/test_commcare_export.py index fa05ca77..cd367c82 100644 --- a/tests/test_commcare_export.py +++ b/tests/test_commcare_export.py @@ -27,3 +27,7 @@ def test_file_contains_py(self): def test_file_dir_contains_periods(self): path = self._file_path("sub.dir/pytest.py") assert logger_name_from_filepath(path) == 'sub.dir.pytest' + + def test_random_file_name(self): + path = self._file_path("pyppy.excel_query.py") + assert logger_name_from_filepath(path) == 'pyppy.excel_query' From 4cd33ebca05311868b8b27088424d894815687e0 Mon Sep 17 00:00:00 2001 From: mkangia Date: Tue, 2 Apr 2024 17:02:17 +0530 Subject: [PATCH 247/257] ask the user to ensure project access --- commcare_export/commcare_hq_client.py | 3 ++- tests/test_commcare_hq_client.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/commcare_export/commcare_hq_client.py b/commcare_export/commcare_hq_client.py index 1bcdc0ab..84cb83d8 100644 --- a/commcare_export/commcare_hq_client.py +++ b/commcare_export/commcare_hq_client.py @@ -164,7 +164,8 @@ def _get(resource, params=None): logger.error( f"#{e}. Please ensure that your CommCare HQ credentials are correct and auth-mode " f"is passed as 'apikey' if using API Key to authenticate. Also, verify that your " - f"account has the necessary permissions to use commcare-export." + f"account has access to the project and the necessary permissions to use " + f"commcare-export." ) else: logger.error(str(e)) diff --git a/tests/test_commcare_hq_client.py b/tests/test_commcare_hq_client.py index d98a591b..5e0de962 100644 --- a/tests/test_commcare_hq_client.py +++ b/tests/test_commcare_hq_client.py @@ -329,8 +329,8 @@ def test_get_with_forbidden_response_in_non_debug_mode(self, session_mock, logge logger_mock.error.assert_called_once_with( "#401 Client Error: None for url: None. " "Please ensure that your CommCare HQ credentials are correct and auth-mode is passed as 'apikey' " - "if using API Key to authenticate. Also, verify that your account has the necessary permissions " - "to use commcare-export.") + "if using API Key to authenticate. Also, verify that your account has access to the project " + "and the necessary permissions to use commcare-export.") @patch('commcare_export.commcare_hq_client.logger') @patch("commcare_export.commcare_hq_client.CommCareHqClient.session") From 551eaee2ff4986e925da4917925e3a8bd0b00c0f Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 4 Apr 2024 16:02:38 +0200 Subject: [PATCH 248/257] Create sample Linux script --- examples/scheduled_run_linux.sh | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 examples/scheduled_run_linux.sh diff --git a/examples/scheduled_run_linux.sh b/examples/scheduled_run_linux.sh new file mode 100644 index 00000000..6da6cc06 --- /dev/null +++ b/examples/scheduled_run_linux.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +commcare-export --output-format \ + --output \ + --query \ + --project \ + --commcare-hq https://commcarehq.org \ + --auth-mode apikey \ + --password \ From 8f15f0111c8b30c89cdf994c1503cfa24b26a9eb Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 4 Apr 2024 16:16:54 +0200 Subject: [PATCH 249/257] Add example bat file --- examples/scheduled_run_windows.bat | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 examples/scheduled_run_windows.bat diff --git a/examples/scheduled_run_windows.bat b/examples/scheduled_run_windows.bat new file mode 100644 index 00000000..4a35f580 --- /dev/null +++ b/examples/scheduled_run_windows.bat @@ -0,0 +1,7 @@ +commcare-export --output-format ^ + --output ^ + --query ^ + --project ^ + --commcare-hq https://commcarehq.org ^ + --auth-mode apikey ^ + --password ^ From 259f511fefc41a6dd7d87379c8158bf02e88faaa Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 4 Apr 2024 16:17:59 +0200 Subject: [PATCH 250/257] Remove multi-line symbol from last line --- examples/scheduled_run_linux.sh | 2 +- examples/scheduled_run_windows.bat | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/scheduled_run_linux.sh b/examples/scheduled_run_linux.sh index 6da6cc06..b485f0fc 100644 --- a/examples/scheduled_run_linux.sh +++ b/examples/scheduled_run_linux.sh @@ -6,4 +6,4 @@ commcare-export --output-format \ --project \ --commcare-hq https://commcarehq.org \ --auth-mode apikey \ - --password \ + --password diff --git a/examples/scheduled_run_windows.bat b/examples/scheduled_run_windows.bat index 4a35f580..2ed1c2b5 100644 --- a/examples/scheduled_run_windows.bat +++ b/examples/scheduled_run_windows.bat @@ -4,4 +4,4 @@ commcare-export --output-format ^ --project ^ --commcare-hq https://commcarehq.org ^ --auth-mode apikey ^ - --password ^ + --password From a4fcf3fb19021c0a4cb33d58dd44a9df346c4d7a Mon Sep 17 00:00:00 2001 From: mkangia Date: Thu, 4 Apr 2024 00:19:48 +0530 Subject: [PATCH 251/257] validate output file extension for output-format --- commcare_export/cli.py | 23 ++++++++++++++ tests/test_cli.py | 70 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/commcare_export/cli.py b/commcare_export/cli.py index 954bec79..e3a7b1af 100644 --- a/commcare_export/cli.py +++ b/commcare_export/cli.py @@ -186,6 +186,12 @@ def main(argv): args = parser.parse_args(argv) + if args.output_format and args.output: + errors = [] + errors.extend(validate_output_filename(args.output_format, args.output)) + if errors: + raise Exception(f"Could not proceed. Following issues were found: {', '.join(errors)}.") + if not args.no_logfile: exe_dir = os.path.dirname(sys.executable) log_file = os.path.join(exe_dir, "commcare_export.log") @@ -254,6 +260,23 @@ def main(argv): stats.print_stats(100) +def validate_output_filename(output_format, output_filename): + """ + Validate file extensions for csv, xls and xlsx output formats. + Ensure extension unless using sql output_format. + """ + errors = [] + if output_format == 'csv' and not output_filename.endswith('.zip'): + errors.append("For output format as csv, output file name should have extension zip") + elif output_format == 'xls' and not output_filename.endswith('.xls'): + errors.append("For output format as xls, output file name should have extension xls") + elif output_format == 'xlsx' and not output_filename.endswith('.xlsx'): + errors.append("For output format as xlsx, output file name should have extension xlsx") + elif output_format != 'sql' and "." not in output_filename: + errors.append("Missing extension in output file name") + return errors + + def _get_query(args, writer, column_enforcer=None): return _get_query_from_file( args.query, diff --git a/tests/test_cli.py b/tests/test_cli.py index cec979cf..01be64bf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -16,7 +16,7 @@ CheckpointManager, session_scope, ) -from commcare_export.cli import CLI_ARGS, main_with_args +from commcare_export.cli import CLI_ARGS, main_with_args, validate_output_filename from commcare_export.commcare_hq_client import ( CommCareHqClient, MockCommCareHqClient, @@ -911,3 +911,71 @@ def _check_checkpoint( assert checkpoint.pagination_mode == pagination_mode assert checkpoint.since_param == since_param assert checkpoint.last_doc_id == doc_id + + +class TestValidateOutputFilename(unittest.TestCase): + def _test_file_extension(self, output_format, expected_extension): + error_message = (f"For output format as {output_format}, " + f"output file name should have extension {expected_extension}") + + errors = validate_output_filename( + output_format=output_format, + output_filename=f'correct_file_extension.{expected_extension}' + ) + self.assertEqual(len(errors), 0) + + errors = validate_output_filename( + output_format=output_format, + output_filename=f'incorrect_file_extension.abc' + ) + self.assertListEqual( + errors, + [error_message] + ) + + # incorrectly using sql output with non sql formats + errors = validate_output_filename( + output_format=output_format, + output_filename='postgresql+psycopg2://scott:tiger@localhost/mydatabase' + ) + self.assertListEqual( + errors, + [error_message] + ) + + def test_for_csv_output(self): + self._test_file_extension(output_format='csv', expected_extension='zip') + + def test_for_xls_output(self): + self._test_file_extension(output_format='xls', expected_extension='xls') + + def test_for_xlsx_output(self): + self._test_file_extension(output_format='xlsx', expected_extension='xlsx') + + def test_for_other_non_sql_output(self): + error_message = "Missing extension in output file name" + + errors = validate_output_filename( + output_format='non_sql', + output_filename='correct_file.abc' + ) + self.assertEqual(len(errors), 0) + + errors = validate_output_filename( + output_format='non_sql', + output_filename='filename_without_extensionxls' + ) + self.assertListEqual( + errors, + [error_message] + ) + + # incorrectly using sql output with non sql output formats + errors = validate_output_filename( + output_format='non_sql', + output_filename='postgresql+psycopg2://scott:tiger@localhost/mydatabase' + ) + self.assertListEqual( + errors, + [error_message] + ) From 4a6f2fa900f2438682049c9e3f6b6b20633365db Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Fri, 5 Apr 2024 09:16:05 +0200 Subject: [PATCH 252/257] Update README with scheduled runs --- README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/README.md b/README.md index f12418fa..19a0fd89 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,49 @@ you will change the columns of the 'commcare_locations' table and it is very likely you will want to drop the table before exporting with the new organization. +Scheduling the DET +------------------ +Scheduling the DET to run at regular intervals is a useful tactic to keep your +database up to date with CommCare HQ. + +A common approach to scheduling DET runs is making use of the operating systems' scheduling +libraries to invoke a script to execute the `commcare-export` command. Sample scripts can be +found in the `examples/` directory for both Windows and Linux. + +### Windows +On Windows systems you can make use of the [task scheduler](https://sqlbackupandftp.com/blog/how-to-schedule-a-script-via-windows-task-scheduler/) +to run scheduled scripts for you. + +The `examples/` directory contains a sample script file, `scheduled_run_windows.bat`, which can be used by the +task scheduler to invoke the `commcare-export` command. + +To set up the scheduled task you can follow the steps below. +1. Copy the file `scheduled_run_windows.bat` to any desired location on your system (e.g. `Documents`) +2. Edit the copied `.bat` file and populate your own details +3. Follow the steps outlined [here](https://sqlbackupandftp.com/blog/how-to-schedule-a-script-via-windows-task-scheduler/), +using the .bat file when prompted for the `Program/script`. + + +### Linux +On a Linux system you can make use of the [crontab](https://www.techtarget.com/searchdatacenter/definition/crontab) +command to create scheduled actions (cron jobs) in the system. + +The `examples/` directory contains a sample script file, `scheduled_run_linux.sh`, which can be used by the cron job. +To set up the cron job you can follow the steps below. +1. Copy the example file to the home directory +> cp ./examples/scheduled_run_linux.sh ~/scheduled_run_linux.sh +2. Edit the file to populate your own details +> nano ~/scheduled_run_linux.sh +3. Create a cron job by appending to the crontab file +> crontab -e + +Make an entry below any existing cron jobs. The example below executes the script file at the top of +every 12th hour of every day +> 0 12 * * * bash ~/scheduled_run_linux.sh + +You can consult the [crontab.guru](https://crontab.guru/) tool which is very useful to generate and interpret +any custom cron schedules. + Python Library Usage -------------------- From 1803e9536fb2f53112472604737d6d8ff51b26a5 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 11 Apr 2024 12:03:10 +0200 Subject: [PATCH 253/257] Generate windows exe --- .github/workflows/release_actions.yml | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release_actions.yml b/.github/workflows/release_actions.yml index cf7f7f8a..b1951970 100644 --- a/.github/workflows/release_actions.yml +++ b/.github/workflows/release_actions.yml @@ -4,8 +4,8 @@ on: types: [published] jobs: - generate_release_assets: - name: Generate release assets + generate_linux_bin: + name: Generate Linux binary as release asset runs-on: ubuntu-22.04 steps: - name: Checkout repository @@ -24,3 +24,27 @@ jobs: with: files: "./dist/linux/*" repo-token: ${{ secrets.GITHUB_TOKEN }} + + generate_windows_exe: + name: Generate Windows exe as release asset + runs-on: windows-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install pyinstaller + shell: pwsh + run: python -m pip install pyinstaller + + - name: Generate exe + shell: pwsh + run: | + pip install commcare-export + pip install -r build_exe/requirements.txt + pyinstaller --dist ./dist/windows commcare-export.spec + + - name: Upload release assets + uses: AButler/upload-release-assets@v3.0 + with: + files: "./dist/windows/*" + repo-token: ${{ secrets.GITHUB_TOKEN }} From fc2a7d19792150488aa4a1c5d371d5eade2aba84 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 11 Apr 2024 12:05:57 +0200 Subject: [PATCH 254/257] Don't use docker image for pyinstaller --- .github/workflows/release_actions.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release_actions.yml b/.github/workflows/release_actions.yml index b1951970..d4fee0bd 100644 --- a/.github/workflows/release_actions.yml +++ b/.github/workflows/release_actions.yml @@ -11,13 +11,16 @@ jobs: - name: Checkout repository uses: actions/checkout@v2 - - name: Pull pyinstaller docker image - run: | - docker pull dimagi/commcare-export-pyinstaller-linux + - name: Install pyinstaller + shell: bash + run: python -m pip install pyinstaller - - name: Compile linux binary + - name: Generate exe + shell: bash run: | - docker run -v "$(pwd):/src/" dimagi/commcare-export-pyinstaller-linux + pip install commcare-export + pip install -r build_exe/requirements.txt + pyinstaller --dist ./dist/linux commcare-export.spec - name: Upload release assets uses: AButler/upload-release-assets@v3.0 From bb49f901c14da97a3159d4abaabcf608266c3662 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 11 Apr 2024 12:13:17 +0200 Subject: [PATCH 255/257] Remove docker-related files and update README --- build_exe/README.md | 69 +++++++--------------------- build_exe/linux/Dockerfile-py3-amd64 | 59 ------------------------ build_exe/linux/entrypoint-linux.sh | 14 ------ 3 files changed, 16 insertions(+), 126 deletions(-) delete mode 100644 build_exe/linux/Dockerfile-py3-amd64 delete mode 100644 build_exe/linux/entrypoint-linux.sh diff --git a/build_exe/README.md b/build_exe/README.md index afebee00..3cbbcbb8 100644 --- a/build_exe/README.md +++ b/build_exe/README.md @@ -1,54 +1,17 @@ # Compiling DET to running executable -This folder contains relevant files needed (dockerfiles and scripts) for compiling the DET into an executable file. -The file structure is segmented into the different operating systems the resultant executable will -be compatible on. - -(Currently only Linux is supported; Windows coming soon) - - -## How it works -In order to compile the DET script into a working executable we use [pyinstaller](https://github.com/pyinstaller/pyinstaller) in a containerized -environment. The dockerfile is an edited version from [cdrx/docker-pyinstaller](https://github.com/cdrx/docker-pyinstaller) -which is slightly modified to suit our use-case. - -When a new release of the DET is published, a workflow is triggered which automatically compiles an executable from the latest -code using the custom built docker image, `dimagi/commcare-export-pyinstaller-linux`, then uploads it to the release as an asset. - -If you ever have to compile the executable yourself you can follow the section below, *Compiling executable files locally*, on how to compile an executable locally. - - -Compiling executable files locally ------------------------------------ -The DET executable files are compiled using a tool called [pyinstaller](https://pyinstaller.org/en/stable/). -Pyinstaller is very easy to use, but only works out-of-the-box for Linux as support for cross-compilation was -dropped in earlier releases. Another tool, [wine](https://www.winehq.org/), can be used in conjuction with -pyinstaller to compile the Windows exe files (not yet supported). - -Luckily in the world we live containerization is a thing. We use a docker container, `dimagi/commcare-export-pyinstaller-linux` -(based on [docker-pyinstaller](https://github.com/cdrx/docker-pyinstaller)), which allows you to seamlessly compile the Linux binary, so we don't ever have to worry about installing any additional packages ourselves. - -To compile a new linux binary, first make sure you have the docker image used to generate the executable: -> docker pull dimagi/commcare-export-pyinstaller-linux:latest - -Now it's really as simple as running -> docker run -v "$(pwd):/src/" dimagi/commcare-export-pyinstaller-linux - -Once you're done, the compiled file can be located at `./dist/linux/commcare-export`. - -The tool needs two files to make the process work: -1. `commcare-export.spec`: this file is used by `pyinstaller` and is already defined and sits at the top of this project. -It shouldn't be necessary for you to change any parameters in the file. -2. `requirements.txt`: this file lists all the necessary packages needed for running commcare-export. - - -## Updating the docker image -Are you sure you need to update the image? - -Just checking... - - -If it's needed to make any changes (for whatever reason) to the docker image you can rebuild the image as follows: -> docker build -f ./build_exe/linux/Dockerfile-py3-amd64 -t dimagi/commcare-export-pyinstaller-linux:latest . - -Now upload the new image to dockerhub (remember to log in to the account first!): -> docker image push dimagi/commcare-export-pyinstaller-linux:latest +This folder contains relevant files needed for compiling the DET into an executable file. +The executable is generated on after every release of the DET and the resultant files are uploaded +to the release as assets. + +## Testing locally +In the event that you want to test the exe compilation locally you can simply run +> pip install -r build_exe/requirements.txt + +Now create the executable (if you're running this on a Linux machine you can only compile the binary for this type of +OS) +> pyinstaller --dist ./dist/linux commcare-export.spec + +The resultant executable file can be located under `./dist/linux/`. + +Note that the argument `commcare-export.spec`. This is a simple configuration file used by +pyinstaller which you ideally shouldn't have to ever change. \ No newline at end of file diff --git a/build_exe/linux/Dockerfile-py3-amd64 b/build_exe/linux/Dockerfile-py3-amd64 deleted file mode 100644 index 1c33b421..00000000 --- a/build_exe/linux/Dockerfile-py3-amd64 +++ /dev/null @@ -1,59 +0,0 @@ -FROM ubuntu:20.04 -SHELL ["/bin/bash", "-i", "-c"] - -ARG PYTHON_VERSION=3.9.18 -ARG PYINSTALLER_VERSION=6.4 - -ENV PYPI_URL=https://pypi.python.org/ -ENV PYPI_INDEX_URL=https://pypi.python.org/simple -ENV PYENV_VERSION=${PYTHON_VERSION} - -COPY ./build_exe/linux/entrypoint-linux.sh /entrypoint.sh - -RUN \ - set -x \ - # update system - && apt-get update \ - # install requirements - && apt-get install -y --no-install-recommends \ - build-essential \ - ca-certificates \ - curl \ - wget \ - git \ - libbz2-dev \ - libreadline-dev \ - libsqlite3-dev \ - libssl-dev \ - zlib1g-dev \ - libffi-dev \ - # required because openSSL on Ubuntu 12.04 and 14.04 run out of support versions of OpenSSL - && mkdir openssl \ - && cd openssl \ - # latest version, there won't be anything newer for this - && wget https://www.openssl.org/source/openssl-1.0.2u.tar.gz \ - && tar -xzvf openssl-1.0.2u.tar.gz \ - && cd openssl-1.0.2u \ - && ./config --prefix=$HOME/openssl --openssldir=$HOME/openssl shared zlib \ - && make \ - && make install \ - # install pyenv - && echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc \ - && echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc \ - && source ~/.bashrc \ - && curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash \ - && echo 'eval "$(pyenv init -)"' >> ~/.bashrc \ - && source ~/.bashrc \ - # install python - && PATH="$HOME/openssl:$PATH" CPPFLAGS="-O2 -I$HOME/openssl/include" CFLAGS="-I$HOME/openssl/include/" LDFLAGS="-L$HOME/openssl/lib -Wl,-rpath,$HOME/openssl/lib" LD_LIBRARY_PATH=$HOME/openssl/lib:$LD_LIBRARY_PATH LD_RUN_PATH="$HOME/openssl/lib" CONFIGURE_OPTS="--with-openssl=$HOME/openssl" PYTHON_CONFIGURE_OPTS="--enable-shared" pyenv install $PYTHON_VERSION \ - && pyenv global $PYTHON_VERSION \ - && pip install --upgrade pip \ - # install pyinstaller - && pip install pyinstaller==$PYINSTALLER_VERSION \ - && mkdir /src/ \ - && chmod +x /entrypoint.sh - -VOLUME /src/ -WORKDIR /src/ - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/build_exe/linux/entrypoint-linux.sh b/build_exe/linux/entrypoint-linux.sh deleted file mode 100644 index 5d781fb4..00000000 --- a/build_exe/linux/entrypoint-linux.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -i - -# Fail on errors. -set -e - -# Make sure .bashrc is sourced -. /root/.bashrc - -cd /src - -pip install commcare-export -pip install -r build_exe/requirements.txt - -pyinstaller --clean -y --dist ./dist/linux --workpath /tmp *.spec From 2c936d449ff61c4ee4b2bc68674ae55ba1819eeb Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 11 Apr 2024 13:36:47 +0200 Subject: [PATCH 256/257] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 19a0fd89..b9ace252 100644 --- a/README.md +++ b/README.md @@ -645,10 +645,10 @@ https://pypi.python.org/pypi/commcare-export https://github.com/dimagi/commcare-export/releases -Once the release is published a GitHub workflow is kicked off that compiles an executable of the DET compatible with -running on a Linux machine (Windows coming soon), adding it as a release asset. +Once the release is published a GitHub workflow is kicked off that compiles executables of the DET compatible with +Linux and Windows machines, adding it to the release as assets. -If you decide to download and use the executable file, please make sure the file has the executable permission enabled, +[For Linux-based users] If you decide to download and use the executable file, please make sure the file has the executable permission enabled, after which it can be invoked like any other executable though the command line. From 6fcbfddd1a61272ae5cb8f27fdd5b4e041e92e36 Mon Sep 17 00:00:00 2001 From: Charl Smit Date: Thu, 11 Apr 2024 13:43:08 +0200 Subject: [PATCH 257/257] Fix README --- build_exe/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/build_exe/README.md b/build_exe/README.md index 3cbbcbb8..28997f34 100644 --- a/build_exe/README.md +++ b/build_exe/README.md @@ -4,14 +4,15 @@ The executable is generated on after every release of the DET and the resultant to the release as assets. ## Testing locally -In the event that you want to test the exe compilation locally you can simply run -> pip install -r build_exe/requirements.txt +In the event that you want to test the exe compilation locally you can follow the steps below: -Now create the executable (if you're running this on a Linux machine you can only compile the binary for this type of -OS) +Install `pyinstaller`: +> python -m pip install pyinstaller + +Now create the executable (assuming you're running this on a Linux machine): > pyinstaller --dist ./dist/linux commcare-export.spec The resultant executable file can be located under `./dist/linux/`. -Note that the argument `commcare-export.spec`. This is a simple configuration file used by +The argument, `commcare-export.spec`, is a simple configuration file used by pyinstaller which you ideally shouldn't have to ever change. \ No newline at end of file