Skip to content

Commit

Permalink
Merge pull request #180 from dimagi/cz/sms-error
Browse files Browse the repository at this point in the history
workaround for incorrect "export is stuck" message when no total count available
  • Loading branch information
czue authored Mar 15, 2021
2 parents a3fbb7e + c6ee572 commit 888f144
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
17 changes: 14 additions & 3 deletions commcare_export/commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def iterate(self, resource, paginator, params=None, checkpoint_manager=None):
Assumes the endpoint is a list endpoint, and iterates over it
making a lot of assumptions that it is like a tastypie endpoint.
"""
UNKNOWN_COUNT = 'unknown'
params = dict(params or {})
def iterate_resource(resource=resource, params=params):
more_to_fetch = True
Expand All @@ -140,19 +141,20 @@ def iterate_resource(resource=resource, params=params):

batch = self.get(resource, params)
last_params = copy.copy(params)
if not total_count or total_count == 'unknown' or fetched >= total_count:
total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else 'unknown'
if not total_count or total_count == UNKNOWN_COUNT or fetched >= total_count:
total_count = int(batch['meta']['total_count']) if batch['meta']['total_count'] else UNKNOWN_COUNT
fetched = 0

fetched += len(batch['objects'])
logger.debug('Received %s of %s', fetched, total_count)

if not batch['objects']:
more_to_fetch = False
else:
got_new_data = False
for obj in batch['objects']:
if obj['id'] not in last_batch_ids:
yield obj
got_new_data = True

if batch['meta']['next']:
last_batch_ids = {obj['id'] for obj in batch['objects']}
Expand All @@ -162,6 +164,15 @@ def iterate_resource(resource=resource, params=params):
else:
more_to_fetch = False

limit = batch['meta'].get('limit')
if more_to_fetch:
repeated_last_page_of_non_counting_resource = (
not got_new_data
and total_count == UNKNOWN_COUNT
and (limit and len(batch['objects']) < limit)
)
more_to_fetch = not repeated_last_page_of_non_counting_resource

self.checkpoint(checkpoint_manager, paginator, batch, not more_to_fetch)

return RepeatableIterator(iterate_resource)
Expand Down
27 changes: 27 additions & 0 deletions tests/test_commcare_hq_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,30 @@ def _get_results(self, params):
}


class FakMessageLogSession(FakeSession):
# for message logs, the last batch returns the same results in a loop, because
# we use a non-counting paginator in tastypie that can't know if it's "finished"
# We will gracefully treat this as success under the conditions where:
# - total_count is absent
# - the number of returned rows is fewer than the limit
# - the contents of the batch are the same
def _get_results(self, params):
obj_1 = {'id': 1, 'foo': 1, 'date': '2017-01-01T15:36:22Z'}
obj_2 = {'id': 2, 'foo': 2, 'date': '2017-01-01T15:37:22Z'}
if not params:
return {
'meta': {'next': '?offset=2', 'offset': 0, 'limit': 2, 'total_count': None},
'objects': [obj_1, obj_2]
}
else:
since_query_param = DATE_PARAMS['date'].start_param
assert params[since_query_param] == '2017-01-01T15:37:22'
return {
'meta': { 'next': '?offset=1', 'offset': 0, 'limit': 2, 'total_count': None},
'objects': [obj_2]
}


class FakeDateFormSession(FakeSession):
def _get_results(self, params):
since1 = '2017-01-01T15:36:22'
Expand Down Expand Up @@ -129,6 +153,9 @@ def test_repeat_limit(self):
match="Requested resource '/fake/uri' 10 times with same parameters"):
self._test_iterate(FakeRepeatedDateCaseSession(), get_paginator('case', 2), 2, [1, 2])

def test_message_log(self):
self._test_iterate(FakMessageLogSession(), get_paginator('messaging-event', 2), 2, [1, 2])


class TestDatePaginator(unittest.TestCase):

Expand Down

0 comments on commit 888f144

Please sign in to comment.