Skip to content

Commit

Permalink
HDX-10398 export data completeness and rename "completness"
Browse files Browse the repository at this point in the history
  • Loading branch information
danmihaila committed Dec 4, 2024
1 parent bee7f57 commit 3068b36
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 44 deletions.
62 changes: 36 additions & 26 deletions ckanext-hdx_org_group/ckanext/hdx_org_group/actions/get.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
'''
"""
Created on April 24, 2015
@author: alexandru-m-g
'''
"""
import json
import logging

import ckan.lib.dictization as d
import ckan.lib.helpers as helpers
import ckan.lib.navl.dictization_functions
import ckan.logic as logic
import ckan.model as model
import ckan.plugins.toolkit as tk

import ckanext.hdx_org_group.dao.indicator_access as indicator_access
import ckanext.hdx_org_group.dao.widget_data_service as widget_data_service
import ckanext.hdx_org_group.helpers.country_helper as country_helper
import ckanext.hdx_org_group.helpers.organization_helper as org_helper
from ckan.common import c
from ckanext.hdx_theme.helpers.caching import cached_make_rest_api_request as cached_make_rest_api_request
Expand All @@ -36,7 +35,7 @@

@side_effect_free
def hdx_datasets_for_group(context, data_dict):
'''
"""
Returns a paginated list of datasets for a group with 25 items per page.
Options for sorting are: metadata_modified desc, title_case_insensitive desc, title_case_insensitive asc,
views_recent desc, score desc ( only useful if query string is specified, should be combined
Expand All @@ -52,11 +51,11 @@ def hdx_datasets_for_group(context, data_dict):
:param type: 'all', 'indicators', 'datasets'. Defaults to 'all'
:type q: string
:return:
'''
"""

skipped_keys = ['q', 'id', 'sort', 'type', 'page']

id = _get_or_bust(data_dict, "id")
id = _get_or_bust(data_dict, 'id')

limit = 25

Expand All @@ -74,31 +73,31 @@ def hdx_datasets_for_group(context, data_dict):
new_data_dict['ext_indicator'] = u'0'

search_param_list = [
key + ":" + value for key, value in data_dict.iteritems() if key not in skipped_keys]
key + ':' + value for key, value in data_dict.iteritems() if key not in skipped_keys]
search_param_list.append(u'groups:{}'.format(id))

if search_param_list != None:
new_data_dict['fq'] = " ".join(
new_data_dict['fq'] = ' '.join(
search_param_list) + ' +dataset_type:dataset'

if data_dict.get('q', None):
new_data_dict['q'] = data_dict['q']

query = get_action("package_search")(context, new_data_dict)
query = get_action('package_search')(context, new_data_dict)

return query


@side_effect_free
def hdx_topline_num_for_group(context, data_dict):
'''
"""
:param id: the id of the group for which top line numbers are requested
:type id: string
:return: a dict of top line numbers. Please note that depending on the selected group the source
of the data ( either the datastore or CPS/indicators ) might be different. The data will have some fields
that are specific to the source.
'''
id = _get_or_bust(data_dict, "id")
"""
id = _get_or_bust(data_dict, 'id')
grp_result = get_group(id)
group_info = grp_result.get('group_info')
# custom_dict = grp_result.get('custom_dict')
Expand All @@ -124,14 +123,14 @@ def hdx_topline_num_for_group(context, data_dict):


def __get_toplines_for_active_country(group_info, common_format):
'''
"""
:param group_info:
:type group_info: dict
:param common_format:
:type common_format: bool
:return:
:rtype: list
'''
"""

# source is rw
top_line_data_list = widget_data_service.build_widget_data_access(group_info).get_dataset_results()
Expand Down Expand Up @@ -162,14 +161,14 @@ def _parse_integer_value(item):


def __get_toplines_for_standard_country(group_info, common_format):
'''
"""
:param group_info:
:type group_info: dict
:param common_format:
:type common_format: bool
:return:
:rtype: list
'''
"""
# source is configured in 'hdx.locations.toplines_url'
# ckan_site_url = config.get('ckan.site_url')
raw_top_line_items = widget_data_service.build_widget_data_access(group_info).get_dataset_results()
Expand Down Expand Up @@ -197,13 +196,13 @@ def __get_toplines_for_standard_country(group_info, common_format):

@side_effect_free
def hdx_light_group_show(context, data_dict):
'''
"""
Return a lightweight ( less resource intensive,faster but without datasets ) version of the group details
:param id: the id of the group for which top line numbers are requested
:type id: string
'''
"""

id = _get_or_bust(data_dict, "id")
id = _get_or_bust(data_dict, 'id')
group_dict = {}
group = model.Group.get(id)
if not group:
Expand All @@ -226,13 +225,13 @@ def hdx_light_group_show(context, data_dict):
dictized = d.table_dictize(extra, context)
if not extra.state == 'active':
continue
value = dictized["value"]
value = dictized['value']
result_list.append(dictized)

# Keeping the above for backwards compatibility
group_dict[name] = dictized["value"]
group_dict[name] = dictized['value']

group_dict['extras'] = sorted(result_list, key=lambda x: x["key"])
group_dict['extras'] = sorted(result_list, key=lambda x: x['key'])
return group_dict


Expand Down Expand Up @@ -283,20 +282,20 @@ def hdx_get_locations_info_from_rw(context, data_dict):
return cached_make_rest_api_request(url)
return None
except:
log.error("RW file was not found or can not be accessed")
log.error('RW file was not found or can not be accessed')
return None


@side_effect_free
def hdx_organization_follower_list(context, data_dict):
'''Return the list of users that are following the given organization.
"""Return the list of users that are following the given organization.
:param id: the id or name of the organization
:type id: string
:rtype: list of dictionaries
'''
"""
_check_access('hdx_organization_follower_list', context, data_dict)
context['keep_email'] = True
return _follower_list(
Expand Down Expand Up @@ -336,3 +335,14 @@ def _user_list_dictize(obj_list, context,
# user_dict.pop('email', None)
result_list.append(user_dict)
return sorted(result_list, key=sort_key, reverse=reverse)

@side_effect_free
def hdx_datagrid_show(context, data_dict):
id = _get_or_bust(data_dict, 'id')
if id:
grp_dict = get_action('hdx_light_group_show')(context, {'id':id})
data_completeness = country_helper._get_data_completeness(grp_dict.get('name'))
replaced_data_completeness = country_helper.hdx_replace_datagrid_labels(data_completeness)
return replaced_data_completeness
else:
raise NotFound('Group was not found.')
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from dogpile.cache import make_region

import ckan.plugins.toolkit as tk
import ckanext.hdx_org_group.helpers.data_completness as data_completness
import ckanext.hdx_org_group.helpers.data_completeness as data_completness
from ckanext.hdx_theme.helpers.caching import dogpile_standard_config, dogpile_config_filter, \
HDXRedisInvalidationStrategy

Expand All @@ -29,7 +29,7 @@ def cached_data_completeness(location_code):
branch = 'master' if for_prod else location_code
url = url_pattern.format(branch=branch, iso=location_code)

return data_completness.DataCompletness(location_code, url).get_config()
return data_completness.DataCompleteness(location_code, url).get_config()


@dogpile_country_region.cache_on_arguments()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@


def _sort_datasets_by_is_good(data_completeness):
categories = data_completeness.get("categories")
categories = data_completeness.get('categories')
for cat in categories:
if cat.get("data_series"):
for ds in cat.get("data_series"):
datasets_list = ds.get("datasets")
if cat.get('data_series'):
for ds in cat.get('data_series'):
datasets_list = ds.get('datasets')
if datasets_list:
datasets_sorted_list = sorted(datasets_list, key=lambda item: item['is_good'] == False)
ds['datasets'] = datasets_sorted_list
Expand All @@ -52,7 +52,7 @@ def _sort_datasets_by_is_good(data_completeness):


def country_topline(id):
log.info("The id of the page is: " + id)
log.info('The id of the page is: ' + id)

country_dict = get_country(id)
top_line_data_list = caching.cached_topline_numbers(id)
Expand Down Expand Up @@ -248,8 +248,8 @@ def get_country(id):


def _db_to_form_schema(group_type=None):
'''This is an interface to manipulate data from the database
into a format suitable for the form (optional)'''
"""This is an interface to manipulate data from the database
into a format suitable for the form (optional)"""
return lookup_group_plugin(group_type).db_to_form_schema()


Expand Down Expand Up @@ -290,3 +290,50 @@ def hdx_datagrid_org_get_display_text(dataseries_dict):
if flag.get('key', '') == 'not_applicable' and flag.get('display_text'):
return flag.get('display_text')
return 'Not applicable'

DATA_COMPLETENESS_LABELS_DICT = {
'good_datasets_num': 'available_datasets_num',
'dataset_goodness_percentage':'dataset_availability_percentage',
'good_dataseries_num':'available_dataseries_num',
'not_good_dataseries_num': 'outdated_dataseries_num',
'good_dataseries_text': 'available_dataseries_text',
'not_good_dataseries_text': 'outdated_dataseries_text',
'dataseries_good_percentage':'dataseries_available_percentage',
'dataseries_not_good_percentage':'dataseries_outdated_percentage',
}

DATA_COMPLETENESS_STATE_DICT = {
'good':'available',
'not_good':'outdated',
'empty':'empty',
}


def transform_dict(data):
if isinstance(data, dict):
new_dict = {}
for key, value in data.items():
# Replace keys if present in DATA_COMPLETENESS_LABELS_DICT
new_key = DATA_COMPLETENESS_LABELS_DICT.get(key, key)

# Replace state values if applicable
if key == 'state' and isinstance(value, str):
value = DATA_COMPLETENESS_STATE_DICT.get(value, value)

# Recursively transform values
new_dict[new_key] = transform_dict(value)
return new_dict
elif isinstance(data, list):
return [transform_dict(item) for item in data]
return data


# Assuming `your_dict` is the large dictionary you want to process



def hdx_replace_datagrid_labels(data_completeness):
# Replace keys in the dictionary
# updated_data = replace_keys(data_completeness, DATA_COMPLETENESS_LABELS_DICT)
updated_data = transform_dict(data_completeness)
return updated_data
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
FLAG_NOT_APPLICABLE = 'not_applicable'


class DataCompletness(object):
class DataCompleteness(object):

basic_query_params = {
'start': 0,
Expand Down
3 changes: 2 additions & 1 deletion ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def get_actions(self):
'invalidate_data_completeness_for_location': update_actions.invalidate_data_completeness_for_location,
'hdx_organization_follower_list': get_actions.hdx_organization_follower_list,
'hdx_user_invite': create_actions.hdx_user_invite,
'member_create': create_actions.hdx_member_create
'member_create': create_actions.hdx_member_create,
'hdx_datagrid_show': get_actions.hdx_datagrid_show,

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import ckanext.hdx_org_group.helpers.country_helper as grp_h

from ckanext.hdx_org_group.helpers.data_completness import DataCompletness, FLAG_NOT_APPLICABLE
from ckanext.hdx_org_group.helpers.data_completeness import DataCompleteness, FLAG_NOT_APPLICABLE
from ckanext.hdx_org_group.helpers.static_lists import ORGANIZATION_TYPE_LIST

_get_action = tk.get_action
Expand Down Expand Up @@ -112,7 +112,7 @@ def keep_db_tables_on_clean():
model.repo.tables_created_and_initialised = True


class MockedDataCompleteness(DataCompletness):
class MockedDataCompleteness(DataCompleteness):

def __init__(self, yaml_dict):
self.yaml_dict = yaml_dict
Expand All @@ -125,7 +125,7 @@ def _fetch_yaml(self):
@pytest.mark.usefixtures("keep_db_tables_on_clean", "clean_db", "clean_index", "setup_data")
class TestDataCompleteness(object):

@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
def test_data_completeness(self, patched_DataCompleteness):
data = self.__compute_data_completeness(_generate_test_yaml_dict(), patched_DataCompleteness)

Expand All @@ -148,7 +148,7 @@ def test_data_completeness(self, patched_DataCompleteness):
assert subcategory2_stats['good_datasets_num'] == 0
assert subcategory2_stats['total_datasets_num'] == 1

@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
def test_data_completeness_force_incomplete(self, patched_DataCompleteness):
yaml_dict = _generate_test_yaml_dict()
incomplete_dataset = 'dataset2-category1'
Expand All @@ -169,7 +169,7 @@ def test_data_completeness_force_incomplete(self, patched_DataCompleteness):
dataset = next(d for d in subcategory1['datasets'] if d['name'] == incomplete_dataset)
assert dataset['general_comment'] == incomplete_comment

@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
def test_data_completeness_force_complete(self, patched_DataCompleteness):
yaml_dict = _generate_test_yaml_dict()
complete_dataset = 'dataset1-category1'
Expand All @@ -190,7 +190,7 @@ def test_data_completeness_force_complete(self, patched_DataCompleteness):
dataset = next(d for d in subcategory1['datasets'] if d['name'] == complete_dataset)
assert dataset['general_comment'] == complete_comment

@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
def test_data_completeness_not_available(self, patched_DataCompleteness):
yaml_dict = _generate_test_yaml_dict()
not_applicable_comment = 'not applicable comment'
Expand All @@ -217,7 +217,7 @@ def test_data_completeness_not_available(self, patched_DataCompleteness):
assert subcategory1_stats['good_datasets_num'] == 0
assert subcategory1_stats['total_datasets_num'] == 0

@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
@mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
def test_data_completeness_dataset_up_to_date(self, patched_DataCompleteness):
review_date = datetime.datetime.utcnow() - datetime.timedelta(days=31)
_generate_dataset_dict('dataset3-category1', ORG, LOCATION, review_date)
Expand Down

0 comments on commit 3068b36

Please sign in to comment.