HDX-10398 export data completeness and rename "completness"

OCHA-DAP · Dec 4, 2024 · 3068b36 · 3068b36
1 parent bee7f57
commit 3068b36
Show file tree

Hide file tree

Showing 6 changed files with 102 additions and 44 deletions.
diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/actions/get.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/actions/get.py
@@ -1,20 +1,19 @@
-'''
+"""
 Created on April 24, 2015
 
 @author: alexandru-m-g
-'''
+"""
 import json
 import logging
 
 import ckan.lib.dictization as d
 import ckan.lib.helpers as helpers
 import ckan.lib.navl.dictization_functions
-import ckan.logic as logic
 import ckan.model as model
 import ckan.plugins.toolkit as tk
-
 import ckanext.hdx_org_group.dao.indicator_access as indicator_access
 import ckanext.hdx_org_group.dao.widget_data_service as widget_data_service
+import ckanext.hdx_org_group.helpers.country_helper as country_helper
 import ckanext.hdx_org_group.helpers.organization_helper as org_helper
 from ckan.common import c
 from ckanext.hdx_theme.helpers.caching import cached_make_rest_api_request as cached_make_rest_api_request
@@ -36,7 +35,7 @@
 
 @side_effect_free
 def hdx_datasets_for_group(context, data_dict):
-    '''
+    """
     Returns a paginated list of datasets for a group with 25 items per page.
     Options for sorting are: metadata_modified desc, title_case_insensitive desc, title_case_insensitive asc,
     views_recent desc, score desc ( only useful if query string is specified, should be combined
@@ -52,11 +51,11 @@ def hdx_datasets_for_group(context, data_dict):
     :param type: 'all', 'indicators', 'datasets'. Defaults to 'all'
     :type q: string
     :return:
-    '''
+    """
 
     skipped_keys = ['q', 'id', 'sort', 'type', 'page']
 
-    id = _get_or_bust(data_dict, "id")
+    id = _get_or_bust(data_dict, 'id')
 
     limit = 25
 
@@ -74,31 +73,31 @@ def hdx_datasets_for_group(context, data_dict):
         new_data_dict['ext_indicator'] = u'0'
 
     search_param_list = [
-        key + ":" + value for key, value in data_dict.iteritems() if key not in skipped_keys]
+        key + ':' + value for key, value in data_dict.iteritems() if key not in skipped_keys]
     search_param_list.append(u'groups:{}'.format(id))
 
     if search_param_list != None:
-        new_data_dict['fq'] = " ".join(
+        new_data_dict['fq'] = ' '.join(
             search_param_list) + ' +dataset_type:dataset'
 
     if data_dict.get('q', None):
         new_data_dict['q'] = data_dict['q']
 
-    query = get_action("package_search")(context, new_data_dict)
+    query = get_action('package_search')(context, new_data_dict)
 
     return query
 
 
 @side_effect_free
 def hdx_topline_num_for_group(context, data_dict):
-    '''
+    """
     :param id: the id of the group for which top line numbers are requested
     :type id: string
     :return: a dict of top line numbers. Please note that depending on the selected group the source
     of the data ( either the datastore or CPS/indicators ) might be different. The data will have some fields
      that are specific to the source.
-    '''
-    id = _get_or_bust(data_dict, "id")
+    """
+    id = _get_or_bust(data_dict, 'id')
     grp_result = get_group(id)
     group_info = grp_result.get('group_info')
     # custom_dict = grp_result.get('custom_dict')
@@ -124,14 +123,14 @@ def hdx_topline_num_for_group(context, data_dict):
 
 
 def __get_toplines_for_active_country(group_info, common_format):
-    '''
+    """
     :param group_info:
     :type group_info: dict
     :param common_format:
     :type common_format: bool
     :return:
     :rtype: list
-    '''
+    """
 
     # source is rw
     top_line_data_list = widget_data_service.build_widget_data_access(group_info).get_dataset_results()
@@ -162,14 +161,14 @@ def _parse_integer_value(item):
 
 
 def __get_toplines_for_standard_country(group_info, common_format):
-    '''
+    """
     :param group_info:
     :type group_info: dict
     :param common_format:
     :type common_format: bool
     :return:
     :rtype: list
-    '''
+    """
     # source is configured in 'hdx.locations.toplines_url'
     # ckan_site_url = config.get('ckan.site_url')
     raw_top_line_items = widget_data_service.build_widget_data_access(group_info).get_dataset_results()
@@ -197,13 +196,13 @@ def __get_toplines_for_standard_country(group_info, common_format):
 
 @side_effect_free
 def hdx_light_group_show(context, data_dict):
-    '''
+    """
     Return a lightweight ( less resource intensive,faster but without datasets ) version of the group details
     :param id: the id of the group for which top line numbers are requested
     :type id: string
-    '''
+    """
 
-    id = _get_or_bust(data_dict, "id")
+    id = _get_or_bust(data_dict, 'id')
     group_dict = {}
     group = model.Group.get(id)
     if not group:
@@ -226,13 +225,13 @@ def hdx_light_group_show(context, data_dict):
         dictized = d.table_dictize(extra, context)
         if not extra.state == 'active':
             continue
-        value = dictized["value"]
+        value = dictized['value']
         result_list.append(dictized)
 
         # Keeping the above for backwards compatibility
-        group_dict[name] = dictized["value"]
+        group_dict[name] = dictized['value']
 
-    group_dict['extras'] = sorted(result_list, key=lambda x: x["key"])
+    group_dict['extras'] = sorted(result_list, key=lambda x: x['key'])
     return group_dict
 
 
@@ -283,20 +282,20 @@ def hdx_get_locations_info_from_rw(context, data_dict):
             return cached_make_rest_api_request(url)
         return None
     except:
-        log.error("RW file was not found or can not be accessed")
+        log.error('RW file was not found or can not be accessed')
         return None
 
 
 @side_effect_free
 def hdx_organization_follower_list(context, data_dict):
-    '''Return the list of users that are following the given organization.
+    """Return the list of users that are following the given organization.
 
     :param id: the id or name of the organization
     :type id: string
 
     :rtype: list of dictionaries
 
-    '''
+    """
     _check_access('hdx_organization_follower_list', context, data_dict)
     context['keep_email'] = True
     return _follower_list(
@@ -336,3 +335,14 @@ def _user_list_dictize(obj_list, context,
         # user_dict.pop('email', None)
         result_list.append(user_dict)
     return sorted(result_list, key=sort_key, reverse=reverse)
+
+@side_effect_free
+def hdx_datagrid_show(context, data_dict):
+    id = _get_or_bust(data_dict, 'id')
+    if id:
+        grp_dict = get_action('hdx_light_group_show')(context, {'id':id})
+        data_completeness = country_helper._get_data_completeness(grp_dict.get('name'))
+        replaced_data_completeness = country_helper.hdx_replace_datagrid_labels(data_completeness)
+        return replaced_data_completeness
+    else:
+        raise NotFound('Group was not found.')
diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/caching.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/caching.py
@@ -2,7 +2,7 @@
 from dogpile.cache import make_region
 
 import ckan.plugins.toolkit as tk
-import ckanext.hdx_org_group.helpers.data_completness as data_completness
+import ckanext.hdx_org_group.helpers.data_completeness as data_completness
 from ckanext.hdx_theme.helpers.caching import dogpile_standard_config, dogpile_config_filter, \
     HDXRedisInvalidationStrategy
 
@@ -29,7 +29,7 @@ def cached_data_completeness(location_code):
     branch = 'master' if for_prod else location_code
     url = url_pattern.format(branch=branch, iso=location_code)
 
-    return data_completness.DataCompletness(location_code, url).get_config()
+    return data_completness.DataCompleteness(location_code, url).get_config()
 
 
 @dogpile_country_region.cache_on_arguments()

diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/country_helper.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/country_helper.py
@@ -39,11 +39,11 @@
 
 
 def _sort_datasets_by_is_good(data_completeness):
-    categories = data_completeness.get("categories")
+    categories = data_completeness.get('categories')
     for cat in categories:
-        if cat.get("data_series"):
-            for ds in cat.get("data_series"):
-                datasets_list = ds.get("datasets")
+        if cat.get('data_series'):
+            for ds in cat.get('data_series'):
+                datasets_list = ds.get('datasets')
                 if datasets_list:
                     datasets_sorted_list = sorted(datasets_list, key=lambda item: item['is_good'] == False)
                     ds['datasets'] = datasets_sorted_list
@@ -52,7 +52,7 @@ def _sort_datasets_by_is_good(data_completeness):
 
 
 def country_topline(id):
-    log.info("The id of the page is: " + id)
+    log.info('The id of the page is: ' + id)
 
     country_dict = get_country(id)
     top_line_data_list = caching.cached_topline_numbers(id)
@@ -248,8 +248,8 @@ def get_country(id):
 
 
 def _db_to_form_schema(group_type=None):
-    '''This is an interface to manipulate data from the database
-    into a format suitable for the form (optional)'''
+    """This is an interface to manipulate data from the database
+    into a format suitable for the form (optional)"""
     return lookup_group_plugin(group_type).db_to_form_schema()
 
 
@@ -290,3 +290,50 @@ def hdx_datagrid_org_get_display_text(dataseries_dict):
                     if flag.get('key', '') == 'not_applicable' and flag.get('display_text'):
                         return flag.get('display_text')
     return 'Not applicable'
+
+DATA_COMPLETENESS_LABELS_DICT = {
+    'good_datasets_num': 'available_datasets_num',
+    'dataset_goodness_percentage':'dataset_availability_percentage',
+    'good_dataseries_num':'available_dataseries_num',
+    'not_good_dataseries_num': 'outdated_dataseries_num',
+    'good_dataseries_text': 'available_dataseries_text',
+    'not_good_dataseries_text': 'outdated_dataseries_text',
+    'dataseries_good_percentage':'dataseries_available_percentage',
+    'dataseries_not_good_percentage':'dataseries_outdated_percentage',
+}
+
+DATA_COMPLETENESS_STATE_DICT = {
+    'good':'available',
+    'not_good':'outdated',
+    'empty':'empty',
+}
+
+
+def transform_dict(data):
+    if isinstance(data, dict):
+        new_dict = {}
+        for key, value in data.items():
+            # Replace keys if present in DATA_COMPLETENESS_LABELS_DICT
+            new_key = DATA_COMPLETENESS_LABELS_DICT.get(key, key)
+
+            # Replace state values if applicable
+            if key == 'state' and isinstance(value, str):
+                value = DATA_COMPLETENESS_STATE_DICT.get(value, value)
+
+            # Recursively transform values
+            new_dict[new_key] = transform_dict(value)
+        return new_dict
+    elif isinstance(data, list):
+        return [transform_dict(item) for item in data]
+    return data
+
+
+# Assuming `your_dict` is the large dictionary you want to process
+
+
+
+def hdx_replace_datagrid_labels(data_completeness):
+    # Replace keys in the dictionary
+    # updated_data = replace_keys(data_completeness, DATA_COMPLETENESS_LABELS_DICT)
+    updated_data = transform_dict(data_completeness)
+    return updated_data
diff --git a/...hdx_org_group/helpers/data_completness.py → ...dx_org_group/helpers/data_completeness.py b/...hdx_org_group/helpers/data_completness.py → ...dx_org_group/helpers/data_completeness.py
@@ -14,7 +14,7 @@
 FLAG_NOT_APPLICABLE = 'not_applicable'
 
 
-class DataCompletness(object):
+class DataCompleteness(object):
 
     basic_query_params = {
         'start': 0,

diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py
@@ -69,7 +69,8 @@ def get_actions(self):
             'invalidate_data_completeness_for_location': update_actions.invalidate_data_completeness_for_location,
             'hdx_organization_follower_list': get_actions.hdx_organization_follower_list,
             'hdx_user_invite': create_actions.hdx_user_invite,
-            'member_create': create_actions.hdx_member_create
+            'member_create': create_actions.hdx_member_create,
+            'hdx_datagrid_show': get_actions.hdx_datagrid_show,
 
         }
 

diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/tests/test_data_completeness.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/tests/test_data_completeness.py
@@ -9,7 +9,7 @@
 
 import ckanext.hdx_org_group.helpers.country_helper as grp_h
 
-from ckanext.hdx_org_group.helpers.data_completness import DataCompletness, FLAG_NOT_APPLICABLE
+from ckanext.hdx_org_group.helpers.data_completeness import DataCompleteness, FLAG_NOT_APPLICABLE
 from ckanext.hdx_org_group.helpers.static_lists import ORGANIZATION_TYPE_LIST
 
 _get_action = tk.get_action
@@ -112,7 +112,7 @@ def keep_db_tables_on_clean():
     model.repo.tables_created_and_initialised = True
 
 
-class MockedDataCompleteness(DataCompletness):
+class MockedDataCompleteness(DataCompleteness):
 
     def __init__(self, yaml_dict):
         self.yaml_dict = yaml_dict
@@ -125,7 +125,7 @@ def _fetch_yaml(self):
 @pytest.mark.usefixtures("keep_db_tables_on_clean", "clean_db", "clean_index", "setup_data")
 class TestDataCompleteness(object):
 
-    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
+    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
     def test_data_completeness(self, patched_DataCompleteness):
         data = self.__compute_data_completeness(_generate_test_yaml_dict(), patched_DataCompleteness)
 
@@ -148,7 +148,7 @@ def test_data_completeness(self, patched_DataCompleteness):
         assert subcategory2_stats['good_datasets_num'] == 0
         assert subcategory2_stats['total_datasets_num'] == 1
 
-    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
+    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
     def test_data_completeness_force_incomplete(self, patched_DataCompleteness):
         yaml_dict = _generate_test_yaml_dict()
         incomplete_dataset = 'dataset2-category1'
@@ -169,7 +169,7 @@ def test_data_completeness_force_incomplete(self, patched_DataCompleteness):
         dataset = next(d for d in subcategory1['datasets'] if d['name'] == incomplete_dataset)
         assert dataset['general_comment'] == incomplete_comment
 
-    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
+    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
     def test_data_completeness_force_complete(self, patched_DataCompleteness):
         yaml_dict = _generate_test_yaml_dict()
         complete_dataset = 'dataset1-category1'
@@ -190,7 +190,7 @@ def test_data_completeness_force_complete(self, patched_DataCompleteness):
         dataset = next(d for d in subcategory1['datasets'] if d['name'] == complete_dataset)
         assert dataset['general_comment'] == complete_comment
 
-    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
+    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
     def test_data_completeness_not_available(self, patched_DataCompleteness):
         yaml_dict = _generate_test_yaml_dict()
         not_applicable_comment = 'not applicable comment'
@@ -217,7 +217,7 @@ def test_data_completeness_not_available(self, patched_DataCompleteness):
         assert subcategory1_stats['good_datasets_num'] == 0
         assert subcategory1_stats['total_datasets_num'] == 0
 
-    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompletness')
+    @mock.patch('ckanext.hdx_org_group.helpers.data_completness.DataCompleteness')
     def test_data_completeness_dataset_up_to_date(self, patched_DataCompleteness):
         review_date = datetime.datetime.utcnow() - datetime.timedelta(days=31)
         _generate_dataset_dict('dataset3-category1', ORG, LOCATION, review_date)