diff --git a/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py b/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py index b1378920dd..dfb988e229 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py +++ b/ckanext-hdx_package/ckanext/hdx_package/controllers/dataset_controller.py @@ -31,6 +31,8 @@ import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.search as search +import ckanext.hdx_package.helpers.analytics as analytics + from ckan.common import _, json, request, c, g, response from ckan.controllers.home import CACHE_PARAMETERS @@ -704,10 +706,9 @@ def read(self, id, format='html'): template = template[:template.index('.') + 1] + format # set dataset type for google analytics - modified by HDX - # c.ga_dataset_type = self._google_analytics_dataset_type(c.pkg_dict) - c.analytics_is_cod = self._analytics_is_cod(c.pkg_dict) - c.analytics_is_indicator = self._analytics_is_indicator(c.pkg_dict) - c.analytics_group_names, c.analytics_group_ids = self._analytics_location(c.pkg_dict) + c.analytics_is_cod = analytics.is_cod(c.pkg_dict) + c.analytics_is_indicator = analytics.is_indicator(c.pkg_dict) + c.analytics_group_names, c.analytics_group_ids = analytics.extract_locations_in_json(c.pkg_dict) # changes done for indicator act_data_dict = {'id': c.pkg_dict['id'], 'limit': 7} @@ -772,27 +773,6 @@ def read(self, id, format='html'): assert False, "We should never get here" - def _analytics_is_indicator(self, pkg_dict): - if int(pkg_dict.get('indicator', 0)) == 1: - return 'true' - return 'false' - - def _analytics_is_cod(self, pkg_dict): - tags = [tag.get('name', '') for tag in pkg_dict.get('tags', [])] - if 'cod' in tags: - return 'true' - return 'false' - - def _analytics_location(self, pkg_dict): - locations = pkg_dict.get('groups', []) - location_names = [] - location_ids = [] - for l in sorted(locations, key=lambda item: item.get('name', '')): - location_names.append(l.get('name', '')) - location_ids.append(l.get('id', '')) - - return json.dumps(location_names), json.dumps(location_ids) - def _get_org_extras(self, org_id): """ Get the extras for our orgs diff --git a/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py new file mode 100644 index 0000000000..99027a8c21 --- /dev/null +++ b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py @@ -0,0 +1,95 @@ +import logging +import json +import urlparse +import mixpanel +import pylons.config as config + +import ckan.model as model +import ckan.lib.base as base +import ckan.logic as logic +import ckan.controllers.package as package_controller + +from ckan.common import _, c, request + +log = logging.getLogger(__name__) + + +def is_indicator(pkg_dict): + if int(pkg_dict.get('indicator', 0)) == 1: + return 'true' + return 'false' + + +def is_cod(pkg_dict): + tags = [tag.get('name', '') for tag in pkg_dict.get('tags', [])] + if 'cod' in tags: + return 'true' + return 'false' + + +def extract_locations(pkg_dict): + locations = pkg_dict.get('groups', []) + location_names = [] + location_ids = [] + for l in sorted(locations, key=lambda item: item.get('name', '')): + location_names.append(l.get('name', '')) + location_ids.append(l.get('id', '')) + + return location_names, location_ids + + +def extract_locations_in_json(pkg_dict): + locations = pkg_dict.get('groups', []) + location_names = [] + location_ids = [] + for l in sorted(locations, key=lambda item: item.get('name', '')): + location_names.append(l.get('name', '')) + location_ids.append(l.get('id', '')) + + return json.dumps(location_names), json.dumps(location_ids) + + +def wrap_resource_download_function(): + original_resource_download = package_controller.PackageController.resource_download + + def new_resource_download(self, id, resource_id, filename=None): + send_event = True + referer_url = request.referer + if referer_url: + ckan_url = config.get('ckan.site_url', '//localhost:5000') + ckan_parsed_url = urlparse.urlparse(ckan_url) + referer_parsed_url = urlparse.urlparse(referer_url) + + if ckan_parsed_url.hostname == referer_parsed_url.hostname: + send_event = False + try: + if send_event: + context = {'model': model, 'session': model.Session, + 'user': c.user or c.author, 'auth_user_obj': c.userobj} + resource_dict = logic.get_action('resource_show')(context, {'id': resource_id}) + dataset_dict = logic.get_action('package_show')(context, {'id': id}) + location_names, location_ids = extract_locations(dataset_dict) + + mp = mixpanel.Mixpanel(config.get('hdx.mixpanel.token')) + event_dict = { + "resource name": resource_dict.get('name'), + "dataset name": dataset_dict.get('title'), + "dataset id": dataset_dict.get('id'), + "org name": dataset_dict.get('organization', {}).get('name'), + "org id": dataset_dict.get('organization', {}).get('id'), + "group names": location_names, + "group ids": location_ids, + "is cod": is_cod(dataset_dict), + "is indicator": is_indicator(dataset_dict), + "event source": "direct", + "referer url": referer_url + + } + mp.track('anonymous', 'resource download', event_dict) + except logic.NotFound: + base.abort(404, _('Resource not found')) + except logic.NotAuthorized: + base.abort(401, _('Unauthorized to read resource %s') % id) + return original_resource_download(self, id, resource_id, filename) + + package_controller.PackageController.resource_download = new_resource_download diff --git a/ckanext-hdx_package/ckanext/hdx_package/plugin.py b/ckanext-hdx_package/ckanext/hdx_package/plugin.py index 927441d136..4befc847fc 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/plugin.py +++ b/ckanext-hdx_package/ckanext/hdx_package/plugin.py @@ -31,6 +31,7 @@ import ckanext.hdx_package.actions.delete as hdx_delete import ckanext.hdx_package.helpers.helpers as hdx_helpers import ckanext.hdx_package.helpers.tracking_changes as tracking_changes +import ckanext.hdx_package.helpers.analytics as analytics import ckanext.hdx_package.actions.get as hdx_get import ckanext.hdx_org_group.helpers.organization_helper as org_helper @@ -56,6 +57,9 @@ def run_on_startup(): # replace original get_proxified_resource_url, check hdx_get_proxified_resource_url for more info resourceproxy_plugin.get_proxified_resource_url = hdx_helpers.hdx_get_proxified_resource_url + # wrap resource download function so that we can track download events + analytics.wrap_resource_download_function() + def _generate_license_list(): package.Package._license_register = license.LicenseRegister() diff --git a/requirements.txt b/requirements.txt index c27927ddb8..f8453a07cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ repoze.who==2.0 repoze.who-friendlyform==1.0.8 requests==2.3.0 simplejson==3.3.1 -six==1.7.3 +six>=1.7.3 solrpy==0.9.5 sqlalchemy-migrate==0.9.1 sqlparse==0.1.11 @@ -44,6 +44,7 @@ zope.interface==4.1.1 validate_email==1.2 pyDNS==2.3.6 ijson==2.2 +mixpanel>=4.3.0 # Below lines are needed for ckanext-powerview ckantoolkit==0.0.2