diff --git a/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py index e3cf349905..15e0f4ce75 100644 --- a/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py +++ b/ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py @@ -1,7 +1,8 @@ import logging import json import urlparse -import mixpanel +import requests + import pylons.config as config import ckan.model as model @@ -49,12 +50,54 @@ def extract_locations_in_json(pkg_dict): return json.dumps(location_names), json.dumps(location_ids) +def _ga_dataset_type(is_indicator, is_cod): + ''' + :param is_indicator: + :type is_indicator: bool + :param is_cod: + :type is_cod: bool + :return: standard / indicator / cod / cod~indicator + :rtype: str + ''' + + type = 'standard' + if is_indicator: + type = 'indicator' + if is_cod: + type = 'cod~indicator' if type == 'indicator' else 'cod' + + return type + + +def _ga_location(location_names): + ''' + :param location_names: + :type location_names: list[str] + :return: + :rtype: str + ''' + limit = 15 + if len(location_names) >= limit: + result = 'many' + else: + result = "~".join(location_names) + + if not result: + result = 'none' + + return result + + def wrap_resource_download_function(): original_resource_download = package_controller.PackageController.resource_download def new_resource_download(self, id, resource_id, filename=None): send_event = True + referer_url = request.referer + remote_addr = request.remote_addr + request_url = request.url + if referer_url: ckan_url = config.get('ckan.site_url', '//localhost:5000') ckan_parsed_url = urlparse.urlparse(ckan_url) @@ -70,27 +113,70 @@ def new_resource_download(self, id, resource_id, filename=None): dataset_dict = logic.get_action('package_show')(context, {'id': id}) location_names, location_ids = extract_locations(dataset_dict) - mp = mixpanel.Mixpanel(config.get('hdx.mixpanel.token')) - event_dict = { - "resource name": resource_dict.get('name'), - "resource id": resource_dict.get('id'), - "dataset name": dataset_dict.get('title'), - "dataset id": dataset_dict.get('id'), - "org name": dataset_dict.get('organization', {}).get('name'), - "org id": dataset_dict.get('organization', {}).get('id'), - "group names": location_names, - "group ids": location_ids, - "is cod": is_cod(dataset_dict), - "is indicator": is_indicator(dataset_dict), - "event source": "direct", - "referer url": referer_url - + dataset_title = dataset_dict.get('title', dataset_dict.get('name')) + dataset_is_cod = is_cod(dataset_dict) == 'true' + dataset_is_indicator = is_indicator(dataset_dict) == 'true' + + analytics_enqueue_url = config.get('hdx.analytics.enqueue_url') + analytics_dict = { + 'event_name': 'resource download', + 'mixpanel_tracking_id': 'anonymous', + 'mixpanel_token': config.get('hdx.analytics.mixpanel.token'), + 'send_mixpanel': True, + 'send_ga': True, + 'mixpanel_meta': { + "resource name": resource_dict.get('name'), + "resource id": resource_dict.get('id'), + "dataset name": dataset_dict.get('title'), + "dataset id": dataset_dict.get('id'), + "org name": dataset_dict.get('organization', {}).get('name'), + "org id": dataset_dict.get('organization', {}).get('id'), + "group names": location_names, + "group ids": location_ids, + "is cod": dataset_is_cod, + "is indicator": dataset_is_indicator, + "event source": "direct", + "referer url": referer_url + }, + 'ga_meta': { + 'v': '1', + 't': 'event', + 'cid': 'anonymous', + 'tid': config.get('hdx.analytics.ga.token'), + 'ds': 'direct', + 'uip': remote_addr, + 'ec': 'resource', # event category + 'ea': 'download', # event action + 'dl': request_url, + 'el': '{} ({})'.format(resource_dict.get('name'), dataset_title), # event label + 'cd1': dataset_dict.get('organization', {}).get('name'), + 'cd2': _ga_dataset_type(dataset_is_indicator, dataset_is_cod), # type + 'cd3': _ga_location(location_names), # locations + + + + + } } - mp.track('anonymous', 'resource download', event_dict) + + response = requests.post(analytics_enqueue_url, allow_redirects=True, timeout=2, + data=json.dumps(analytics_dict), headers={'Content-type': 'application/json'}) + response.raise_for_status() + enq_result = response.json() + log.info('Enqueuing result was: {}'.format(enq_result.get('success'))) except logic.NotFound: base.abort(404, _('Resource not found')) except logic.NotAuthorized: base.abort(401, _('Unauthorized to read resource %s') % id) + except requests.ConnectionError, e: + log.error("There was a connection error to the analytics enqueuing service: {}".format(str(e))) + except requests.HTTPError, e: + log.error("Bad HTTP response from analytics enqueuing service: {}".format(str(e))) + except requests.Timeout, e: + log.error("Request timed out: {}".format(str(e))) + except Exception, e: + log.error('Unexpected error {}'.format(e)) + return original_resource_download(self, id, resource_id, filename) package_controller.PackageController.resource_download = new_resource_download diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/templates/base.html b/ckanext-hdx_theme/ckanext/hdx_theme/templates/base.html index 7ff134df25..41fc2a3e42 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/templates/base.html +++ b/ckanext-hdx_theme/ckanext/hdx_theme/templates/base.html @@ -24,7 +24,8 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); - ga('create', 'UA-48221887-3', 'auto'); + var gaToken = '{{ h.hdx_get_ckan_config('hdx.analytics.ga.token') }}'; + ga('create', gaToken, 'auto'); //swap the two create calls in order to enable recording data from localhost instances //this is useful when testing to see realtime data on the Google Analytics site diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/templates/snippets/mixpanel.html b/ckanext-hdx_theme/ckanext/hdx_theme/templates/snippets/mixpanel.html index 375c946511..0db3f42007 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/templates/snippets/mixpanel.html +++ b/ckanext-hdx_theme/ckanext/hdx_theme/templates/snippets/mixpanel.html @@ -39,6 +39,6 @@ f.parentNode.insertBefore(a, f) } })(document, window.mixpanel || []); - mixpanel.init("{{ h.hdx_get_ckan_config('hdx.mixpanel.token') }}"); + mixpanel.init("{{ h.hdx_get_ckan_config('hdx.analytics.mixpanel.token') }}"); \ No newline at end of file diff --git a/common-config-ini.txt b/common-config-ini.txt index d492f0b5ed..8fb6e29d33 100644 --- a/common-config-ini.txt +++ b/common-config-ini.txt @@ -204,6 +204,12 @@ hdx.google.dev_key = # hdx.gis.layer_import_url = http://localhost:1234/api/add-layer/dataset/{dataset_id}/resource/{resource_id}?resource_download_url={resource_download_url}&url_type={url_type} # hdx.gis.resource_pbf_url = http://localhost:7101/services/postgis/{resource_id}/wkb_geometry/vector-tiles/{z}/{x}/{y}.pbf +# Analytics +hdx.analytics.ga.token = UA-48221887-3 +# This should be overriden in your own prod.ini +# hdx.analytics.enqueue_url = http://localhost:1234/api/send-analytics +hdx.analytics.mixpanel.token = 875bfe50f9cb981f4e2817832c83c165 + hdx.captcha.url = https://www.google.com/recaptcha/api/siteverify hdx.onboarding.send_confirmation_email = true @@ -214,6 +220,3 @@ hdx.checks.config_path = /srv/ckan/ckanext-hdx_service_checker/ckanext/hdx_servi hdx.explorer.url = /mpx/#/ hdx.explorer.iframe.width = 100% hdx.explorer.iframe.height = 750px - -# Mixpanel -hdx.mixpanel.token = 875bfe50f9cb981f4e2817832c83c165 diff --git a/requirements.txt b/requirements.txt index f8453a07cc..c27927ddb8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,7 +33,7 @@ repoze.who==2.0 repoze.who-friendlyform==1.0.8 requests==2.3.0 simplejson==3.3.1 -six>=1.7.3 +six==1.7.3 solrpy==0.9.5 sqlalchemy-migrate==0.9.1 sqlparse==0.1.11 @@ -44,7 +44,6 @@ zope.interface==4.1.1 validate_email==1.2 pyDNS==2.3.6 ijson==2.2 -mixpanel>=4.3.0 # Below lines are needed for ckanext-powerview ckantoolkit==0.0.2