Skip to content

Commit

Permalink
#4228 Refactoring to send the analytics events async
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandru-m-g committed Jun 20, 2016
1 parent 5e2e93e commit 626667c
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 24 deletions.
120 changes: 103 additions & 17 deletions ckanext-hdx_package/ckanext/hdx_package/helpers/analytics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
import json
import urlparse
import mixpanel
import requests

import pylons.config as config

import ckan.model as model
Expand Down Expand Up @@ -49,12 +50,54 @@ def extract_locations_in_json(pkg_dict):
return json.dumps(location_names), json.dumps(location_ids)


def _ga_dataset_type(is_indicator, is_cod):
'''
:param is_indicator:
:type is_indicator: bool
:param is_cod:
:type is_cod: bool
:return: standard / indicator / cod / cod~indicator
:rtype: str
'''

type = 'standard'
if is_indicator:
type = 'indicator'
if is_cod:
type = 'cod~indicator' if type == 'indicator' else 'cod'

return type


def _ga_location(location_names):
'''
:param location_names:
:type location_names: list[str]
:return:
:rtype: str
'''
limit = 15
if len(location_names) >= limit:
result = 'many'
else:
result = "~".join(location_names)

if not result:
result = 'none'

return result


def wrap_resource_download_function():
original_resource_download = package_controller.PackageController.resource_download

def new_resource_download(self, id, resource_id, filename=None):
send_event = True

referer_url = request.referer
remote_addr = request.remote_addr
request_url = request.url

if referer_url:
ckan_url = config.get('ckan.site_url', '//localhost:5000')
ckan_parsed_url = urlparse.urlparse(ckan_url)
Expand All @@ -70,27 +113,70 @@ def new_resource_download(self, id, resource_id, filename=None):
dataset_dict = logic.get_action('package_show')(context, {'id': id})
location_names, location_ids = extract_locations(dataset_dict)

mp = mixpanel.Mixpanel(config.get('hdx.mixpanel.token'))
event_dict = {
"resource name": resource_dict.get('name'),
"resource id": resource_dict.get('id'),
"dataset name": dataset_dict.get('title'),
"dataset id": dataset_dict.get('id'),
"org name": dataset_dict.get('organization', {}).get('name'),
"org id": dataset_dict.get('organization', {}).get('id'),
"group names": location_names,
"group ids": location_ids,
"is cod": is_cod(dataset_dict),
"is indicator": is_indicator(dataset_dict),
"event source": "direct",
"referer url": referer_url

dataset_title = dataset_dict.get('title', dataset_dict.get('name'))
dataset_is_cod = is_cod(dataset_dict) == 'true'
dataset_is_indicator = is_indicator(dataset_dict) == 'true'

analytics_enqueue_url = config.get('hdx.analytics.enqueue_url')
analytics_dict = {
'event_name': 'resource download',
'mixpanel_tracking_id': 'anonymous',
'mixpanel_token': config.get('hdx.analytics.mixpanel.token'),
'send_mixpanel': True,
'send_ga': True,
'mixpanel_meta': {
"resource name": resource_dict.get('name'),
"resource id": resource_dict.get('id'),
"dataset name": dataset_dict.get('title'),
"dataset id": dataset_dict.get('id'),
"org name": dataset_dict.get('organization', {}).get('name'),
"org id": dataset_dict.get('organization', {}).get('id'),
"group names": location_names,
"group ids": location_ids,
"is cod": dataset_is_cod,
"is indicator": dataset_is_indicator,
"event source": "direct",
"referer url": referer_url
},
'ga_meta': {
'v': '1',
't': 'event',
'cid': 'anonymous',
'tid': config.get('hdx.analytics.ga.token'),
'ds': 'direct',
'uip': remote_addr,
'ec': 'resource', # event category
'ea': 'download', # event action
'dl': request_url,
'el': '{} ({})'.format(resource_dict.get('name'), dataset_title), # event label
'cd1': dataset_dict.get('organization', {}).get('name'),
'cd2': _ga_dataset_type(dataset_is_indicator, dataset_is_cod), # type
'cd3': _ga_location(location_names), # locations




}
}
mp.track('anonymous', 'resource download', event_dict)

response = requests.post(analytics_enqueue_url, allow_redirects=True, timeout=2,
data=json.dumps(analytics_dict), headers={'Content-type': 'application/json'})
response.raise_for_status()
enq_result = response.json()
log.info('Enqueuing result was: {}'.format(enq_result.get('success')))
except logic.NotFound:
base.abort(404, _('Resource not found'))
except logic.NotAuthorized:
base.abort(401, _('Unauthorized to read resource %s') % id)
except requests.ConnectionError, e:
log.error("There was a connection error to the analytics enqueuing service: {}".format(str(e)))
except requests.HTTPError, e:
log.error("Bad HTTP response from analytics enqueuing service: {}".format(str(e)))
except requests.Timeout, e:
log.error("Request timed out: {}".format(str(e)))
except Exception, e:
log.error('Unexpected error {}'.format(e))

return original_resource_download(self, id, resource_id, filename)

package_controller.PackageController.resource_download = new_resource_download
3 changes: 2 additions & 1 deletion ckanext-hdx_theme/ckanext/hdx_theme/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');

ga('create', 'UA-48221887-3', 'auto');
var gaToken = '{{ h.hdx_get_ckan_config('hdx.analytics.ga.token') }}';
ga('create', gaToken, 'auto');

//swap the two create calls in order to enable recording data from localhost instances
//this is useful when testing to see realtime data on the Google Analytics site
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@
f.parentNode.insertBefore(a, f)
}
})(document, window.mixpanel || []);
mixpanel.init("{{ h.hdx_get_ckan_config('hdx.mixpanel.token') }}");
mixpanel.init("{{ h.hdx_get_ckan_config('hdx.analytics.mixpanel.token') }}");
</script>
<!-- end Mixpanel -->
9 changes: 6 additions & 3 deletions common-config-ini.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ hdx.google.dev_key =
# hdx.gis.layer_import_url = http://localhost:1234/api/add-layer/dataset/{dataset_id}/resource/{resource_id}?resource_download_url={resource_download_url}&url_type={url_type}
# hdx.gis.resource_pbf_url = http://localhost:7101/services/postgis/{resource_id}/wkb_geometry/vector-tiles/{z}/{x}/{y}.pbf

# Analytics
hdx.analytics.ga.token = UA-48221887-3
# This should be overriden in your own prod.ini
# hdx.analytics.enqueue_url = http://localhost:1234/api/send-analytics
hdx.analytics.mixpanel.token = 875bfe50f9cb981f4e2817832c83c165

hdx.captcha.url = https://www.google.com/recaptcha/api/siteverify
hdx.onboarding.send_confirmation_email = true

Expand All @@ -214,6 +220,3 @@ hdx.checks.config_path = /srv/ckan/ckanext-hdx_service_checker/ckanext/hdx_servi
hdx.explorer.url = /mpx/#/
hdx.explorer.iframe.width = 100%
hdx.explorer.iframe.height = 750px

# Mixpanel
hdx.mixpanel.token = 875bfe50f9cb981f4e2817832c83c165
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ repoze.who==2.0
repoze.who-friendlyform==1.0.8
requests==2.3.0
simplejson==3.3.1
six>=1.7.3
six==1.7.3
solrpy==0.9.5
sqlalchemy-migrate==0.9.1
sqlparse==0.1.11
Expand All @@ -44,7 +44,6 @@ zope.interface==4.1.1
validate_email==1.2
pyDNS==2.3.6
ijson==2.2
mixpanel>=4.3.0

# Below lines are needed for ckanext-powerview
ckantoolkit==0.0.2
Expand Down

0 comments on commit 626667c

Please sign in to comment.