Skip to content

Commit

Permalink
revamp metrics_summaries
Browse files Browse the repository at this point in the history
The list of metrics input to generate_summaries is now a dict of {metric name : [ list of fields to group by] }.
e-mission/e-mission-server#966 (comment)

Add 'response_count' metric which counts the occurences of 'responded' and 'not_responded', as opposed to the existing metrics which just sum up number values.

app_config and trip_labels as globals to avoid argument/parameter drilling

Add comments and rename variables throughout improve comprehension
  • Loading branch information
JGreenlee committed May 20, 2024
1 parent 1032dc5 commit 941069a
Showing 1 changed file with 98 additions and 48 deletions.
146 changes: 98 additions & 48 deletions src/emcommon/metrics/metrics_summaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,146 @@
# from util import memoize
import emcommon.logger as Logger
import emcommon.util as util
import emcommon.bluetooth.ble_matching as emcble
import emcommon.survey.conditional_surveys as emcsc

app_config = None
labels_map = None

# @memoize
def label_for_trip(composite_trip: dict, label_key: str, trip_labels_map: dict[str, any] = None) -> str:
def label_for_trip(composite_trip: dict, label_key: str) -> str:
"""
:param composite_trip: composite trip
:param label_key: which type of label to get ('mode', 'purpose', or 'replaced_mode')
:param trip_labels_map: trip labels map
:return: the label for the trip, derived from the trip's user_input if available, or the trip_labels_map if available, or 'unlabeled' otherwise
:return: the label for the trip, derived from the trip's user_input if available, or the labels_map if available, or 'unlabeled' otherwise
"""
global labels_map
label_key = label_key.upper()
label_key_confirm = label_key.lower() + '_confirm'
UNLABELED = 'unlabeled'
if not composite_trip:
return UNLABELED
Logger.log_debug('called label_for_trip with label_key %s for trip %s' % (label_key, composite_trip))
if 'user_input' in composite_trip and label_key_confirm in composite_trip['user_input']:
return composite_trip['user_input'][label_key_confirm]
if trip_labels_map and composite_trip['_id']['$oid'] in trip_labels_map:
if label_key_upper in trip_labels_map[composite_trip['_id']['$oid']]:
return trip_labels_map[composite_trip['_id']['$oid']][label_key_upper]['data']['label']
return UNLABELED
if labels_map and composite_trip['_id']['$oid'] in labels_map \
and label_key in labels_map[composite_trip['_id']['$oid']]:
return labels_map[composite_trip['_id']['$oid']][label_key]['data']['label']
return None


def labeled_purpose_for_trip(composite_trip: dict, trip_labels_map: dict[str, any] = None) -> str:
def survey_answered_for_trip(composite_trip: dict) -> str | None:
"""
:param composite_trip: composite trip
:param trip_labels_map: trip labels map
:return: labeled purpose for the trip, derived from the trip's user_input if available, or the trip_labels_map if available, or 'unlabeled' otherwise
:return: the name of the survey that was answered for the trip, or None if no survey was answered
"""
UNLABELED = 'unlabeled'
if not composite_trip:
return UNLABELED
if 'user_input' in composite_trip and 'purpose_confirm' in composite_trip['user_input']:
return composite_trip['user_input']['purpose_confirm']
if trip_labels_map and composite_trip['_id']['$oid'] in trip_labels_map:
if 'PURPOSE' in trip_labels_map[composite_trip['_id']['$oid']]:
return trip_labels_map[composite_trip['_id']['$oid']]['PURPOSE']['data']['label']
return UNLABELED
global labels_map
Logger.log_debug('called survey_answered_for_trip for trip %s' % composite_trip)
if 'user_input' in composite_trip and 'trip_user_input' in composite_trip['user_input']:
return composite_trip['user_input']['trip_user_input']['data']['name']
if labels_map \
and composite_trip['_id']['$oid'] in labels_map \
and 'SURVEY' in labels_map[composite_trip['_id']['$oid']] \
and 'data' in labels_map[composite_trip['_id']['$oid']]['SURVEY']:
return labels_map[composite_trip['_id']['$oid']]['SURVEY']['data']['name']
return None


# @memoize
def generate_summaries(metrics: list[str], composite_trips: list, trip_labels_map: dict[str, any] = None):
def generate_summaries(metric_list: dict[str, list[str]], composite_trips: list, _app_config, _labels_map: dict[str, any] = None):
global app_config, labels_map
app_config = _app_config
labels_map = _labels_map
composite_trips = [util.flatten_db_entry(trip) for trip in composite_trips if 'data' in trip]
return {metric: get_summary_for_metric(metric, composite_trips, trip_labels_map) for metric in metrics}
metric_list = dict(metric_list)
return {metric[0]: get_summary_for_metric(metric, composite_trips) for metric in metric_list.items()}


def value_of_metric_for_trip(metric: str, trip: dict):
if metric == 'distance':
def value_of_metric_for_trip(metric_name: str, grouping_field: str, trip: dict):
global app_config
if metric_name == 'distance':
return trip['distance']
elif metric == 'count':
elif metric_name == 'count':
return 1
elif metric == 'duration':
elif metric_name == 'duration':
return trip['duration']
elif metric_name == 'response_count':
if grouping_field.endswith('_confirm'):
return 'responded' if label_for_trip(trip, grouping_field[:-8]) else 'not_responded'
elif grouping_field == 'survey':
prompted_survey = emcsc.survey_prompted_for_trip(trip, app_config)
answered_survey = survey_answered_for_trip(trip)
return 'responded' if answered_survey == prompted_survey else 'not_responded'
return None


def get_summary_for_metric(metric: str, composite_trips: list, trip_labels_map: dict[str, any] = None):
def get_summary_for_metric(metric: tuple[str, list[str]], composite_trips: list):
"""
:param metric: tuple of metric name and list of grouping fields
:param composite_trips: list of composite trips
:return: a list of dicts, each representing a summary of the metric on one day
e.g. get_summary_for_metric(('distance', ['mode_confirm', 'purpose_confirm']), composite_trips)
-> [ { 'date': '2024-05-20', 'mode_confirm_bike': 1000, 'mode_confirm_walk': 500, 'purpose_confirm_home': 1500 } ]
"""
days_of_metrics_data = {}
for trip in composite_trips:
# for now, we're only grouping by day. First part of ISO date is YYYY-MM-DD
date = trip['start_fmt_time'].split('T')[0]
if date not in days_of_metrics_data:
days_of_metrics_data[date] = []
days_of_metrics_data[date].append(trip)

# days_summaries e.g. [ { 'date': '2024-05-20', 'mode_confirm_bike': 1000, 'purpose_confirm_home': 1500 } ]
days_summaries = []
for date, trips in days_of_metrics_data.items():
summary_for_day = {
'date': date,
}
summary_for_day.update(metric_summary_by_mode(
metric, trips, trip_labels_map))
summary_for_day.update(metric_summary_for_trips(metric, trips))
days_summaries.append(summary_for_day)
return days_summaries

def metric_summary_by_mode(metric: str, composite_trips: list, trip_labels_map = None):
grouping_field_fns = {
'mode_confirm': lambda trip: label_for_trip(trip, 'mode') or 'UNLABELED',
'purpose_confirm': lambda trip: label_for_trip(trip, 'purpose') or 'UNLABELED',
'replaced_mode_confirm': lambda trip: label_for_trip(trip, 'replaced_mode') or 'UNLABELED',
'survey': lambda trip: emcsc.survey_prompted_for_trip(trip, app_config),
# 'primary_inferred_mode', maybe add later
'primary_ble_sensed_mode': lambda trip: emcble.primary_ble_sensed_mode_for_trip(trip) or 'UNKNOWN',
}

def metric_summary_for_trips(metric: tuple[str, list[str]], composite_trips: list):
"""
:param metric: tuple of metric name and list of grouping fields
:param composite_trips: list of composite trips
:return: a dict of mode keys to the metric total for that mode
:return: a dict of { groupingfield_value : metric_total } for the given metric and trips
e.g. metric_summary_for_trips(('distance', ['mode_confirm', 'purpose_confirm']), composite_trips)
-> { 'mode_confirm_bike': 1000, 'mode_confirm_walk': 500, 'purpose_confirm_home': 1500 }
e.g. metric_summary_for_trips(('response_count', ['mode_confirm', 'purpose_confirm']), composite_trips)
-> { 'mode_confirm_bike': { 'responded': 10, 'not_responded': 5 }, 'mode_confirm_walk': { 'responded': 5, 'not_responded': 10 } }
"""
grouping_fields = {
'mode_confirm': lambda trip: label_for_trip(trip, 'mode', trip_labels_map),
'purpose_confirm': lambda trip: label_for_trip(trip, 'purpose', trip_labels_map),
'replaced_mode_confirm': lambda trip: label_for_trip(trip, 'replaced_mode', trip_labels_map),
}

mode_to_metric_map = {}
global app_config
groups = {}
if not composite_trips:
return mode_to_metric_map
return groups
for trip in composite_trips:
for grouping_field, field_for_trip_fn in grouping_fields.items():
grouping_key = grouping_field + '_' + field_for_trip_fn(trip)
if grouping_key not in mode_to_metric_map:
mode_to_metric_map[grouping_key] = 0
mode_to_metric_map[grouping_key] += value_of_metric_for_trip(metric, trip)
return mode_to_metric_map
if 'primary_ble_sensed_mode' not in trip:
trip['primary_ble_sensed_mode'] = emcble.primary_ble_sensed_mode_for_trip(trip) or 'UNKNOWN'
for grouping_field in metric[1]:
if grouping_field not in grouping_field_fns:
continue
field_value_for_trip = grouping_field_fns[grouping_field](trip)
if field_value_for_trip is None:
continue
# grouping_key e.g. 'mode_confirm_bike'
grouping_key = grouping_field + '_' + field_value_for_trip
val = value_of_metric_for_trip(metric[0], grouping_field, trip)
# if it's a number, we're summing and adding to the total (used for distance, duration, count)
if type(val) == int or type(val) == float:
if grouping_key not in groups:
groups[grouping_key] = 0
groups[grouping_key] += val
# if it's a string, we're counting the number of times it appears (used for response_count)
elif type(val) == str:
if grouping_key not in groups:
groups[grouping_key] = {}
if val not in groups[grouping_key]:
groups[grouping_key][val] = 0
groups[grouping_key][val] += 1
return groups

0 comments on commit 941069a

Please sign in to comment.