-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
182 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import random | ||
|
||
""" | ||
The RouterTable class. | ||
In the jargon, control is our baseline model. It can be the original model in this comparison. | ||
Treatment is the new model that we want to test against the old model. | ||
""" | ||
class RouterTable: | ||
def __init__(self, port): | ||
# The port of our control model | ||
self.control_port = port | ||
# The port of our treat model | ||
self.treat_port = -1 | ||
# The mapping from user_id to self.CONTROL or self.TREAT | ||
self.user_to_group = {} | ||
# How many traffic are we directing into treat model | ||
self.treat_percentage = 0 | ||
|
||
self.CONTROL = "CONTROL" | ||
self.TREAT = "TREAT" | ||
|
||
def flush(self): | ||
self.user_to_group = {} | ||
self.treat_percentage = 0 | ||
|
||
def get_port_by_user_id(self, user_id): | ||
if not self.is_in_test(): | ||
return self.control_port | ||
|
||
# If we have not seen this user before, we first assign it to a group according to self.treat_percentage. | ||
if user_id not in self.user_to_group: | ||
self.assign_new_user(user_id) | ||
|
||
if self.user_to_group[user_id] == self.CONTROL: | ||
return self.control_port | ||
else: | ||
return self.treat_port | ||
|
||
def is_user_in_treatment(self, user_id): | ||
group = self.get_group_by_user_id(user_id) | ||
if group == self.TREAT: | ||
return True | ||
return False | ||
|
||
def get_group_by_user_id(self, user_id): | ||
if user_id not in self.user_to_group: | ||
self.assign_new_user(user_id) | ||
return self.user_to_group[user_id] | ||
|
||
""" | ||
This is used in canary tests to incrementally increase the traffic. | ||
However, do remember to be aware of the logging of data because for a particular user_id, | ||
you may be using a different model than the last time. | ||
""" | ||
def set_treat_percentage(self, percentage): | ||
self.user_to_group = {} | ||
self.treat_percentage = percentage | ||
|
||
""" | ||
For the user we have not seen before, we add him/her into our map. | ||
""" | ||
def assign_new_user(self, user_id): | ||
r = random.random() | ||
if r <= self.treat_percentage: | ||
# It should be in treatment group. | ||
self.user_to_group[user_id] = self.TREAT | ||
else: | ||
self.user_to_group[user_id] = self.CONTROL | ||
|
||
""" | ||
This is used to setup a new treatment. | ||
""" | ||
def set_new_treatment(self, port, percentage): | ||
self.treat_port = port | ||
self.treat_percentage = percentage | ||
self.user_to_group = {} | ||
|
||
""" | ||
This is used when the test is successful. Then the treatment becomes the new control. | ||
""" | ||
def test_success(self): | ||
self.control_port = self.treat_port | ||
self.user_to_group = {} | ||
self.treat_percentage = 0 | ||
|
||
""" | ||
This is used when the test is failed. Then the control keeps being the control. | ||
""" | ||
def test_fail(self): | ||
self.user_to_group = {} | ||
self.treat_percentage = 0 | ||
|
||
def is_in_test(self): | ||
return self.treat_percentage != 0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import pandas as pd | ||
|
||
""" | ||
This function takes the data that a specific model produces and tell whether it is a good model or not. | ||
Parameters: | ||
df: The data that a particular model produces. It should contains following columns. | ||
user_id: The id of the user. | ||
response: The recommendation that we give back. | ||
timestamp: The timestamp of the request. (or response) | ||
req_percentage: Over this threshold, the model is successful. | ||
time_window: The time window that a user come back. This is in minutes. | ||
Return: boolean. Determine whether certain percentages of users use the recommendation again in a time window. | ||
""" | ||
def eval_model(df, req_percentage=0.2, time_window=20): | ||
total_requests = df.shape[0] | ||
return (num_following_req(df, time_window) / total_requests) >= req_percentage | ||
|
||
""" | ||
Return the number of requests that are in the time_window of the last request. This can be included in our report. | ||
""" | ||
def num_following_req(df, time_window=20): | ||
time_window *= 60 | ||
count_successful = 0 | ||
for user_id, group in df.groupby('user_id'): | ||
timestamps = group['timestamp'].tolist() | ||
for i in range(1, len(timestamps)): | ||
if (timestamps[i] - timestamps[i-1] <= time_window): | ||
count_successful += 1 | ||
return count_successful | ||
|
||
# if __name__ == "__main__": | ||
# data = {'user_id': [1, 2, 3, 4, 1, 1, 3, 5, 2], 'timestamp': [1, 8, 10, 20, 23, 26, 29, 50, 88]} | ||
# df = pd.DataFrame.from_dict(data) | ||
# print(eval_model(df, 0.2, 20)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters