Skip to content

Commit

Permalink
feat(postgres/analytics): Add task to clean-up old data
Browse files Browse the repository at this point in the history
  • Loading branch information
gagantrivedi committed Dec 15, 2023
1 parent 79e67ee commit 76d9ff3
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 0 deletions.
4 changes: 4 additions & 0 deletions api/app/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,10 @@
AWS_SSE_LOGS_BUCKET_NAME = env.str("AWS_SSE_LOGS_BUCKET_NAME", None)
SSE_INFLUXDB_BUCKET = env.str("SSE_INFLUXDB_BUCKET", None)

RAW_ANALYTICS_DATA_RETENTION_DAYS = env.int("RAW_ANALYTICS_DATA_RETENTION_DAYS", 30)
BUCKETED_ANALYTICS_DATA_RETENTION_DAYS = env.int(
"BUCKETED_ANALYTICS_DATA_RETENTION_DAYS", 90
)

DISABLE_INVITE_LINKS = env.bool("DISABLE_INVITE_LINKS", False)

Expand Down
26 changes: 26 additions & 0 deletions api/app_analytics/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,32 @@ def populate_bucket(
populate_feature_evaluation_bucket(bucket_size, run_every, source_bucket_size)


@register_recurring_task(
run_every=timedelta(days=1),
)
def clean_up_old_analytics_data():
# delete raw analytics data older than `RAW_ANALYTICS_DATA_RETENTION_DAYS`
APIUsageRaw.objects.filter(
created_at__lt=timezone.now()
- timedelta(days=settings.RAW_ANALYTICS_DATA_RETENTION_DAYS)
).delete()
FeatureEvaluationRaw.objects.filter(
created_at__lt=timezone.now()
- timedelta(days=settings.RAW_ANALYTICS_DATA_RETENTION_DAYS)
).delete()

# delete bucketed analytics data older than `BUCKETED_ANALYTICS_DATA_RETENTION_DAYS`
APIUsageBucket.objects.filter(
created_at__lt=timezone.now()
- timedelta(days=settings.BUCKETED_ANALYTICS_DATA_RETENTION_DAYS)
).delete()

FeatureEvaluationBucket.objects.filter(
created_at__lt=timezone.now()
- timedelta(days=settings.BUCKETED_ANALYTICS_DATA_RETENTION_DAYS)
).delete()


@register_task_handler()
def track_feature_evaluation(environment_id, feature_evaluations):
feature_evaluation_objects = []
Expand Down
99 changes: 99 additions & 0 deletions api/tests/unit/app_analytics/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
Resource,
)
from app_analytics.tasks import (
clean_up_old_analytics_data,
populate_api_usage_bucket,
populate_feature_evaluation_bucket,
track_feature_evaluation,
track_request,
)
from django.conf import settings
from django.utils import timezone
from pytest_django.fixtures import SettingsWrapper

if "analytics" not in settings.DATABASES:
pytest.skip(
Expand Down Expand Up @@ -315,3 +317,100 @@ def _create_feature_evaluation_event(environment_id, feature_name, count, when):
event.save()

return event


@pytest.mark.django_db(databases=["analytics"])
def test_clean_up_old_analytics_data_does_nothing_if_no_data() -> None:
# Given
# When
clean_up_old_analytics_data()

# Then
# no exception was raised


@pytest.mark.django_db(databases=["analytics"])
def test_clean_up_old_analytics_data_removes_old_data(
settings: SettingsWrapper,
) -> None:
# Given
now = timezone.now()
settings.RAW_ANALYTICS_DATA_RETENTION_DAYS = 2
settings.BUCKETED_ANALYTICS_DATA_RETENTION_DAYS = 4

environment_id = 1

# APIUsageRaw data that should not be removed
new_api_usage_raw_data = []
new_api_usage_raw_data.append(_create_api_usage_event(environment_id, now))
new_api_usage_raw_data.append(
_create_api_usage_event(environment_id, now - timezone.timedelta(days=1))
)

# APIUsageRaw data that should be removed
_create_api_usage_event(environment_id, now - timezone.timedelta(days=2))
_create_api_usage_event(environment_id, now - timezone.timedelta(days=3))

# APIUsageBucket data that should not be removed
new_api_usage_bucket = APIUsageBucket.objects.create(
environment_id=environment_id,
resource=Resource.FLAGS,
total_count=100,
created_at=now,
bucket_size=5,
)
# APIUsageBucket data that should be removed
APIUsageBucket.objects.create(
environment_id=environment_id,
resource=Resource.FLAGS,
total_count=100,
created_at=now - timezone.timedelta(days=5),
bucket_size=5,
)

# FeatureEvaluationRaw data that should not be removed
new_feature_evaluation_raw_data = []
new_feature_evaluation_raw_data.append(
_create_feature_evaluation_event(environment_id, "feature1", 1, now)
)
new_feature_evaluation_raw_data.append(
_create_feature_evaluation_event(
environment_id, "feature1", 1, now - timezone.timedelta(days=1)
)
)

# FeatureEvaluationRaw data that should be removed
_create_feature_evaluation_event(
environment_id, "feature1", 1, now - timezone.timedelta(days=3)
)
_create_feature_evaluation_event(
environment_id, "feature1", 1, now - timezone.timedelta(days=2)
)

# FeatureEvaluationBucket data that should not be removed
new_feature_evaluation_bucket = FeatureEvaluationBucket.objects.create(
environment_id=environment_id,
feature_name="feature1",
total_count=100,
created_at=now,
bucket_size=5,
)

# FeatureEvaluationBucket data that should be removed
FeatureEvaluationBucket.objects.create(
environment_id=environment_id,
feature_name="feature1",
total_count=100,
created_at=now - timezone.timedelta(days=5),
bucket_size=5,
)
# When
clean_up_old_analytics_data()

# Then
assert list(APIUsageRaw.objects.all()) == new_api_usage_raw_data
assert list(FeatureEvaluationRaw.objects.all()) == new_feature_evaluation_raw_data
assert list(FeatureEvaluationBucket.objects.all()) == [
new_feature_evaluation_bucket
]
assert list(APIUsageBucket.objects.all()) == [new_api_usage_bucket]

0 comments on commit 76d9ff3

Please sign in to comment.