From 0c6535ed6cdea02399de29d1a3cec2ec2d868475 Mon Sep 17 00:00:00 2001 From: Anirudh Pillai Date: Thu, 12 Dec 2024 16:52:17 +0000 Subject: [PATCH] feat(alerts): check alerts earlier (#26843) --- posthog/tasks/alerts/checks.py | 39 +-- .../test/test_trends_absolute_alerts.py | 130 ++++++++-- .../test/test_trends_relative_alerts.py | 222 +++++++++++++++--- posthog/tasks/alerts/utils.py | 51 +++- 4 files changed, 345 insertions(+), 97 deletions(-) diff --git a/posthog/tasks/alerts/checks.py b/posthog/tasks/alerts/checks.py index 4738ca45d3ade..439305283b4cf 100644 --- a/posthog/tasks/alerts/checks.py +++ b/posthog/tasks/alerts/checks.py @@ -1,4 +1,3 @@ -import time import traceback from datetime import datetime, timedelta, UTC @@ -25,16 +24,15 @@ AlertState, ) from posthog.utils import get_from_dict_or_attr -from prometheus_client import Counter, Gauge from django.db.models import Q, F from collections import defaultdict from posthog.tasks.alerts.utils import ( AlertEvaluationResult, calculation_interval_to_order, + next_check_time, send_notifications_for_breaches, send_notifications_for_errors, WRAPPER_NODE_KINDS, - alert_calculation_interval_to_relativedelta, ) from posthog.tasks.alerts.trends import check_trends_alert from posthog.ph_client import ph_us_client @@ -54,26 +52,6 @@ def __init__(self, err: Exception): self.__traceback__ = err.__traceback__ -HOURLY_ALERTS_BACKLOG_GAUGE = Gauge( - "hourly_alerts_backlog", - "Number of hourly alerts that are not being checked in the last hour.", -) - -DAILY_ALERTS_BACKLOG_GAUGE = Gauge( - "daily_alerts_backlog", - "Number of daily alerts that are not being checked in the last 24 hours.", -) - -ALERT_CHECK_ERROR_COUNTER = Counter( - "alerts_check_failures", - "Number of alert check errors that don't notify the user", -) - -ALERT_COMPUTED_COUNTER = Counter( - "alerts_computed", - "Number of alerts we calculated", -) - ANIRUDH_DISTINCT_ID = "wcPbDRs08GtNzrNIXfzHvYAkwUaekW7UrAo4y3coznT" @@ -102,8 +80,6 @@ def alerts_backlog_task() -> None: ) ).count() - HOURLY_ALERTS_BACKLOG_GAUGE.set(hourly_alerts_breaching_sla) - now = datetime.now(UTC) daily_alerts_breaching_sla = AlertConfiguration.objects.filter( @@ -114,8 +90,6 @@ def alerts_backlog_task() -> None: ) ).count() - DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla) - with ph_us_client() as capture_ph_event: capture_ph_event( ANIRUDH_DISTINCT_ID, @@ -135,9 +109,6 @@ def alerts_backlog_task() -> None: }, ) - # sleeping 30s for prometheus to pick up the metrics sent during task - time.sleep(30) - @shared_task( ignore_result=True, @@ -266,7 +237,6 @@ def check_alert(alert_id: str, capture_ph_event: Callable = lambda *args, **kwar try: check_alert_and_notify_atomically(alert, capture_ph_event) except Exception as err: - ALERT_CHECK_ERROR_COUNTER.inc() user = cast(User, alert.created_by) capture_ph_event( @@ -309,9 +279,6 @@ def check_alert_and_notify_atomically(alert: AlertConfiguration, capture_ph_even so we can retry notification without re-computing insight. """ set_tag("alert_config_id", alert.id) - - ALERT_COMPUTED_COUNTER.inc() - user = cast(User, alert.created_by) # Event to count alert checks @@ -426,9 +393,7 @@ def add_alert_check( # IMPORTANT: update next_check_at according to interval # ensure we don't recheck alert until the next interval is due - alert.next_check_at = (alert.next_check_at or now) + alert_calculation_interval_to_relativedelta( - cast(AlertCalculationInterval, alert.calculation_interval) - ) + alert.next_check_at = next_check_time(alert) if notify: alert.last_notified_at = now diff --git a/posthog/tasks/alerts/test/test_trends_absolute_alerts.py b/posthog/tasks/alerts/test/test_trends_absolute_alerts.py index 6d2edac986579..9a65a4297f978 100644 --- a/posthog/tasks/alerts/test/test_trends_absolute_alerts.py +++ b/posthog/tasks/alerts/test/test_trends_absolute_alerts.py @@ -1,9 +1,11 @@ from typing import Optional, Any from unittest.mock import ANY, MagicMock, patch -import dateutil - from freezegun import freeze_time +import dateutil +import pytz +import datetime + from posthog.models.alert import AlertCheck from posthog.models.instance_setting import set_instance_setting from posthog.tasks.alerts.checks import check_alert @@ -41,7 +43,12 @@ def setUp(self) -> None: self.dashboard_api = DashboardAPI(self.client, self.team, self.assertEqual) def create_alert( - self, insight: dict, series_index: int, lower: Optional[int] = None, upper: Optional[int] = None + self, + insight: dict, + series_index: int, + lower: Optional[int] = None, + upper: Optional[int] = None, + calculation_interval: AlertCalculationInterval = AlertCalculationInterval.DAILY, ) -> dict: alert = self.client.post( f"/api/projects/{self.team.id}/alerts", @@ -54,7 +61,7 @@ def create_alert( "series_index": series_index, }, "condition": {"type": "absolute_value"}, - "calculation_interval": AlertCalculationInterval.DAILY, + "calculation_interval": calculation_interval, "threshold": {"configuration": {"type": "absolute", "bounds": {"lower": lower, "upper": upper}}}, }, ).json() @@ -134,7 +141,11 @@ def test_alert_lower_threshold_breached(self, mock_send_breaches: MagicMock, moc assert updated_alert.state == AlertState.FIRING assert updated_alert.last_checked_at == FROZEN_TIME assert updated_alert.last_notified_at == FROZEN_TIME - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 0 @@ -168,7 +179,11 @@ def test_trend_high_threshold_breached(self, mock_send_breaches: MagicMock, mock updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -181,9 +196,11 @@ def test_trend_high_threshold_breached(self, mock_send_breaches: MagicMock, mock def test_trend_no_threshold_breached(self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock) -> None: insight = self.create_time_series_trend_insight() - alert = self.create_alert(insight, series_index=0, lower=0, upper=2) + alert = self.create_alert( + insight, series_index=0, lower=0, upper=2, calculation_interval=AlertCalculationInterval.MONTHLY + ) - with freeze_time(FROZEN_TIME - dateutil.relativedelta.relativedelta(days=1)): + with freeze_time(FROZEN_TIME): _create_event( team=self.team, event="signed_up", @@ -196,10 +213,49 @@ def test_trend_no_threshold_breached(self, mock_send_breaches: MagicMock, mock_s updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = datetime.datetime(2024, 7, 1, 4, 0, tzinfo=pytz.UTC) + # first day of next month at around 4 AM + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") - assert alert_check.calculated_value == 1 + assert alert_check.calculated_value == 0 + assert alert_check.state == AlertState.NOT_FIRING + assert alert_check.error is None + + def test_trend_no_threshold_breached_weekly( + self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock + ) -> None: + insight = self.create_time_series_trend_insight() + alert = self.create_alert( + insight, series_index=0, lower=0, upper=2, calculation_interval=AlertCalculationInterval.WEEKLY + ) + + with freeze_time(FROZEN_TIME): + _create_event( + team=self.team, + event="signed_up", + distinct_id="1", + properties={"$browser": "Chrome"}, + ) + flush_persons_and_events() + + check_alert(alert["id"]) + + updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) + assert updated_alert.state == AlertState.NOT_FIRING + + next_check = ( + FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1, weekday=dateutil.relativedelta.MO(1)) + ).replace(hour=3, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() + + alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") + assert alert_check.calculated_value == 0 assert alert_check.state == AlertState.NOT_FIRING assert alert_check.error is None @@ -234,7 +290,11 @@ def test_trend_breakdown_high_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -276,7 +336,11 @@ def test_trend_breakdown_low_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 1 @@ -318,7 +382,11 @@ def test_trend_breakdown_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value is None @@ -358,7 +426,11 @@ def test_aggregate_trend_high_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 3 @@ -400,7 +472,11 @@ def test_aggregate_trend_with_breakdown_high_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -437,7 +513,11 @@ def test_trend_current_interval_high_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -484,7 +564,11 @@ def test_trend_current_interval_fallback_to_previous_high_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -524,7 +608,11 @@ def test_trend_current_interval_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 0 @@ -551,7 +639,11 @@ def test_trend_current_interval_low_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at is not None + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") # will be 0 even thought for current day it's 1 diff --git a/posthog/tasks/alerts/test/test_trends_relative_alerts.py b/posthog/tasks/alerts/test/test_trends_relative_alerts.py index 04d049217401e..b213ea2c47e54 100644 --- a/posthog/tasks/alerts/test/test_trends_relative_alerts.py +++ b/posthog/tasks/alerts/test/test_trends_relative_alerts.py @@ -1,10 +1,10 @@ from typing import Optional, Any from unittest.mock import ANY, call, MagicMock, patch -import dateutil - +from freezegun import freeze_time +import dateutil import dateutil.relativedelta -from freezegun import freeze_time +import pytz from posthog.models.alert import AlertCheck from posthog.models.instance_setting import set_instance_setting @@ -123,7 +123,11 @@ def test_alert_properties(self, mock_send_breaches: MagicMock, mock_send_errors: assert updated_alert.state == AlertState.FIRING assert updated_alert.last_checked_at == FROZEN_TIME assert updated_alert.last_notified_at == FROZEN_TIME - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") assert alert_check.calculated_value == 0 @@ -168,7 +172,11 @@ def test_relative_increase_absolute_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") @@ -247,7 +255,11 @@ def test_relative_increase_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -259,7 +271,11 @@ def test_relative_increase_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -329,7 +345,11 @@ def test_relative_increase_lower_threshold_breached_1( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -346,7 +366,11 @@ def test_relative_increase_lower_threshold_breached_1( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -421,7 +445,11 @@ def test_relative_increase_lower_threshold_breached_2( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -433,7 +461,11 @@ def test_relative_increase_lower_threshold_breached_2( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -508,7 +540,11 @@ def test_relative_decrease_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -524,7 +560,11 @@ def test_relative_decrease_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -598,7 +638,11 @@ def test_relative_decrease_lower_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -614,7 +658,11 @@ def test_relative_decrease_lower_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -694,7 +742,11 @@ def test_relative_increase_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -705,7 +757,11 @@ def test_relative_increase_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -779,7 +835,11 @@ def test_relative_decrease_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") assert alert_check.calculated_value == 2 @@ -790,7 +850,11 @@ def test_relative_decrease_no_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") assert alert_check.calculated_value == (2 / 3) @@ -878,7 +942,11 @@ def test_breakdown_relative_increase_upper_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -890,7 +958,11 @@ def test_breakdown_relative_increase_upper_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -996,7 +1068,11 @@ def test_breakdown_relative_increase_lower_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1008,7 +1084,11 @@ def test_breakdown_relative_increase_lower_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1114,7 +1194,11 @@ def test_breakdown_relative_decrease_lower_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1126,7 +1210,11 @@ def test_breakdown_relative_decrease_lower_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1233,7 +1321,11 @@ def test_breakdown_relative_decrease_upper_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1245,7 +1337,11 @@ def test_breakdown_relative_decrease_upper_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1340,7 +1436,11 @@ def test_breakdown_relative_decrease_no_breaches( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1352,7 +1452,11 @@ def test_breakdown_relative_decrease_no_breaches( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1432,7 +1536,11 @@ def test_breakdown_relative_increase_no_breaches( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1444,7 +1552,11 @@ def test_breakdown_relative_increase_no_breaches( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1532,7 +1644,11 @@ def test_current_interval_relative_increase_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1548,7 +1664,11 @@ def test_current_interval_relative_increase_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1639,7 +1759,11 @@ def test_current_interval_relative_increase_fallback_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1657,7 +1781,11 @@ def test_current_interval_relative_increase_fallback_upper_threshold_breached( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1721,7 +1849,11 @@ def test_relative_increase_when_previous_value_is_0( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1739,7 +1871,11 @@ def test_relative_increase_when_previous_value_is_0( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") @@ -1803,7 +1939,11 @@ def test_relative_decrease_when_previous_value_is_0( updated_alert = AlertConfiguration.objects.get(pk=absolute_alert["id"]) assert updated_alert.state == AlertState.NOT_FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=absolute_alert["id"]).latest("created_at") @@ -1815,7 +1955,11 @@ def test_relative_decrease_when_previous_value_is_0( updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) + + next_check = (FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1)).replace(hour=1, tzinfo=pytz.UTC) + assert updated_alert.next_check_at + assert updated_alert.next_check_at.hour == next_check.hour + assert updated_alert.next_check_at.date() == next_check.date() alert_check = AlertCheck.objects.filter(alert_configuration=percentage_alert["id"]).latest("created_at") diff --git a/posthog/tasks/alerts/utils.py b/posthog/tasks/alerts/utils.py index 686ec8a1355fe..28bebd0aa43e3 100644 --- a/posthog/tasks/alerts/utils.py +++ b/posthog/tasks/alerts/utils.py @@ -1,6 +1,8 @@ -from dateutil.relativedelta import relativedelta - +from dateutil.relativedelta import relativedelta, MO from django.utils import timezone +import pytz + +from datetime import datetime import structlog from posthog.email import EmailMessage @@ -56,6 +58,51 @@ def alert_calculation_interval_to_relativedelta(alert_calculation_interval: Aler raise ValueError(f"Invalid alert calculation interval: {alert_calculation_interval}") +def next_check_time(alert: AlertConfiguration) -> datetime: + """ + Rule by calculation interval + + hourly alerts -> want them to run at the same min every hour (same min comes from creation time so that they're spread out and don't all run at the start of the hour) + daily alerts -> want them to run at the start of the day (around 1am) by the timezone of the team + weekly alerts -> want them to run at the start of the week (Mon around 3am) by the timezone of the team + monthly alerts -> want them to run at the start of the month (first day of the month around 4am) by the timezone of the team + """ + now = datetime.now(pytz.UTC) + team_timezone = pytz.timezone(alert.team.timezone) + + match alert.calculation_interval: + case AlertCalculationInterval.HOURLY: + return (alert.next_check_at or now) + relativedelta(hours=1) + case AlertCalculationInterval.DAILY: + # Get the next date in the specified timezone + tomorrow_local = datetime.now(team_timezone) + relativedelta(days=1) + + # set hour to 1 AM + # only replacing hour and not minute/second... to distribute execution of all daily alerts + one_am_local = tomorrow_local.replace(hour=1) + + # Convert to UTC + return one_am_local.astimezone(pytz.utc) + case AlertCalculationInterval.WEEKLY: + next_monday_local = datetime.now(team_timezone) + relativedelta(days=1, weekday=MO(1)) + + # Set the hour to around 3 AM on next Monday + next_monday_1am_local = next_monday_local.replace(hour=3) + + # Convert to UTC + return next_monday_1am_local.astimezone(pytz.utc) + case AlertCalculationInterval.MONTHLY: + next_month_local = datetime.now(team_timezone) + relativedelta(months=1) + + # Set hour to 4 AM on first day of next month + next_month_1am_local = next_month_local.replace(day=1, hour=4) + + # Convert to UTC + return next_month_1am_local.astimezone(pytz.utc) + case _: + raise ValueError(f"Invalid alert calculation interval: {alert.calculation_interval}") + + def send_notifications_for_breaches(alert: AlertConfiguration, breaches: list[str]) -> None: subject = f"PostHog alert {alert.name} is firing" campaign_key = f"alert-firing-notification-{alert.id}-{timezone.now().timestamp()}"