Skip to content

Commit

Permalink
feat: add page views metadata (#557)
Browse files Browse the repository at this point in the history
  • Loading branch information
vncsna authored Feb 4, 2024
1 parent faa7225 commit bc627af
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
# Generated by Django 4.2.6 on 2024-02-04 16:08

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("v1", "0026_alter_table_source_bucket_name"),
]

operations = [
migrations.AddField(
model_name="dataset",
name="page_views",
field=models.BigIntegerField(
default=0, help_text="Number of page views by Google Analytics"
),
),
migrations.AddField(
model_name="table",
name="page_views",
field=models.BigIntegerField(
default=0, help_text="Number of page views by Google Analytics"
),
),
]
10 changes: 9 additions & 1 deletion bd_api/apps/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,10 @@ class Dataset(BaseModel):
is_closed = models.BooleanField(
default=False, help_text="Dataset is for BD Pro subscribers only"
)
page_views = models.BigIntegerField(
default=0,
help_text="Number of page views by Google Analytics",
)

graphql_nested_filter_fields_whitelist = ["id", "slug"]

Expand Down Expand Up @@ -946,8 +950,12 @@ class Table(BaseModel, OrderedModel):
number_rows = models.BigIntegerField(blank=True, null=True)
number_columns = models.BigIntegerField(blank=True, null=True)
is_closed = models.BooleanField(default=False, help_text="Table is for BD Pro subscribers only")
order_with_respect_to = ("dataset",)
page_views = models.BigIntegerField(
default=0,
help_text="Number of page views by Google Analytics",
)

order_with_respect_to = ("dataset",)
graphql_nested_filter_fields_whitelist = ["id", "dataset"]

def __str__(self):
Expand Down
55 changes: 54 additions & 1 deletion bd_api/apps/api/v1/tasks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta

from django.core.management import call_command
from google.api_core.exceptions import BadRequest, NotFound
from google.cloud.bigquery import Table as GBQTable
Expand All @@ -7,7 +9,7 @@
from loguru import logger
from pandas import read_gbq

from bd_api.apps.api.v1.models import Table
from bd_api.apps.api.v1.models import Dataset, Table
from bd_api.custom.client import get_gbq_client, get_gcs_client, send_discord_message
from bd_api.utils import production_task

Expand Down Expand Up @@ -121,3 +123,54 @@ def format_msg(msg: list[str]) -> str:

if msg := format_msg(msg):
send_discord_message(msg)


@periodic_task(crontab(hour="6", minute="0"))
@production_task
def update_page_views_task(backfill: bool = False):
if backfill:
event_table = "events_*"
else:
yesterday = datetime.now() - timedelta(1)
yesterday = yesterday.strftime("%Y%m%d")
event_table = f"events_{yesterday}"

query = f"""
select
count(1) page_views
, regexp_extract(param.value.string_value, r'table=([a-z0-9-]{36})') table_id
, regexp_extract(param.value.string_value, r'dataset\/([a-z0-9-]{36})') dataset_id
from `basedosdados.analytics_295884852.{event_table}` event
join unnest(event_params) param
where
true
and event_name = 'page_view'
and param.key = 'page_location'
and param.value.string_value like '%/dataset/%'
group by
table_id,
dataset_id
having
true
and table_id is not null
and dataset_id is not null
""" # noqa: W605
metadata = read_gbq(query)

if backfill:
for table_id in metadata["table_id"].unique():
if table := Table.objects.filter(id=table_id).first():
table.page_views = 0
table.save()
for dataset_id in metadata["dataset_id"].unique():
if dataset := Dataset.objects.filter(id=dataset_id).first():
dataset.page_views = 0
dataset.save()

for _, (page_views, table_id, dataset_id) in metadata.iterrows():
if table := Table.objects.filter(id=table_id).first():
table.page_views += page_views
table.save()
if dataset := Dataset.objects.filter(id=dataset_id).first():
dataset.page_views += page_views
dataset.save()

0 comments on commit bc627af

Please sign in to comment.