diff --git a/bd_api/apps/api/v1/graphql.py b/bd_api/apps/api/v1/graphql.py new file mode 100644 index 00000000..0720c5ec --- /dev/null +++ b/bd_api/apps/api/v1/graphql.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +from graphene import UUID, Boolean, Float, List, ObjectType, String +from graphene_django import DjangoObjectType + +from bd_api.apps.api.v1.models import TableNeighbor +from bd_api.custom.graphql_base import PlainTextNode + + +class TableNeighborNode(DjangoObjectType): + """Similiar tables and columns with filters""" + + table_id = String() + table_name = String() + dataset_id = String() + dataset_name = String() + score = Float() + + class Meta: + model = TableNeighbor + fields = ("id",) + filter_fields = ("id",) + interfaces = (PlainTextNode,) + + def resolve__table_id(root, info): + return root.table_b.pk + + def resolve__table_name(root, info): + return root.table_b.name + + def resolve__dataset_id(root, info): + return root.table_b.dataset.pk + + def resolve__dataset_name(root, info): + return root.table_b.dataset.name + + def resolve_score(root, info): + return root.score + + +class APIQuery(ObjectType): + get_table_neighbor = List( + TableNeighborNode, + table_id=UUID(required=True), + theme=String(), + share_theme=Boolean(), + ) + + def resolve_get_table_neighbor(root, info, table_id, **kwargs): + return TableNeighbor.objects.filter(table_a__pk=table_id).all() diff --git a/bd_api/apps/api/v1/migrations/0028_tableneighbor_and_more.py b/bd_api/apps/api/v1/migrations/0028_tableneighbor_and_more.py index 8a05864e..998839ba 100644 --- a/bd_api/apps/api/v1/migrations/0028_tableneighbor_and_more.py +++ b/bd_api/apps/api/v1/migrations/0028_tableneighbor_and_more.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 4.2.10 on 2024-03-15 18:55 +# Generated by Django 4.2.10 on 2024-03-20 11:53 import django.db.models.deletion from django.db import migrations, models @@ -27,6 +27,7 @@ class Migration(migrations.Migration): ("similarity_of_area", models.FloatField(default=0)), ("similarity_of_datetime", models.FloatField(default=0)), ("similarity_of_directory", models.FloatField(default=0)), + ("similarity_of_popularity", models.FloatField(default=0)), ( "table_a", models.ForeignKey( @@ -44,6 +45,9 @@ class Migration(migrations.Migration): ), ), ], + options={ + "db_table": "table_neighbor", + }, ), migrations.AddConstraint( model_name="tableneighbor", diff --git a/bd_api/apps/api/v1/models.py b/bd_api/apps/api/v1/models.py index cd1ae43a..f46a72b2 100644 --- a/bd_api/apps/api/v1/models.py +++ b/bd_api/apps/api/v1/models.py @@ -8,7 +8,6 @@ from django.core.exceptions import ValidationError from django.db import models -from django.db.models import Q from django.urls import reverse from ordered_model.models import OrderedModel @@ -1026,24 +1025,9 @@ def full_coverage(self) -> str: @property def neighbors(self) -> list[dict]: """Similiar tables and columns without filters""" - all_neighbors = [] - for neighbor in TableNeighbor.objects.filter(Q(table_a=self) | Q(table_b=self)).all(): - if neighbor.table_a == self: - table = neighbor.table_b - if neighbor.table_b == self: - table = neighbor.table_a - similarity_of_directory = neighbor.similarity_of_directory - similarity_of_popularity = table.dataset.popularity - all_neighbors.append( - { - "table_id": str(table.pk), - "table_name": table.name, - "dataset_id": str(table.dataset.id), - "dataset_name": table.dataset.name, - "score": round(similarity_of_directory, 2) + similarity_of_popularity, - } - ) - return sorted(all_neighbors, key=lambda item: item["score"])[::-1] + all_neighbors = [t.as_dict for t in TableNeighbor.objects.filter(table_a=self)] + all_neighbors = sorted(all_neighbors, key=lambda item: item["score"], reverse=True) + return all_neighbors @property def last_updated_at(self): @@ -1086,7 +1070,7 @@ def get_similarity_of_directory(self, other: "Table"): intersection = self_directories.intersection(other_directories) return len(intersection) / len(self_directories), intersection - def get_neighbors(self) -> list[dict]: + def gen_neighbors(self) -> list[dict]: self_columns = ( self.columns .filter(directory_primary_key__isnull=False) @@ -1185,6 +1169,7 @@ class TableNeighbor(BaseModel): similarity_of_area = models.FloatField(default=0) similarity_of_datetime = models.FloatField(default=0) similarity_of_directory = models.FloatField(default=0) + similarity_of_popularity = models.FloatField(default=0) class Meta: db_table = "table_neighbor" @@ -1195,11 +1180,22 @@ class Meta: ), ] + @property + def score(self): + return round(self.similarity_of_directory, 2) + round(self.similarity_of_popularity, 2) + + @property + def as_dict(self): + return { + "table_id": str(self.table_b.pk), + "table_name": self.table_b.name, + "dataset_id": str(self.table_b.dataset.pk), + "dataset_name": self.table_b.dataset.name, + "score": self.score, + } + def clean(self) -> None: errors = {} - if self.table_a.pk > self.table_b.pk: - errors["table_a"] = "Table primary keys should be ordered" - errors["table_b"] = "Table primary keys should be ordered" if self.table_a.pk == self.table_b.pk: errors["table_a"] = "Table neighbors A & B shouldn't be the same" errors["table_b"] = "Table neighbors A & B shouldn't be the same" diff --git a/bd_api/apps/api/v1/tasks.py b/bd_api/apps/api/v1/tasks.py index 54ad5253..e0fcfa53 100644 --- a/bd_api/apps/api/v1/tasks.py +++ b/bd_api/apps/api/v1/tasks.py @@ -121,7 +121,7 @@ def get_uncompressed_file_size(table: Table, bq_table: GBQTable) -> int | None: @production_task def update_table_neighbors_task(): for table in Table.objects.all(): - for neighbor in table.get_neighbors(): + for neighbor in table.gen_neighbors(): TableNeighbor.objects.update_or_create(**neighbor) diff --git a/bd_api/apps/schema.py b/bd_api/apps/schema.py index aaffd770..2785e128 100644 --- a/bd_api/apps/schema.py +++ b/bd_api/apps/schema.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from bd_api.apps.account.graphql import AccountMutation +from bd_api.apps.api.v1.graphql import APIQuery from bd_api.apps.payment.graphql import ( StripeCustomerMutation, StripePriceQuery, @@ -11,6 +12,7 @@ schema = build_schema( applications=["account", "v1"], extra_queries=[ + APIQuery, StripePriceQuery, ], extra_mutations=[