Skip to content

Commit

Permalink
feat: spatial coverage in search, admin
Browse files Browse the repository at this point in the history
  • Loading branch information
rdahis committed Nov 3, 2024
1 parent 8dad202 commit ba85bb2
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 66 deletions.
10 changes: 8 additions & 2 deletions backend/apps/api/v1/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ class DatasetAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin):
readonly_fields = [
"id",
"full_slug",
"coverage",
"spatial_coverage",
"temporal_coverage",
"contains_tables",
"contains_raw_data_sources",
"contains_information_requests",
Expand All @@ -508,7 +509,8 @@ class DatasetAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin):
list_display = [
"name",
"organization",
"coverage",
"spatial_coverage",
"temporal_coverage",
"related_objects",
"page_views",
"created_at",
Expand Down Expand Up @@ -548,6 +550,8 @@ class TableAdmin(OrderedInlineModelAdminMixin, TabbedTranslationAdmin):
"partitions",
"created_at",
"updated_at",
"spatial_coverage",
"full_temporal_coverage",
"coverage_datetime_units",
]
search_fields = [
Expand Down Expand Up @@ -637,6 +641,8 @@ class ColumnAdmin(TabbedTranslationAdmin):
readonly_fields = [
"id",
"order",
"spatial_coverage",
"temporal_coverage",
]
search_fields = ["name", "table__name"]
inlines = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.16 on 2024-11-03 01:29

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('v1', '0036_datetimerange_units'),
]

operations = [
migrations.AddField(
model_name='area',
name='entity',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.PROTECT, related_name='areas', to='v1.entity'),
),
migrations.AddField(
model_name='area',
name='level',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='area',
name='parent',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.PROTECT, related_name='children', to='v1.area'),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.16 on 2024-11-03 01:37

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('v1', '0037_area_entity_area_level_area_parent'),
]

operations = [
migrations.RenameField(
model_name='area',
old_name='level',
new_name='administrative_level',
),
]
155 changes: 133 additions & 22 deletions backend/apps/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ class Area(BaseModel):
id = models.UUIDField(primary_key=True, default=uuid4)
slug = models.SlugField(unique=True)
name = models.CharField(max_length=255, blank=False, null=False)
administrative_level = models.IntegerField(
null=True,
blank=True,
choices=[
(0, '0'),
(1, '1'),
(2, '2'),
(3, '3'),
]
)
entity = models.ForeignKey(
"Entity",
on_delete=models.PROTECT,
related_name="areas",
null=True,
blank=True,
limit_choices_to={'category__slug': 'spatial'}
)
parent = models.ForeignKey(
"Area",
on_delete=models.PROTECT,
related_name="children",
null=True,
blank=True,
)

graphql_nested_filter_fields_whitelist = ["id"]

Expand All @@ -35,6 +60,27 @@ class Meta:
verbose_name_plural = "Areas"
ordering = ["name"]

def clean(self):
"""Validate the model fields."""
errors = {}
if self.administrative_level is not None and self.administrative_level not in [0, 1, 2, 3]:
errors['administrative_level'] = 'Administrative level must be 0, 1, 2, or 3'

if self.entity and self.entity.category.slug != 'spatial':
errors['entity'] = 'Entity must have category "spatial"'

if self.parent and self.parent.slug != 'world':
if self.administrative_level is None:
errors['administrative_level'] = 'Administrative level is required when parent is set'
elif self.parent.administrative_level is None:
errors['parent'] = 'Parent must have an administrative level'
elif self.parent.administrative_level != self.administrative_level - 1:
errors['parent'] = 'Parent must have administrative level exactly one level above'

if errors:
raise ValidationError(errors)
return super().clean()


class Coverage(BaseModel):
"""
Expand Down Expand Up @@ -517,22 +563,32 @@ def popularity(self):
return log10(self.page_views)

@property
def coverage(self) -> dict:
def temporal_coverage(self) -> dict:
"""Temporal coverage of all related entities"""
resources = [
*self.tables.all(),
*self.raw_data_sources.all(),
*self.information_requests.all(),
]
coverage = get_coverage(resources)
if coverage["start"] and coverage["end"]:
return f"{coverage['start']} - {coverage['end']}"
if coverage["start"]:
return f"{coverage['start']}"
if coverage["end"]:
return f"{coverage['end']}"
temporal_coverage = get_temporal_coverage(resources)
if temporal_coverage["start"] and temporal_coverage["end"]:
return f"{temporal_coverage['start']} - {temporal_coverage['end']}"
if temporal_coverage["start"]:
return f"{temporal_coverage['start']}"
if temporal_coverage["end"]:
return f"{temporal_coverage['end']}"
return ""

@property
def spatial_coverage(self) -> list[str]:
"""Union spatial coverage of all related resources"""
resources = [
*self.tables.all(),
*self.raw_data_sources.all(),
*self.information_requests.all(),
]
return sorted(list(get_spatial_coverage(resources)))

@property
def entities(self) -> list[dict]:
"""Entity of all related resources"""
Expand Down Expand Up @@ -931,14 +987,19 @@ def contains_closed_data(self):
return False

@property
def coverage(self) -> dict:
def temporal_coverage(self) -> dict:
"""Temporal coverage"""
return get_coverage([self])
return get_temporal_coverage([self])

@property
def full_coverage(self) -> dict:
def full_temporal_coverage(self) -> dict:
"""Temporal coverage steps"""
return get_full_coverage([self])
return get_full_temporal_coverage([self])

@property
def spatial_coverage(self) -> list[str]:
"""Unique list of areas across all coverages"""
return sorted(list(get_spatial_coverage([self])))

@property
def neighbors(self) -> list[dict]:
Expand Down Expand Up @@ -1241,17 +1302,25 @@ class Meta:
ordering = ["name"]

@property
def coverage(self) -> dict:
def temporal_coverage(self) -> dict:
"""Temporal coverage of column if exists, if not table coverage"""
coverage = get_coverage([self])
temporal_coverage = get_temporal_coverage([self])
fallback = defaultdict(lambda: None)
if not coverage["start"] or not coverage["end"]:
fallback = self.table.coverage
if not temporal_coverage["start"] or not temporal_coverage["end"]:
fallback = self.table.temporal_coverage
return {
"start": coverage["start"] or fallback["start"],
"end": coverage["end"] or fallback["end"],
"start": temporal_coverage["start"] or fallback["start"],
"end": temporal_coverage["end"] or fallback["end"],
}

@property
def spatial_coverage(self) -> list[str]:
"""Unique list of areas across all coverages, falling back to table coverage if empty"""
coverage = get_spatial_coverage([self])
if not coverage:
return get_spatial_coverage([self.table])
return coverage

@property
def dir_column(self):
"""Column of directory table and column"""
Expand Down Expand Up @@ -1898,8 +1967,8 @@ def as_dict(self):
return {"date": self.str, "type": self.type}


def get_coverage(resources: list) -> dict:
"""Get maximum datetime coverage of resources
def get_temporal_coverage(resources: list) -> dict:
"""Get maximum temporal coverage of resources
Case:
- Table A has data with dates between [X, Y]
Expand All @@ -1918,8 +1987,8 @@ def get_coverage(resources: list) -> dict:
return {"start": since.str, "end": until.str}


def get_full_coverage(resources: list) -> dict:
"""Get datetime coverage steps of resources
def get_full_temporal_coverage(resources: list) -> dict:
"""Get temporal coverage steps of resources
Cases:
- Table A has data with dates between [X, Y], where [X, Y] is open
Expand Down Expand Up @@ -1957,3 +2026,45 @@ def get_full_coverage(resources: list) -> dict:
return [open_since.as_dict, open_until.as_dict]
if paid_since.str and paid_until.str:
return [paid_since.as_dict, paid_until.as_dict]

def get_spatial_coverage(resources: list) -> list:
"""Get spatial coverage of resources by returning unique area slugs, keeping only the highest level in each branch
For example:
- If areas = [br_mg_3100104, br_mg_3100104] -> returns [br_mg_3100104]
- If areas = [br_mg_3100104, br_sp_3500105] -> returns [br_mg_3100104, br_sp_3500105]
- If areas = [br_mg, us_ny, us] -> returns [br_mg, us]
- If areas = [br_mg, world, us] -> returns [world]
- If resources have no areas -> returns empty list
"""
# Collect all unique area slugs across resources
all_areas = set()
for resource in resources:
for coverage in resource.coverages.all():
if coverage.area:
all_areas.add(coverage.area.slug)

if not all_areas:
return []

# If 'world' is present, it encompasses everything
if 'world' in all_areas:
return ['world']

# Filter out areas that have a parent in the set
filtered_areas = set()
for area in all_areas:
parts = area.split('_')
is_parent_present = False

# Check if any parent path exists in all_areas
for i in range(1, len(parts)):
parent = '_'.join(parts[:i])
if parent in all_areas:
is_parent_present = True
break

if not is_parent_present:
filtered_areas.add(area)

return sorted(list(filtered_areas))
14 changes: 11 additions & 3 deletions backend/apps/api/v1/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ class RawDataSource(BaseModel):
id: str


class SpatialCoverage(BaseModel):
slug: str
name_pt: str
name_en: str
name_es: str


class TemporalCoverage(BaseModel):
start_date: str
end_date: str
Expand Down Expand Up @@ -67,11 +74,12 @@ class Dataset(BaseModel):
contains_open_data: bool
contains_closed_data: bool
#
tags: List[Tag]
themes: List[Theme]
entities: List[Entity]
temporal_coverage: List[str]
organization: List[Organization]
temporal_coverage: List[str]
spatial_coverage: List[SpatialCoverage]
tags: List[Tag]
entities: List[Entity]


class Facet(BaseModel):
Expand Down
31 changes: 25 additions & 6 deletions backend/apps/api/v1/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable):
null=True,
indexed=False,
)

spatial_coverage = indexes.MultiValueField(
model_attr="spatial_coverage",
null=True,
faceted=True,
indexed=True,
)

temporal_coverage = indexes.MultiValueField(
model_attr="temporal_coverage",
null=True,
faceted=True,
indexed=True,
)


table_id = indexes.MultiValueField(
model_attr="tables__pk",
Expand Down Expand Up @@ -213,12 +228,7 @@ class DatasetIndex(indexes.SearchIndex, indexes.Indexable):
faceted=True,
indexed=False,
)
temporal_coverage = indexes.MultiValueField(
default="",
model_attr="coverage",
indexed=False,
)


contains_open_data = indexes.BooleanField(
model_attr="contains_open_data",
indexed=False,
Expand Down Expand Up @@ -294,3 +304,12 @@ def load_all_queryset(self, using=None):

def prepare_organization_picture(self, obj):
return getattr(obj.organization.picture, "name", None)

def get_field_mapping(self):
mapping = super().get_field_mapping()
mapping['spatial_coverage'] = {
'type': 'keyword',
'store': True,
'index': True,
}
return mapping
Loading

0 comments on commit ba85bb2

Please sign in to comment.