From 5c9f1a3c2616cecf456d8b2a6a3f13f8754d6077 Mon Sep 17 00:00:00 2001 From: Vinicius Date: Tue, 20 Feb 2024 12:15:39 -0300 Subject: [PATCH] fix: update temporal coverages --- bd_api/apps/api/v1/models.py | 389 ++++++++--------------------------- 1 file changed, 88 insertions(+), 301 deletions(-) diff --git a/bd_api/apps/api/v1/models.py b/bd_api/apps/api/v1/models.py index 733e04e4..ef1ca5ab 100644 --- a/bd_api/apps/api/v1/models.py +++ b/bd_api/apps/api/v1/models.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -import calendar -import json +from calendar import monthrange from collections import defaultdict +from dataclasses import dataclass from datetime import datetime from uuid import uuid4 @@ -16,43 +16,6 @@ from bd_api.custom.utils import check_kebab_case, check_snake_case -def to_str(value: str | None, zfill: int = 0): - """Parse and pad to string if not null""" - if value is None: - return None - return str(value).zfill(zfill) - - -def get_date_time(date_times): - """Returns a DateTimeRange object with the minimum start date and maximum end date""" - start_year, start_month, start_day = False, False, False - end_year, end_month, end_day = False, False, False - start_date, end_date = datetime(3000, 12, 31, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0) - - for date_time in date_times: - if date_time.start_year and date_time.start_year < start_date.year: - start_year = date_time.start_year - if date_time.start_month and date_time.start_month < start_date.month: - start_month = date_time.start_month - if date_time.start_day and date_time.start_day < start_date.day: - start_day = date_time.start_day - if date_time.end_year and date_time.end_year > end_date.year: - end_year = date_time.end_year - if date_time.end_month and date_time.end_month > end_date.month: - end_month = date_time.end_month - if date_time.end_day and date_time.end_day > end_date.day: - end_day = date_time.end_day - - return DateTimeRange( - start_year=start_year, - start_month=start_month, - start_day=start_day, - end_year=end_year, - end_month=end_month, - end_day=end_day, - ) - - class Area(BaseModel): """Area model""" @@ -546,150 +509,19 @@ def get_success_url(self): @property def full_slug(self): - """Get the full slug or Dataset""" if self.organization.area.slug != "unknown": return f"{self.organization.area.slug}_{self.organization.slug}_{self.slug}" return f"{self.organization.slug}_{self.slug}" @property - def coverage(self): - """Get the temporal coverage of the dataset in the format YYYY-MM-DD - YYYY-MM-DD""" - tables = self.tables.all() - raw_data_sources = self.raw_data_sources.all() - information_requests = self.information_requests.all() - start_year, start_month, start_day = False, False, False - end_year, end_month, end_day = False, False, False - - start_date = datetime(3000, 12, 31, 0, 0, 0) - end_date = datetime(1, 1, 1, 0, 0, 0) - - # This must be refactored to avoid code duplication - for table in tables: - for coverage in table.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - for raw_data_source in raw_data_sources: - for coverage in raw_data_source.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - for information_request in information_requests: - for coverage in information_request.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - start = [] - end = [] - - if start_year and start_year < 3000 and start_date.year: - start.append(str(start_date.year)) - if start_month and start_date.month: - start.append(str(start_date.month).zfill(2)) - if start_day and start_date.day: - start.append(str(start_date.day).zfill(2)) - - if end_year and end_year > 1 and end_date.year: - end.append(str(end_date.year)) - if end_month and end_date.month: - end.append(str(end_date.month).zfill(2)) - if end_day and end_date.day: - end.append(str(end_date.day).zfill(2)) - - coverage_str = "" - if start: - coverage_str += "-".join(start) - if end: - coverage_str += " - " + "-".join(end) - - return coverage_str - - @property - def full_coverage(self) -> str: - """ - Returns the full temporal coverage of the dataset as a json string - representing an object with the 3 initial points of the coverage - The first point is the start of the open coverage, the second point is the - end of the open coverage and the third point is the end of closed coverage - When thera are only one type of coverage (open or closed) the second point - will represent the end of the entire coverage, with both the types being - the same - - Returns: - str: json string representing the full coverage - """ - full_coverage_dict = [ - # {"year": 2021, "month": 6, "type": "open"}, - # {"year": 2023, "month": 6, "type": "open"}, - # {"year": 2026, "month": 6, "type": "closed"}, + def coverage(self) -> dict: + """Temporal coverage of all related entities""" + entities = [ + *self.tables.all(), + *self.raw_data_sources.all(), + *self.information_requests.all(), ] - return json.dumps(full_coverage_dict) + return get_coverage(entities) @property def contains_tables(self): @@ -949,82 +781,14 @@ def contains_closed_data(self): return closed_data @property - def full_coverage(self) -> str: - """ - Returns the full temporal coverage of the table as a json string - representing an object with the 3 initial points of the coverage - The first point is the start of the open coverage, the second point is the - end of the open coverage and the third point is the end of closed coverage - When thera are only one type of coverage (open or closed) the second point - will represent the end of the entire coverage, with both the types being - the same - - Returns: - str: json string representing the full coverage - """ - # First area of all coverages - thus must be changed to get all areas - try: - first_area = self.coverages.first().area - except AttributeError: - return "" - # First open coverage of a table - it's an open coverage for now - try: - first_open_datetime_range = ( - self.coverages.filter(area=first_area, is_closed=False) - .first() - .datetime_ranges.order_by("start_year", "start_month", "start_day") - .first() - ) - except AttributeError: - first_open_datetime_range = None - # First closed coverage of a table - it's a closed coverage for now - try: - first_closed_datetime_range = ( - self.coverages.filter(area=first_area, is_closed=True) - .first() - .datetime_ranges.order_by("start_year", "start_month", "start_day") - .first() - ) - except AttributeError: - first_closed_datetime_range = None - full_coverage = [] - if first_open_datetime_range: - full_coverage.append( - { - "year": to_str(first_open_datetime_range.start_year), - "month": to_str(first_open_datetime_range.start_month, 2), - "day": to_str(first_open_datetime_range.start_day, 2), - "type": "open", - } - ) - full_coverage.append( - { - "year": to_str(first_open_datetime_range.end_year, 2), - "month": to_str(first_open_datetime_range.end_month, 2), - "day": to_str(first_open_datetime_range.end_day, 2), - "type": "open", - } - ) - if first_closed_datetime_range: - if not first_open_datetime_range: - full_coverage.append( - { - "year": to_str(first_closed_datetime_range.start_year), - "month": to_str(first_closed_datetime_range.start_month, 2), - "day": to_str(first_closed_datetime_range.start_day, 2), - "type": "closed", - } - ) - full_coverage.append( - { - "year": to_str(first_closed_datetime_range.end_year), - "month": to_str(first_closed_datetime_range.end_month, 2), - "day": to_str(first_closed_datetime_range.end_day, 2), - "type": "closed", - } - ) + def coverage(self) -> dict: + """Temporal coverage""" + return get_coverage([self]) - return json.dumps(full_coverage) + @property + def full_coverage(self) -> dict: + """Temporal coverage steps""" + return get_full_coverage([self]) @property def neighbors(self) -> list[dict]: @@ -1221,53 +985,12 @@ class Meta: ordering = ["name"] @property - def full_coverage(self) -> str: - """ - Returns the coverage of the column if it exists, - otherwise returns the coverage of the table - Currently returns the first coverage, but this - should be changed to return the - full coverage of the column, as in table coverage - - Returns: - str: coverage of the column - a dumped list of dicts [start_date, end_date] - """ - - coverages = self.coverages.all() - column_full_coverage = [] - - if ( - len(coverages) == 0 - or not coverages[0].datetime_ranges.exists() - or coverages[0].datetime_ranges.first().start_year is None - ): - """ - At the moment, only one coverage exists per column - No coverage for column, using table coverage - """ - table_full_coverage = json.loads(self.table.full_coverage) - temporal_coverage_start = table_full_coverage[0] - temporal_coverage_end = table_full_coverage[-1] - elif coverages[0].datetime_ranges.first().start_year is not None: - dt_range = coverages[0].datetime_ranges.first() - temporal_coverage_start = { - "year": to_str(dt_range.start_year), - "month": to_str(dt_range.start_month, 2), - "day": to_str(dt_range.start_day, 2), - } - temporal_coverage_end = { - "year": to_str(dt_range.end_year), - "month": to_str(dt_range.end_month, 2), - "day": to_str(dt_range.end_day, 2), - } - else: - temporal_coverage_start = {"year": "", "month": "", "day": ""} - temporal_coverage_end = {"year": "", "month": "", "day": ""} - - column_full_coverage.append(temporal_coverage_start) - column_full_coverage.append(temporal_coverage_end) - - return json.dumps(column_full_coverage) + def coverage(self) -> dict: + """Temporal coverage of column if exists, if not table coverage""" + coverage = get_coverage([self]) + if not coverage["since"] and not coverage["until"]: + return self.table.coverage + return coverage def clean(self) -> None: """Clean method for Column model""" @@ -1674,6 +1397,7 @@ class DateTimeRange(BaseModel): interval = models.IntegerField(blank=True, null=True) is_closed = models.BooleanField("Is Closed", default=False) + graphql_fields_blacklist = BaseModel.graphql_fields_blacklist + ["since", "until"] graphql_nested_filter_fields_whitelist = ["id"] def __str__(self): @@ -1783,14 +1507,14 @@ def clean(self) -> None: errors["start_year"] = ["Start year or end year are invalid"] if self.start_day: - max_day = calendar.monthrange(self.start_year, self.start_month)[1] + max_day = monthrange(self.start_year, self.start_month)[1] if self.start_day > max_day: errors["start_day"] = [ f"{self.start_month} does not have {self.start_day} days in {self.start_year}" ] if self.end_day: - max_day = calendar.monthrange(self.end_year, self.end_month)[1] + max_day = monthrange(self.end_year, self.end_month)[1] if self.end_day > max_day: errors["end_day"] = [ f"{self.end_month} does not have {self.end_day} days in {self.end_year}" @@ -1928,3 +1652,66 @@ def clean(self) -> None: "'column', 'key, 'raw_data_source', 'information_request' must be set." ) return super().clean() + + +@dataclass +class Date: + dt: datetime + str: str + + +def get_coverage(entities: list) -> dict: + """Get maximum datetime coverage of entities + + Case: + - Table A has data with dates between [X, Y] + """ + since = Date(datetime.max, None) + until = Date(datetime.min, None) + for entity in entities: + for cov in entity.coverages.all(): + for dt in cov.datetime_ranges.all(): + if dt.since and dt.since < since.dt: + since.dt = dt.since + since.str = dt.since_str + if dt.until and dt.until > until.dt: + until.dt = dt.until + until.str = dt.until_str + return {"since": since.str, "until": until.str} + + +def get_full_coverage(entities: list) -> dict: + """Get datetime coverage steps of entities + + Cases: + - Table A has data with dates between [X, Y], where [X, Y] is open + - Table A has data with dates between [X, Y], where [X, Y] is closed + - Table A has data with dates between [X, Y, Z], where [X, Y] is open and [Y, Z] is closed + """ + open_since = Date(datetime.max, None) + open_until = Date(datetime.min, None) + paid_since = Date(datetime.max, None) + paid_until = Date(datetime.min, None) + for entity in entities: + for cov in entity.coverages.all(): + for dt in cov.datetime_ranges.all(): + if not cov.is_closed: + if dt.since and dt.since < open_since.dt: + open_since.dt = dt.since + open_since.str = dt.since_str + if dt.until and dt.until > open_until.dt: + open_until.dt = dt.until + open_until.str = dt.until_str + else: + if dt.since and dt.since < paid_since.dt: + paid_since.dt = dt.since + paid_since.str = dt.since_str + if dt.until and dt.until > paid_until.dt: + paid_until.dt = dt.until + paid_until.str = dt.until_str + return { + "open_since": open_since.str, + "open_until": open_until.str, + "paid_since": paid_since.str, + "paid_until": paid_until.str, + }