From d9aecd0117b9f36921ce1b0384ce70a43f7b17a6 Mon Sep 17 00:00:00 2001 From: Vinicius Date: Tue, 20 Feb 2024 12:15:39 -0300 Subject: [PATCH] fix: update temporal coverages --- bd_api/apps/api/v1/models.py | 271 +++++++---------------------------- 1 file changed, 55 insertions(+), 216 deletions(-) diff --git a/bd_api/apps/api/v1/models.py b/bd_api/apps/api/v1/models.py index a6afeae9..2c14c962 100644 --- a/bd_api/apps/api/v1/models.py +++ b/bd_api/apps/api/v1/models.py @@ -23,36 +23,6 @@ def to_str(value: str | None, zfill: int = 0): return str(value).zfill(zfill) -def get_date_time(date_times): - """Returns a DateTimeRange object with the minimum start date and maximum end date""" - start_year, start_month, start_day = False, False, False - end_year, end_month, end_day = False, False, False - start_date, end_date = datetime(3000, 12, 31, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0) - - for date_time in date_times: - if date_time.start_year and date_time.start_year < start_date.year: - start_year = date_time.start_year - if date_time.start_month and date_time.start_month < start_date.month: - start_month = date_time.start_month - if date_time.start_day and date_time.start_day < start_date.day: - start_day = date_time.start_day - if date_time.end_year and date_time.end_year > end_date.year: - end_year = date_time.end_year - if date_time.end_month and date_time.end_month > end_date.month: - end_month = date_time.end_month - if date_time.end_day and date_time.end_day > end_date.day: - end_day = date_time.end_day - - return DateTimeRange( - start_year=start_year, - start_month=start_month, - start_day=start_day, - end_year=end_year, - end_month=end_month, - end_day=end_day, - ) - - class Area(BaseModel): """Area model""" @@ -546,150 +516,19 @@ def get_success_url(self): @property def full_slug(self): - """Get the full slug or Dataset""" if self.organization.area.slug != "unknown": return f"{self.organization.area.slug}_{self.organization.slug}_{self.slug}" return f"{self.organization.slug}_{self.slug}" @property - def coverage(self): - """Get the temporal coverage of the dataset in the format YYYY-MM-DD - YYYY-MM-DD""" - tables = self.tables.all() - raw_data_sources = self.raw_data_sources.all() - information_requests = self.information_requests.all() - start_year, start_month, start_day = False, False, False - end_year, end_month, end_day = False, False, False - - start_date = datetime(3000, 12, 31, 0, 0, 0) - end_date = datetime(1, 1, 1, 0, 0, 0) - - # This must be refactored to avoid code duplication - for table in tables: - for coverage in table.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - for raw_data_source in raw_data_sources: - for coverage in raw_data_source.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - for information_request in information_requests: - for coverage in information_request.coverages.all(): - date_times = DateTimeRange.objects.filter(coverage=coverage.pk) - if len(date_times) == 0: - continue - date_time = get_date_time(date_times) - - start_year = date_time.start_year if date_time.start_year else start_year - start_month = date_time.start_month if date_time.start_month else start_month - start_day = date_time.start_day if date_time.start_day else start_day - end_year = date_time.end_year if date_time.end_year else end_year - end_month = date_time.end_month if date_time.end_month else end_month - end_day = date_time.end_day if date_time.end_day else end_day - - new_start_date = datetime( - date_time.start_year or 3000, - date_time.start_month or 1, - date_time.start_day or 1, - ) - start_date = new_start_date if new_start_date < start_date else start_date - new_end_date = datetime( - date_time.end_year or 1, - date_time.end_month or 1, - date_time.end_day or 1, - ) - end_date = new_end_date if new_end_date > end_date else end_date - - start = [] - end = [] - - if start_year and start_year < 3000 and start_date.year: - start.append(str(start_date.year)) - if start_month and start_date.month: - start.append(str(start_date.month).zfill(2)) - if start_day and start_date.day: - start.append(str(start_date.day).zfill(2)) - - if end_year and end_year > 1 and end_date.year: - end.append(str(end_date.year)) - if end_month and end_date.month: - end.append(str(end_date.month).zfill(2)) - if end_day and end_date.day: - end.append(str(end_date.day).zfill(2)) - - coverage_str = "" - if start: - coverage_str += "-".join(start) - if end: - coverage_str += " - " + "-".join(end) - - return coverage_str - - @property - def full_coverage(self) -> str: - """ - Returns the full temporal coverage of the dataset as a json string - representing an object with the 3 initial points of the coverage - The first point is the start of the open coverage, the second point is the - end of the open coverage and the third point is the end of closed coverage - When thera are only one type of coverage (open or closed) the second point - will represent the end of the entire coverage, with both the types being - the same - - Returns: - str: json string representing the full coverage - """ - full_coverage_dict = [ - # {"year": 2021, "month": 6, "type": "open"}, - # {"year": 2023, "month": 6, "type": "open"}, - # {"year": 2026, "month": 6, "type": "closed"}, + def coverage(self) -> dict: + """Coverage of all related entities""" + entities = [ + *self.tables.all(), + *self.raw_data_sources.all(), + *self.information_requests.all(), ] - return json.dumps(full_coverage_dict) + return get_coverage(entities) @property def contains_tables(self): @@ -949,7 +788,11 @@ def contains_closed_data(self): return closed_data @property - def full_coverage(self) -> str: + def coverage(self) -> dict: + return get_coverage([self]) + + @property + def full_coverage(self): """ Returns the full temporal coverage of the table as a json string representing an object with the 3 initial points of the coverage @@ -1221,53 +1064,12 @@ class Meta: ordering = ["name"] @property - def full_coverage(self) -> str: - """ - Returns the coverage of the column if it exists, - otherwise returns the coverage of the table - Currently returns the first coverage, but this - should be changed to return the - full coverage of the column, as in table coverage - - Returns: - str: coverage of the column - a dumped list of dicts [start_date, end_date] - """ - - coverages = self.coverages.all() - column_full_coverage = [] - - if ( - len(coverages) == 0 - or not coverages[0].datetime_ranges.exists() - or coverages[0].datetime_ranges.first().start_year is None - ): - """ - At the moment, only one coverage exists per column - No coverage for column, using table coverage - """ - table_full_coverage = json.loads(self.table.full_coverage) - temporal_coverage_start = table_full_coverage[0] - temporal_coverage_end = table_full_coverage[-1] - elif coverages[0].datetime_ranges.first().start_year is not None: - dt_range = coverages[0].datetime_ranges.first() - temporal_coverage_start = { - "year": to_str(dt_range.start_year), - "month": to_str(dt_range.start_month, 2), - "day": to_str(dt_range.start_day, 2), - } - temporal_coverage_end = { - "year": to_str(dt_range.end_year), - "month": to_str(dt_range.end_month, 2), - "day": to_str(dt_range.end_day, 2), - } - else: - temporal_coverage_start = {"year": "", "month": "", "day": ""} - temporal_coverage_end = {"year": "", "month": "", "day": ""} - - column_full_coverage.append(temporal_coverage_start) - column_full_coverage.append(temporal_coverage_end) - - return json.dumps(column_full_coverage) + def coverage(self) -> dict: + """Coverage of column if exists, if not table coverage""" + coverage = get_coverage([self]) + if not coverage["since"] and not coverage["until"]: + return self.table.coverage + return coverage def clean(self) -> None: """Clean method for Column model""" @@ -1715,6 +1517,15 @@ def since(self): self.start_second or 0, ) + @property + def since_str(self): + if self.start_year and self.start_month and self.start_day: + return self.since.strftime("%Y-%m-%d") + if self.start_year and self.start_month: + return self.since.strftime("%Y-%m") + if self.start_year: + return self.since.strftime("%Y") + @property def until(self): if self.end_year: @@ -1727,6 +1538,15 @@ def until(self): self.end_second or 0, ) + @property + def until_str(self): + if self.end_year and self.end_month and self.end_day: + return self.until.strftime("%Y-%m-%d") + if self.end_year and self.end_month: + return self.until.strftime("%Y-%m") + if self.end_year: + return self.until.strftime("%Y") + def get_similarity_of_datetime(self, other: "DateTimeRange"): if not self.since: return 0 @@ -1918,3 +1738,22 @@ def clean(self) -> None: "'column', 'key, 'raw_data_source', 'information_request' must be set." ) return super().clean() + + +def get_coverage(entities: list) -> dict: + """Get maximum datetime coverages of entities""" + + since_str = None + until_str = None + since = datetime.max + until = datetime.min + for entity in entities: + for cov in entity.coverages.all(): + for dt in cov.datetime_ranges.all(): + if dt.since and dt.since < since: + since = dt.since + since_str = dt.since_str + if dt.until and dt.until > until: + until = dt.until + until_str = dt.until_str + return {"since": since_str, "until": until_str}