Skip to content

Commit

Permalink
fix: update temporal coverages
Browse files Browse the repository at this point in the history
  • Loading branch information
vncsna committed Feb 20, 2024
1 parent 075e8a5 commit d9aecd0
Showing 1 changed file with 55 additions and 216 deletions.
271 changes: 55 additions & 216 deletions bd_api/apps/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,36 +23,6 @@ def to_str(value: str | None, zfill: int = 0):
return str(value).zfill(zfill)


def get_date_time(date_times):
"""Returns a DateTimeRange object with the minimum start date and maximum end date"""
start_year, start_month, start_day = False, False, False
end_year, end_month, end_day = False, False, False
start_date, end_date = datetime(3000, 12, 31, 0, 0, 0), datetime(1, 1, 1, 0, 0, 0)

for date_time in date_times:
if date_time.start_year and date_time.start_year < start_date.year:
start_year = date_time.start_year
if date_time.start_month and date_time.start_month < start_date.month:
start_month = date_time.start_month
if date_time.start_day and date_time.start_day < start_date.day:
start_day = date_time.start_day
if date_time.end_year and date_time.end_year > end_date.year:
end_year = date_time.end_year
if date_time.end_month and date_time.end_month > end_date.month:
end_month = date_time.end_month
if date_time.end_day and date_time.end_day > end_date.day:
end_day = date_time.end_day

return DateTimeRange(
start_year=start_year,
start_month=start_month,
start_day=start_day,
end_year=end_year,
end_month=end_month,
end_day=end_day,
)


class Area(BaseModel):
"""Area model"""

Expand Down Expand Up @@ -546,150 +516,19 @@ def get_success_url(self):

@property
def full_slug(self):
"""Get the full slug or Dataset"""
if self.organization.area.slug != "unknown":
return f"{self.organization.area.slug}_{self.organization.slug}_{self.slug}"
return f"{self.organization.slug}_{self.slug}"

@property
def coverage(self):
"""Get the temporal coverage of the dataset in the format YYYY-MM-DD - YYYY-MM-DD"""
tables = self.tables.all()
raw_data_sources = self.raw_data_sources.all()
information_requests = self.information_requests.all()
start_year, start_month, start_day = False, False, False
end_year, end_month, end_day = False, False, False

start_date = datetime(3000, 12, 31, 0, 0, 0)
end_date = datetime(1, 1, 1, 0, 0, 0)

# This must be refactored to avoid code duplication
for table in tables:
for coverage in table.coverages.all():
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
date_time = get_date_time(date_times)

start_year = date_time.start_year if date_time.start_year else start_year
start_month = date_time.start_month if date_time.start_month else start_month
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = new_start_date if new_start_date < start_date else start_date
new_end_date = datetime(
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

for raw_data_source in raw_data_sources:
for coverage in raw_data_source.coverages.all():
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
date_time = get_date_time(date_times)

start_year = date_time.start_year if date_time.start_year else start_year
start_month = date_time.start_month if date_time.start_month else start_month
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = new_start_date if new_start_date < start_date else start_date
new_end_date = datetime(
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

for information_request in information_requests:
for coverage in information_request.coverages.all():
date_times = DateTimeRange.objects.filter(coverage=coverage.pk)
if len(date_times) == 0:
continue
date_time = get_date_time(date_times)

start_year = date_time.start_year if date_time.start_year else start_year
start_month = date_time.start_month if date_time.start_month else start_month
start_day = date_time.start_day if date_time.start_day else start_day
end_year = date_time.end_year if date_time.end_year else end_year
end_month = date_time.end_month if date_time.end_month else end_month
end_day = date_time.end_day if date_time.end_day else end_day

new_start_date = datetime(
date_time.start_year or 3000,
date_time.start_month or 1,
date_time.start_day or 1,
)
start_date = new_start_date if new_start_date < start_date else start_date
new_end_date = datetime(
date_time.end_year or 1,
date_time.end_month or 1,
date_time.end_day or 1,
)
end_date = new_end_date if new_end_date > end_date else end_date

start = []
end = []

if start_year and start_year < 3000 and start_date.year:
start.append(str(start_date.year))
if start_month and start_date.month:
start.append(str(start_date.month).zfill(2))
if start_day and start_date.day:
start.append(str(start_date.day).zfill(2))

if end_year and end_year > 1 and end_date.year:
end.append(str(end_date.year))
if end_month and end_date.month:
end.append(str(end_date.month).zfill(2))
if end_day and end_date.day:
end.append(str(end_date.day).zfill(2))

coverage_str = ""
if start:
coverage_str += "-".join(start)
if end:
coverage_str += " - " + "-".join(end)

return coverage_str

@property
def full_coverage(self) -> str:
"""
Returns the full temporal coverage of the dataset as a json string
representing an object with the 3 initial points of the coverage
The first point is the start of the open coverage, the second point is the
end of the open coverage and the third point is the end of closed coverage
When thera are only one type of coverage (open or closed) the second point
will represent the end of the entire coverage, with both the types being
the same
Returns:
str: json string representing the full coverage
"""
full_coverage_dict = [
# {"year": 2021, "month": 6, "type": "open"},
# {"year": 2023, "month": 6, "type": "open"},
# {"year": 2026, "month": 6, "type": "closed"},
def coverage(self) -> dict:
"""Coverage of all related entities"""
entities = [
*self.tables.all(),
*self.raw_data_sources.all(),
*self.information_requests.all(),
]
return json.dumps(full_coverage_dict)
return get_coverage(entities)

@property
def contains_tables(self):
Expand Down Expand Up @@ -949,7 +788,11 @@ def contains_closed_data(self):
return closed_data

@property
def full_coverage(self) -> str:
def coverage(self) -> dict:
return get_coverage([self])

@property
def full_coverage(self):
"""
Returns the full temporal coverage of the table as a json string
representing an object with the 3 initial points of the coverage
Expand Down Expand Up @@ -1221,53 +1064,12 @@ class Meta:
ordering = ["name"]

@property
def full_coverage(self) -> str:
"""
Returns the coverage of the column if it exists,
otherwise returns the coverage of the table
Currently returns the first coverage, but this
should be changed to return the
full coverage of the column, as in table coverage
Returns:
str: coverage of the column - a dumped list of dicts [start_date, end_date]
"""

coverages = self.coverages.all()
column_full_coverage = []

if (
len(coverages) == 0
or not coverages[0].datetime_ranges.exists()
or coverages[0].datetime_ranges.first().start_year is None
):
"""
At the moment, only one coverage exists per column
No coverage for column, using table coverage
"""
table_full_coverage = json.loads(self.table.full_coverage)
temporal_coverage_start = table_full_coverage[0]
temporal_coverage_end = table_full_coverage[-1]
elif coverages[0].datetime_ranges.first().start_year is not None:
dt_range = coverages[0].datetime_ranges.first()
temporal_coverage_start = {
"year": to_str(dt_range.start_year),
"month": to_str(dt_range.start_month, 2),
"day": to_str(dt_range.start_day, 2),
}
temporal_coverage_end = {
"year": to_str(dt_range.end_year),
"month": to_str(dt_range.end_month, 2),
"day": to_str(dt_range.end_day, 2),
}
else:
temporal_coverage_start = {"year": "", "month": "", "day": ""}
temporal_coverage_end = {"year": "", "month": "", "day": ""}

column_full_coverage.append(temporal_coverage_start)
column_full_coverage.append(temporal_coverage_end)

return json.dumps(column_full_coverage)
def coverage(self) -> dict:
"""Coverage of column if exists, if not table coverage"""
coverage = get_coverage([self])
if not coverage["since"] and not coverage["until"]:
return self.table.coverage
return coverage

def clean(self) -> None:
"""Clean method for Column model"""
Expand Down Expand Up @@ -1715,6 +1517,15 @@ def since(self):
self.start_second or 0,
)

@property
def since_str(self):
if self.start_year and self.start_month and self.start_day:
return self.since.strftime("%Y-%m-%d")
if self.start_year and self.start_month:
return self.since.strftime("%Y-%m")
if self.start_year:
return self.since.strftime("%Y")

@property
def until(self):
if self.end_year:
Expand All @@ -1727,6 +1538,15 @@ def until(self):
self.end_second or 0,
)

@property
def until_str(self):
if self.end_year and self.end_month and self.end_day:
return self.until.strftime("%Y-%m-%d")
if self.end_year and self.end_month:
return self.until.strftime("%Y-%m")
if self.end_year:
return self.until.strftime("%Y")

def get_similarity_of_datetime(self, other: "DateTimeRange"):
if not self.since:
return 0
Expand Down Expand Up @@ -1918,3 +1738,22 @@ def clean(self) -> None:
"'column', 'key, 'raw_data_source', 'information_request' must be set."
)
return super().clean()


def get_coverage(entities: list) -> dict:
"""Get maximum datetime coverages of entities"""

since_str = None
until_str = None
since = datetime.max
until = datetime.min
for entity in entities:
for cov in entity.coverages.all():
for dt in cov.datetime_ranges.all():
if dt.since and dt.since < since:
since = dt.since
since_str = dt.since_str
if dt.until and dt.until > until:
until = dt.until
until_str = dt.until_str
return {"since": since_str, "until": until_str}

0 comments on commit d9aecd0

Please sign in to comment.