diff --git a/README.md b/README.md index fc9fe75..449912c 100644 --- a/README.md +++ b/README.md @@ -138,9 +138,8 @@ Test coverage includes every example given in the spec table of features. * Partial uncertain/approximate: - >>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate. - # Note that the result text is normalized - PartialUncertainOrApproximate: '2011-(06-04)~' + >>> parse_edtf('2004-06~-11') # year certain, month/day approximate. + PartialUncertainOrApproximate: '2004-06~-11' * Partial unspecified: @@ -159,13 +158,42 @@ Test coverage includes every example given in the spec table of features. * Level 2 Extended intervals: - >>> parse_edtf('2004-06-(01)~/2004-06-(20)~') - Level2Interval: '2004-06-(01)~/2004-06-(20)~' + >>> parse_edtf('2004-06-~01/2004-06-~20') + Level2Interval: '2004-06-~01/2004-06-~20' * Year requiring more than 4 digits - exponential form: - >>> parse_edtf('Y-17e7') - ExponentialYear: 'Y-17e7' + >>> e = parse_edtf('Y-17E7') + ExponentialYear: 'Y-17E7' + >>> e.estimated() + -170000000 + +* Significant digits: + # '1950S2': some year between 1900 and 1999, estimated to be 1950 + >>> d = parse_edtf('1950S2') + Date: '1950S2' + >>> d.lower_fuzzy()[:3] + (1900, 1, 1) + >>> d.upper_fuzzy()[:3] + (1999, 12, 31) + # 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits. + >>> l = parse_edtf('Y171010000S3') + LongYear: 'Y171010000S3' + >>> l.estimated() + 171010000 + >>> l.lower_fuzzy()[:3] + (171000000, 1, 1) + >>> l.upper_fuzzy()[:3] + (171999999, 12, 31) + # 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800 + >>> e = parse_edtf('Y3388E2S3') + ExponentialYear: 'Y3388E2S3S3' + >>> e.estimated() + 338800 + >>> e.lower_fuzzy()[:3] + (338000, 1, 1) + >>> e.upper_fuzzy()[:3] + (338999, 12, 31) ### Natural language representation diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 09140b6..e12ecbd 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -272,7 +272,9 @@ def __init__( self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): r = self.year @@ -291,6 +293,36 @@ def isoformat(self, default=date.max): int(self.day or default.day), ) + def lower_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + lower_year = ( + int(self.year) + // (10**insignificant_digits) + * (10**insignificant_digits) + ) + return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + + def upper_fuzzy(self): + if not hasattr(self, "significant_digits") or not self.significant_digits: + return apply_delta( + add, self.upper_strict(), self._get_fuzzy_padding(LATEST) + ) + else: + total_digits = len(self.year) + insignificant_digits = total_digits - self.significant_digits + upper_year = (int(self.year) // (10**insignificant_digits) + 1) * ( + 10**insignificant_digits + ) - 1 + return struct_time( + [upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS + ) + def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: @@ -547,7 +579,9 @@ def _get_fuzzy_padding(self, lean): class LongYear(EDTFObject): def __init__(self, year, significant_digits=None): self.year = year - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def __str__(self): if self.significant_digits: @@ -568,6 +602,42 @@ def _strict_date(self, lean): def estimated(self): return self._precise_year() + def lower_fuzzy(self): + full_year = self._precise_year() + strict_val = self.lower_strict() + if not self.significant_digits: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + else: + insignificant_digits = len(str(full_year)) - int(self.significant_digits) + if insignificant_digits <= 0: + return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + lower_year = sig_digits * padding_value + return apply_delta( + sub, + struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(EARLIEST), + ) + + def upper_fuzzy(self): + full_year = self._precise_year() + strict_val = self.upper_strict() + if not self.significant_digits: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + else: + insignificant_digits = len(str(full_year)) - self.significant_digits + if insignificant_digits <= 0: + return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST)) + padding_value = 10**insignificant_digits + sig_digits = full_year // padding_value + upper_year = (sig_digits + 1) * padding_value - 1 + return apply_delta( + add, + struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS), + self._get_fuzzy_padding(LATEST), + ) + class Season(Date): def __init__(self, year, season, **kwargs): @@ -845,7 +915,9 @@ class ExponentialYear(LongYear): def __init__(self, base, exponent, significant_digits=None): self.base = base self.exponent = exponent - self.significant_digits = significant_digits + self.significant_digits = ( + int(significant_digits) if significant_digits else None + ) def _precise_year(self): return int(self.base) * 10 ** int(self.exponent) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 6e0a8a1..1ec7452 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -14,8 +14,8 @@ # where the first value is a tuple, the second item is a tuple of the normalised parse result. # # The values in the second tuple indicate the iso versions of the derived Python `date`s. -# - If there's one other value, all the derived dates should be the same. -# - If there're two other values, then all the lower values should be the same +# - If there is one other value, all the derived dates should be the same. +# - If there are two other values, then all the lower values should be the same # and all the upper values should be the same. # - If there are three other values, then the upper and lower ``_strict`` values # should be the first value, and the upper and lower ``_fuzzy`` values should be @@ -194,13 +194,21 @@ ("Y-17E7", ("-170000000-01-01", "-170000000-12-31")), # L2 significant digits # Some year between 1900 and 1999, estimated to be 1950 - ("1950S2", ("1900-01-01", "1999-12-31")), + ("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")), + ("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")), + ("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) - ("Y17101E4S3", ("171000000-01-01", "171999999-12-31")), + ( + "Y17101E4S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # Some year between 338000 and 338999, estimated to be 338800 - ("Y3388E2S3", ("338000-01-01", "338999-12-31")), + ("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")), # some year between 171000000 and 171999999 estimated to be 171010000 - ("Y171010000S3", ("171010000-01-01", "171999999-12-31")), + ( + "Y171010000S3", + ("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"), + ), # L2 Seasons # Spring southern hemisphere, 2001 ("2001-29", ("2001-09-01", "2001-11-30")),