Skip to content

Commit

Permalink
Stub in support for checking non-flattened files.
Browse files Browse the repository at this point in the history
  • Loading branch information
netsettler committed Oct 25, 2023
1 parent 60b5ef1 commit d9fb9f6
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 16 deletions.
104 changes: 91 additions & 13 deletions dcicutils/bundle_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import copy

from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Tuple, Union
from .common import AnyJsonData
from .env_utils import EnvUtils, public_env_name
from .ff_utils import get_metadata
from .lang_utils import there_are
from .misc_utils import AbstractVirtualApp, ignored, PRINT, to_camel_case
from .misc_utils import AbstractVirtualApp, ignored, ignorable, PRINT, to_camel_case
from .sheet_utils import (
LoadTableError, prefer_number, TabbedJsonSchemas,
Header, Headers, TabbedHeaders, ParsedHeader, ParsedHeaders, TabbedParsedHeaders, SheetCellValue, TabbedSheetData,
Expand Down Expand Up @@ -75,7 +75,9 @@ class NumHint(TypeHint):

PREFERENCE_MAP = {'number': 'num', 'integer': 'int', 'float': 'float'}

def __init__(self, declared_type):
def __init__(self, declared_type: Optional[str] = None):
if declared_type is None:
declared_type = 'num'
self.preferred_type = self.PREFERENCE_MAP.get(declared_type)

def apply_hint(self, value):
Expand Down Expand Up @@ -128,7 +130,51 @@ def apply_hint(self, value):
return value


OptionalTypeHints = List[Optional[TypeHint]]
class OptionalTypeHints:

def __init__(self, positional_hints: Optional[List[Optional[TypeHint]]] = None,
positional_breadcrumbs: Optional[List[Union[List, Tuple]]] = None):
self.other_hints: Dict[Any, TypeHint] = {}
self.positional_hints: List[Optional[TypeHint]] = [] if positional_hints is None else positional_hints
if positional_breadcrumbs and positional_hints:
n = len(positional_breadcrumbs)
if n != len(positional_hints):
raise Exception("positional_hints and positional_breadcrumbs must have the same length.")
for i in range(n):
# for convenience, we accept this as a list or tuple, but it must be a tuple to be a key
breadcrumbs = tuple(positional_breadcrumbs[i])
if not isinstance(breadcrumbs, tuple):
raise Exception(f"Each of the positional breadcrumbs must be a tuple: {breadcrumbs}")
hint = positional_hints[i]
self.other_hints[breadcrumbs] = hint

def __getitem__(self, key: Any) -> Optional[TypeHint]:
"""
For enumerated positional information, we consult our initial type vector.
For other situations, we do a general lookup of the hint in our lookup table.
"""
if isinstance(key, int):
hints = self.positional_hints
if key < 0:
raise ValueError(f"Negative hint positions are not allowed: {key}")
elif key >= len(hints):
return None
else:
return hints[key]
elif isinstance(key, tuple): # a parsed header (or schema breadcrumbs)
return self.other_hints.get(key)
else:
raise ValueError(f"Key of unexpected type for OptionalTypeHints: {key}")

def __setitem__(self, key: Any, value: TypeHint):
if isinstance(key, int):
raise ValueError(f"Cannot assign OptionalTypeHints by position after initial creation: {key!r}")
elif key in self.other_hints:
raise ValueError(f"Attempt to redefine OptionalTypeHint key {key!r}.")
elif isinstance(key, tuple):
self.other_hints[key] = value
else:
raise ValueError(f"Attempt to set an OptionalTypeHints key to other than a breadcrumbs tuple: {key!r}")


class AbstractStructureManager(AbstractTableSetManager):
Expand Down Expand Up @@ -384,12 +430,12 @@ def __init__(self, tabbed_sheet_data: TabbedSheetData, *, flattened: bool,
apply_heuristics: bool = False):

self.flattened = flattened
if not flattened:
# TODO: Need to implement something that depends on this flattened attribute.
# Also, it's possible that we can default this once we see if the new strategy is general-purpose,
# rather than it being a required argument. But for now let's require it be passed.
# -kmp 25-Oct-2023
raise ValueError("Only flattened=True is supported by TableChecker for now.")
# if not flattened:
# # TODO: Need to implement something that depends on this flattened attribute.
# # Also, it's possible that we can default this once we see if the new strategy is general-purpose,
# # rather than it being a required argument. But for now let's require it be passed.
# # -kmp 25-Oct-2023
# raise ValueError("Only flattened=True is supported by TableChecker for now.")

if portal_env is None and portal_vapp is None:
portal_env = public_env_name(EnvUtils.PRD_ENV_NAME)
Expand Down Expand Up @@ -494,6 +540,37 @@ def check_tab(self, tab_name: str):

def check_row(self, row: Dict, *, tab_name: str, row_number: int, prototype: Dict,
parsed_headers: ParsedHeaders, type_hints: OptionalTypeHints):
if self.flattened:
return self.check_flattened_row(row=row, tab_name=tab_name, row_number=row_number, prototype=prototype,
parsed_headers=parsed_headers, type_hints=type_hints)
else:
return self.check_inflated_row(row=row, tab_name=tab_name, row_number=row_number, prototype=prototype,
parsed_headers=parsed_headers, type_hints=type_hints)

def check_inflated_row(self, row: Dict, *, tab_name: str, row_number: int, prototype: Dict,
parsed_headers: ParsedHeaders, type_hints: OptionalTypeHints):
ignorable(self, tab_name, row_number, prototype, parsed_headers, type_hints) #
# TODO: Make this work...
# def traverse(item, *, subschema, breadcrumbs):
# if isinstance(item, list):
# # check schema here to make sure it's supposed to be a list before proceeding
# for i, elem in enumerate(item):
# traverse(item, subschema=..., breadcrumbs=(*breadcrumbs, i))
# elif isinstance(item, dict):
# # check schema here to make sure it's supposed to be a dict before proceeding
# for k, v in item.items():
# traverse(v, subschema=..., breadcrumbs=(*breadcrumbs, k))
# else:
# # look up hint. if there's not a hint for these breadcrumbs, make one
# # apply the hint for side-effect, to get an error if we have a bad value
# pass
# schema = self.schemas[tab_name]
# if schema:
# traverse(row, subschema=schema, breadcrumbs=()) # for side-effect
return row

def check_flattened_row(self, row: Dict, *, tab_name: str, row_number: int, prototype: Dict,
parsed_headers: ParsedHeaders, type_hints: OptionalTypeHints):
patch_item = copy.deepcopy(prototype)
for column_number, column_value in enumerate(row.values()):
parsed_value = ItemTools.parse_item_value(column_value, apply_heuristics=self.apply_heuristics)
Expand Down Expand Up @@ -532,8 +609,9 @@ def compile_type_hints(self, tab_name: str) -> OptionalTypeHints:
for required_header in self._schema_required_headers(schema):
if required_header not in parsed_headers:
self.note_problem("Missing required header")
type_hints = [ItemTools.find_type_hint(parsed_header, schema, context=self) if schema else None
for parsed_header in parsed_headers]
positional_type_hints = [ItemTools.find_type_hint(parsed_header, schema, context=self) if schema else None
for parsed_header in parsed_headers]
type_hints = OptionalTypeHints(positional_type_hints, positional_breadcrumbs=parsed_headers)
return type_hints

@classmethod
Expand Down Expand Up @@ -577,7 +655,7 @@ def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional
error_summary = summary_of_data_validation_errors(problems)
if error_summary:
for item in error_summary:
print(item)
PRINT(item)
raise Exception("Validation problems were seen.")
# TODO: Maybe connect validation here. Although another option is to just call validation separately
# once this is successfully loaded. Needs thought. However, David's validation_utils can do
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "8.0.0.1-alpha.14" # to become "8.1.0"
version = "8.0.0.1-alpha.15" # to become "8.1.0"
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down
64 changes: 62 additions & 2 deletions test/test_bundle_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# High-level interfaces
load_table_structures, load_items,
# Low-level implementation
SchemaManager, ItemTools, TableChecker,
BoolHint,
SchemaManager, ItemTools, TableChecker, OptionalTypeHints,
BoolHint, NumHint,
# Probably we should test NumHint, TypeHint, EnumHint, RefHint, etc. as well. -kmp 23-Oct-2023
)
from dcicutils.common import AnyJsonData
Expand Down Expand Up @@ -49,6 +49,66 @@
)


def test_optional_type_hints():

x = OptionalTypeHints()
assert x.positional_hints == []
assert x.other_hints == {}
assert x[0] is None
assert x[100] is None
with pytest.raises(ValueError) as exc:
print(x[-1])
assert str(exc.value) == "Negative hint positions are not allowed: -1"

bh = BoolHint()
nh = NumHint()
ih = NumHint(declared_type='int')

x = OptionalTypeHints([bh, nh])
assert x.positional_hints == [bh, nh]
assert x.other_hints == {}
assert x[0] is bh
assert x[1] is nh
assert x[2] is None

x = OptionalTypeHints([bh, nh], positional_breadcrumbs=[('foo', 'x'), ('foo', 'y')])
assert x.positional_hints == [bh, nh]
assert x.other_hints == {
('foo', 'x'): bh,
('foo', 'y'): nh,
}
assert x[0] is bh
assert x[1] is nh
assert x[2] is None
assert x[('something',)] is None
assert x[('foo', 'x')] is bh
assert x[('foo', 'y')] is nh
assert x[('foo', 'z')] is None

with pytest.raises(ValueError) as exc:
x[2] = bh
assert str(exc.value) == "Cannot assign OptionalTypeHints by position after initial creation: 2"
assert x.positional_hints == [bh, nh]

with pytest.raises(ValueError) as exc:
x['something'] = bh
assert str(exc.value) == "Attempt to set an OptionalTypeHints key to other than a breadcrumbs tuple: 'something'"
assert x.positional_hints == [bh, nh]

x[('something',)] = ih
assert x.positional_hints == [bh, nh]
assert x.other_hints == {
('foo', 'x'): bh,
('foo', 'y'): nh,
('something',): ih,
}
assert x[('something',)] == ih

with pytest.raises(ValueError) as exc:
x[('something',)] = ih
assert str(exc.value) == "Attempt to redefine OptionalTypeHint key ('something',)."


def test_item_tools_parse_sheet_header():
assert ItemTools.parse_sheet_header('.a') == ['a']
assert ItemTools.parse_sheet_header('a') == ['a']
Expand Down

0 comments on commit d9fb9f6

Please sign in to comment.