diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d8b85d0fb..317a7391b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,8 +6,13 @@ dcicutils
 Change Log
 ----------
 
+8.4.0
+=====
+* More work related to SMaHT ingestion (bundle/sheet_utils, data_readers, etc).
+
+
 8.3.0
-=========
+=====
 
 * Updates for RAS to Redis API
 
diff --git a/dcicutils/bundle_utils.py b/dcicutils/bundle_utils.py
index fb76c74a5..0f9b73e1e 100644
--- a/dcicutils/bundle_utils.py
+++ b/dcicutils/bundle_utils.py
@@ -4,7 +4,7 @@
 from .common import AnyJsonData
 from .env_utils import EnvUtils, public_env_name
 from .ff_utils import get_metadata
-from .misc_utils import AbstractVirtualApp, ignored, ignorable, PRINT, to_camel_case
+from .misc_utils import AbstractVirtualApp, ignored, ignorable, PRINT, remove_empty_properties, to_camel_case
 from .sheet_utils import (
     LoadTableError, prefer_number, TabbedJsonSchemas,
     Header, Headers, TabbedHeaders, ParsedHeader, ParsedHeaders, TabbedParsedHeaders, SheetCellValue, TabbedSheetData,
@@ -15,6 +15,7 @@
 
 PatchPrototype = Dict
 TabbedPatchPrototypes = Dict[str, PatchPrototype]
+ARRAY_VALUE_DELIMITER = "|"
 
 
 class TypeHintContext:
@@ -44,7 +45,10 @@ def __init__(self, problems: Optional[dict] = None):
 
 
 class TypeHint:
-    def apply_hint(self, value):
+    def __init__(self):
+        self.is_array = False
+
+    def apply_hint(self, value, src):
         return value
 
     def __str__(self):
@@ -55,38 +59,41 @@ def __repr__(self):
 
 
 class BoolHint(TypeHint):
+    def __init__(self):
+        super().__init__()
 
     # We could use other ways to do this, such as initial substring, but this is more likely to be right.
     # Then again, we might want to consder accepting athers like 'yes/no', 'y/n', 'on/off', '1/0'.
     TRUE_VALUES = ['true', 't']
     FALSE_VALUES = ['false', 'f']
 
-    def apply_hint(self, value):
+    def apply_hint(self, value, src):
         if isinstance(value, str) and value:
             l_value = value.lower()
             if l_value in self.TRUE_VALUES:
                 return True
             elif l_value in self.FALSE_VALUES:
                 return False
-        return super().apply_hint(value)
+        return super().apply_hint(value, src)
 
 
 class NumHint(TypeHint):
 
-    PREFERENCE_MAP = {'number': 'num', 'integer': 'int', 'float': 'float'}
+    PREFERENCE_MAP = {'number': 'number', 'integer': 'integer'}
 
     def __init__(self, declared_type: Optional[str] = None):
         if declared_type is None:
-            declared_type = 'num'
+            declared_type = 'number'
         self.preferred_type = self.PREFERENCE_MAP.get(declared_type)
+        super().__init__()
 
-    def apply_hint(self, value):
+    def apply_hint(self, value, src):
         if isinstance(value, str) and value:
             if self.preferred_type:
                 return prefer_number(value, kind=self.preferred_type)
             else:
                 return value
-        return super().apply_hint(value)
+        return super().apply_hint(value, src)
 
 
 class EnumHint(TypeHint):
@@ -96,8 +103,9 @@ def __str__(self):
 
     def __init__(self, value_map):
         self.value_map = value_map
+        super().__init__()
 
-    def apply_hint(self, value):
+    def apply_hint(self, value, src):
         if isinstance(value, str):
             if value in self.value_map:
                 result = self.value_map[value]
@@ -112,7 +120,29 @@ def apply_hint(self, value):
                     [only_found] = found
                     result = self.value_map[only_found]
                     return result
-        return super().apply_hint(value)
+        return super().apply_hint(value, src)
+
+
+class ArrayHint(TypeHint):
+    def __init__(self):
+        super().__init__()
+
+    def apply_hint(self, value, src):
+        if value is None or value == '':
+            return []
+        if isinstance(value, str):
+            if not value:
+                return []
+            return [value.strip() for value in value.split(ARRAY_VALUE_DELIMITER)]
+        return super().apply_hint(value, src)
+
+
+class StringHint(TypeHint):
+    def __init__(self):
+        super().__init__()
+
+    def apply_hint(self, value, src):
+        return str(value).strip() if value is not None else ""
 
 
 class RefHint(TypeHint):
@@ -120,13 +150,29 @@ class RefHint(TypeHint):
     def __str__(self):
         return f"<RefHint {self.schema_name} context={self.context}>"
 
-    def __init__(self, schema_name: str, context: TypeHintContext):
+    def __init__(self, schema_name: str, required: bool, context: TypeHintContext):
         self.schema_name = schema_name
         self.context = context
+        self.required = required
+        super().__init__()
+
+    def apply_hint(self, value, src):
+        if self.is_array and isinstance(value, str):
+            value = [value.strip() for value in value.split(ARRAY_VALUE_DELIMITER)] if value else []
+        if self.is_array and isinstance(value, list):
+            for item in value:
+                self._apply_ref_hint(item, src)
+        else:
+            self._apply_ref_hint(value, src)
+        return value
 
-    def apply_hint(self, value):
-        if not self.context.validate_ref(item_type=self.schema_name, item_ref=value):
-            raise ValidationProblem(f"Unable to validate {self.schema_name} reference: {value!r}")
+    def _apply_ref_hint(self, value, src):
+        if not value and self.required:
+            raise ValidationProblem(f"No required reference (linkTo) value for: {self.schema_name}"
+                                    f"{f' from {src}' if src else ''}")
+        if value and not self.context.validate_ref(item_type=self.schema_name, item_ref=value):
+            raise ValidationProblem(f"Cannot resolve reference (linkTo) for: {self.schema_name}"
+                                    f"{f'/{value}' if value else ''}{f' from {src}' if src else ''}")
         return value
 
 
@@ -228,7 +274,7 @@ def parse_sheet_headers(cls, headers: Headers) -> ParsedHeaders:
     def compute_patch_prototype(cls, parsed_headers: ParsedHeaders):
         prototype = {}
         for parsed_header in parsed_headers:
-            parsed_header0 = parsed_header[0]
+            parsed_header0 = parsed_header[0] if parsed_header else ""
             if isinstance(parsed_header0, int):
                 raise LoadTableError(f"A header cannot begin with a numeric ref: {parsed_header0}")
             cls.assure_patch_prototype_shape(parent=prototype, keys=parsed_header)
@@ -236,7 +282,9 @@ def compute_patch_prototype(cls, parsed_headers: ParsedHeaders):
 
     @classmethod
     def assure_patch_prototype_shape(cls, *, parent: Union[Dict, List], keys: ParsedHeader):
-        [key0, *more_keys] = keys
+        key0 = None
+        more_keys = None
+        [key0, *more_keys] = keys if keys else [None]
         key1 = more_keys[0] if more_keys else None
         if isinstance(key1, int):
             placeholder = []
@@ -288,8 +336,11 @@ def parse_item_value(cls, value: SheetCellValue,
                 return True
             elif lvalue == 'false':
                 return False
-            elif lvalue == 'null' or lvalue == '':
+            # elif lvalue == 'null' or lvalue == '':
+            elif lvalue == 'null':
                 return None
+            elif lvalue == '':
+                return lvalue
             elif split_pipe and '|' in value:
                 if value == '|':  # Use '|' for []
                     return []
@@ -307,16 +358,18 @@ def parse_item_value(cls, value: SheetCellValue,
 
     @classmethod
     def set_path_value(cls, datum: Union[List, Dict], path: ParsedHeader, value: Any, force: bool = False):
-        if (value is None or value == '') and not force:
+        # if (value is None or value == '') and not force:
+        if value is None and not force:
             return
-        [key, *more_path] = path
+        [key, *more_path] = path if path else [None]
         if not more_path:
             datum[key] = value
         else:
             cls.set_path_value(datum[key], more_path, value)
 
     @classmethod
-    def find_type_hint_for_subschema(cls, subschema: Any, context: Optional[TypeHintContext] = None):
+    def find_type_hint_for_subschema(cls, subschema: Any, required: bool = False,
+                                     context: Optional[TypeHintContext] = None):
         if subschema is not None:
             t = subschema.get('type')
             if t == 'string':
@@ -326,11 +379,19 @@ def find_type_hint_for_subschema(cls, subschema: Any, context: Optional[TypeHint
                     return EnumHint(mapping)
                 link_to = subschema.get('linkTo')
                 if link_to and context.schema_exists(link_to):
-                    return RefHint(schema_name=link_to, context=context)
-            elif t in ('integer', 'float', 'number'):
+                    return RefHint(schema_name=link_to, required=required, context=context)
+                return StringHint()
+            elif t in ('integer', 'number'):
                 return NumHint(declared_type=t)
             elif t == 'boolean':
                 return BoolHint()
+            elif t == 'array':
+                array_type_hint = cls.find_type_hint_for_subschema(subschema.get("items"),
+                                                                   required=required, context=context)
+                if type(array_type_hint) is RefHint:
+                    array_type_hint.is_array = True
+                    return array_type_hint
+                return ArrayHint()
 
     @classmethod
     def find_type_hint_for_parsed_header(cls, parsed_header: Optional[ParsedHeader], schema: Any,
@@ -344,7 +405,8 @@ def finder(subheader, subschema):
                     if subschema.get('type') == 'object':
                         subsubschema = subschema.get('properties', {}).get(key1)
                         if not other_headers:
-                            hint = cls.find_type_hint_for_subschema(subsubschema, context=context)
+                            required = key1 and subschema and key1 in subschema.get('required', [])
+                            hint = cls.find_type_hint_for_subschema(subsubschema, required=required, context=context)
                             if hint:
                                 return hint
                             else:
@@ -453,7 +515,8 @@ def __init__(self, tabbed_sheet_data: TabbedSheetData, *, flattened: bool,
         self.portal_vapp = portal_vapp
         self.schema_manager: SchemaManager = SchemaManager(portal_env=portal_env, portal_vapp=portal_vapp,
                                                            override_schemas=override_schemas)
-        self.schemas = self.schema_manager.fetch_relevant_schemas(self.tab_names)  # , schemas=schemas)
+        schema_names_to_fetch = [key for key, value in tabbed_sheet_data.items() if value]
+        self.schemas = self.schema_manager.fetch_relevant_schemas(schema_names_to_fetch)
         self.lookup_tables_by_tab_name: Dict[str, Dict[str, Dict]] = {
             tab_name: self.build_lookup_table_for_tab(tab_name, rows=rows)
             for tab_name, rows in tabbed_sheet_data.items()
@@ -474,6 +537,8 @@ def note_problem(self, problem: str):
         self._problems.append(problem)
 
     def build_lookup_table_for_tab(self, tab_name: str, *, rows: List[Dict]) -> Dict[str, Dict]:
+        if not self.schema_manager or not rows:
+            return []
         identifying_properties = self.schema_manager.identifying_properties(schema_name=tab_name)
         if not identifying_properties:
             # Maybe issue a warning here that we're going to lose
@@ -497,7 +562,15 @@ def contains_ref(self, item_type, item_ref):
     def resolve_ref(self, item_type, item_ref):
         lookup_table = self.lookup_tables_by_tab_name.get(item_type)
         if lookup_table:  # Is it a type we're tracking?
-            return lookup_table.get(item_ref) or None
+            if isinstance(item_ref, list):
+                found = True
+                for item in item_ref:
+                    if not lookup_table.get(item):
+                        found = False
+                        break
+                return True if found else None
+            else:
+                return lookup_table.get(item_ref) or None
         else:  # Apparently some stray type not in our tables
             return None
 
@@ -523,8 +596,18 @@ def validate_ref(self, item_type, item_ref):
             return True
         try:
             # TODO: This probably needs a cache
-            info = get_metadata(f"/{to_camel_case(item_type)}/{item_ref}",
-                                ff_env=self.portal_env, vapp=self.portal_vapp)
+            if isinstance(item_ref, list):
+                found = True
+                for item in item_ref:
+                    info = get_metadata(f"/{to_camel_case(item_type)}/{item}",
+                                        ff_env=self.portal_env, vapp=self.portal_vapp)
+                    if not isinstance(info, dict) or 'uuid' not in info:
+                        found = False
+                        break
+                return found
+            else:
+                info = get_metadata(f"/{to_camel_case(item_type)}/{item_ref}",
+                                    ff_env=self.portal_env, vapp=self.portal_vapp)
             # Basically return True if there's a value at all,
             # but still check it's not an error message that didn't get raised.
             return isinstance(info, dict) and 'uuid' in info
@@ -532,7 +615,7 @@ def validate_ref(self, item_type, item_ref):
             return False
 
     def schema_exists(self, schema_name: str) -> bool:
-        return self.schema_manager.schema_exists(schema_name)
+        return self.schema_manager.schema_exists(schema_name) if self.schema_manager else False
 
     def check_tab(self, tab_name: str):
         prototype = self.patch_prototypes_by_tab_name[tab_name]
@@ -579,10 +662,15 @@ def check_flattened_row(self, row: Dict, *, tab_name: str, row_number: int, prot
         patch_item = copy.deepcopy(prototype)
         for column_number, column_value in enumerate(row.values()):
             parsed_value = ItemTools.parse_item_value(column_value, apply_heuristics=self.apply_heuristics)
+            column_name = (list(row.keys())[column_number] or "") if len(row) > column_number else ""
+            if column_name.endswith("#"):
+                if isinstance(parsed_value, str):
+                    parsed_value = [value.strip() for value in parsed_value.split(ARRAY_VALUE_DELIMITER) if value]
             type_hint = type_hints[column_number]
             if type_hint:
                 try:
-                    parsed_value = type_hint.apply_hint(parsed_value)
+                    src = f"{tab_name}{f'.{column_name}' if column_name else ''}"
+                    parsed_value = type_hint.apply_hint(parsed_value, src)
                 except ValidationProblem as e:
                     headers = self.headers_by_tab_name[tab_name]
                     column_name = headers[column_number]
@@ -632,7 +720,7 @@ def create_tab_processor_state(self, tab_name: str) -> SheetState:
                                type_hints=self.type_hints_by_tab_name[tab_name])
 
 
-check = TableChecker.check
+# check = TableChecker.check
 
 
 def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
@@ -641,9 +729,11 @@ def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional
                # TODO: validate= is presently False (i.e., disabled) by default while being debugged,
                #       but for production use maybe should not be? -kmp 25-Oct-2023
                validate: bool = False,
+               retain_empty_properties: bool = False,
+               sheet_order: Optional[List[str]] = None,
                **kwargs):
     annotated_data = TableSetManager.load_annotated(filename=filename, tab_name=tab_name, escaping=escaping,
-                                                    prefer_number=False, **kwargs)
+                                                    prefer_number=False, sheet_order=sheet_order, **kwargs)
     tabbed_rows = annotated_data['content']
     flattened = annotated_data['flattened']
     if flattened:
@@ -655,6 +745,8 @@ def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional
         # No fancy checking for things like .json, etc. for now. Only check things that came from
         # spreadsheet-like data, where structural datatypes are forced into strings.
         checked_items = tabbed_rows
+    if not retain_empty_properties:
+        remove_empty_properties(checked_items)
     if validate:
         problems = validate_data_against_schemas(checked_items, portal_env=portal_env, portal_vapp=portal_vapp,
                                                  override_schemas=override_schemas)
diff --git a/dcicutils/data_readers.py b/dcicutils/data_readers.py
new file mode 100644
index 000000000..46a07a059
--- /dev/null
+++ b/dcicutils/data_readers.py
@@ -0,0 +1,147 @@
+import abc
+import csv
+import openpyxl
+from typing import Any, Generator, Iterator, List, Optional, Type, Tuple, Union
+from dcicutils.misc_utils import right_trim
+
+
+class RowReader(abc.ABC):
+
+    def __init__(self):
+        self.header = None
+        self.location = 0
+        self._warning_empty_headers = False
+        self._warning_extra_values = []  # Line numbers.
+        self.open()
+
+    def __iter__(self) -> Iterator:
+        for row in self.rows:
+            self.location += 1
+            if self.is_comment_row(row):
+                continue
+            if self.is_terminating_row(row):
+                break
+            if len(self.header) < len(row):  # Row values beyond what there are headers for are ignored.
+                self._warning_extra_values.append(self.location)
+            yield {column: self.cell_value(value) for column, value in zip(self.header, row)}
+
+    def _define_header(self, header: List[Optional[Any]]) -> None:
+        self.header = []
+        for index, column in enumerate(header or []):
+            if not (column := str(column).strip() if column is not None else ""):
+                self._warning_empty_headers = True
+                break  # Empty header column signals end of header.
+            self.header.append(column)
+
+    def rows(self) -> Generator[Union[List[Optional[Any]], Tuple[Optional[Any], ...]], None, None]:
+        yield
+
+    def is_comment_row(self, row: Union[List[Optional[Any]], Tuple[Optional[Any]]]) -> bool:
+        return False
+
+    def is_terminating_row(self, row: Union[List[Optional[Any]], Tuple[Optional[Any]]]) -> bool:
+        return False
+
+    def cell_value(self, value: Optional[Any]) -> Optional[Any]:
+        return str(value).strip() if value is not None else ""
+
+    def open(self) -> None:
+        pass
+
+    @property
+    def issues(self) -> Optional[List[str]]:
+        issues = []
+        if self._warning_empty_headers:
+            issues.append("Empty header column encountered; ignoring it and all subsequent columns.")
+        if self._warning_extra_values:
+            issues.extend([f"Extra column values on row [{row_number}]" for row_number in self._warning_extra_values])
+        return issues if issues else None
+
+
+class ListReader(RowReader):
+
+    def __init__(self, rows: List[List[Optional[Any]]]) -> None:
+        self._rows = rows
+        super().__init__()
+
+    @property
+    def rows(self) -> Generator[List[Optional[Any]], None, None]:
+        for row in self._rows[1:]:
+            yield row
+
+    def open(self) -> None:
+        if not self.header:
+            self._define_header(self._rows[0] if self._rows else [])
+
+
+class CsvReader(RowReader):
+
+    def __init__(self, file: str) -> None:
+        self._file = file
+        self._file_handle = None
+        self._rows = None
+        super().__init__()
+
+    @property
+    def rows(self) -> Generator[List[Optional[Any]], None, None]:
+        for row in self._rows:
+            yield right_trim(row)
+
+    def open(self) -> None:
+        if self._file_handle is None:
+            self._file_handle = open(self._file)
+            self._rows = csv.reader(self._file_handle, delimiter="\t" if self._file.endswith(".tsv") else ",")
+            self._define_header(right_trim(next(self._rows, [])))
+
+    def __del__(self) -> None:
+        if (file_handle := self._file_handle) is not None:
+            self._file_handle = None
+            file_handle.close()
+
+
+class ExcelSheetReader(RowReader):
+
+    def __init__(self, sheet_name: str, workbook: openpyxl.workbook.workbook.Workbook) -> None:
+        self.sheet_name = sheet_name or "Sheet1"
+        self._workbook = workbook
+        self._rows = None
+        super().__init__()
+
+    @property
+    def rows(self) -> Generator[Tuple[Optional[Any], ...], None, None]:
+        for row in self._rows(min_row=2, values_only=True):
+            yield right_trim(row)
+
+    def is_terminating_row(self, row: Tuple[Optional[Any]]) -> bool:
+        return all(cell is None for cell in row)  # Empty row signals end of data.
+
+    def open(self) -> None:
+        if not self._rows:
+            self._rows = self._workbook[self.sheet_name].iter_rows
+            self._define_header(right_trim(next(self._rows(min_row=1, max_row=1, values_only=True), [])))
+
+
+class Excel:
+
+    def __init__(self, file: str, reader_class: Optional[Type] = None) -> None:
+        self._file = file
+        self._workbook = None
+        self.sheet_names = None
+        if isinstance(reader_class, Type) and issubclass(reader_class, ExcelSheetReader):
+            self._reader_class = reader_class
+        else:
+            self._reader_class = ExcelSheetReader
+        self.open()
+
+    def sheet_reader(self, sheet_name: str) -> ExcelSheetReader:
+        return self._reader_class(sheet_name=sheet_name, workbook=self._workbook)
+
+    def open(self) -> None:
+        if self._workbook is None:
+            self._workbook = openpyxl.load_workbook(self._file, data_only=True)
+            self.sheet_names = self._workbook.sheetnames or []
+
+    def __del__(self) -> None:
+        if (workbook := self._workbook) is not None:
+            self._workbook = None
+            workbook.close()
diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py
index 97a17c21b..0484d6992 100644
--- a/dcicutils/misc_utils.py
+++ b/dcicutils/misc_utils.py
@@ -23,7 +23,7 @@
 from collections import defaultdict
 from datetime import datetime as datetime_type
 from dateutil.parser import parse as dateutil_parse
-from typing import Optional
+from typing import Any, Callable, List, Optional, Tuple, Union
 
 
 # Is this the right place for this? I feel like this should be done in an application, not a library.
@@ -978,6 +978,43 @@ def str_to_bool(x: Optional[str]) -> Optional[bool]:
         raise ValueError(f"An argument to str_or_bool must be a string or None: {x!r}")
 
 
+def to_integer(value: str, fallback: Optional[Any] = None) -> Optional[Any]:
+    try:
+        return int(value)
+    except Exception:
+        return fallback
+
+
+def to_float(value: str, fallback: Optional[Any] = None) -> Optional[Any]:
+    try:
+        return float(value)
+    except Exception:
+        return fallback
+
+
+def to_boolean(value: str, fallback: Optional[Any]) -> Optional[Any]:
+    if isinstance(value, str) and (value := value.strip().lower()):
+        if (lower_value := value.lower()) in ["true", "t"]:
+            return True
+        elif lower_value in ["false", "f"]:
+            return False
+    return fallback
+
+
+def to_enum(value: str, enumerators: List[str]) -> Optional[str]:
+    matches = []
+    if isinstance(value, str) and (value := value.strip()) and isinstance(enumerators, List):
+        enum_specifiers = {str(enum).lower(): enum for enum in enumerators}
+        if (enum_value := enum_specifiers.get(lower_value := value.lower())) is not None:
+            return enum_value
+        for enum_canonical, _ in enum_specifiers.items():
+            if enum_canonical.startswith(lower_value):
+                matches.append(enum_canonical)
+        if len(matches) == 1:
+            return enum_specifiers[matches[0]]
+    return enum_specifiers[matches[0]] if len(matches) == 1 else value
+
+
 @contextlib.contextmanager
 def override_environ(**overrides):
     """
@@ -1107,6 +1144,18 @@ def remove_suffix(suffix: str, text: str, required: bool = False):
     return text[:len(text)-len(suffix)]
 
 
+def remove_empty_properties(data: Optional[Union[list, dict]]) -> None:
+    if isinstance(data, dict):
+        for key in list(data.keys()):
+            if (value := data[key]) in [None, "", {}, []]:
+                del data[key]
+            else:
+                remove_empty_properties(value)
+    elif isinstance(data, list):
+        for item in data:
+            remove_empty_properties(item)
+
+
 class ObsoleteError(Exception):
     pass
 
@@ -1320,6 +1369,18 @@ def json_file_contents(filename):
         return json.load(fp)
 
 
+def load_json_if(s: str, is_array: bool = False, is_object: bool = False,
+                 fallback: Optional[Any] = None) -> Optional[Any]:
+    if (isinstance(s, str) and
+        ((is_object and s.startswith("{") and s.endswith("}")) or
+         (is_array and s.startswith("[") and s.endswith("]")))):
+        try:
+            return json.loads(s)
+        except Exception:
+            pass
+    return fallback
+
+
 def camel_case_to_snake_case(s, separator='_'):
     """
     Converts CamelCase to snake_case.
@@ -1404,6 +1465,42 @@ def string_list(s):
     return [p for p in [part.strip() for part in s.split(",")] if p]
 
 
+def split_string(value: str, delimiter: str, escape: Optional[str] = None) -> List[str]:
+    """
+    Splits the given string into an array of string based on the given delimiter, and an optional escape character.
+    """
+    if not isinstance(value, str) or not (value := value.strip()):
+        return []
+    if not isinstance(escape, str) or not escape:
+        return [item.strip() for item in value.split(delimiter)]
+    result = []
+    item = r""
+    escaped = False
+    for c in value:
+        if c == delimiter and not escaped:
+            result.append(item.strip())
+            item = r""
+        elif c == escape and not escaped:
+            escaped = True
+        else:
+            item += c
+            escaped = False
+    result.append(item.strip())
+    return [item for item in result if item]
+
+
+def right_trim(list_or_tuple: Union[List[Any], Tuple[Any]],
+               remove: Optional[Callable] = None) -> Union[List[Any], Tuple[Any]]:
+    """
+    Removes training None (or emptry string) values from the give tuple or list arnd returns;
+    does NOT change the given value.
+    """
+    i = len(list_or_tuple) - 1
+    while i >= 0 and ((remove and remove(list_or_tuple[i])) or (not remove and list_or_tuple[i] in (None, ""))):
+        i -= 1
+    return list_or_tuple[:i + 1]
+
+
 def is_c4_arn(arn: str) -> bool:
     """
     Returns True iff the given (presumed) AWS ARN string value looks like it
@@ -2050,6 +2147,32 @@ def merge_key_value_dict_lists(x, y):
     return [key_value_dict(k, v) for k, v in merged.items()]
 
 
+def merge_objects(target: Union[dict, List[Any]], source: Union[dict, List[Any]], full: bool = False) -> dict:
+    """
+    Merges the given source dictionary or list into the target dictionary or list.
+    This MAY well change the given target (dictionary or list) IN PLACE.
+    The the full argument is True then any target lists longer than the
+    source be will be filled out with the last element(s) of the source.
+    """
+    if target is None:
+        return source
+    if isinstance(target, dict) and isinstance(source, dict) and source:
+        for key, value in source.items():
+            target[key] = merge_objects(target[key], value, full) if key in target else value
+    elif isinstance(target, list) and isinstance(source, list) and source:
+        for i in range(max(len(source), len(target))):
+            if i < len(target):
+                if i < len(source):
+                    target[i] = merge_objects(target[i], source[i], full)
+                elif full:
+                    target[i] = merge_objects(target[i], source[len(source) - 1], full)
+            else:
+                target.append(source[i])
+    elif source:
+        target = source
+    return target
+
+
 # Stealing topological sort classes below from python's graphlib module introduced
 # in v3.9 with minor refactoring.
 # Source: https://github.com/python/cpython/blob/3.11/Lib/graphlib.py
diff --git a/dcicutils/qa_utils.py b/dcicutils/qa_utils.py
index 0c9b019ed..ac18f3b19 100644
--- a/dcicutils/qa_utils.py
+++ b/dcicutils/qa_utils.py
@@ -3540,7 +3540,7 @@ def recurse(json1, json2, path=""):
                 if not result:
                     # out(f"Recursive failure at {path!r} in list comparison")
                     pass
-        elif type(json1) == type(json2):
+        elif type(json1) is type(json2):
             result = json1 == json2
             if not result:
                 out(f"Failed at {path!r} due to value mismatch: {json.dumps(json1)} != {json.dumps(json2)}")
diff --git a/dcicutils/s3_utils.py b/dcicutils/s3_utils.py
index e34a3f2b7..ea1897beb 100644
--- a/dcicutils/s3_utils.py
+++ b/dcicutils/s3_utils.py
@@ -318,7 +318,7 @@ def get_key(self, keyfile_name: str = 'access_key_admin'
                                       SSECustomerKey=os.environ['S3_ENCRYPT_KEY'],
                                       SSECustomerAlgorithm='AES256')
         body_data: Union[bytes, str] = response['Body'].read()
-        auth_text: str = body_data.decode() if type(body_data) == bytes else body_data
+        auth_text: str = body_data.decode() if type(body_data) is bytes else body_data
         try:
             return json.loads(auth_text)
         except (ValueError, TypeError):
diff --git a/dcicutils/sheet_utils.py b/dcicutils/sheet_utils.py
index 66ce523fa..07b15a993 100644
--- a/dcicutils/sheet_utils.py
+++ b/dcicutils/sheet_utils.py
@@ -16,8 +16,8 @@
 from tempfile import TemporaryFile, TemporaryDirectory
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
 from .common import AnyJsonData, Regexp, JsonSchema
-from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize  # , there_are
-from .misc_utils import ignored, pad_to, JsonLinesReader, remove_suffix  # , PRINT, AbstractVirtualApp
+from .lang_utils import conjoined_list, disjoined_list, maybe_pluralize
+from .misc_utils import ignored, pad_to, JsonLinesReader, remove_suffix, to_camel_case
 
 
 Header = str
@@ -32,6 +32,7 @@
 SheetData = List[dict]
 TabbedSheetData = Dict[str, SheetData]
 TabbedJsonSchemas = Dict[str, JsonSchema]
+CommentRow = object()
 
 
 class LoadFailure(Exception):
@@ -69,7 +70,7 @@ def unwanted_kwargs(*, context, kwargs, context_plural=False, detailed=False):
                                      f" {maybe_pluralize(unwanted, 'keyword argument')} {conjoined_list(unwanted)}.")
 
 
-def prefer_number(value: SheetCellValue, kind='num'):
+def prefer_number(value: SheetCellValue, kind='number'):
     """
     Given a string, if the string has number syntax, returns the number it represents. Otherwise, returns its argument.
     (It follows from this that if given an int or a float, it just returns that argument.)
@@ -78,7 +79,7 @@ def prefer_number(value: SheetCellValue, kind='num'):
     is coerced to, but it has no effect when the argument is a number, even a number of the wrong kind.
 
     :param value: a string, int, or float
-    :param kind: one of 'num', 'int', or 'float'
+    :param kind: one of 'number' or 'integer'
     :returns: the argument coerced to a number of the appropriate kind, if possible, or else the argument literally
     """
     if isinstance(value, str):  # the given value might be an int or float, in which case just fall through
@@ -87,12 +88,12 @@ def prefer_number(value: SheetCellValue, kind='num'):
         value = value
         ch0 = value[0]
         if ch0 == '+' or ch0 == '-' or ch0.isdigit():
-            if kind == 'num' or kind == 'int':
+            if kind == 'integer':
                 try:
                     return int(value)
                 except Exception:
                     pass
-            if kind == 'num' or kind == 'float':
+            if kind == 'number':
                 try:
                     return float(value)
                 except Exception:
@@ -198,10 +199,10 @@ def load(cls, filename: str, **kwargs) -> TabbedSheetData:
         raise NotImplementedError(f".load(...) is not implemented for {cls.__name__}.")  # noQA
 
     @property
-    def tab_names(self) -> List[str]:
+    def tab_names(self, order: Optional[List[str]] = None) -> List[str]:
         raise NotImplementedError(f".tab_names is not implemented for {self.__class__.__name__}..")  # noQA
 
-    def load_content(self) -> Any:
+    def load_content(self, sheet_order: Optional[List[str]] = None) -> Any:
         raise NotImplementedError(f".load_content() is not implemented for {self.__class__.__name__}.")  # noQA
 
 
@@ -290,18 +291,24 @@ def _process_row(self, tab_name: str, state: Any, row: List[SheetCellValue]) ->
         """
         raise NotImplementedError(f"._process_row(...) is not implemented for {self.__class__.__name__}.")  # noQA
 
-    def load_content(self) -> AnyJsonData:
-        for tab_name in self.tab_names:
+    def load_content(self, sheet_order: Optional[List[str]] = None) -> AnyJsonData:
+        for tab_name in self.tab_names(sheet_order):
             sheet_content = []
             state = self._create_tab_processor_state(tab_name)
             for row_data in self._raw_row_generator_for_tab_name(tab_name):
                 processed_row_data: AnyJsonData = self._process_row(tab_name, state, row_data)
+                if not processed_row_data:
+                    break
+                if processed_row_data is CommentRow:
+                    continue
                 sheet_content.append(processed_row_data)
-            self.content_by_tab_name[tab_name] = sheet_content
+            self.content_by_tab_name[tab_name.replace(" ", "")] = sheet_content
         return self.content_by_tab_name
 
-    def parse_cell_value(self, value: SheetCellValue) -> AnyJsonData:
-        return prefer_number(value) if self.prefer_number else value
+    def parse_cell_value(self, value: SheetCellValue, override_prefer_number: Optional[bool] = None) -> AnyJsonData:
+        if override_prefer_number is None:
+            override_prefer_number = self.prefer_number
+        return prefer_number(value) if override_prefer_number else value
 
 
 class TableSetManagerRegistry:
@@ -355,8 +362,9 @@ class XlsxManager(FlattenedTableSetManager):
     """
     This implements the mechanism to get a series of rows out of the sheets in an XLSX file.
     """
-
     ALLOWED_FILE_EXTENSIONS = ['.xlsx']
+    TERMINATE_ON_EMPTY_ROW = True
+    CONVERT_VALUES_TO_STRING = True
 
     @classmethod
     def _all_rows(cls, sheet: Worksheet):
@@ -370,9 +378,20 @@ def _all_cols(cls, sheet: Worksheet):
         for col in range(1, col_max + 1):
             yield col
 
-    @property
-    def tab_names(self) -> List[str]:
-        return self.reader_agent.sheetnames
+    def tab_names(self, order: Optional[List[str]] = None) -> List[str]:
+        def ordered_sheet_names(sheet_names: List[str]) -> List[str]:
+            if not order:
+                return sheet_names
+            ordered_sheet_names = []
+            for item in order:
+                for sheet_name in sheet_names:
+                    if to_camel_case(item.replace(" ", "")) == to_camel_case(sheet_name.replace(" ", "")):
+                        ordered_sheet_names.append(sheet_name)
+            for sheet_name in sheet_names:
+                if sheet_name not in ordered_sheet_names:
+                    ordered_sheet_names.append(sheet_name)
+            return ordered_sheet_names
+        return ordered_sheet_names(self.reader_agent.sheetnames)
 
     def _get_reader_agent(self) -> Workbook:
         return openpyxl.load_workbook(self.filename)
@@ -383,21 +402,68 @@ def _raw_row_generator_for_tab_name(self, tab_name: str) -> Iterable[SheetRow]:
                 for row in self._all_rows(sheet))
 
     def _get_raw_row_content_tuple(self, sheet: Worksheet, row: int) -> SheetRow:
-        return [sheet.cell(row=row, column=col).value
-                for col in self._all_cols(sheet)]
+        # return [str(sheet.cell(row=row, column=col).value) for col in self._all_cols(sheet)]
+        results = []
+        for col in self._all_cols(sheet):
+            value = sheet.cell(row=row, column=col).value
+#           if value is not None and XlsxManager.CONVERT_VALUES_TO_STRING:
+#               value = str(value).strip()
+            if XlsxManager.CONVERT_VALUES_TO_STRING:
+                value = str(value).strip() if value is not None else ""
+            results.append(value)
+        return results
 
     def _create_tab_processor_state(self, tab_name: str) -> Headers:
         sheet = self.reader_agent[tab_name]
+        """
         headers: Headers = [str(sheet.cell(row=1, column=col).value)
                             for col in self._all_cols(sheet)]
-        self.headers_by_tab_name[sheet.title] = headers
-        return headers
+        """
+        headers = []
+        for col in self._all_cols(sheet):
+            cell = sheet.cell(row=1, column=col).value
+            if cell is not None and XlsxManager.CONVERT_VALUES_TO_STRING:
+                cell = str(cell).strip()
+            headers.append(cell)
+        self.headers_by_tab_name[sheet.title] = XlsxManager.remove_trailing_none_values(headers)
+        return self.headers_by_tab_name[sheet.title]
 
     def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
         ignored(tab_name)
-        return {headers[i]: self.parse_cell_value(row_datum)
+        if XlsxManager.is_terminating_row(row_data):
+            return None
+        if XlsxManager.is_comment_row(row_data):
+            return CommentRow
+        if len(headers) < len(row_data):
+            row_data = row_data[:len(headers)]
+        override_prefer_number = False if XlsxManager.CONVERT_VALUES_TO_STRING else None
+        return {headers[i]: self.parse_cell_value(row_datum, override_prefer_number=override_prefer_number)
                 for i, row_datum in enumerate(row_data)}
 
+    @staticmethod
+    def is_terminating_row(row: List[Optional[Any]]) -> bool:
+        # TODO: This is may change; currently an all blank row signals the end of input.
+        return all(cell is None or cell == "" for cell in row) and XlsxManager.TERMINATE_ON_EMPTY_ROW
+
+    @staticmethod
+    def is_comment_row(row: Tuple[Optional[Any]]) -> bool:
+        # TODO: This will probably change; currently a row starting only with #, *, or ^ signals a comment.
+        for cell in row:
+            if cell is None:
+                continue
+            if (cell := str(cell)).startswith("#") or cell.startswith("*") or cell.startswith("^"):
+                return True
+        return False
+
+    @staticmethod
+    def remove_trailing_none_values(values: List[Any]) -> List[Any]:
+        for index in range(len(values) - 1, -1, -1):
+            if values[index] is not None:
+                break
+        else:
+            return []
+        return values[:index + 1]
+
 
 def infer_tab_name_from_filename(filename):
     return os.path.basename(filename).split('.')[0]
@@ -409,8 +475,7 @@ def __init__(self, filename: str, tab_name: Optional[str] = None, **kwargs):
         self._tab_name = tab_name or infer_tab_name_from_filename(filename)
         super().__init__(filename=filename, **kwargs)
 
-    @property
-    def tab_names(self) -> List[str]:
+    def tab_names(self, order: Optional[List[str]] = None) -> List[str]:
         return [self._tab_name]
 
 
@@ -437,8 +502,7 @@ def _wrap_inserts_data(cls, filename: str, data: AnyJsonData) -> AnyJsonData:
         ignored(filename)
         return data
 
-    @property
-    def tab_names(self) -> List[str]:
+    def tab_names(self, order: Optional[List[str]] = None) -> List[str]:
         return list(self.content_by_tab_name.keys())
 
     def _get_reader_agent(self) -> Any:
@@ -457,7 +521,7 @@ def extract_tabbed_headers(cls, data: TabbedSheetData) -> TabbedHeaders:
             result[tab] = headers
         return result
 
-    def load_content(self) -> Dict[str, AnyJsonData]:
+    def load_content(self, sheet_order: Optional[List[str]] = None) -> Dict[str, AnyJsonData]:
         data = self._load_inserts_data(self.filename)
         self.content_by_tab_name = data
         self.headers_by_tab_name = self.extract_tabbed_headers(data)
@@ -589,10 +653,10 @@ def _escape_cell_text(cls, cell_text):
     def _process_row(self, tab_name: str, headers: Headers, row_data: SheetRow) -> AnyJsonData:
         ignored(tab_name)
         if self.escaping:
-            return {headers[i]: self.parse_cell_value(self._escape_cell_text(cell_text))
+            return {headers[i]: self.parse_cell_value(self._escape_cell_text(cell_text), override_prefer_number=False)
                     for i, cell_text in enumerate(row_data)}
         else:
-            return {headers[i]: self.parse_cell_value(cell_text)
+            return {headers[i]: self.parse_cell_value(cell_text, override_prefer_number=False)
                     for i, cell_text in enumerate(row_data)}
 
 
@@ -688,6 +752,7 @@ def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[
 
     @classmethod
     def load_annotated(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
+                       retain_empty_properties: bool = False, sheet_order: Optional[List[str]] = None,
                        **kwargs) -> Dict:
         """
         Given a filename and various options
@@ -696,7 +761,7 @@ def load_annotated(cls, filename: str, tab_name: Optional[str] = None, escaping:
         with maybe_unpack(filename) as filename:
             manager = cls.create_implementation_manager(filename=filename, tab_name=tab_name, escaping=escaping,
                                                         **kwargs)
-            content: TabbedSheetData = manager.load_content()
+            content: TabbedSheetData = manager.load_content(sheet_order)
             return {
                 'filename': filename,
                 'content': content,
@@ -709,3 +774,4 @@ def load_annotated(cls, filename: str, tab_name: Optional[str] = None, escaping:
 
 
 load_table_set = TableSetManager.load
+load_table_annotated = TableSetManager.load_annotated
diff --git a/dcicutils/validation_utils.py b/dcicutils/validation_utils.py
index 9e80146a0..abe6bcb2e 100644
--- a/dcicutils/validation_utils.py
+++ b/dcicutils/validation_utils.py
@@ -63,10 +63,15 @@ def fetch_schema(self, schema_name: str):
             return override_schema
         schema: Optional[AnyJsonData] = self.SCHEMA_CACHE.get(schema_name)
         if schema is None and schema_name not in self.SCHEMA_CACHE:  # If None is already stored, don't look it up again
-            schema = get_schema(schema_name, portal_env=self.portal_env, portal_vapp=self.portal_vapp)
+            schema_name = schema_name.replace(" ", "")
+            schema = SchemaManager.get_schema(schema_name, portal_env=self.portal_env, portal_vapp=self.portal_vapp)
             self.SCHEMA_CACHE[schema_name] = schema
         return schema
 
+    @staticmethod
+    def get_schema(name: str, portal_env: Optional[str] = None, portal_vapp: Optional[AbstractVirtualApp] = None):
+        return get_schema(name, portal_env=portal_env, portal_vapp=portal_vapp)
+
     # Should not be needed, given that SCHEMA_CACHE is an instance variable.
     #
     # @classmethod
@@ -86,16 +91,18 @@ def identifying_properties(self, schema: Optional[JsonSchema] = None, schema_nam
         return identifying_properties
 
     @classmethod  # This operation doesn't actually use the schemas so is safe as a class method
-    def identifying_value(cls, data_item: Dict[str, AnyJsonData], identifying_properties) -> AnyJsonData:
-        if not identifying_properties:
+    def identifying_value(cls, data_item: Dict[str, AnyJsonData],
+                          identifying_properties, raise_exception: bool = False) -> AnyJsonData:
+        if not identifying_properties and raise_exception:
             raise ValueError("No identifying properties were specified.")
-        for identifying_property in identifying_properties:
+        for identifying_property in identifying_properties or []:
             if identifying_property in data_item:
                 return data_item[identifying_property]
-        raise ValueError(f'{there_are(identifying_properties, just_are=True)}'
-                         f' no {maybe_pluralize(identifying_properties, "identifying property")}'
-                         f' {disjoined_list([repr(x) for x in identifying_properties])}'
-                         f' in {json.dumps(data_item)}.')
+        if raise_exception:
+            raise ValueError(f'{there_are(identifying_properties, just_are=True)}'
+                             f' no {maybe_pluralize(identifying_properties, "identifying property")}'
+                             f' {disjoined_list([repr(x) for x in identifying_properties])}'
+                             f' in {json.dumps(data_item)}.')
 
     @staticmethod
     def get_identifying_properties(schema: dict) -> list:
@@ -171,7 +178,9 @@ def validate_data_against_schemas(data: TabbedSheetData, *,
     schema_manager = SchemaManager(portal_env=portal_env, portal_vapp=portal_vapp, override_schemas=override_schemas)
 
     errors = []
-    schemas = schema_manager.fetch_relevant_schemas(list(data.keys()))
+
+    schema_names_to_fetch = [key for key, value in data.items() if value]
+    schemas = schema_manager.fetch_relevant_schemas(schema_names_to_fetch)
 
     for data_type in data:
         schema = schemas.get(data_type)
diff --git a/dcicutils/zip_utils.py b/dcicutils/zip_utils.py
new file mode 100644
index 000000000..e98e6ac70
--- /dev/null
+++ b/dcicutils/zip_utils.py
@@ -0,0 +1,79 @@
+from contextlib import contextmanager
+import gzip
+import os
+import shutil
+import tarfile
+import tempfile
+from typing import List, Optional, Union
+import zipfile
+
+
+@contextmanager
+def unpack_zip_file_to_temporary_directory(file: str) -> str:
+    with temporary_directory() as tmp_directory_name:
+        with zipfile.ZipFile(file, "r") as zipf:
+            zipf.extractall(tmp_directory_name)
+        yield tmp_directory_name
+
+
+@contextmanager
+def unpack_tar_file_to_temporary_directory(file: str) -> str:
+    with temporary_directory() as tmp_directory_name:
+        with tarfile.open(file, "r") as tarf:
+            tarf.extractall(tmp_directory_name)
+        yield tmp_directory_name
+
+
+def unpack_files(file: str, suffixes: Optional[List[str]] = None) -> Optional[str]:
+    unpack_file_to_tmp_directory = {
+        ".tar": unpack_tar_file_to_temporary_directory,
+        ".zip": unpack_zip_file_to_temporary_directory
+    }.get(file[dot:]) if (dot := file.rfind(".")) > 0 else None
+    if unpack_file_to_tmp_directory is not None:
+        with unpack_file_to_tmp_directory(file) as tmp_directory_name:
+            for directory, _, files in os.walk(tmp_directory_name):  # Ignore "." prefixed files.
+                for file in [file for file in files if not file.startswith(".")]:
+                    if not suffixes or any(file.endswith(suffix) for suffix in suffixes):
+                        yield os.path.join(directory, file)
+
+
+@contextmanager
+def unpack_gz_file_to_temporary_file(file: str, suffix: Optional[str] = None) -> str:
+    if (gz := file.endswith(".gz")) or file.endswith(".tgz"):  # The .tgz suffix is simply short for .tar.gz.
+        with temporary_file(name=os.path.basename(file[:-3] if gz else file[:-4] + ".tar")) as tmp_file_name:
+            with open(tmp_file_name, "wb") as outputf:
+                with gzip.open(file, "rb") as inputf:
+                    outputf.write(inputf.read())
+                    outputf.close()
+                    yield tmp_file_name
+
+
+@contextmanager
+def temporary_directory() -> str:
+    try:
+        with tempfile.TemporaryDirectory() as tmp_directory_name:
+            yield tmp_directory_name
+    finally:
+        remove_temporary_directory(tmp_directory_name)
+
+
+@contextmanager
+def temporary_file(name: Optional[str] = None, suffix: Optional[str] = None,
+                   content: Optional[Union[str, bytes, List[str]]] = None) -> str:
+    with temporary_directory() as tmp_directory_name:
+        tmp_file_name = os.path.join(tmp_directory_name, name or tempfile.mktemp(dir="")) + (suffix or "")
+        if content is not None:
+            with open(tmp_file_name, "wb" if isinstance(content, bytes) else "w") as tmp_file:
+                tmp_file.write("\n".join(content) if isinstance(content, list) else content)
+        yield tmp_file_name
+
+
+def remove_temporary_directory(tmp_directory_name: str) -> None:
+    def is_temporary_directory(path: str) -> bool:
+        try:
+            tmpdir = tempfile.gettempdir()
+            return os.path.commonpath([path, tmpdir]) == tmpdir and os.path.exists(path) and os.path.isdir(path)
+        except Exception:
+            return False
+    if is_temporary_directory(tmp_directory_name):  # Guard against errant deletion.
+        shutil.rmtree(tmp_directory_name)
diff --git a/docs/source/dcicutils.rst b/docs/source/dcicutils.rst
index 2d5b8058c..f8b902536 100644
--- a/docs/source/dcicutils.rst
+++ b/docs/source/dcicutils.rst
@@ -79,6 +79,13 @@ cloudformation_utils
    :members:
 
 
+data_readers
+^^^^^^^^^^
+
+.. automodule:: dcicutils.data_readers
+   :members:
+
+
 data_utils
 ^^^^^^^^^^
 
@@ -335,3 +342,10 @@ variant_utils
 
 .. automodule:: dcicutils.variant_utils
   :members:
+
+
+zip_utils
+^^^^^^^^^
+
+.. automodule:: dcicutils.zip_utils
+   :members:
diff --git a/pyproject.toml b/pyproject.toml
index 6d6847fd5..4ae62c250 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "8.3.0"
+version = "8.4.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/test/data_files/escaping-false.json b/test/data_files/escaping-false.json
index 84ab06993..3c95926d9 100644
--- a/test/data_files/escaping-false.json
+++ b/test/data_files/escaping-false.json
@@ -5,56 +5,56 @@
       "unquoted": "\\\\",
       "doublequoted": "\\\\",
       "singlequoted": "'\\\\'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "formfeed",
       "unquoted": "\\f",
       "doublequoted": "\\f",
       "singlequoted": "'\\f'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "newline",
       "unquoted": "\\n",
       "doublequoted": "\\n",
       "singlequoted": "'\\n'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "return",
       "unquoted": "\\r",
       "doublequoted": "\\r",
       "singlequoted": "'\\r'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "tab",
       "unquoted": "\\t",
       "doublequoted": "\\t",
       "singlequoted": "'\\t'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "misc",
       "unquoted": "\\m",
       "doublequoted": "\\m",
       "singlequoted": "'\\m'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "quote1",
       "unquoted": "N/A",
       "doublequoted": "x,,z",
       "singlequoted": "N/A",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "quotelong",
       "unquoted": "N/A",
       "doublequoted": "x,,z,N/A\nquotlongcontinued,",
       "singlequoted": "N/A",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "comma",
diff --git a/test/data_files/escaping-true.json b/test/data_files/escaping-true.json
index 5d6c837a6..bae122afd 100644
--- a/test/data_files/escaping-true.json
+++ b/test/data_files/escaping-true.json
@@ -5,56 +5,56 @@
       "unquoted": "\\",
       "doublequoted": "\\",
       "singlequoted": "'\\'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "formfeed",
       "unquoted": "\\f",
       "doublequoted": "\\f",
       "singlequoted": "'\\f'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "newline",
       "unquoted": "\n",
       "doublequoted": "\n",
       "singlequoted": "'\n'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "return",
       "unquoted": "\r",
       "doublequoted": "\r",
       "singlequoted": "'\r'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "tab",
       "unquoted": "\t",
       "doublequoted": "\t",
       "singlequoted": "'\t'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "misc",
       "unquoted": "\\m",
       "doublequoted": "\\m",
       "singlequoted": "'\\m'",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "quote1",
       "unquoted": "N/A",
       "doublequoted": "x,,z",
       "singlequoted": "N/A",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "quotelong",
       "unquoted": "N/A",
       "doublequoted": "x,,z,N/A\nquotlongcontinued,",
       "singlequoted": "N/A",
-      "overflow": null
+      "overflow": ""
     },
     {
       "name": "comma",
diff --git a/test/data_files/sample_items2.json b/test/data_files/sample_items2.json
index 7e084f908..0f3b22f90 100644
--- a/test/data_files/sample_items2.json
+++ b/test/data_files/sample_items2.json
@@ -1,6 +1,6 @@
 [
     {"name": "john", "sex": "Male", "member": false},
     {"name": "juan", "sex": "Male", "member": true},
-    {"name": "igor", "sex": "unknown", "member": null},
+    {"name": "igor", "sex": "unknown", "member": ""},
     {"name": "mary", "sex": "Female", "member": true}
 ]
diff --git a/test/test_bundle_utils.py b/test/test_bundle_utils.py
index 6e1da6685..008442430 100644
--- a/test/test_bundle_utils.py
+++ b/test/test_bundle_utils.py
@@ -4,6 +4,7 @@
 import os
 import pytest
 import re
+from typing import List, Optional, Union
 
 from dcicutils import (
     bundle_utils as bundle_utils_module,
@@ -172,7 +173,7 @@ def test_item_tools_parse_item_value_basic():
 
         # Nulls
         (None, None),
-        ('', None), ('null', None), ('Null', None), ('NULL', None),
+        ('', ''), ('null', None), ('Null', None), ('NULL', None),
 
         # Booleans
         ('true', True), ('True', True), ('TRUE', True),
@@ -191,9 +192,9 @@ def test_item_tools_parse_item_value_basic():
         ('|', []),  # special case: lone '|' means empty
         ('alpha|', ['alpha']), ('7|', [7]),  # special case: trailing '|' means singleton
         # These follow from general case of '|' as separator of items recursively parsed
-        ('|alpha', [None, 'alpha']), ('|alpha|', [None, 'alpha']), ('|7', [None, 7]),
+        ('|alpha', ['', 'alpha']), ('|alpha|', ['', 'alpha']), ('|7', ['', 7]),
         ('alpha|beta|gamma', ['alpha', 'beta', 'gamma']),
-        ('alpha|true|false|null||7|1.5', ['alpha', True, False, None, None, 7, 1.5])
+        ('alpha|true|false|null||7|1.5', ['alpha', True, False, None, '', 7, 1.5])
     ]
 
     for input, heuristic_result in expectations:
@@ -271,6 +272,7 @@ def test_load_table_structures():
     print("loaded=", json.dumps(loaded, indent=2))
     expected = SAMPLE_JSON_TABS_FILE_ITEM_CONTENT
     print("expected=", json.dumps(expected, indent=2))
+    # assert loaded == expected
     assert loaded == expected
 
     with pytest.raises(LoadArgumentsError) as exc:
@@ -292,9 +294,13 @@ def test_load_items():
     #     mock_get_schema.return_value = {}
     with no_schemas():
 
-        assert load_items(SAMPLE_XLSX_FILE, apply_heuristics=True) == SAMPLE_XLSX_FILE_ITEM_CONTENT
-        assert load_items(SAMPLE_CSV_FILE, apply_heuristics=True) == SAMPLE_CSV_FILE_ITEM_CONTENT
-        assert load_items(SAMPLE_TSV_FILE, apply_heuristics=True) == SAMPLE_TSV_FILE_ITEM_CONTENT
+        # TODO: Some work still to do WRT sorting out between None and empty strings in general.
+        assert load_items(SAMPLE_XLSX_FILE, apply_heuristics=True) == \
+            remove_empty_properties(SAMPLE_XLSX_FILE_ITEM_CONTENT)
+        assert remove_empty_properties(load_items(SAMPLE_CSV_FILE, apply_heuristics=True), [None, '']) == \
+            SAMPLE_CSV_FILE_ITEM_CONTENT
+        assert remove_empty_properties(load_items(SAMPLE_TSV_FILE, apply_heuristics=True), [None, '']) == \
+            SAMPLE_TSV_FILE_ITEM_CONTENT
 
         with pytest.raises(LoadArgumentsError) as exc:
             load_items("something.else")
@@ -320,7 +326,7 @@ def test_load_items():
     SAMPLE_CSV_FILE2_SHEET_NAME: [
         {"name": "john", "sex": "M", "member": "false"},
         {"name": "juan", "sex": "male", "member": "true"},
-        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "igor", "sex": "unknown", "member": ""},
         {"name": "mary", "sex": "Female", "member": "t"}
     ]
 }
@@ -329,7 +335,7 @@ def test_load_items():
     SAMPLE_CSV_FILE2_SHEET_NAME: [
         {"name": "john", "sex": "M", "member": False},
         {"name": "juan", "sex": "male", "member": True},
-        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "igor", "sex": "unknown"},
         {"name": "mary", "sex": "Female", "member": "t"}
     ]
 }
@@ -338,7 +344,7 @@ def test_load_items():
     "Person": [
         {"name": "john", "sex": "Male", "member": False},
         {"name": "juan", "sex": "Male", "member": True},
-        {"name": "igor", "sex": "unknown", "member": None},
+        {"name": "igor", "sex": "unknown"},
         {"name": "mary", "sex": "Female", "member": True}
     ]
 }
@@ -717,8 +723,10 @@ def test_table_checker():
                                        portal_env=mock_ff_env)
                 checker.check_tabs()
             expected_problems = [
-                f"User[0].project: Unable to validate Project reference: {SAMPLE_PROJECT_UUID!r}",
-                f"User[0].user_institution: Unable to validate Institution reference: {SAMPLE_INSTITUTION_UUID!r}"
+                f"User[0].project: Cannot resolve reference (linkTo) for:"
+                f" Project/{SAMPLE_PROJECT_UUID} from User.project",
+                f"User[0].user_institution: Cannot resolve reference (linkTo) for:"
+                f" Institution/{SAMPLE_INSTITUTION_UUID} from User.user_institution"
             ]
             expected_problem_lines = [f"Problem: {problem}" for problem in expected_problems]
             assert exc.value.problems == expected_problems
@@ -733,3 +741,19 @@ def test_table_checker():
                                flattened=True,
                                portal_env=mock_ff_env)
         checker.check_tabs()
+
+
+def remove_empty_properties(data: Optional[Union[list, dict]],
+                            empty: Optional[List[Optional[str]]] = None) -> Optional[Union[list, dict]]:
+    if not empty:
+        empty = [None]
+    if isinstance(data, dict):
+        for key in list(data.keys()):
+            if (value := data[key]) in empty:
+                del data[key]
+            else:
+                remove_empty_properties(value, empty)
+    elif isinstance(data, list):
+        for item in data:
+            remove_empty_properties(item, empty)
+    return data
diff --git a/test/test_misc_utils.py b/test/test_misc_utils.py
index b32a6d4ce..6bf0afebe 100644
--- a/test/test_misc_utils.py
+++ b/test/test_misc_utils.py
@@ -31,7 +31,7 @@
     ObsoleteError, CycleError, TopologicalSorter, keys_and_values_to_dict, dict_to_keys_and_values, is_c4_arn,
     deduplicate_list, chunked, parse_in_radix, format_in_radix, managed_property, future_datetime,
     MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case, json_file_contents,
-    pad_to, JsonLinesReader,
+    pad_to, JsonLinesReader, split_string, merge_objects
 )
 from dcicutils.qa_utils import (
     Occasionally, ControlledTime, override_environ as qa_override_environ, MockFileSystem, printed_output,
@@ -3590,3 +3590,87 @@ def test_json_lines_reader_lists():
             parsed = [line for line in JsonLinesReader(fp)]
             expected = [item1, item2]
             assert parsed == expected
+
+
+def test_split_array_string():
+    def split_array_string(value: str) -> List[str]:
+        return split_string(value, "|", "\\")
+    assert split_array_string(r"abc|def|ghi") == ["abc", "def", "ghi"]
+    assert split_array_string(r"abc\|def|ghi") == ["abc|def", "ghi"]
+    assert split_array_string(r"abc\\|def|ghi") == ["abc\\", "def", "ghi"]
+    assert split_array_string(r"abc\\\|def|ghi") == ["abc\\|def", "ghi"]
+    assert split_array_string(r"abc\\\|def\|ghi") == ["abc\\|def|ghi"]
+    assert split_array_string(r"abc\\|\\def\|ghi") == ["abc\\", "\\def|ghi"]
+    assert split_array_string(r"abc\\|\\def\|ghi||") == ["abc\\", "\\def|ghi"]
+    assert split_array_string(r"|abcdefghi|") == ["abcdefghi"]
+    assert split_array_string(r"|abcdefghi||xyzzy") == ["abcdefghi", "xyzzy"]
+    assert split_array_string(r"") == []
+    assert split_array_string(None) == []
+    assert split_array_string(r"|") == []
+    assert split_array_string(r"\|") == ["|"]
+    assert split_array_string(r"\\|") == ["\\"]
+
+
+def test_merge_objects_1():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None, None]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}, None]}
+    merge_objects(target, source, False)
+    assert target == expected
+
+
+def test_merge_objects_2():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None, None]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}, {"goo": None}]}
+    merge_objects(target, source, True)
+    assert target == expected
+
+
+def test_merge_objects_3():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}]}
+    merge_objects(target, source, False)
+    assert target == expected
+
+
+def test_merge_objects_4():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}]}
+    merge_objects(target, source, True)
+    assert target == expected
+
+
+def test_merge_objects_5():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": ["mno"]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}]}
+    merge_objects(target, source, False)
+    assert target == expected
+
+
+def test_merge_objects_6():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": ["mno"]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}]}
+    merge_objects(target, source, True)
+    assert target == expected
+
+
+def test_merge_objects_7():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None, "abc", "def", 123]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}, {"hoo": None}]}
+    expected = {"abc": {"def": {"ghi": None}}, "xyzzy": [{"foo": None}, {"goo": None}, {"hoo": None}, "def", 123]}
+    merge_objects(target, source, False)
+    assert target == expected
+
+
+def test_merge_objects_8():
+    target = {"abc": {"def": {"ghi": None}}, "xyzzy": [None, None, "abc", "def", 123]}
+    source = {"xyzzy": [{"foo": None}, {"goo": None}, {"hoo": None}]}
+    expected = {"abc": {"def": {"ghi": None}},
+                "xyzzy": [{"foo": None}, {"goo": None}, {"hoo": None}, {"hoo": None}, {"hoo": None}]}
+    merge_objects(target, source, True)
+    assert target == expected
diff --git a/test/test_qa_utils.py b/test/test_qa_utils.py
index bed502166..6e9c606bf 100644
--- a/test/test_qa_utils.py
+++ b/test/test_qa_utils.py
@@ -568,7 +568,7 @@ def test_occasionally_errors():
                                         error_message="something")
         fail_for_a_while(1)
     except Exception as e:
-        assert type(e) == SyntaxError
+        assert type(e) is SyntaxError
         assert str(e) == "something"
 
 
diff --git a/test/test_sheet_utils.py b/test/test_sheet_utils.py
index c012772c1..276b17b23 100644
--- a/test/test_sheet_utils.py
+++ b/test/test_sheet_utils.py
@@ -1,6 +1,7 @@
 import json
 import os
 import pytest
+from typing import Optional, Union
 
 from collections import namedtuple
 from dcicutils.sheet_utils import (
@@ -116,29 +117,52 @@ def test_table_set_manager_registry_manager_for_filename():
 
 SAMPLE_XLSX_FILE = os.path.join(TEST_DIR, 'data_files/sample_items.xlsx')
 
-SAMPLE_XLSX_FILE_RAW_CONTENT = {
+backup_SAMPLE_XLSX_FILE_RAW_CONTENT = {
     "Sheet1": [
-        {"x": 1, "y.a": 1, "y.z": 1},
-        {"x": 1, "y.a": 2, "y.z": 3},
+        {"x": "1", "y.a": "1", "y.z": "1"},
+        {"x": "1", "y.a": "2", "y.z": "3"},
         {"x": "alpha", "y.a": "beta", "y.z": "gamma|delta"},
     ],
     "Sheet2": [
         {
-            "name": "bill", "age": 23,
-            "mother.name": "mary", "mother.age": 58,
-            "father.name": "fred", "father.age": 63,
-            "friends#0.name": "sam", "friends#0.age": 22,
-            "friends#1.name": "arthur", "friends#1.age": 19,
+            "name": "bill", "age": "23",
+            "mother.name": "mary", "mother.age": "58",
+            "father.name": "fred", "father.age": "63",
+            "friends#0.name": "sam", "friends#0.age": "22",
+            "friends#1.name": "arthur", "friends#1.age": "19",
         },
         {
-            "name": "joe", "age": 9,
-            "mother.name": "estrella", "mother.age": 35,
-            "father.name": "anthony", "father.age": 34,
-            "friends#0.name": "anders", "friends#0.age": 9,
+            "name": "joe", "age": "9",
+            "mother.name": "estrella", "mother.age": "35",
+            "father.name": "anthony", "father.age": "34",
+            "friends#0.name": "anders", "friends#0.age": "9",
             "friends#1.name": None, "friends#1.age": None,
         },
     ]
 }
+SAMPLE_XLSX_FILE_RAW_CONTENT = {
+    "Sheet1": [
+        {"x": "1", "y.a": "1", "y.z": "1"},
+        {"x": "1", "y.a": "2", "y.z": "3"},
+        {"x": "alpha", "y.a": "beta", "y.z": "gamma|delta"},
+    ],
+    "Sheet2": [
+        {
+            "name": "bill", "age": "23",
+            "mother.name": "mary", "mother.age": "58",
+            "father.name": "fred", "father.age": "63",
+            "friends#0.name": "sam", "friends#0.age": "22",
+            "friends#1.name": "arthur", "friends#1.age": "19",
+        },
+        {
+            "name": "joe", "age": "9",
+            "mother.name": "estrella", "mother.age": "35",
+            "father.name": "anthony", "father.age": "34",
+            "friends#0.name": "anders", "friends#0.age": "9",
+            "friends#1.name": "", "friends#1.age": "",
+        },
+    ]
+}
 
 SAMPLE_XLSX_FILE_INFLATED_CONTENT = {
     "Sheet1": [
@@ -162,7 +186,7 @@ def test_table_set_manager_registry_manager_for_filename():
             "father": {"name": "anthony", "age": 34},
             "friends": [
                 {"name": "anders", "age": 9},
-                {"name": None, "age": None}
+                {"name": "", "age": ""}
             ]
         },
     ],
@@ -243,11 +267,11 @@ def test_xlsx_manager_load_csv():
 
 def test_csv_manager_load_content():
     wt = CsvManager(SAMPLE_CSV_FILE)
-    assert wt.load_content() == SAMPLE_CSV_FILE_RAW_CONTENT
+    assert convert_number_to_string(wt.load_content()) == SAMPLE_CSV_FILE_RAW_CONTENT
 
 
 def test_csv_manager_load():
-    assert CsvManager.load(SAMPLE_CSV_FILE) == SAMPLE_CSV_FILE_RAW_CONTENT
+    assert convert_number_to_string(CsvManager.load(SAMPLE_CSV_FILE)) == SAMPLE_CSV_FILE_RAW_CONTENT
 
 
 def test_csv_manager_load_csv():
@@ -269,11 +293,11 @@ def test_csv_escaping():
 
 def test_tsv_manager_load_content():
     wt = TsvManager(SAMPLE_TSV_FILE)
-    assert wt.load_content() == SAMPLE_TSV_FILE_RAW_CONTENT
+    assert convert_number_to_string(wt.load_content()) == SAMPLE_TSV_FILE_RAW_CONTENT
 
 
 def test_tsv_manager_load():
-    assert TsvManager.load(SAMPLE_TSV_FILE) == SAMPLE_TSV_FILE_RAW_CONTENT
+    assert convert_number_to_string(TsvManager.load(SAMPLE_TSV_FILE)) == SAMPLE_TSV_FILE_RAW_CONTENT
 
 
 def test_tsv_manager_load_csv():
@@ -281,3 +305,16 @@ def test_tsv_manager_load_csv():
         TsvManager.load(SAMPLE_XLSX_FILE)
     assert str(exc.value).startswith('The TableSetManager subclass TsvManager'
                                      ' expects only .tsv or .tsv.txt filenames:')
+
+
+def convert_number_to_string(data: Optional[Union[list, dict]]) -> Optional[Union[list, dict]]:
+    if isinstance(data, dict):
+        for key in list(data.keys()):
+            if isinstance(value := data[key], int):
+                data[key] = str(value)
+            else:
+                convert_number_to_string(value)
+    elif isinstance(data, list):
+        for item in data:
+            convert_number_to_string(item)
+    return data
diff --git a/test/test_validation_utils.py b/test/test_validation_utils.py
index 40878a222..9129e05c0 100644
--- a/test/test_validation_utils.py
+++ b/test/test_validation_utils.py
@@ -73,20 +73,20 @@ def test_schema_manager_with_schemas(schema_id):
 def test_schema_manager_identifying_value():
 
     with pytest.raises(ValueError) as exc:
-        assert SchemaManager.identifying_value({'any': 'thing'}, [])
+        assert SchemaManager.identifying_value({'any': 'thing'}, [], raise_exception=True)
     assert str(exc.value) == "No identifying properties were specified."
 
     person_named_fred = {'age': 33, 'name': 'Fred', 'favorite-color': 'yellow'}
-    assert SchemaManager.identifying_value(person_named_fred, ['uuid', 'name']) == 'Fred'
+    assert SchemaManager.identifying_value(person_named_fred, ['uuid', 'name'], raise_exception=True) == 'Fred'
 
     person_nicknamed_fred = {'age': 33, 'nickname': 'Fred', 'favorite-color': 'yellow'}
     with pytest.raises(ValueError) as exc:
-        SchemaManager.identifying_value(person_nicknamed_fred, ['uuid', 'name'])
+        SchemaManager.identifying_value(person_nicknamed_fred, ['uuid', 'name'], raise_exception=True)
     assert str(exc.value) == ("""There are no identifying properties 'uuid' or 'name'"""
                               """ in {"age": 33, "nickname": "Fred", "favorite-color": "yellow"}.""")
 
     with pytest.raises(ValueError) as exc:
-        SchemaManager.identifying_value(person_nicknamed_fred, ['name'])
+        SchemaManager.identifying_value(person_nicknamed_fred, ['name'], raise_exception=True)
     assert str(exc.value) == ("""There is no identifying property 'name'"""
                               """ in {"age": 33, "nickname": "Fred", "favorite-color": "yellow"}.""")