Skip to content

Commit

Permalink
Merge pull request #415 from Renumics/fix/use-of-mixed-dtype
Browse files Browse the repository at this point in the history
replace mixed with unknown in arrow datasource
  • Loading branch information
neindochoh authored Feb 8, 2024
2 parents f94ef40 + 7b3e883 commit f80f304
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions renumics/spotlight_plugins/core/arrow_dataset_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _convert_schema(self) -> spotlight_dtypes.DTypeMap:
def _convert_dtype(field: pa.Field) -> spotlight_dtypes.DType:
if field.type == pa.null():
# should we introduce a `null` dtype?
return spotlight_dtypes.mixed_dtype
return spotlight_dtypes.unknown_dtype
if field.type == pa.bool_():
return spotlight_dtypes.bool_dtype
if field.type in PA_INTEGER_TYPES:
Expand Down Expand Up @@ -134,15 +134,15 @@ def _convert_dtype(field: pa.Field) -> spotlight_dtypes.DType:
if isinstance(field.type, pa.LargeListType):
return spotlight_dtypes.SequenceDType(_convert_dtype(field.type.value_field))
if isinstance(field.type, pa.MapType):
return spotlight_dtypes.mixed_dtype
return spotlight_dtypes.unknown_dtype
if isinstance(field.type, pa.StructType):
return spotlight_dtypes.mixed_dtype
return spotlight_dtypes.unknown_dtype
if isinstance(field.type, pa.DictionaryType):
if (field.type.index_type() in PA_INTEGER_TYPES) and (
field.type.value_type() in (pa.string(), pa.large_string())
):
return spotlight_dtypes.CategoryDType()
return spotlight_dtypes.mixed_dtype
return spotlight_dtypes.unknown_dtype
if isinstance(field.type, pa.RunEndEncodedType):
return spotlight_dtypes.SequenceDType(
_convert_dtype(pa.field("", field.type.value_type))
Expand Down

0 comments on commit f80f304

Please sign in to comment.