Skip to content

Commit

Permalink
correctly convert pd.CategoricalDType
Browse files Browse the repository at this point in the history
  • Loading branch information
neindochoh committed Sep 18, 2023
1 parent f5bcbc4 commit 4293013
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 9 deletions.
5 changes: 1 addition & 4 deletions renumics/spotlight/io/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,7 @@ def infer_dtype(column: pd.Series) -> dtypes.DType:
return dtypes.bool_dtype
if pd.api.types.is_categorical_dtype(column):
return dtypes.CategoryDType(
{
category: code
for code, category in zip(column.cat.codes, column.cat.categories)
}
{category: code for code, category in enumerate(column.cat.categories)}
)
if pd.api.types.is_integer_dtype(column) and not column.hasnans:
return dtypes.int_dtype
Expand Down
6 changes: 1 addition & 5 deletions renumics/spotlight_plugins/core/pandas_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def __init__(self, source: Union[Path, pd.DataFrame]):
for feature_name, feature_type in hf_dataset[
splits[0]
].features.items():
print(feature_type)
if isinstance(feature_type, datasets.ClassLabel):
try:
df[feature_name] = pd.Categorical.from_codes(
Expand Down Expand Up @@ -212,10 +211,7 @@ def _determine_intermediate_dtype(column: pd.Series) -> dtypes.DType:
return dtypes.bool_dtype
if pd.api.types.is_categorical_dtype(column):
return dtypes.CategoryDType(
{
category: code
for code, category in zip(column.cat.codes, column.cat.categories)
}
{category: code for code, category in enumerate(column.cat.categories)}
)
if pd.api.types.is_integer_dtype(column):
return dtypes.int_dtype
Expand Down

0 comments on commit 4293013

Please sign in to comment.