diff --git a/kdp/stats.py b/kdp/stats.py index ff6c908..2426287 100644 --- a/kdp/stats.py +++ b/kdp/stats.py @@ -280,7 +280,12 @@ def _compute_final_statistics(self) -> dict[str, dict]: for feature in self.categorical_cols: # Convert TensorFlow string tensor to Python list for unique values - unique_values = self.categorical_stats[feature].get_unique_values() + _dtype = self.features_dtypes.get(feature, tf.string) + if _dtype == tf.int32: + unique_values = [int(_byte) for _byte in self.categorical_stats[feature].get_unique_values()] + unique_values.sort() + else: + unique_values = [_byte.decode("utf-8") for _byte in self.categorical_stats[feature].get_unique_values()] final_stats["categorical_stats"][feature] = { "size": len(unique_values), "vocab": unique_values,