Skip to content

Commit

Permalink
feat(KDP): adding string decoding and sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
piotrlaczkowski committed Mar 11, 2024
1 parent e91495c commit c057516
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion kdp/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,12 @@ def _compute_final_statistics(self) -> dict[str, dict]:

for feature in self.categorical_cols:
# Convert TensorFlow string tensor to Python list for unique values
unique_values = self.categorical_stats[feature].get_unique_values()
_dtype = self.features_dtypes.get(feature, tf.string)
if _dtype == tf.int32:
unique_values = [int(_byte) for _byte in self.categorical_stats[feature].get_unique_values()]
unique_values.sort()
else:
unique_values = [_byte.decode("utf-8") for _byte in self.categorical_stats[feature].get_unique_values()]
final_stats["categorical_stats"][feature] = {
"size": len(unique_values),
"vocab": unique_values,
Expand Down

0 comments on commit c057516

Please sign in to comment.