From 18e3143c65b28980d2378e6f0256f61ae71cba2b Mon Sep 17 00:00:00 2001 From: piotrlaczkowski Date: Sun, 14 Apr 2024 20:42:23 +0200 Subject: [PATCH] refactor(KDP): adding missing layers --- kdp/layers_factory.py | 4 ++-- kdp/processor.py | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/kdp/layers_factory.py b/kdp/layers_factory.py index c892122..b6c748a 100644 --- a/kdp/layers_factory.py +++ b/kdp/layers_factory.py @@ -169,7 +169,7 @@ def crossing_layer(name: str, **kwargs) -> tf.keras.layers.Layer: ) @staticmethod - def create_crossing_layer(name: str, **kwargs) -> tf.keras.layers.Layer: + def flatten_layer(name: str, **kwargs) -> tf.keras.layers.Layer: """Create a crossing layer. Args: @@ -180,7 +180,7 @@ def create_crossing_layer(name: str, **kwargs) -> tf.keras.layers.Layer: An instance of the Flatten layer. """ return PreprocessorLayerFactory.create_layer( - layer_class=tf.keras.layers.HashedCrossing, + layer_class=tf.keras.layers.Flatten, name=name, **kwargs, ) diff --git a/kdp/processor.py b/kdp/processor.py index 7908093..df98178 100644 --- a/kdp/processor.py +++ b/kdp/processor.py @@ -443,7 +443,7 @@ def _add_pipeline_cross(self, stats: dict) -> None: _feature_name_crossed = f"{feature_a}_x_{feature_b}" preprocessor.add_processing_step( - layer_creator=PreprocessorLayerFactory.create_crossing_layer, + layer_creator=PreprocessorLayerFactory.crossing_layer, nr_bins=nr_bins, name=f"cross_{_feature_name_crossed}", ) @@ -466,28 +466,27 @@ def _add_pipeline_text(self, feature_name: str, input_layer) -> None: feature_name (str): The name of the feature to be preprocessed. input_layer: The input layer for the feature. """ + # getting feature object + _feature = self.features_specs[feature_name] + + # initializing preprocessor preprocessor = FeaturePreprocessor(name=feature_name) - # checking if we have custom setting per feature - _feature_config = self.text_features_config.get(feature_name) or self.text_features_config # getting stop words for text preprocessing - _stop_words = _feature_config.get("stop_words") + _stop_words = _feature.kwargs.get("stop_words", []) if _stop_words: preprocessor.add_processing_step( - layer_creator=PreprocessorLayerFactory.create_text_preprocessing_layer, + layer_creator=PreprocessorLayerFactory.text_preprocessing_layer, stop_words=_stop_words, name=f"text_preprocessor_{feature_name}", ) preprocessor.add_processing_step( - layer_creator=PreprocessorLayerFactory.create_text_vectorization_layer, - conf=_feature_config, + layer_creator=PreprocessorLayerFactory.text_vectorization_layer, name=f"text_vactorizer_{feature_name}", ) self.outputs[feature_name] = preprocessor.chain(input_layer=input_layer) - # updating output vector dim - self.output_dims += _feature_config["output_sequence_length"] def _prepare_outputs(self) -> None: """Preparing the outputs of the model.