From 52128ad3585ca4f1fb1e185bc37261a7020f7ed8 Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Wed, 15 Nov 2023 17:19:08 +0000 Subject: [PATCH 1/5] Update pre-commit hooks --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 14c2fba..d263b82 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,13 @@ repos: - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.11.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 6.1.0 hooks: - id: flake8 From ce45d9add19d4dcd2fa77a46f701c18594e06680 Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Wed, 15 Nov 2023 17:21:40 +0000 Subject: [PATCH 2/5] Fix flake8 config --- setup.cfg | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/setup.cfg b/setup.cfg index 17fae0e..60f4863 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,11 +5,16 @@ setup_requires = [flake8] ignore = - E203 # whitespace before ':' - doesn't work well with black - E402 # module level import not at top of file - E501 # line too long - let black worry about that - E731 # do not assign a lambda expression, use a def - W503 # line break before binary operator + # whitespace before ':' - doesn't work well with black + E203 + # module level import not at top of file + E402 + # line too long - let black worry about that + E501 + # do not assign a lambda expression, use a def + E731 + # line break before binary operator + W503 exclude= .eggs doc From 5462abd6c1c707f79ee6849c792fa40fc935e5ca Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Wed, 15 Nov 2023 17:21:49 +0000 Subject: [PATCH 3/5] Fix DataID access --- pygac_fdr/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygac_fdr/writer.py b/pygac_fdr/writer.py index 701a6a3..65ea4b7 100644 --- a/pygac_fdr/writer.py +++ b/pygac_fdr/writer.py @@ -360,7 +360,7 @@ def _get_encoding(self, scene): # Remove entries from the encoding dictionary if the corresponding dataset is not available. # The CF writer doesn't like that. enc_keys = set(self.encoding.keys()) - scn_keys = set([key.name for key in scene.keys()]) + scn_keys = set([key["name"] for key in scene.keys()]) scn_keys = scn_keys.union( set([coord for key in scene.keys() for coord in scene[key].coords]) ) From e0432dd02423b77be5cbf7d2236ce7193f095fae Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Wed, 15 Nov 2023 17:33:10 +0000 Subject: [PATCH 4/5] Refactor scene encoding --- pygac_fdr/writer.py | 54 +++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/pygac_fdr/writer.py b/pygac_fdr/writer.py index 65ea4b7..e278f61 100644 --- a/pygac_fdr/writer.py +++ b/pygac_fdr/writer.py @@ -357,25 +357,8 @@ def _rename_datasets(self, scene): def _get_encoding(self, scene): """Get netCDF encoding for the datasets in the scene.""" - # Remove entries from the encoding dictionary if the corresponding dataset is not available. - # The CF writer doesn't like that. - enc_keys = set(self.encoding.keys()) - scn_keys = set([key["name"] for key in scene.keys()]) - scn_keys = scn_keys.union( - set([coord for key in scene.keys() for coord in scene[key].coords]) - ) - encoding = dict( - [(key, self.encoding[key]) for key in enc_keys.intersection(scn_keys)] - ) - - # Make sure scale_factor and add_offset are both double - for enc in encoding.values(): - if "scale_factor" in enc: - enc["scale_factor"] = np.float64(enc["scale_factor"]) - if "add_offset" in enc: - enc["add_offset"] = np.float64(enc["add_offset"]) - - return encoding + enc = SceneEncoder(self.encoding) + return enc.get_encoding(scene) def _fix_global_attrs(self, filename, global_attrs): LOG.info("Fixing global attributes") @@ -441,6 +424,39 @@ def _postproc_file(self, filename, gac_header, global_attrs): self._fix_global_attrs(filename, global_attrs) +class SceneEncoder: + def __init__(self, encoding): + self.encoding = encoding + + def get_encoding(self, scene): + enc = self._get_encoding_for_available_datasets(scene) + self._fix_dtypes(enc) + return enc + + def _get_encoding_for_available_datasets(self, scene): + common_keys = self._get_keys_in_both_scene_and_encoding(scene) + return dict([(key, self.encoding[key]) for key in common_keys]) + + def _get_keys_in_both_scene_and_encoding(self, scene): + scn_keys = self._get_scene_keys(scene) + enc_keys = set(self.encoding.keys()) + return enc_keys.intersection(scn_keys) + + def _get_scene_keys(self, scene): + dataset_keys = set([key["name"] for key in scene.keys()]) + coords_keys = set( + [coord for key in scene.keys() for coord in scene[key].coords] + ) + return dataset_keys.union(coords_keys) + + def _fix_dtypes(self, encoding): + for enc in encoding.values(): + if "scale_factor" in enc: + enc["scale_factor"] = np.float64(enc["scale_factor"]) + if "add_offset" in enc: + enc["add_offset"] = np.float64(enc["add_offset"]) + + class GlobalAttributeComposer: """Compose global attributes.""" From f66c54149337d4b13e692d41de389f2ddb9174df Mon Sep 17 00:00:00 2001 From: Stephan Finkensieper Date: Wed, 15 Nov 2023 17:51:17 +0000 Subject: [PATCH 5/5] Avoid extra index when grouping data frames --- pygac_fdr/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygac_fdr/metadata.py b/pygac_fdr/metadata.py index 1950d49..1e77303 100644 --- a/pygac_fdr/metadata.py +++ b/pygac_fdr/metadata.py @@ -227,7 +227,7 @@ def _sort_by_ascending_time(self, df): def _set_global_quality_flag(self, df): LOG.info("Computing quality flags") - grouped = df.groupby("platform", as_index=False) + grouped = df.groupby("platform", as_index=False, group_keys=False) return grouped.apply( lambda x: self._set_global_qual_flags_single_platform(x, x.name) ) @@ -355,7 +355,7 @@ def _set_too_long_flag(self, df, max_length=120): def _calc_overlap(self, df): LOG.info("Computing overlap") - grouped = df.groupby("platform", as_index=False) + grouped = df.groupby("platform", as_index=False, group_keys=False) return grouped.apply(self._calc_overlap_single_platform) def _calc_overlap_single_platform(self, df, open_end=False):