From b257fc14087b7b873173a494a57064a82962537f Mon Sep 17 00:00:00 2001 From: David Almeida Date: Tue, 26 Nov 2024 15:02:37 +0100 Subject: [PATCH] Add short documentation on utility function --- docs/updating-nuts.md | 15 +++++++++++++++ pysquirrel/core.py | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 docs/updating-nuts.md diff --git a/docs/updating-nuts.md b/docs/updating-nuts.md new file mode 100644 index 0000000..bfae572 --- /dev/null +++ b/docs/updating-nuts.md @@ -0,0 +1,15 @@ +# Updating the NUTS source file + +EUROSTAT occasionally updates the current NUTS classification spreadsheet. These updates might be minor and not encompass changing region names or codes, but knowing they take place, it is important to ensure the package accesses the most up-to-date version of the data. + +To this end, a weekly GitHub action compares pysquirrel's copy of the file and the version hosted in the EUROSTAT website with a hash check. The workflow fails if hashes differ. + +In such a case, using a local installation of pysquirrel, and with the newest version of the spreadsheet downloaded: + +```python +from pysquirrel.core import nuts_to_yaml + +nuts_to_yaml("path/to/latest_nuts.xlsx", "path/to/output") +``` + +The function will parse the XLSX file and output the two corresponding YAML files (for NUTS regions and Statistical Regions). YAML files allow for easy tracking of changes in GitHub commits. \ No newline at end of file diff --git a/pysquirrel/core.py b/pysquirrel/core.py index f09aca5..089a7f0 100644 --- a/pysquirrel/core.py +++ b/pysquirrel/core.py @@ -25,10 +25,10 @@ def flatten(lst): yield i -def nuts_to_yaml(path: str, output_dir: str): +def nuts_to_yaml(file_path: str, output_dir: str): """Converts a NUTS .xlsx source file to YAML files.""" - workbook = load_workbook(path, read_only=True, data_only=True) + workbook = load_workbook(file_path, read_only=True, data_only=True) for sheet, file in { "NUTS2024": "NUTS2021-2024.yaml",