diff --git a/pysquirrel/core.py b/pysquirrel/core.py index 88c21d5..c462da6 100644 --- a/pysquirrel/core.py +++ b/pysquirrel/core.py @@ -5,16 +5,18 @@ import os import yaml +from openpyxl import load_workbook from pydantic.dataclasses import dataclass # Base path for package code BASE_PATH = Path(__file__).absolute().parent DATA_PATH = BASE_PATH / "data" +COL_NAME_ROW = 1 MIN_DATA_ROW = 2 MAX_DATA_COL = 4 -# utility function +# Utility functions def flatten(lst): for i in lst: if isinstance(i, list): @@ -23,6 +25,26 @@ def flatten(lst): yield i +def nuts_to_yaml(path: str, output_dir: str): + """Converts a NUTS .xlsx source file to YAML files.""" + + workbook = load_workbook(path, read_only=True, data_only=True) + + for sheet, file in { + "NUTS2024": "NUTS2021-2024.yaml", + "Statistical Regions": "SR2021-2024.yaml", + }.items(): + regions = [] + worksheet = workbook[sheet] + cols = [cell.value for cell in worksheet[1]] + for row in worksheet.iter_rows(min_row=MIN_DATA_ROW, max_col=MAX_DATA_COL): + if all(cell.value for cell in row): + regions.append({col: cell.value for (col, cell) in zip(cols, row)}) + + with open(Path(output_dir) / file, "w") as f: + yaml.dump(regions, f, allow_unicode=True) + + class Level(IntEnum): LEVEL_1 = 1 LEVEL_2 = 2