Skip to content

Commit

Permalink
Merge pull request #68 from mtiemann-os-climate/tpi-cleanup
Browse files Browse the repository at this point in the history
Minor notebook cleanups to make TPI nb more consistent with OECM nb
  • Loading branch information
ModeSevenIndustrialSolutions authored May 14, 2024
2 parents 4d6a88a + 51de8f8 commit 8b15070
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 71 deletions.
Binary file modified data/processed/OECM 20220504/benchmark_OECM_PC.xlsx
Binary file not shown.
Binary file modified data/processed/OECM 20220504/benchmark_OECM_S3.xlsx
Binary file not shown.
23 changes: 11 additions & 12 deletions notebooks/ITR-data-production.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
"metadata": {},
"outputs": [],
"source": [
"from dotenv import dotenv_values, load_dotenv\n",
"import sys\n",
"import os\n",
"import pathlib\n",
"import pytest\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import trino\n",
Expand Down Expand Up @@ -76,12 +78,10 @@
"from common_units import ureg\n",
"\n",
"# openscm_units doesn't make it easy to set preprocessors. This is one way to do it.\n",
"unit_registry.preprocessors = [\n",
"ureg.preprocessors = [\n",
" lambda s1: s1.replace(\"passenger km\", \"passenger_km\"),\n",
" lambda s2: s2.replace(\"BoE\", \"boe\"),\n",
"]\n",
"import ITR\n",
"from ITR.interfaces import EScope\n",
"\n",
"Q_ = ureg.Quantity\n",
"PA_ = PintArray"
Expand Down Expand Up @@ -145,6 +145,9 @@
"metadata": {},
"outputs": [],
"source": [
"# We'll deal with CI/CD later.\n",
"pytest.skip(\"skipping this notebook\", allow_module_level=True)\n",
"\n",
"# Load environment variables from credentials.env\n",
"osc.load_credentials_dotenv()"
]
Expand All @@ -158,14 +161,10 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "491b3688-8a64-467c-a99e-6db65fa864e6",
"cell_type": "raw",
"id": "5ed16a89-aa93-4bcb-a579-b5348472dd4d",
"metadata": {},
"outputs": [],
"source": [
"import boto3\n",
"\n",
"s3_source = boto3.resource(\n",
" service_name=\"s3\",\n",
" endpoint_url=os.environ[\"S3_LANDING_ENDPOINT\"],\n",
Expand Down Expand Up @@ -1354,7 +1353,7 @@
"traj_df = {}\n",
"traj_mdf = {}\n",
"traj_udf = targets_df.unstack(level=\"year\")\n",
"for scope in [s.lower() for s in EScope.get_scopes()]:\n",
"for scope in [\"s1\", \"s2\", \"s3\", \"s1s2\", \"s1s2s3\"]:\n",
" # We start by copying the target data, but we will use only the historic and replace the projection\n",
" traj_df[scope] = traj_udf[f\"ei_{scope}_by_year\"].copy()\n",
" # By calculating 2014-2019, we miss the anomoly of 2020\n",
Expand Down Expand Up @@ -1936,7 +1935,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down
72 changes: 38 additions & 34 deletions notebooks/OECM-2023-ingest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@
"outputs": [],
"source": [
"def process_T_country(oecm_dir):\n",
" data_dir = os.path.join(oecm_dir, \"T_country\")\n",
" data_dir = os.path.join(oecm_dir, \"T_Country\")\n",
" sector_df_list = []\n",
" sector_sum_df_list = []\n",
" sector_share_df_list = []\n",
Expand Down Expand Up @@ -1255,6 +1255,7 @@
"for subsector, sector_elements in oecm_dict.items():\n",
" sheet = sector_elements[1]\n",
" ei_unit = sector_elements[6]\n",
" continue\n",
" for region, filename in region_dict.items():\n",
" df = pd.read_excel(f\"{benchmark_OECM_dir}/{filename}.xlsx\", sheet_name=sheet, dtype=str)\n",
" orig_df = df.applymap(lambda x: x.rstrip(), na_action=\"ignore\")\n",
Expand Down Expand Up @@ -1414,6 +1415,7 @@
"outputs": [],
"source": [
"for production_centric in [True, False]:\n",
" break\n",
" df_all = pd.concat([v for k, v in oil_and_gas_dict.items() if k[2] is production_centric])\n",
" for region in df_all.Region.unique():\n",
" df = df_all[df_all.Region == region]\n",
Expand Down Expand Up @@ -1549,13 +1551,13 @@
" return o\n",
"\n",
"\n",
"with open(\"benchmark_production_OECM.json\", \"w\") as f:\n",
" json.dump(round_floats(production_bm), sort_keys=False, indent=2, fp=f)\n",
"\n",
"with open(\"benchmark_EI_OECM_S3.json\", \"w\") as f:\n",
" json.dump(round_floats(ei_bms[False]), sort_keys=False, indent=2, fp=f)\n",
"with open(\"benchmark_EI_OECM_PC.json\", \"w\") as f:\n",
" json.dump(round_floats(ei_bms[True]), sort_keys=False, indent=2, fp=f)"
"if False:\n",
" with open(\"benchmark_production_OECM.json\", \"w\") as f:\n",
" json.dump(round_floats(production_bm), sort_keys=False, indent=2, fp=f)\n",
" with open(\"benchmark_EI_OECM_S3.json\", \"w\") as f:\n",
" json.dump(round_floats(ei_bms[False]), sort_keys=False, indent=2, fp=f)\n",
" with open(\"benchmark_EI_OECM_PC.json\", \"w\") as f:\n",
" json.dump(round_floats(ei_bms[True]), sort_keys=False, indent=2, fp=f)"
]
},
{
Expand All @@ -1565,32 +1567,33 @@
"metadata": {},
"outputs": [],
"source": [
"production_index = pd.MultiIndex.from_tuples(\n",
" [\n",
" (\n",
" production_bm[scope][\"benchmarks\"][bm][\"sector\"],\n",
" production_bm[scope][\"benchmarks\"][bm][\"region\"],\n",
" production_bm[scope][\"benchmarks\"][bm][\"benchmark_metric\"],\n",
" scope,\n",
" bm,\n",
" )\n",
" for scope in [\"AnyScope\"]\n",
" for bm in range(len(production_bm[scope][\"benchmarks\"]))\n",
" ],\n",
" names=[\"sector\", \"region\", \"benchmark_metric\", \"scope\", \"bm_idx\"],\n",
")\n",
"df_production = pd.DataFrame.from_dict(\n",
" {\n",
" (idx[0], idx[1], idx[2], idx[3]): {\n",
" projection[\"year\"]: projection[\"value\"]\n",
" for projection in production_bm[idx[3]][\"benchmarks\"][idx[4]][\"projections_nounits\"]\n",
" }\n",
" for idx in production_index\n",
" },\n",
" orient=\"index\",\n",
")\n",
"df_production.index = production_index.droplevel(\"bm_idx\")\n",
"df_production.sort_index(inplace=True)"
"if False:\n",
" production_index = pd.MultiIndex.from_tuples(\n",
" [\n",
" (\n",
" production_bm[scope][\"benchmarks\"][bm][\"sector\"],\n",
" production_bm[scope][\"benchmarks\"][bm][\"region\"],\n",
" production_bm[scope][\"benchmarks\"][bm][\"benchmark_metric\"],\n",
" scope,\n",
" bm,\n",
" )\n",
" for scope in [\"AnyScope\"]\n",
" for bm in range(len(production_bm[scope][\"benchmarks\"]))\n",
" ],\n",
" names=[\"sector\", \"region\", \"benchmark_metric\", \"scope\", \"bm_idx\"],\n",
" )\n",
" df_production = pd.DataFrame.from_dict(\n",
" {\n",
" (idx[0], idx[1], idx[2], idx[3]): {\n",
" projection[\"year\"]: projection[\"value\"]\n",
" for projection in production_bm[idx[3]][\"benchmarks\"][idx[4]][\"projections_nounits\"]\n",
" }\n",
" for idx in production_index\n",
" },\n",
" orient=\"index\",\n",
" )\n",
" df_production.index = production_index.droplevel(\"bm_idx\")\n",
" df_production.sort_index(inplace=True)"
]
},
{
Expand All @@ -1603,6 +1606,7 @@
"benchmark_scopes = [[\"S1\", \"S2\", \"S1S2\", \"S3\", \"S1S2S3\"], [\"S1\", \"S2\", \"S1S2\"]]\n",
"\n",
"for wb_filename, production_centric in [(\"benchmark_OECM_S3\", False), (\"benchmark_OECM_PC\", True)]:\n",
" continue\n",
" ei_index = pd.MultiIndex.from_tuples(\n",
" [\n",
" (\n",
Expand Down
45 changes: 20 additions & 25 deletions notebooks/TPI-benchmark-ingest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
"metadata": {},
"outputs": [],
"source": [
"from dotenv import dotenv_values, load_dotenv\n",
"import os\n",
"import pathlib\n",
"import numpy as np\n",
Expand All @@ -42,25 +41,6 @@
"# import python_pachyderm"
]
},
{
"cell_type": "markdown",
"id": "016b9282-c249-45ca-adea-dd9e6f56056e",
"metadata": {},
"source": [
"Define Environment and Execution Variables"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44e0c077-bfbf-41d1-ab61-ac39856216b0",
"metadata": {},
"outputs": [],
"source": [
"# Load environment variables from credentials.env\n",
"osc.load_credentials_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -93,6 +73,23 @@
"PA_ = PintArray"
]
},
{
"cell_type": "markdown",
"id": "016b9282-c249-45ca-adea-dd9e6f56056e",
"metadata": {},
"source": [
"Define Environment and Execution Variables"
]
},
{
"cell_type": "raw",
"id": "9cbe8f9d-71cd-4d4f-a043-e16646dbf25b",
"metadata": {},
"source": [
"# Load environment variables from credentials.env\n",
"osc.load_credentials_dotenv()"
]
},
{
"cell_type": "markdown",
"id": "76dd9d51-0532-44b9-a3fc-a78a301edf4c",
Expand Down Expand Up @@ -128,11 +125,9 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8a756db-291f-48ad-8ae8-dad3da4af325",
"cell_type": "raw",
"id": "5c760e8b-f01e-4c0b-bbf7-8ed86f512e53",
"metadata": {},
"outputs": [],
"source": [
"ingest_catalog = \"osc_datacommons_dev\"\n",
"ingest_schema = \"sandbox\"\n",
Expand Down Expand Up @@ -401,7 +396,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
11 changes: 11 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,21 @@ commands =
allowlist_externals =
pdm
pytest
setenv =
addopts = "--cov --cov-report html --cov-report term-missing --cov-fail-under 70"
commands =
pdm install --dev
pytest test

[testenv:notebooks]
description = "Notebooks build/test"
deps =
pytest
nbmake
commands =
pdm install --dev
pytest --nbmake -- notebooks/OECM-benchmark-ingest.ipynb notebooks/TPI-benchmark-ingest.ipynb

[testenv:lint]
description = Perform static analysis and style checks
passenv =
Expand Down

0 comments on commit 8b15070

Please sign in to comment.