Skip to content

Commit

Permalink
Merge pull request #29 from NOAA-GFDL/CatalogBuilder
Browse files Browse the repository at this point in the history
catalog builder files
  • Loading branch information
bcc2761 authored Jan 29, 2024
2 parents c065a4c + 208845a commit bd12efb
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 15 deletions.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include README.md
include LICENSE
include fre/frepp/schema.json
include fre/frepp/schema.json
include fre/frecatalog/cats/gfdl_test1.json
27 changes: 23 additions & 4 deletions fre/fre.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,30 @@ def testfunction(context, uppercase):
#############################################

@freCatalog.command()
@click.option('--uppercase', '-u', is_flag=True, help = 'Print statement in uppercase.')
@click.option('-i',
'--input_path',
required=True,
nargs=1)
@click.option('-o',
'--output_path',
required=True,
nargs=1)
@click.option('--filter_realm',
nargs=1)
@click.option('--filter_freq',
nargs=1)
@click.option('--filter_chunk',
nargs=1)
@click.option('--overwrite',
is_flag=True,
default=False)
@click.option('--append',
is_flag=True,
default=False)
@click.pass_context
def testfunction(context, uppercase):
""" - Execute fre catalog testfunction """
context.forward(frecatalog.frecatalog.testfunction)
def buildCatalog(context, input_path, output_path, filter_realm, filter_freq, filter_chunk, overwrite,append):
""" - Execute fre catalog build """
context.forward(frecatalog.frecatalog.buildCatalog)

#############################################

Expand Down
Empty file added fre/frecatalog/cats/__init__.py
Empty file.
111 changes: 111 additions & 0 deletions fre/frecatalog/cats/gfdl_test1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{
"esmcat_version": "0.0.1",
"attributes": [
{
"column_name": "activity_id",
"vocabulary": ""
},
{
"column_name": "institution_id",
"vocabulary": ""
},
{
"column_name": "source_id",
"vocabulary": ""
},
{
"column_name": "experiment_id",
"vocabulary": ""
},
{
"column_name": "frequency",
"vocabulary": ""
},
{
"column_name": "modeling_realm",
"vocabulary": ""
},
{
"column_name": "table_id",
"vocabulary": ""
},
{
"column_name": "member_id",
"vocabulary": ""
},
{
"column_name": "grid_label",
"vocabulary": ""
},
{
"column_name": "variable_id",
"vocabulary": ""
},
{
"column_name": "temporal_subset",
"vocabulary": ""
},
{
"column_name": "chunk_freq",
"vocabulary": ""
},
{
"column_name": "grid_label",
"vocabulary": ""
},
{
"column_name":"platform",
"vocabulary": ""
},
{
"column_name": "platform",
"vocabulary": ""
},
{
"column_name": "cell_methods",
"vocabulary": ""
},
{
"column_name": "path",
"vocabulary": ""
}
],
"assets": {
"column_name": "path",
"format": "netcdf",
"format_column_name": null
},
"aggregation_control": {
"variable_column_name": "variable_id",
"groupby_attrs": [
"source_id",
"experiment_id",
"frequency",
"member_id",
"modeling_realm",
"variable_id",
"chunk_freq"
],
"aggregations": [
{
"type": "union",
"attribute_name": "variable_id",
"options": {}
},
{
"type": "join_existing",
"attribute_name": "temporal_subset",
"options": {
"dim": "time",
"coords": "minimal",
"compat": "override"
}
}
]
},
"id": "esm_catalog_ESM4",
"description": null,
"title": null,
"last_updated": "2023-05-07T16:35:52Z",
"catalog_file": "gfdl_test1.csv"
}
36 changes: 26 additions & 10 deletions fre/frecatalog/frecatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,39 @@
## \author Bennett Chang
## \description Integration of CatalogBuilder to build a data catalog which can then be ingested in climate analysis scripts/workflow

import os
from pathlib import Path
import yaml
import click

from fre.frecatalog.catalogfile import *
from fre.frecatalog.get_intake_gfdl import *

@click.group()
def catalog():
pass
pass

@catalog.command()
@click.option('--uppercase', '-u', is_flag=True, help = 'Print statement in uppercase.')
@click.option('-i',
'--input_path',
required=True,
nargs=1)
@click.option('-o',
'--output_path',
required=True,
nargs=1)
@click.option('--filter_realm',
nargs=1)
@click.option('--filter_freq',
nargs=1)
@click.option('--filter_chunk',
nargs=1)
@click.option('--overwrite',
is_flag=True,
default=False)
@click.option('--append',
is_flag=True,
default=False)
@click.pass_context
def testfunction(context, uppercase):
""" - Execute fre catalog function """
context.forward(function)
def buildCatalog(context, input_path, output_path, filter_realm, filter_freq, filter_chunk, overwrite, append):
""" - Execute fre catalog build """
context.forward(build)

if __name__ == "__main__":
test()
catalog()
87 changes: 87 additions & 0 deletions fre/frecatalog/get_intake_gfdl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python

import json
import click
import os
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig
from pathlib import Path
import logging
logger = logging.getLogger('local')
logger.setLevel(logging.INFO)

package_dir = os.path.dirname(os.path.abspath(__file__))
template_path = os.path.join(package_dir, 'cats/gfdl_test1.json')

#Setting up argument parsing/flags
@click.command()
@click.option('-i',
'--input_path',
required=True,
nargs=1)
@click.option('-o',
'--output_path',
required=True,
nargs=1)
@click.option('--filter_realm',
nargs=1)
@click.option('--filter_freq',
nargs=1)
@click.option('--filter_chunk',
nargs=1)
@click.option('--overwrite',
is_flag=True,
default=False)
@click.option('--append',
is_flag=True,
default=False)
def build(input_path, output_path, filter_realm, filter_freq, filter_chunk, overwrite, append):
project_dir = input_path
csv_path = output_path+".csv"
json_path = output_path+".json"

######### SEARCH FILTERS ###########################

dictFilter = {}
dictFilterIgnore = {}
if filter_realm:
dictFilter["modeling_realm"] = filter_realm
if filter_freq:
dictFilter["frequency"] = filter_freq
if filter_chunk:
dictFilter["chunk_freq"] = filter_chunk

''' Override config file if necessary for dev
project_dir = "/archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_1pctCO2_D1/gfdl.ncrc4-intel16-prod-openmp/pp/"
#for dev csvfile = "/nbhome/$USER/intakebuilder_cats/intake_gfdl2.csv"
dictFilterIgnore = {}
dictFilter["modeling_realm"]= 'atmos_cmip'
dictFilter["frequency"] = "monthly"
dictFilter["chunk_freq"] = "5yr"
dictFilterIgnore["remove"]= 'DO_NOT_USE'
'''
#########################################################
dictInfo = {}
project_dir = project_dir.rstrip("/")
logger.info("Calling gfdlcrawler.crawlLocal")
list_files = gfdlcrawler.crawlLocal(project_dir, dictFilter, dictFilterIgnore,logger)

#Grabbing data from template JSON, changing CSV path to match output path, and dumping data in new JSON
with open(template_path, "r") as jsonTemplate:
data = json.load(jsonTemplate)
data["catalog_file"] = os.path.abspath(csv_path)
jsonFile = open(json_path, "w")
json.dump(data, jsonFile, indent=2)
jsonFile.close()
headers = CSVwriter.getHeader()

#When we pass relative path or just the filename the following still needs to not choke
#so we check if it's a directory first
if os.path.isdir(os.path.dirname(csv_path)):
os.makedirs(os.path.dirname(csv_path), exist_ok=True)
CSVwriter.listdict_to_csv(list_files, headers, csv_path, overwrite, append)
print("JSON generated at:", os.path.abspath(json_path))
print("CSV generated at:", os.path.abspath(csv_path))
logger.info("CSV generated at"+ os.path.abspath(csv_path))

if __name__ == '__main__':
build()

0 comments on commit bd12efb

Please sign in to comment.