From ed3f2119c8bb2d40dfee23ca6e831b113b96f8e6 Mon Sep 17 00:00:00 2001 From: Maximilian Staib Date: Fri, 3 Jan 2025 12:48:40 +0100 Subject: [PATCH] Update CDS retreival to new API --- Project.toml | 3 +-- src/CDSAPI.jl | 36 ++++++++++++++++++------------------ test/py2ju.jl | 18 +++++++++--------- test/retrieve.jl | 11 ++++------- 4 files changed, 32 insertions(+), 36 deletions(-) diff --git a/Project.toml b/Project.toml index 1c28601..f3e96f8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,9 @@ name = "CDSAPI" uuid = "8a7b9de3-9c00-473e-88b4-7eccd7ef2fea" authors = ["Micky Yun Chan and contributors"] -version = "1.0.1" +version = "2.0.0" [deps] -Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/src/CDSAPI.jl b/src/CDSAPI.jl index e7a6e2d..827eb17 100644 --- a/src/CDSAPI.jl +++ b/src/CDSAPI.jl @@ -2,7 +2,6 @@ module CDSAPI using HTTP using JSON -using Base64 """ retrieve(name, params, filename; max_sleep = 120.) @@ -14,45 +13,46 @@ directory as `filename`. The client periodically requests the status of the retrieve request. `max_sleep` is the maximum time (in seconds) between the status updates. """ -function retrieve(name, params, filename; max_sleep = 120.) +function retrieve(name, params, filename; max_sleep=120.0) creds = Dict() - open(joinpath(homedir(),".cdsapirc")) do f + open(joinpath(homedir(), ".cdsapirc")) do f for line in readlines(f) - key, val = strip.(split(line,':', limit=2)) + key, val = strip.(split(line, ':', limit=2)) creds[key] = val end end - apikey = string("Basic ", base64encode(creds["key"])) response = HTTP.request( "POST", - creds["url"] * "/resources/$name", - ["Authorization" => apikey], - body=JSON.json(params), + creds["url"] * "/retrieve/v1/processes/$name/execute/", + ["PRIVATE-TOKEN" => creds["key"]], + body=JSON.json(Dict("inputs" => params)), verbose=1) resp_dict = JSON.parse(String(response.body)) - data = Dict("state" => "queued") - sleep_seconds = 1. + data = Dict("status" => "queued") + sleep_seconds = 1.0 - while data["state"] != "completed" - data = HTTP.request("GET", creds["url"] * "/tasks/" * string(resp_dict["request_id"]), ["Authorization" => apikey]) + while data["status"] != "successful" + data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]), ["PRIVATE-TOKEN" => creds["key"]]) data = JSON.parse(String(data.body)) - println("request queue status ", data["state"]) + println("request queue status ", data["status"]) - if data["state"] == "failed" + if data["status"] == "failed" error("Request to dataset $name failed. Check " * "https://cds.climate.copernicus.eu/cdsapp#!/yourrequests " * "for more information (after login).") end - sleep_seconds = min(1.5 * sleep_seconds,max_sleep) - if data["state"] != "completed" + sleep_seconds = min(1.5 * sleep_seconds, max_sleep) + if data["status"] != "successful" sleep(sleep_seconds) end end - HTTP.download(data["location"], filename) + response = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]) * "/results/", ["PRIVATE-TOKEN" => creds["key"]]) + body = JSON.parse(String(response.body)) + HTTP.download(body["asset"]["value"]["href"], filename) return data end @@ -88,7 +88,7 @@ function py2ju(dictstr) # if there's no pair after the last comma if findnext(":", dictstr_cpy, lastcomma_pos) == nothing # remove the comma - dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos - 1)] * dictstr_cpy[(lastcomma_pos + 1):lastindex(dictstr_cpy)] + dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos-1)] * dictstr_cpy[(lastcomma_pos+1):lastindex(dictstr_cpy)] end # removes trailing comma from a list diff --git a/test/py2ju.jl b/test/py2ju.jl index db8e10f..a8b987c 100644 --- a/test/py2ju.jl +++ b/test/py2ju.jl @@ -1,6 +1,6 @@ @testset "Py2Ju" begin pydict_str = """{ - 'format': 'grib', + 'data_format': 'grib', 'product_type': 'monthly_averaged_reanalysis', 'variable': 'divergence', 'pressure_level': '1', @@ -12,14 +12,14 @@ ], 'time': '00:00', }""" - julia_dict = Dict("format"=> "grib", - "month" => "06", - "time" => "00:00", - "year" => "2020", - "pressure_level" => "1", - "area" => Any[90, -180, -90, 180], - "product_type" => "monthly_averaged_reanalysis", - "variable" => "divergence") + julia_dict = Dict("data_format" => "grib", + "month" => "06", + "time" => "00:00", + "year" => "2020", + "pressure_level" => "1", + "area" => Any[90, -180, -90, 180], + "product_type" => "monthly_averaged_reanalysis", + "variable" => "divergence") py2ju_result = CDSAPI.py2ju(pydict_str) @test typeof(py2ju_result) <: Dict diff --git a/test/retrieve.jl b/test/retrieve.jl index 606eab2..4f9d19d 100644 --- a/test/retrieve.jl +++ b/test/retrieve.jl @@ -1,11 +1,11 @@ @testset "Retrieve" begin - datadir = joinpath(@__DIR__,"data") + datadir = joinpath(@__DIR__, "data") @testset "ERA5 monthly preasure data" begin filepath = joinpath(datadir, "era5.grib") response = CDSAPI.retrieve("reanalysis-era5-pressure-levels-monthly-means", CDSAPI.py2ju("""{ - 'format': 'grib', + 'data_format': 'grib', 'product_type': 'monthly_averaged_reanalysis', 'variable': 'divergence', 'pressure_level': '1', @@ -20,7 +20,6 @@ filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/x-grib" @test isfile(filepath) GribFile(filepath) do datafile @@ -43,12 +42,11 @@ 'emissions_scenario': 'rcp_2_6', 'period': '2071_2100', 'return_period': '100', - 'format': 'zip', + 'data_format': 'zip', }"""), filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/zip" @test isfile(filepath) # extract contents @@ -76,12 +74,11 @@ 'time_aggregation': '1_year_average', 'vertical_level': '0_m', 'bias_correction': 'bias_adjustment_based_on_gamma_distribution', - 'format': 'tgz', + 'data_format': 'tgz', }"""), filepath) @test typeof(response) <: Dict - @test response["content_type"] == "application/gzip" @test isfile(filepath) # extract contents