From ed3f2119c8bb2d40dfee23ca6e831b113b96f8e6 Mon Sep 17 00:00:00 2001
From: Maximilian Staib <maximilian.staib@kerith.net>
Date: Fri, 3 Jan 2025 12:48:40 +0100
Subject: [PATCH] Update CDS retreival to new API

---
 Project.toml     |  3 +--
 src/CDSAPI.jl    | 36 ++++++++++++++++++------------------
 test/py2ju.jl    | 18 +++++++++---------
 test/retrieve.jl | 11 ++++-------
 4 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1c28601..f3e96f8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,9 @@
 name = "CDSAPI"
 uuid = "8a7b9de3-9c00-473e-88b4-7eccd7ef2fea"
 authors = ["Micky Yun Chan <michan@redhat.com> and contributors"]
-version = "1.0.1"
+version = "2.0.0"
 
 [deps]
-Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 
diff --git a/src/CDSAPI.jl b/src/CDSAPI.jl
index e7a6e2d..827eb17 100644
--- a/src/CDSAPI.jl
+++ b/src/CDSAPI.jl
@@ -2,7 +2,6 @@ module CDSAPI
 
 using HTTP
 using JSON
-using Base64
 
 """
     retrieve(name, params, filename; max_sleep = 120.)
@@ -14,45 +13,46 @@ directory as `filename`.
 The client periodically requests the status of the retrieve request.
 `max_sleep` is the maximum time (in seconds) between the status updates.
 """
-function retrieve(name, params, filename; max_sleep = 120.)
+function retrieve(name, params, filename; max_sleep=120.0)
     creds = Dict()
-    open(joinpath(homedir(),".cdsapirc")) do f
+    open(joinpath(homedir(), ".cdsapirc")) do f
         for line in readlines(f)
-            key, val = strip.(split(line,':', limit=2))
+            key, val = strip.(split(line, ':', limit=2))
             creds[key] = val
         end
     end
 
-    apikey = string("Basic ", base64encode(creds["key"]))
     response = HTTP.request(
         "POST",
-        creds["url"] * "/resources/$name",
-        ["Authorization" => apikey],
-        body=JSON.json(params),
+        creds["url"] * "/retrieve/v1/processes/$name/execute/",
+        ["PRIVATE-TOKEN" => creds["key"]],
+        body=JSON.json(Dict("inputs" => params)),
         verbose=1)
 
     resp_dict = JSON.parse(String(response.body))
-    data = Dict("state" => "queued")
-    sleep_seconds = 1.
+    data = Dict("status" => "queued")
+    sleep_seconds = 1.0
 
-    while data["state"] != "completed"
-        data = HTTP.request("GET", creds["url"] * "/tasks/" * string(resp_dict["request_id"]),  ["Authorization" => apikey])
+    while data["status"] != "successful"
+        data = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]), ["PRIVATE-TOKEN" => creds["key"]])
         data = JSON.parse(String(data.body))
-        println("request queue status ", data["state"])
+        println("request queue status ", data["status"])
 
-        if data["state"] == "failed"
+        if data["status"] == "failed"
             error("Request to dataset $name failed. Check " *
                   "https://cds.climate.copernicus.eu/cdsapp#!/yourrequests " *
                   "for more information (after login).")
         end
 
-        sleep_seconds = min(1.5 * sleep_seconds,max_sleep)
-        if data["state"] != "completed"
+        sleep_seconds = min(1.5 * sleep_seconds, max_sleep)
+        if data["status"] != "successful"
             sleep(sleep_seconds)
         end
     end
 
-    HTTP.download(data["location"], filename)
+    response = HTTP.request("GET", creds["url"] * "/retrieve/v1/jobs/" * string(resp_dict["jobID"]) * "/results/", ["PRIVATE-TOKEN" => creds["key"]])
+    body = JSON.parse(String(response.body))
+    HTTP.download(body["asset"]["value"]["href"], filename)
     return data
 end
 
@@ -88,7 +88,7 @@ function py2ju(dictstr)
     # if there's no pair after the last comma
     if findnext(":", dictstr_cpy, lastcomma_pos) == nothing
         # remove the comma
-        dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos - 1)] * dictstr_cpy[(lastcomma_pos + 1):lastindex(dictstr_cpy)]
+        dictstr_cpy = dictstr_cpy[firstindex(dictstr_cpy):(lastcomma_pos-1)] * dictstr_cpy[(lastcomma_pos+1):lastindex(dictstr_cpy)]
     end
 
     # removes trailing comma from a list
diff --git a/test/py2ju.jl b/test/py2ju.jl
index db8e10f..a8b987c 100644
--- a/test/py2ju.jl
+++ b/test/py2ju.jl
@@ -1,6 +1,6 @@
 @testset "Py2Ju" begin
     pydict_str = """{
-                'format': 'grib',
+                'data_format': 'grib',
                 'product_type': 'monthly_averaged_reanalysis',
                 'variable': 'divergence',
                 'pressure_level': '1',
@@ -12,14 +12,14 @@
                 ],
                 'time': '00:00',
             }"""
-    julia_dict = Dict("format"=> "grib",
-                    "month" => "06",
-                    "time" => "00:00",
-                    "year" => "2020",
-                    "pressure_level" => "1",
-                    "area" => Any[90, -180, -90, 180],
-                    "product_type" => "monthly_averaged_reanalysis",
-                    "variable" => "divergence")
+    julia_dict = Dict("data_format" => "grib",
+        "month" => "06",
+        "time" => "00:00",
+        "year" => "2020",
+        "pressure_level" => "1",
+        "area" => Any[90, -180, -90, 180],
+        "product_type" => "monthly_averaged_reanalysis",
+        "variable" => "divergence")
     py2ju_result = CDSAPI.py2ju(pydict_str)
 
     @test typeof(py2ju_result) <: Dict
diff --git a/test/retrieve.jl b/test/retrieve.jl
index 606eab2..4f9d19d 100644
--- a/test/retrieve.jl
+++ b/test/retrieve.jl
@@ -1,11 +1,11 @@
 @testset "Retrieve" begin
-    datadir = joinpath(@__DIR__,"data")
+    datadir = joinpath(@__DIR__, "data")
 
     @testset "ERA5 monthly preasure data" begin
         filepath = joinpath(datadir, "era5.grib")
         response = CDSAPI.retrieve("reanalysis-era5-pressure-levels-monthly-means",
             CDSAPI.py2ju("""{
-                'format': 'grib',
+                'data_format': 'grib',
                 'product_type': 'monthly_averaged_reanalysis',
                 'variable': 'divergence',
                 'pressure_level': '1',
@@ -20,7 +20,6 @@
             filepath)
 
         @test typeof(response) <: Dict
-        @test response["content_type"] == "application/x-grib"
         @test isfile(filepath)
 
         GribFile(filepath) do datafile
@@ -43,12 +42,11 @@
                 'emissions_scenario': 'rcp_2_6',
                 'period': '2071_2100',
                 'return_period': '100',
-                'format': 'zip',
+                'data_format': 'zip',
             }"""),
             filepath)
 
         @test typeof(response) <: Dict
-        @test response["content_type"] == "application/zip"
         @test isfile(filepath)
 
         # extract contents
@@ -76,12 +74,11 @@
                 'time_aggregation': '1_year_average',
                 'vertical_level': '0_m',
                 'bias_correction': 'bias_adjustment_based_on_gamma_distribution',
-                'format': 'tgz',
+                'data_format': 'tgz',
             }"""),
             filepath)
 
         @test typeof(response) <: Dict
-        @test response["content_type"] == "application/gzip"
         @test isfile(filepath)
 
         # extract contents