-
Notifications
You must be signed in to change notification settings - Fork 0
/
process_data.py
72 lines (56 loc) · 3.5 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import ncas_amof_netcdf_template as nant
import polars as pl
import datetime as dt
def read_data_year(input_file, year=2023):
df = pl.read_csv(input_file, null_values="NA")
df = df.filter(~pl.all_horizontal(pl.all().is_null()))
coppb = pl.Series("coppb", [ -1e20 if i is None else i for i in df["CO_ppb"] ])
h2ppb = pl.Series("h2ppb", [ -1e20 if i is None else i for i in df["H2_ppb"] ])
dt_dates = pl.Series("dt_dates", [ dt.datetime.strptime(i, "%d/%m/%Y %H:%M") for i in df['Date'] ])
df = df.with_columns(dt_dates.alias("Date"))
df = df.with_columns(coppb.alias("CO_ppb"))
df = df.with_columns(h2ppb.alias("H2_ppb"))
df = df.filter(pl.col("Date").is_between(dt.datetime(year,1,1), dt.datetime(year+1,1,1) ))
return df
def make_netcdf(input_file="ncas-rga3-1_growing.csv", output_location=".", product_version="1.0", year=2023):
data_df = read_data_year(input_file, year=year)
unix_times, day_of_year, years, months, days, hours, minutes, seconds, \
time_coverage_start_unix, time_coverage_end_unix, file_date = nant.util.get_times(data_df["Date"])
nc = nant.create_netcdf.main("ncas-rga3-1", date = file_date, dimension_lengths={"time":len(unix_times)}, file_location = output_location, products = "h2-concentration", product_version = product_version)
nant.util.update_variable(nc, "time", unix_times)
nant.util.update_variable(nc, "day_of_year", day_of_year)
nant.util.update_variable(nc, "year", years)
nant.util.update_variable(nc, "month", months)
nant.util.update_variable(nc, "day", days)
nant.util.update_variable(nc, "hour", hours)
nant.util.update_variable(nc, "minute", minutes)
nant.util.update_variable(nc, "second", seconds)
#nant.util.update_variable(nc, "mass_fraction_of_carbon_monoxide_in_air", data_df["CO_ppb"])
#nc["mass_fraction_of_carbon_monoxide_in_air"].units = "1e-9"
#nc["mass_fraction_of_carbon_monoxide_in_air"].practical_units = "ppb"
#nc["mass_fraction_of_carbon_monoxide_in_air"].cell_methods = "time: point"
nant.util.update_variable(nc, "mass_fraction_of_molecular_hydrogen_in_air", data_df["H2_ppb"])
nc["mass_fraction_of_molecular_hydrogen_in_air"].units = "1e-9"
nc["mass_fraction_of_molecular_hydrogen_in_air"].practical_units = "ppb"
nc["mass_fraction_of_molecular_hydrogen_in_air"].cell_methods = "time: point"
#if len(data_df.filter(pl.col("CO_ppb") > 0)) > 0:
# nant.util.update_variable(nc, "qc_flag_co", data_df["CO_Flag"])
if len(data_df.filter(pl.col("H2_ppb") > 0)) > 0:
#nant.util.update_variable(nc, "qc_flag_h2", data_df["H2_Flag"])
nant.util.update_variable(nc, "qc_flag", data_df["H2_Flag"])
nc.setncattr('time_coverage_start',
dt.datetime.fromtimestamp(time_coverage_start_unix, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S"))
nc.setncattr('time_coverage_end',
dt.datetime.fromtimestamp(time_coverage_end_unix, dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S"))
nant.util.add_metadata_to_netcdf(nc, "metadata.csv")
# Close file
nc.close()
# Check for empty variables and remove if necessary
nant.remove_empty_variables.main(f'{output_location}/ncas-rga3-1_wao_{file_date}_h2-concentration_v{product_version}.nc')
if __name__ == "__main__":
import sys
input_file = sys.argv[1]
output_loc = sys.argv[2]
product_version = sys.argv[3]
year = int(sys.argv[4])
make_netcdf(input_file = input_file, output_location=output_loc, product_version=product_version, year=year)