From 916227ec39f09a3bd0ee28dcd8615b2dcf8237a1 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Tue, 26 Mar 2024 10:03:00 -0700 Subject: [PATCH] Address review comments --- ext/DataHandlingExt.jl | 46 ++++++++++++++++++++++++++----------- ext/NCFileReaderExt.jl | 7 +++--- ext/TempestRegridderExt.jl | 2 ++ src/DataHandling.jl | 8 +++---- src/Regridders.jl | 2 +- test/Artifacts.toml | 7 ++++++ test/file_readers.jl | 16 +++++++++++++ test/time_varying_inputs.jl | 16 +++++++------ 8 files changed, 75 insertions(+), 29 deletions(-) diff --git a/ext/DataHandlingExt.jl b/ext/DataHandlingExt.jl index 31867c63..9357ff11 100644 --- a/ext/DataHandlingExt.jl +++ b/ext/DataHandlingExt.jl @@ -107,9 +107,11 @@ everything more type stable.) - `reference_date`: Calendar date corresponding to the start of the simulation. - `t_start`: Simulation time at the beginning of the simulation. Typically this is 0 (seconds), but if might be different if the simulation was restarted. -- `regridder_type`: What type of regridding to perform. Currently, the only one implemented - is `:tempest` to use `TempestRemap`. `TempestRemap` regrids everything - ahead of time and saves the result to HDF5 files. +- `regridder_type`: What type of regridding to perform. Currently, the ones implemented are + `:TempestRegridder` (using `TempestRemap`) and + `:InterpolationsRegridder` (using `Interpolations.jl`). `TempestRemap` + regrids everything ahead of time and saves the result to HDF5 files. + `Interpolations.jl` is online and GPU compatible but not conservative. """ function DataHandling.DataHandler( file_path::AbstractString, @@ -138,6 +140,8 @@ function DataHandling.DataHandler( _cached_regridded_fields = Dict{Dates.DateTime, ClimaCore.Fields.Field}() available_dates = file_reader.available_dates + # Second() is required to convert from DateTime to float. Also, Second(1) transforms + # from milliseconds to seconds. times_s = Second.(available_dates .- reference_date) ./ Second(1) available_times = times_s .- t_start @@ -183,6 +187,21 @@ function DataHandling.available_dates(data_handler::DataHandler) return data_handler.available_dates end +""" + time_to_date(data_handler::DataHandler, time::AbstractFloat) + +Convert the given time to a calendar date. + +``` +date = reference_date + t_start + time +``` +""" +function time_to_date(data_handler::DataHandler, time::AbstractFloat) + return data_handler.reference_date + + Second(data_handler.t_start) + + Second(time) +end + """ previous_time(data_handler::DataHandler, time::AbstractFloat) previous_time(data_handler::DataHandler, date::Dates.DateTime) @@ -194,15 +213,15 @@ function DataHandling.previous_time( data_handler::DataHandler, time::AbstractFloat, ) - time in data_handler.available_times && return time - index = searchsortedfirst(data_handler.available_times, time) - 1 - return data_handler.available_times[index] + date = time_to_date(data_handler, time) + return DataHandling.previous_time(data_handler, date) end function DataHandling.previous_time( data_handler::DataHandler, date::Dates.DateTime, ) + # We have to handle separately what happens when we are on the node if date in data_handler.available_dates index = searchsortedfirst(data_handler.available_dates, date) else @@ -219,12 +238,12 @@ Return the time in seconds of the snapshot after the given `time`. If `time` is one of the snapshots, return the next time. """ function DataHandling.next_time(data_handler::DataHandler, time::AbstractFloat) - index = searchsortedfirst(data_handler.available_times, time) - time in data_handler.available_times && (index += 1) - return data_handler.available_times[index] + date = time_to_date(data_handler, time) + return DataHandling.next_time(data_handler, date) end function DataHandling.next_time(data_handler::DataHandler, date::Dates.DateTime) + # We have to handle separately what happens when we are on the node if date in data_handler.available_dates index = searchsortedfirst(data_handler.available_dates, date) + 1 else @@ -253,7 +272,9 @@ function DataHandling.regridded_snapshot( ) # Dates.DateTime(0) is the cache key for static maps if date != Dates.DateTime(0) - date in data_handler.available_dates || error("date not available") + date in data_handler.available_dates || error( + "Date $date not available in file $(data_handler.file_reader.file_path)", + ) end regridder_type = nameof(typeof(data_handler.regridder)) @@ -276,10 +297,7 @@ function DataHandling.regridded_snapshot( data_handler::DataHandler, time::AbstractFloat, ) - date = - data_handler.reference_date + - Second(round(data_handler.t_start)) + - Second(round(time)) + date = time_to_date(data_handler, time) return DataHandling.regridded_snapshot(data_handler, date) end diff --git a/ext/NCFileReaderExt.jl b/ext/NCFileReaderExt.jl index 74b1c6cc..a07de654 100644 --- a/ext/NCFileReaderExt.jl +++ b/ext/NCFileReaderExt.jl @@ -45,7 +45,7 @@ struct NCFileReader{ """A vector of DateTime collecting all the available dates in the file""" available_dates::DATES - """NetCDF file opened by NCDataset""" + """NetCDF file opened by NCDataset. Don't forget to close the reader!""" dataset::NC """Optional function that is applied to the read dataset. Useful to do unit-conversion @@ -95,8 +95,9 @@ function FileReaders.NCFileReader( error("Could not find (unique) time dimension") time_index = time_index_vec[] - issorted(available_dates) || - error("Cannot process files that are not sorted in time") + issorted(available_dates) || error( + "Cannot process files that are not sorted in time ($file_path)", + ) # Remove time from the dim names dim_names = filter(!is_time, dim_names) diff --git a/ext/TempestRegridderExt.jl b/ext/TempestRegridderExt.jl index 866d6838..1a18fc41 100644 --- a/ext/TempestRegridderExt.jl +++ b/ext/TempestRegridderExt.jl @@ -45,6 +45,7 @@ end """ TempestRegridder(target_space::ClimaCore.Spaces.AbstractSpace, input_file::AbstractString, + regrid_dir::AbstractString, varname::AbstractString, regrid_dir::AbstractString; mono::Bool) @@ -57,6 +58,7 @@ Positional arguments - `target_space`: the ClimaCore Space where the simulation is being performed. - `input_file`: the path of the NetCDF file that has to be read and processed. +- `regrid_dir`: the path where to save the regrid files created by TempestRemap. - `varname`: the name of the variable in the NetCDF file. - `regrid_dir`: a folder where regridded Fields are saved as HDF5 files. diff --git a/src/DataHandling.jl b/src/DataHandling.jl index 2e130080..7c4973f5 100644 --- a/src/DataHandling.jl +++ b/src/DataHandling.jl @@ -11,7 +11,7 @@ This is no trivial task. Among the challenges: - CPU/GPU communication can be a bottleneck The `DataHandling` takes the divide and conquer approach: the various core tasks and -features and split into other independent modules (chiefly `FileReaders`, and `Regridders`). +features are split into other independent modules (chiefly `FileReaders`, and `Regridders`). Such modules can be developed, tested, and extended independently (as long as they maintain a consistent interface). For instance, if need arises, the `DataHandler` can be used (almost) directly to process files with a different format from NetCDF. @@ -25,9 +25,9 @@ exposes the following key functions: - `available_times` (`available_dates`): to list all the `times` (`dates`) over which the data is defined. - `previous_time(time/date)` (`next_time(time/date)`): to obtain the time of the snapshot - before the given `time` or `date`. This can be used to compute the - interpolation weight for linear interpolation, or in combination - with `regridded_snapshot` to read a particular snapshot + before/after the given `time` or `date`. This can be used to + compute the interpolation weight for linear interpolation, or in + combination with `regridded_snapshot` to read a particular snapshot Most `DataHandling` functions take either `time` or `date`, with the difference being that `time` is intended as "simulation time" and is expected to be in seconds; `date` is a calendar date (from `Dates.DateTime`). Conversion between time and date is performed using diff --git a/src/Regridders.jl b/src/Regridders.jl index b2a19b76..bccf641e 100644 --- a/src/Regridders.jl +++ b/src/Regridders.jl @@ -7,7 +7,7 @@ grids. Currently, the schemes implemented are `TempestRegridder`, which uses `ClimaCoreTempestRemap`, and `InterpolationsRegridder`, which uses `Interpolations.jl`. -The key function exposed by `Regridder` is the `regrid` method. +The key function exposed by `Regridders` is the `regrid` method. """ module Regridders diff --git a/test/Artifacts.toml b/test/Artifacts.toml index 6ef326b5..5aabd592 100644 --- a/test/Artifacts.toml +++ b/test/Artifacts.toml @@ -26,3 +26,10 @@ git-tree-sha1 = "e4044f4093e731021a3e62428f594ee7b5ea56e2" [[laskar2004.download]] sha256 = "8b3a9510ade307e1834c51968421ab803d0d570f1c27529cb417d7a185fbcc3e" url = "https://data.caltech.edu/records/v9dp1-jb227/files/laskar2004.tar.gz" + +[landseamask] +git-tree-sha1 = "31d4110d008fcfb1ecfd77eb9675c983b650eedf" + + [[landseamask.download]] + sha256 = "363c5c85f17458ce80a7e3944fcaf0539889e3e0e14f46df6752c7d9417f8fae" + url = "https://caltech.box.com/shared/static/vn1kh5x868mlo17x6ql2gmqj2flfemu3.gz" diff --git a/test/file_readers.jl b/test/file_readers.jl index b4609113..709a95f8 100644 --- a/test/file_readers.jl +++ b/test/file_readers.jl @@ -79,3 +79,19 @@ end close(ncreader) end end + +@testset "NCFileReader missing dimensions" begin + PATH = joinpath(artifact"landseamask", "means_2.5_new.nc") + NCDataset(PATH) do nc + ncreader = FileReaders.NCFileReader(PATH, "landsea_mask") + + @test ncreader.dimensions[1] == nc["lon"][:] + @test ncreader.dimensions[2] == nc["lat"][:] + + @test FileReaders.read(ncreader) == nc["u10n"][:, :] + + @test isempty(FileReaders.available_dates(ncreader)) + + close(ncreader) + end +end diff --git a/test/time_varying_inputs.jl b/test/time_varying_inputs.jl index 6b996fdb..970d70a2 100644 --- a/test/time_varying_inputs.jl +++ b/test/time_varying_inputs.jl @@ -46,7 +46,7 @@ include("TestTools.jl") @test Array(parent(column_field))[1] == fun2(10.0, 20.0; z = 30.0) end -@testset "InteprolatingTimeVaryingInput0D" begin +@testset "InterpolatingTimeVaryingInput0D" begin # Check times not sorted xs = [1.0, 0.0] ys = [1.0, 2.0] @@ -119,7 +119,7 @@ end end end -@testset "InteprolatingTimeVaryingInput23D" begin +@testset "InterpolatingTimeVaryingInput23D" begin PATH = joinpath(artifact"era5_example", "era5_t2m_sp_u10n_20210101.nc") regridder_types = (:InterpolationsRegridder, :TempestRegridder) @@ -198,11 +198,13 @@ end TimeVaryingInputs.evaluate!(dest, input_nearest, target_time) @test isequal( - parent(dest), - parent( - DataHandling.regridded_snapshot( - data_handler, - available_times[11], + Array(parent(dest)), + Array( + parent( + DataHandling.regridded_snapshot( + data_handler, + available_times[11], + ), ), ), )