Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Sbozzolo committed Mar 26, 2024
1 parent 521e15e commit 916227e
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 29 deletions.
46 changes: 32 additions & 14 deletions ext/DataHandlingExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,11 @@ everything more type stable.)
- `reference_date`: Calendar date corresponding to the start of the simulation.
- `t_start`: Simulation time at the beginning of the simulation. Typically this is 0
(seconds), but if might be different if the simulation was restarted.
- `regridder_type`: What type of regridding to perform. Currently, the only one implemented
is `:tempest` to use `TempestRemap`. `TempestRemap` regrids everything
ahead of time and saves the result to HDF5 files.
- `regridder_type`: What type of regridding to perform. Currently, the ones implemented are
`:TempestRegridder` (using `TempestRemap`) and
`:InterpolationsRegridder` (using `Interpolations.jl`). `TempestRemap`
regrids everything ahead of time and saves the result to HDF5 files.
`Interpolations.jl` is online and GPU compatible but not conservative.
"""
function DataHandling.DataHandler(
file_path::AbstractString,
Expand Down Expand Up @@ -138,6 +140,8 @@ function DataHandling.DataHandler(
_cached_regridded_fields = Dict{Dates.DateTime, ClimaCore.Fields.Field}()

available_dates = file_reader.available_dates
# Second() is required to convert from DateTime to float. Also, Second(1) transforms
# from milliseconds to seconds.
times_s = Second.(available_dates .- reference_date) ./ Second(1)
available_times = times_s .- t_start

Expand Down Expand Up @@ -183,6 +187,21 @@ function DataHandling.available_dates(data_handler::DataHandler)
return data_handler.available_dates
end

"""
time_to_date(data_handler::DataHandler, time::AbstractFloat)
Convert the given time to a calendar date.
```
date = reference_date + t_start + time
```
"""
function time_to_date(data_handler::DataHandler, time::AbstractFloat)
return data_handler.reference_date +
Second(data_handler.t_start) +
Second(time)
end

"""
previous_time(data_handler::DataHandler, time::AbstractFloat)
previous_time(data_handler::DataHandler, date::Dates.DateTime)
Expand All @@ -194,15 +213,15 @@ function DataHandling.previous_time(
data_handler::DataHandler,
time::AbstractFloat,
)
time in data_handler.available_times && return time
index = searchsortedfirst(data_handler.available_times, time) - 1
return data_handler.available_times[index]
date = time_to_date(data_handler, time)
return DataHandling.previous_time(data_handler, date)
end

function DataHandling.previous_time(
data_handler::DataHandler,
date::Dates.DateTime,
)
# We have to handle separately what happens when we are on the node
if date in data_handler.available_dates
index = searchsortedfirst(data_handler.available_dates, date)
else
Expand All @@ -219,12 +238,12 @@ Return the time in seconds of the snapshot after the given `time`.
If `time` is one of the snapshots, return the next time.
"""
function DataHandling.next_time(data_handler::DataHandler, time::AbstractFloat)
index = searchsortedfirst(data_handler.available_times, time)
time in data_handler.available_times && (index += 1)
return data_handler.available_times[index]
date = time_to_date(data_handler, time)
return DataHandling.next_time(data_handler, date)
end

function DataHandling.next_time(data_handler::DataHandler, date::Dates.DateTime)
# We have to handle separately what happens when we are on the node
if date in data_handler.available_dates
index = searchsortedfirst(data_handler.available_dates, date) + 1
else
Expand Down Expand Up @@ -253,7 +272,9 @@ function DataHandling.regridded_snapshot(
)
# Dates.DateTime(0) is the cache key for static maps
if date != Dates.DateTime(0)
date in data_handler.available_dates || error("date not available")
date in data_handler.available_dates || error(
"Date $date not available in file $(data_handler.file_reader.file_path)",
)
end

regridder_type = nameof(typeof(data_handler.regridder))
Expand All @@ -276,10 +297,7 @@ function DataHandling.regridded_snapshot(
data_handler::DataHandler,
time::AbstractFloat,
)
date =
data_handler.reference_date +
Second(round(data_handler.t_start)) +
Second(round(time))
date = time_to_date(data_handler, time)
return DataHandling.regridded_snapshot(data_handler, date)
end

Expand Down
7 changes: 4 additions & 3 deletions ext/NCFileReaderExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct NCFileReader{
"""A vector of DateTime collecting all the available dates in the file"""
available_dates::DATES

"""NetCDF file opened by NCDataset"""
"""NetCDF file opened by NCDataset. Don't forget to close the reader!"""
dataset::NC

"""Optional function that is applied to the read dataset. Useful to do unit-conversion
Expand Down Expand Up @@ -95,8 +95,9 @@ function FileReaders.NCFileReader(
error("Could not find (unique) time dimension")
time_index = time_index_vec[]

issorted(available_dates) ||
error("Cannot process files that are not sorted in time")
issorted(available_dates) || error(
"Cannot process files that are not sorted in time ($file_path)",
)

# Remove time from the dim names
dim_names = filter(!is_time, dim_names)
Expand Down
2 changes: 2 additions & 0 deletions ext/TempestRegridderExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ end
"""
TempestRegridder(target_space::ClimaCore.Spaces.AbstractSpace,
input_file::AbstractString,
regrid_dir::AbstractString,
varname::AbstractString,
regrid_dir::AbstractString;
mono::Bool)
Expand All @@ -57,6 +58,7 @@ Positional arguments
- `target_space`: the ClimaCore Space where the simulation is being performed.
- `input_file`: the path of the NetCDF file that has to be read and processed.
- `regrid_dir`: the path where to save the regrid files created by TempestRemap.
- `varname`: the name of the variable in the NetCDF file.
- `regrid_dir`: a folder where regridded Fields are saved as HDF5 files.
Expand Down
8 changes: 4 additions & 4 deletions src/DataHandling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This is no trivial task. Among the challenges:
- CPU/GPU communication can be a bottleneck
The `DataHandling` takes the divide and conquer approach: the various core tasks and
features and split into other independent modules (chiefly `FileReaders`, and `Regridders`).
features are split into other independent modules (chiefly `FileReaders`, and `Regridders`).
Such modules can be developed, tested, and extended independently (as long as they maintain
a consistent interface). For instance, if need arises, the `DataHandler` can be used
(almost) directly to process files with a different format from NetCDF.
Expand All @@ -25,9 +25,9 @@ exposes the following key functions:
- `available_times` (`available_dates`): to list all the `times` (`dates`) over which the
data is defined.
- `previous_time(time/date)` (`next_time(time/date)`): to obtain the time of the snapshot
before the given `time` or `date`. This can be used to compute the
interpolation weight for linear interpolation, or in combination
with `regridded_snapshot` to read a particular snapshot
before/after the given `time` or `date`. This can be used to
compute the interpolation weight for linear interpolation, or in
combination with `regridded_snapshot` to read a particular snapshot
Most `DataHandling` functions take either `time` or `date`, with the difference being that
`time` is intended as "simulation time" and is expected to be in seconds; `date` is a
calendar date (from `Dates.DateTime`). Conversion between time and date is performed using
Expand Down
2 changes: 1 addition & 1 deletion src/Regridders.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ grids.
Currently, the schemes implemented are `TempestRegridder`, which uses
`ClimaCoreTempestRemap`, and `InterpolationsRegridder`, which uses `Interpolations.jl`.
The key function exposed by `Regridder` is the `regrid` method.
The key function exposed by `Regridders` is the `regrid` method.
"""
module Regridders

Expand Down
7 changes: 7 additions & 0 deletions test/Artifacts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,10 @@ git-tree-sha1 = "e4044f4093e731021a3e62428f594ee7b5ea56e2"
[[laskar2004.download]]
sha256 = "8b3a9510ade307e1834c51968421ab803d0d570f1c27529cb417d7a185fbcc3e"
url = "https://data.caltech.edu/records/v9dp1-jb227/files/laskar2004.tar.gz"

[landseamask]
git-tree-sha1 = "31d4110d008fcfb1ecfd77eb9675c983b650eedf"

[[landseamask.download]]
sha256 = "363c5c85f17458ce80a7e3944fcaf0539889e3e0e14f46df6752c7d9417f8fae"
url = "https://caltech.box.com/shared/static/vn1kh5x868mlo17x6ql2gmqj2flfemu3.gz"
16 changes: 16 additions & 0 deletions test/file_readers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,19 @@ end
close(ncreader)
end
end

@testset "NCFileReader missing dimensions" begin
PATH = joinpath(artifact"landseamask", "means_2.5_new.nc")
NCDataset(PATH) do nc
ncreader = FileReaders.NCFileReader(PATH, "landsea_mask")

@test ncreader.dimensions[1] == nc["lon"][:]
@test ncreader.dimensions[2] == nc["lat"][:]

@test FileReaders.read(ncreader) == nc["u10n"][:, :]

@test isempty(FileReaders.available_dates(ncreader))

close(ncreader)
end
end
16 changes: 9 additions & 7 deletions test/time_varying_inputs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ include("TestTools.jl")
@test Array(parent(column_field))[1] == fun2(10.0, 20.0; z = 30.0)
end

@testset "InteprolatingTimeVaryingInput0D" begin
@testset "InterpolatingTimeVaryingInput0D" begin
# Check times not sorted
xs = [1.0, 0.0]
ys = [1.0, 2.0]
Expand Down Expand Up @@ -119,7 +119,7 @@ end
end
end

@testset "InteprolatingTimeVaryingInput23D" begin
@testset "InterpolatingTimeVaryingInput23D" begin
PATH = joinpath(artifact"era5_example", "era5_t2m_sp_u10n_20210101.nc")
regridder_types = (:InterpolationsRegridder, :TempestRegridder)

Expand Down Expand Up @@ -198,11 +198,13 @@ end
TimeVaryingInputs.evaluate!(dest, input_nearest, target_time)

@test isequal(
parent(dest),
parent(
DataHandling.regridded_snapshot(
data_handler,
available_times[11],
Array(parent(dest)),
Array(
parent(
DataHandling.regridded_snapshot(
data_handler,
available_times[11],
),
),
),
)
Expand Down

0 comments on commit 916227e

Please sign in to comment.