Skip to content

Commit

Permalink
Merge pull request CliMA#140 from climate-machine/system-tests
Browse files Browse the repository at this point in the history
Checkpointing, NetCDF, and golden master tests
  • Loading branch information
ali-ramadhan authored Mar 21, 2019
2 parents 7163560 + 371af73 commit ecc0e46
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 32 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ GPUifyLoops = "ba82f77b-6841-5d2e-bd9f-4daf811aec27"
JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
22 changes: 17 additions & 5 deletions env/cpu/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "49269e311ffe11ac5b334681d212329002a9832a"
git-tree-sha1 = "195a3ffcb8b0762684b6821de18f83a16455c6ea"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "1.5.1"
version = "2.0.0"

[[Conda]]
deps = ["Compat", "JSON", "VersionParsing"]
Expand All @@ -66,6 +66,12 @@ git-tree-sha1 = "3b868935adf4ce2115f5487e789553507739014c"
uuid = "a9693cdc-2bc8-5703-a9cd-1da358117377"
version = "0.1.0"

[[Crayons]]
deps = ["Test"]
git-tree-sha1 = "416737eea5c50ee5a08c588ea73d77d5eebc94e7"
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
version = "3.0.0"

[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -75,7 +81,7 @@ deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"

[[Distributed]]
deps = ["LinearAlgebra", "Random", "Serialization", "Sockets"]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[FFTW]]
Expand Down Expand Up @@ -121,7 +127,7 @@ uuid = "d9be37ee-ecc9-5288-90f1-b9ca67657a75"
version = "0.7.1"

[[InteractiveUtils]]
deps = ["LinearAlgebra", "Markdown"]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[JLD]]
Expand Down Expand Up @@ -261,14 +267,20 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TimerOutputs]]
deps = ["Crayons", "Printf", "Test", "Unicode"]
git-tree-sha1 = "b80671c06f8f8bae08c55d67b5ce292c5ae2660c"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.0"

[[URIParser]]
deps = ["Test", "Unicode"]
git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69"
uuid = "30578b45-9adc-5946-b283-645ec420af67"
version = "0.4.0"

[[UUIDs]]
deps = ["Random"]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[[Unicode]]
Expand Down
3 changes: 3 additions & 0 deletions env/cpu/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@ GPUifyLoops = "ba82f77b-6841-5d2e-bd9f-4daf811aec27"
JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
20 changes: 16 additions & 4 deletions env/gpu/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ version = "0.2.0"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "49269e311ffe11ac5b334681d212329002a9832a"
git-tree-sha1 = "195a3ffcb8b0762684b6821de18f83a16455c6ea"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "1.5.1"
version = "2.0.0"

[[Conda]]
deps = ["Compat", "JSON", "VersionParsing"]
Expand All @@ -96,6 +96,12 @@ git-tree-sha1 = "3b868935adf4ce2115f5487e789553507739014c"
uuid = "a9693cdc-2bc8-5703-a9cd-1da358117377"
version = "0.1.0"

[[Crayons]]
deps = ["Test"]
git-tree-sha1 = "416737eea5c50ee5a08c588ea73d77d5eebc94e7"
uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
version = "3.0.0"

[[CuArrays]]
deps = ["AbstractFFTs", "Adapt", "CUDAapi", "CUDAdrv", "CUDAnative", "DiffRules", "ForwardDiff", "GPUArrays", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "SparseArrays", "Test"]
git-tree-sha1 = "c1cd8792ca783987fcba2ed0d6b3b58176e6b13e"
Expand Down Expand Up @@ -140,9 +146,9 @@ version = "1.0.5"

[[FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays", "Test"]
git-tree-sha1 = "471b7e33dc9c9c5b9170045dd57c8ba0927b2918"
git-tree-sha1 = "2def0123a4f3572234405b0e3d80bfe5d3e1a2a4"
uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
version = "0.4.0"
version = "0.5.0"

[[Formatting]]
deps = ["Compat"]
Expand Down Expand Up @@ -357,6 +363,12 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[TimerOutputs]]
deps = ["Crayons", "Printf", "Test", "Unicode"]
git-tree-sha1 = "b80671c06f8f8bae08c55d67b5ce292c5ae2660c"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.0"

[[URIParser]]
deps = ["Test", "Unicode"]
git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69"
Expand Down
4 changes: 4 additions & 0 deletions env/gpu/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ authors = ["Ali Ramadhan <ali.hh.ramadhan@gmail.com>"]
version = "0.1.0"

[deps]
CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
CUDAdrv = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
CUDAnative = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
CuArrays = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
Expand All @@ -12,5 +13,8 @@ GPUifyLoops = "ba82f77b-6841-5d2e-bd9f-4daf811aec27"
JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
22 changes: 20 additions & 2 deletions examples/deep_convection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,20 @@ function impose_cooling_disk!(model::Model)
# geometries, probably by being able to define, e.g. a forcing only at the
# top, etc.
@inline function cooling_disk(u, v, w, T, S, Nx, Ny, Nz, Δx, Δy, Δz, i, j, k)
ifelse(k == 1 && 0.2Nx < i < 0.8Nx && 0.2Ny < j < 0.8Ny, -4.5e-6, 0)
if k == 1
x = i*Δx
y = j*Δy
Lx = Nx*Δx
Ly = Ny*Δy
= (x - Lx/2)^2 + (y - Ly/2)^2
if< 600^2
return -4.5e-6
else
return 0
end
else
return 0
end
end

model.forcing = Forcing(nothing, nothing, nothing, cooling_disk, nothing)
Expand All @@ -75,6 +88,11 @@ impose_cooling_disk!(model)
nc_writer = NetCDFOutputWriter(dir=".", prefix="deep_convection_", frequency=20)
push!(model.output_writers, nc_writer)

time_step!(model; Nt=Nt, Δt=Δt)
# time_step!(model; Nt=Nt, Δt=Δt)
for i = 1:Nt
tic = time_ns()
time_step!(model, 1, Δt)
println("Time: $(model.clock.time) [$(prettytime(time_ns()-tic))]")
end

make_vertical_slice_movie(model, nc_writer, "T", Nt, Δt, 293.15, ceil(Int, Ny/2))
2 changes: 1 addition & 1 deletion src/models.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mutable struct Model
pressures::PressureFields
G::SourceTerms
Gp::SourceTerms
forcing::Forcing
forcing # ::Forcing # No type so we can set to nothing while checkpointing.
stepper_tmp::StepperTemporaryFields
poisson_solver # ::PoissonSolver or ::PoissonSolverGPU
clock::Clock
Expand Down
56 changes: 36 additions & 20 deletions src/output_writers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ mutable struct BinaryOutputWriter <: OutputWriter
padding::Int
end

function Checkpointer(; dir=".", prefix="", frequency=1, padding=9)
Checkpointer(dir, prefix, frequency, padding)
end

function NetCDFOutputWriter(; dir=".", prefix="", frequency=1, padding=9, compression=5, async=false)
NetCDFOutputWriter(dir, prefix, frequency, padding, compression, async)
end
Expand All @@ -39,7 +43,7 @@ ext(fw::NetCDFOutputWriter) = ".nc"
ext(fw::Checkpointer) = ".jld"

filename(fw, name, iteration) = fw.filename_prefix * name * lpad(iteration, fw.padding, "0") * ext(fw)
filename(fw::Checkpointer, name, iteration) = filename(fw, "model_checkpoint", iteration)
filename(fw::Checkpointer, iteration) = filename(fw, "model_checkpoint_", iteration)

#
# Checkpointing functions
Expand All @@ -48,25 +52,34 @@ filename(fw::Checkpointer, name, iteration) = filename(fw, "model_checkpoint", i
function write_output(model::Model, chk::Checkpointer)
filepath = joinpath(chk.dir, filename(chk, model.clock.iteration))

# Do not include the spectral solver parameters. We want to avoid serializing
forcing_functions = model.forcing

# Do not include forcing functions and FFT plans. We want to avoid serializing
# FFTW and CuFFT plans as serializing functions is not supported by JLD, and
# seems like a tricky business in general.
model.forcing = nothing
model.poisson_solver = nothing

println("WARNING: Forcing functions are not serialized!")

println("[Checkpointer] Serializing model to disk: $filepath")
f = JLD.jldopen(filepath, "w", compress=true)
JLD.@write f model
close(f)

# Reconstruct PoissonSolver struct with FFT plans ?
println("[Checkpointer] Reconstructing FFT plans...")
metadata, grid, stepper_tmp = model.metadata, model.grid, model.stepper_tmp
if metadata.arch == :cpu
if metadata.arch == :CPU
stepper_tmp.fCC1.data .= rand(metadata.float_type, grid.Nx, grid.Ny, grid.Nz)
poisson_solver = PoissonSolver(grid, stepper_tmp.fCC1, FFTW.PATIENT; verbose=true)
elseif metadata.arch == :gpu
model.poisson_solver = PoissonSolver(grid, stepper_tmp.fCC1, FFTW.PATIENT)
elseif metadata.arch == :GPU
stepper_tmp.fCC1.data .= CuArray{Complex{Float64}}(rand(metadata.float_type, grid.Nx, grid.Ny, grid.Nz))
poisson_solver = PoissonSolverGPU(grid, stepper_tmp.fCC1)
model.poisson_solver = PoissonSolverGPU(grid, stepper_tmp.fCC1)
end

# Putting back in the forcing functions.
model.forcing = forcing_functions

return nothing
end

Expand All @@ -76,16 +89,19 @@ function restore_from_checkpoint(filepath)
model = read(f, "model");
close(f)

# Reconstruct PoissonSolver struct with FFT plans.
println("Reconstructing FFT plans...")
metadata, grid, stepper_tmp = model.metadata, model.grid, model.stepper_tmp
if metadata.arch == :cpu
if metadata.arch == :CPU
stepper_tmp.fCC1.data .= rand(metadata.float_type, grid.Nx, grid.Ny, grid.Nz)
poisson_solver = PoissonSolver(grid, stepper_tmp.fCC1, FFTW.PATIENT; verbose=true)
elseif metadata.arch == :gpu
model.poisson_solver = PoissonSolver(grid, stepper_tmp.fCC1, FFTW.PATIENT)
elseif metadata.arch == :GPU
stepper_tmp.fCC1.data .= CuArray{Complex{Float64}}(rand(metadata.float_type, grid.Nx, grid.Ny, grid.Nz))
poisson_solver = PoissonSolverGPU(grid, stepper_tmp.fCC1)
model.poisson_solver = PoissonSolverGPU(grid, stepper_tmp.fCC1)
end

model.forcing = Forcing(nothing, nothing, nothing, nothing, nothing)
println("WARNING: Forcing functions have been set to nothing!")

return model
end

Expand All @@ -99,9 +115,9 @@ function write_output(model::Model, fw::BinaryOutputWriter)
filepath = joinpath(fw.dir, filename(fw, field_name, model.clock.iteration))

println("[BinaryOutputWriter] Writing $field_name to disk: $filepath")
if model.metadata == :cpu
if model.metadata == :CPU
write(filepath, field.data)
elseif model.metadata == :gpu
elseif model.metadata == :GPU
write(filepath, Array(field.data))
end
end
Expand Down Expand Up @@ -139,7 +155,7 @@ function write_output(model::Model, fw::NetCDFOutputWriter)
"T" => Array(model.tracers.T.data),
"S" => Array(model.tracers.S.data)
)

if fw.async
# Execute asynchronously on worker 2.
println("Using @async...")
Expand All @@ -156,10 +172,10 @@ end
function write_output_netcdf(fw::NetCDFOutputWriter, fields, iteration)
xC, yC, zC = fields["xC"], fields["yC"], fields["zC"]
xF, yF, zF = fields["xF"], fields["yF"], fields["zF"]

u, v, w = fields["u"], fields["v"], fields["w"]
T, S = fields["T"], fields["S"]

xC_attr = Dict("longname" => "Locations of the cell centers in the x-direction.", "units" => "m")
yC_attr = Dict("longname" => "Locations of the cell centers in the y-direction.", "units" => "m")
zC_attr = Dict("longname" => "Locations of the cell centers in the z-direction.", "units" => "m")
Expand All @@ -184,19 +200,19 @@ function write_output_netcdf(fw::NetCDFOutputWriter, fields, iteration)

isfile(filepath) && rm(filepath)

nccreate(filepath, "u", "xF", xF, xF_attr,
nccreate(filepath, "u", "xF", xC, xC_attr,
"yC", yC, yC_attr,
"zC", zC, zC_attr,
atts=u_attr, compress=fw.compression)

nccreate(filepath, "v", "xC", xC, xC_attr,
"yF", yF, yC_attr,
"yF", yC, yC_attr,
"zC", zC, zC_attr,
atts=v_attr, compress=fw.compression)

nccreate(filepath, "w", "xC", xC, xC_attr,
"yC", yC, yC_attr,
"zF", zF, zF_attr,
"zF", zC, zC_attr,
atts=w_attr, compress=fw.compression)

nccreate(filepath, "T", "xC", xC, xC_attr,
Expand Down
Binary file added test/deep_convection_golden_master_10.nc
Binary file not shown.
24 changes: 24 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,28 @@ float_types = [Float32, Float64]
end
end
end

@testset "Output writers" begin
include("test_output_writers.jl")

@testset "Checkpointing" begin
run_thermal_bubble_checkpointer_tests()
end

@testset "NetCDF" begin
run_thermal_bubble_netcdf_tests()
end
end

@testset "Golden master tests" begin
include("test_golden_master.jl")

@testset "Thermal bubble" begin
run_thermal_bubble_golden_master_tests()
end

@testset "Deep convection" begin
run_deep_convection_golden_master_tests()
end
end
end # Oceananigans tests
Loading

0 comments on commit ecc0e46

Please sign in to comment.