Skip to content

Commit

Permalink
Merge pull request #370 from NIEHS/container-01
Browse files Browse the repository at this point in the history
_targets.R, test and run bash scripts
  • Loading branch information
kyle-messier authored Oct 2, 2024
2 parents e460622 + 8790b40 commit 7d17aba
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 108 deletions.
209 changes: 108 additions & 101 deletions _targets.R
Original file line number Diff line number Diff line change
@@ -1,123 +1,80 @@
library(targets)
library(tarchetypes)
library(future)
library(future.batchtools)
library(dplyr)
library(
beethoven,
lib.loc = "/ddn/gs1/home/manwareme/R/x86_64-pc-linux-gnu-library/4.3"
)
library(tidymodels)
library(bonsai)
# library(
# torch,
# lib.loc = "/ddn/gs1/biotools/R/lib64/R/library"
# )
library(crew)
library(future)
library(beethoven)
library(amadeus)

Sys.setenv("LD_LIBRARY_PATH" = paste("/ddn/gs1/biotools/R/lib64/R/customlib", Sys.getenv("LD_LIBRARY_PATH"), sep = ":"))

# replacing yaml file.
# targets store location corresponds to _targets/ in the root of the project
tar_config_set(
store = "/ddn/gs1/home/manwareme/beethoven/beethoven_targets"
store = "/opt/_targets"
)

# crew contollers
# For now, one is set, but we can explore the use of multiple controllers
# Can also explore making the workers input for bash script or Rscript
geo_controller <- crew_controller_local(
name = "geo_controller",
workers = 16L,
launch_max = 8L,
seconds_idle = 120
)

# maximum future exportable object size is set 50GB
# TODO: the maximum size error did not appear until recently
# and suddenly appeared. Need to investigate the cause.
# Should be removed after the investigation.
# options(future.globals.maxSize = 50 * 2^30)
options(future.globals.maxSize = 60 * 1024^3) # 60 GiB


generate_list_download <- FALSE
# Setting up the NASA Earthdata token inside the container
# This needs to be tested
if (!nzchar(Sys.getenv("NASA_EARTHDATA_TOKEN"))){
tar_source("/mnt/NASA_token_setup.R")
file.exists(".netrc")
file.exists(".urs_cookies")
file.exists(".dodsrc")
}


arglist_download <-
set_args_download(
char_period = c("2018-01-01", "2022-12-31"),
char_input_dir = "input",
nasa_earth_data_token = NULL,#Sys.getenv("NASA_EARTHDATA_TOKEN"),
mod06_filelist = "inst/targets/mod06_links_2018_2022.csv",
export = generate_list_download,
path_export = "inst/targets/download_spec.qs"
char_input_dir = "/input",
nasa_earth_data_token = Sys.getenv("NASA_EARTHDATA_TOKEN"),
mod06_filelist = "/pipeline/targets/mod06_links_2018_2022.csv",
export = TRUE,
path_export = "/pipeline/targets/download_spec.qs"
)

generate_list_calc <- FALSE

arglist_common <-
set_args_calc(
char_siteid = "site_id",
char_timeid = "time",
char_period = c("2018-01-01", "2022-12-31"),
num_extent = c(-126, -62, 22, 52),
char_user_email = paste0(Sys.getenv("USER"), "@nih.gov"),
export = generate_list_calc,
path_export = "inst/targets/calc_spec.qs",
char_input_dir = "/ddn/gs1/group/set/Projects/NRT-AP-Model/input"
)

tar_source("inst/targets/targets_initialize.R")
tar_source("inst/targets/targets_download.R")
tar_source("inst/targets/targets_calculate_fit.R")
tar_source("inst/targets/targets_calculate_predict.R")
tar_source("inst/targets/targets_baselearner.R")
tar_source("inst/targets/targets_metalearner.R")
tar_source("inst/targets/targets_predict.R")


# bypass option
Sys.setenv("BTV_DOWNLOAD_PASS" = "TRUE")

#
# bind custom built GDAL
# Users should export the right path to the GDAL library
# by export LD_LIBRARY_PATH=.... command.
### NOTE: It is important to source the scipts after the global variables are defined from the set_args functions
#tar_source("/pipeline/targets/targets_aqs.R")
tar_source("/pipeline/targets/targets_download.R")

# arglist_common is generated above
plan(
list(
tweak(
future.batchtools::batchtools_slurm,
template = "inst/targets/template_slurm.tmpl",
resources =
list(
memory = 8,
log.file = "slurm_run.log",
ncpus = 1, partition = "geo", ntasks = 1,
email = arglist_common$char_user_email,
error.file = "slurm_error.log"
)
),
multicore
)
)
# Toy test files - note we will not have functions defined like this directly in
# the _targets.R file
my_fun_a <- function(n) {
rnorm(n)
}

my_fun_b <- function(x) {
x^2
}

# # invalidate any nodes older than 180 days: force running the pipeline
# tar_invalidate(any_of(tar_older(Sys.time() - as.difftime(180, units = "days"))))


# # nullify download target if bypass option is set
if (Sys.getenv("BTV_DOWNLOAD_PASS") == "TRUE") {
target_download <- NULL
}

# targets options
# For GPU support, users should be aware of setting environment
# variables and GPU versions of the packages.
# TODO: check if the controller and resources setting are required
tar_option_set(
packages = c(
"beethoven", "amadeus", "chopin", "targets", "tarchetypes",
"data.table", "sf", "terra", "exactextractr",
#"crew", "crew.cluster",
"tigris", "dplyr",
"future.batchtools", "qs", "collapse", "bonsai",
"tidymodels", "tune", "rsample", "torch", "brulee",
"glmnet", "xgboost",
"future", "future.apply", "future.callr", "callr",
"stars", "rlang", "parallelly"
),
library = c("/ddn/gs1/group/set/isong-archive/r-libs"),
repository = "local",
packages =
c( "amadeus", "targets", "tarchetypes",
"data.table", "sf", "terra", "exactextractr",
"dplyr", "qs", "callr", "stars", "rlang"),
controller = crew_controller_group(geo_controller),
resources = tar_resources(
crew = tar_resources_crew(controller = "geo_controller")
),
error = "abridge",
memory = "transient",
format = "qs",
Expand All @@ -127,15 +84,65 @@ tar_option_set(
seed = 202401L
)

# should run tar_make_future()
list(
tar_target(name = A, command = my_fun_a(100)),
tar_target(name = B, command = my_fun_b(A), pattern = A),
tar_target(name = save_input, command = saveRDS(B, "/input/input.rds")),
tar_target( # Test download data with amadeus
download_test,
amadeus::download_narr(
variables = c("weasd", "omega"),
year = c(2023, 2023),
directory_to_save = "/input/narr_monolevel",
acknowledgement = TRUE,
download = TRUE,
remove_command = TRUE
)
),
target_download
)


# Style below that uses sources scripts for targets by pipeline step
# Note that variables created in _targets.R are in the same local
# environment as the sourced scripts

# list(
# target_init,
# target_download
# target_calculate_fit,
# target_baselearner#,
# target_metalearner,
# target_calculate_predict,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
# summary_urban_rural,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "urbanrural"))
# ,
# targets::tar_target(
# summary_state,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "state"
# )
# )
# )

# targets::tar_visnetwork(targets_only = TRUE)
# END OF FILE

list(
target_init,
target_download,
target_calculate_fit,
target_baselearner,
target_metalearner,
target_calculate_predict#,
# list(
# target_init,
# target_download,
# target_calculate_fit,
# target_baselearner,
# target_metalearner,
# target_calculate_predict#,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
Expand Down
10 changes: 5 additions & 5 deletions container/run_container_dl_calc.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#!/bin/bash
#SBATCH --job-name=beethoven_001
#SBATCH --job-name=download_calc
#SBATCH --partition=geo
#SBATCH --mem=128G
#SBATCH --cpus-per-task=4
#SBATCH --ntasks=16
#SBATCH --output=slurm_messages/slurm-%j.out
#SBATCH --error=slurm_messages/slurm-%j.err
#SBATCH --output=../slurm_messages/slurm-%j.out
#SBATCH --error=../slurm_messages/slurm-%j.err
#SBATCH --mail-user=kyle.messier@nih.gov
#SBATCH --mail-type=ALL




# Run the container
# .sif file sites in "root/container", thus we need to go up one level with bind mounts
apptainer exec \
--bind $PWD/inst:/pipeline \
--bind $PWD/input:/input \
Expand Down
4 changes: 2 additions & 2 deletions container/run_dl_calc_local_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#SBATCH --mem=128G
#SBATCH --cpus-per-task=4
#SBATCH --ntasks=16
#SBATCH --output=slurm_messages/slurm-%j.out
#SBATCH --error=slurm_messages/slurm-%j.err
#SBATCH --output=../slurm_messages/slurm-%j.out
#SBATCH --error=../slurm_messages/slurm-%j.err
#SBATCH --mail-user=kyle.messier@nih.gov
#SBATCH --mail-type=ALL

Expand Down

0 comments on commit 7d17aba

Please sign in to comment.