Skip to content

Commit

Permalink
Merge pull request #377 from NIEHS/dev-0930
Browse files Browse the repository at this point in the history
0.4.2
  • Loading branch information
mitchellmanware authored Oct 10, 2024
2 parents 7d17aba + d7118ca commit 56f2a3a
Show file tree
Hide file tree
Showing 85 changed files with 2,109 additions and 1,300 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-container-dl-calc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Build the Apptainer container (if cache miss)
if: steps.cache-sif.outputs.cache-hit != 'true'
run: |
apptainer build --fakeroot beethoven_dl_calc.sif container/beethoven_dl_calc.def
apptainer build --force --fakeroot beethoven_dl_calc.sif container/beethoven_dl_calc.def
- name: Cache the .sif file
if: steps.cache-sif.outputs.cache-hit != 'true'
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ _targets
# future batchtools outputs
.future
slurm_error.log
slurm/

# SLURM messages and logs
**/*.err
Expand All @@ -110,4 +111,7 @@ targets_start.Rout

.netrc
.urs_cookies
beethoven_branching_notes.txt
beethoven_branching_notes.txt

# NASA Earthdata login credentials
inst/extdata/nasa_token.txt
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: beethoven
Title: Building an Extensible, rEproducible, Test-driven, Harmonized, Open-source, Versioned, ENsemble model for air quality
Version: 0.4.1
Version: 0.4.2
Authors@R: c(
person("Kyle", "Messier", , "kyle.messier@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9508-9623")),
person("Insang", "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export(pred_colname)
export(predict_meta_learner)
export(process_geos_bulk)
export(process_narr2)
export(query_modis_files)
export(read_locs)
export(read_paths)
export(reduce_list)
Expand Down
36 changes: 33 additions & 3 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,18 @@ calc_geos_strict <-
return(rast_ext)

}
future::plan(future::multicore, workers = 10)
# future::plan(future::multicore, workers = 10)
# rast_summary <-
# future.apply::future_lapply(
# future_inserted,
# function(fs) summary_byvar(fs = fs)
# )
# future::plan(future::sequential)
rast_summary <-
future.apply::future_lapply(
lapply(
future_inserted,
function(fs) summary_byvar(fs = fs)
)
future::plan(future::sequential)
rast_summary <- data.table::rbindlist(rast_summary)

return(rast_summary)
Expand Down Expand Up @@ -542,3 +547,28 @@ par_narr <- function(domain, path, date, locs, nthreads = 24L) {
return(res)

}

#' Identify MODIS files
#' @description
#' This function identifies the relevant MODIS file paths based on
#' path, list of julian dates, and index. Designed to help set arguments
#' for the `inject_modis_par` function.
#' @keywords Calculation
#' @param path A character vector specifying the path to the MODIS data.
#' @param list A list of julian dates.
#' @param index An integer specifying the index of the julian date to use.
#' @return A character vector of MODIS file paths.
#' @export
query_modis_files <- function(path, list, index) {
grep_files <- list.files(
path,
full.names = TRUE,
recursive = TRUE
) |> grep(
pattern = paste0(
"A", list[[index]], collapse = "|"
),
value = TRUE
)
return(grep_files)
}
34 changes: 34 additions & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Helper functions for checking SLURM jobs and nodes

# nocov start
job <- function(job_id) {
system(
paste0("sacct -j ", job_id, " --format=JobID,Elapsed,TotalCPU,MaxRSS")
)
}

kb_to_gb <- function(kb) {
gb <- kb / (1024^2)
return(gb)
}

geo <- function() {
system("srun --partition=geo --cpus-per-task=1 --pty top")
}

node <- function(node = "gn040815") {
system(paste0("scontrol show node ", node))
}

queue <- function() {
system("squeue -u $USER")
}

cancel <- function() {
system("scancel -u $USER")
}

batch <- function(file = "run.sh") {
system(paste0("sbatch ", file))
}
# nocov end
197 changes: 43 additions & 154 deletions _targets.R
Original file line number Diff line number Diff line change
@@ -1,166 +1,55 @@
library(targets)
library(tarchetypes)
library(dplyr)
library(crew)
library(future)
library(beethoven)
library(amadeus)


# targets store location corresponds to _targets/ in the root of the project
tar_config_set(
store = "/opt/_targets"
################################################################################
############################## BEETHOVEN #############################
##### Main file controlling the settings, options, and sourcing of targets
##### for the beethoven analysis pipeline.

############################# CONTROLLER #############################
default_controller <- crew::crew_controller_local(
name = "default_controller",
workers = 4,
seconds_idle = 30
)

# crew contollers
# For now, one is set, but we can explore the use of multiple controllers
# Can also explore making the workers input for bash script or Rscript
geo_controller <- crew_controller_local(
name = "geo_controller",
workers = 16L,
launch_max = 8L,
seconds_idle = 120
calc_controller <- crew::crew_controller_local(
name = "calc_controller",
workers = 96,
seconds_idle = 30
)

############################## STORE ##############################
targets::tar_config_set(store = "/opt/_targets")


# Setting up the NASA Earthdata token inside the container
# This needs to be tested
if (!nzchar(Sys.getenv("NASA_EARTHDATA_TOKEN"))){
tar_source("/mnt/NASA_token_setup.R")
file.exists(".netrc")
file.exists(".urs_cookies")
file.exists(".dodsrc")
}


arglist_download <-
set_args_download(
char_period = c("2018-01-01", "2022-12-31"),
char_input_dir = "/input",
nasa_earth_data_token = Sys.getenv("NASA_EARTHDATA_TOKEN"),
mod06_filelist = "/pipeline/targets/mod06_links_2018_2022.csv",
export = TRUE,
path_export = "/pipeline/targets/download_spec.qs"
)






### NOTE: It is important to source the scipts after the global variables are defined from the set_args functions
#tar_source("/pipeline/targets/targets_aqs.R")
tar_source("/pipeline/targets/targets_download.R")

# Toy test files - note we will not have functions defined like this directly in
# the _targets.R file
my_fun_a <- function(n) {
rnorm(n)
}

my_fun_b <- function(x) {
x^2
}




tar_option_set(
packages =
c( "amadeus", "targets", "tarchetypes",
"data.table", "sf", "terra", "exactextractr",
"dplyr", "qs", "callr", "stars", "rlang"),
controller = crew_controller_group(geo_controller),
resources = tar_resources(
crew = tar_resources_crew(controller = "geo_controller")
),
error = "abridge",
############################## OPTIONS ##############################
targets::tar_option_set(
packages = c(
"beethoven", "targets", "tarchetypes", "dplyr",
"data.table", "sf", "crew", "crew.cluster"
),
repository = "local",
error = "continue",
memory = "transient",
format = "qs",
storage = "worker",
deployment = "worker",
garbage_collection = TRUE,
seed = 202401L
)

list(
tar_target(name = A, command = my_fun_a(100)),
tar_target(name = B, command = my_fun_b(A), pattern = A),
tar_target(name = save_input, command = saveRDS(B, "/input/input.rds")),
tar_target( # Test download data with amadeus
download_test,
amadeus::download_narr(
variables = c("weasd", "omega"),
year = c(2023, 2023),
directory_to_save = "/input/narr_monolevel",
acknowledgement = TRUE,
download = TRUE,
remove_command = TRUE
)
),
target_download
seed = 202401L,
controller = crew::crew_controller_group(
default_controller,
calc_controller
)


# Style below that uses sources scripts for targets by pipeline step
# Note that variables created in _targets.R are in the same local
# environment as the sourced scripts

# list(
# target_init,
# target_download
# target_calculate_fit,
# target_baselearner#,
# target_metalearner,
# target_calculate_predict,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
# summary_urban_rural,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "urbanrural"))
# ,
# targets::tar_target(
# summary_state,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "state"
# )
# )
# )

# targets::tar_visnetwork(targets_only = TRUE)
# END OF FILE

# list(
# target_init,
# target_download,
# target_calculate_fit,
# target_baselearner,
# target_metalearner,
# target_calculate_predict#,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
# summary_urban_rural,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "urbanrural"))
# ,
# targets::tar_target(
# summary_state,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "state"
# )
# )
)

# targets::tar_visnetwork(targets_only = TRUE)
# END OF FILE
########################### SOURCE TARGETS ###########################
targets::tar_source("inst/targets/targets_critical.R")
targets::tar_source("inst/targets/targets_initiate.R")
targets::tar_source("inst/targets/targets_download.R")
targets::tar_source("inst/targets/targets_aqs.R")
targets::tar_source("inst/targets/targets_calculate_fit.R")

############################## PIPELINE ##############################
list(
target_critical,
target_initiate,
target_download,
target_aqs,
target_calculate_fit
)
2 changes: 1 addition & 1 deletion _targets.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
main:
store: /ddn/gs1/home/manwareme/beethoven/beethoven_targets
store: /opt/_targets
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions archive/narr_variables.csv
Git LFS file not shown
File renamed without changes.
10 changes: 3 additions & 7 deletions archive/run_interactive.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
#!/bin/bash

export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin
export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
if [ "$USER" != "songi2" ]; then
export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:/ddn/gs1/biotools/R/lib64/R/library
else
export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
fi
export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH
export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER

# Submit the pipeline as a background process with ./run.sh
# module load R # Uncomment if R is an environment module.
Expand Down
18 changes: 7 additions & 11 deletions archive/run_slurm.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
#!/bin/bash

#SBATCH --job-name=pipeline_bench
#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_out.out
#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_err.err
#SBATCH --job-name=beethoven
#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/output.out
#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/error.err
#SBATCH --mail-type=END,FAIL
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=32g
#SBATCH --partition=geo
#SBATCH --mail-user=manwareme@nih.gov

export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin
export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
if [ "$USER" != "songi2" ]; then
export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
else
export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
fi
export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH
export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER

# modify it into the proper directory path. and output/error paths in the
# # SBATCH directives
# USER_PROJDIR=/ddn/gs1/home/$USER/projects
USER_PROJDIR=/ddn/gs1/home/manwareme/beethoven/

nohup nice -4 Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R
nohup nice -4 /ddn/gs1/tools/set/R432/bin/Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R
Loading

0 comments on commit 56f2a3a

Please sign in to comment.