Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.4.2 #377

Merged
merged 23 commits into from
Oct 10, 2024
Merged

0.4.2 #377

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-container-dl-calc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Build the Apptainer container (if cache miss)
if: steps.cache-sif.outputs.cache-hit != 'true'
run: |
apptainer build --fakeroot beethoven_dl_calc.sif container/beethoven_dl_calc.def
apptainer build --force --fakeroot beethoven_dl_calc.sif container/beethoven_dl_calc.def

- name: Cache the .sif file
if: steps.cache-sif.outputs.cache-hit != 'true'
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ _targets
# future batchtools outputs
.future
slurm_error.log
slurm/

# SLURM messages and logs
**/*.err
Expand All @@ -110,4 +111,7 @@ targets_start.Rout

.netrc
.urs_cookies
beethoven_branching_notes.txt
beethoven_branching_notes.txt

# NASA Earthdata login credentials
inst/extdata/nasa_token.txt
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: beethoven
Title: Building an Extensible, rEproducible, Test-driven, Harmonized, Open-source, Versioned, ENsemble model for air quality
Version: 0.4.1
Version: 0.4.2
Authors@R: c(
person("Kyle", "Messier", , "kyle.messier@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9508-9623")),
person("Insang", "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export(pred_colname)
export(predict_meta_learner)
export(process_geos_bulk)
export(process_narr2)
export(query_modis_files)
export(read_locs)
export(read_paths)
export(reduce_list)
Expand Down
36 changes: 33 additions & 3 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,18 @@ calc_geos_strict <-
return(rast_ext)

}
future::plan(future::multicore, workers = 10)
# future::plan(future::multicore, workers = 10)
# rast_summary <-
# future.apply::future_lapply(
# future_inserted,
# function(fs) summary_byvar(fs = fs)
# )
# future::plan(future::sequential)
rast_summary <-
future.apply::future_lapply(
lapply(
future_inserted,
function(fs) summary_byvar(fs = fs)
)
future::plan(future::sequential)
rast_summary <- data.table::rbindlist(rast_summary)

return(rast_summary)
Expand Down Expand Up @@ -542,3 +547,28 @@ par_narr <- function(domain, path, date, locs, nthreads = 24L) {
return(res)

}

#' Identify MODIS files
#' @description
#' This function identifies the relevant MODIS file paths based on
#' path, list of julian dates, and index. Designed to help set arguments
#' for the `inject_modis_par` function.
#' @keywords Calculation
#' @param path A character vector specifying the path to the MODIS data.
#' @param list A list of julian dates.
#' @param index An integer specifying the index of the julian date to use.
#' @return A character vector of MODIS file paths.
#' @export
query_modis_files <- function(path, list, index) {
grep_files <- list.files(
path,
full.names = TRUE,
recursive = TRUE
) |> grep(
pattern = paste0(
"A", list[[index]], collapse = "|"
),
value = TRUE
)
return(grep_files)
}
34 changes: 34 additions & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Helper functions for checking SLURM jobs and nodes

# nocov start
job <- function(job_id) {
system(
paste0("sacct -j ", job_id, " --format=JobID,Elapsed,TotalCPU,MaxRSS")
)
}

kb_to_gb <- function(kb) {
gb <- kb / (1024^2)
return(gb)
}

geo <- function() {
system("srun --partition=geo --cpus-per-task=1 --pty top")
}

node <- function(node = "gn040815") {
system(paste0("scontrol show node ", node))
}

queue <- function() {
system("squeue -u $USER")
}

cancel <- function() {
system("scancel -u $USER")
}

batch <- function(file = "run.sh") {
system(paste0("sbatch ", file))
}
# nocov end
197 changes: 43 additions & 154 deletions _targets.R
Original file line number Diff line number Diff line change
@@ -1,166 +1,55 @@
library(targets)
library(tarchetypes)
library(dplyr)
library(crew)
library(future)
library(beethoven)
library(amadeus)


# targets store location corresponds to _targets/ in the root of the project
tar_config_set(
store = "/opt/_targets"
################################################################################
############################## BEETHOVEN #############################
##### Main file controlling the settings, options, and sourcing of targets
##### for the beethoven analysis pipeline.

############################# CONTROLLER #############################
default_controller <- crew::crew_controller_local(
name = "default_controller",
workers = 4,
seconds_idle = 30
)

# crew contollers
# For now, one is set, but we can explore the use of multiple controllers
# Can also explore making the workers input for bash script or Rscript
geo_controller <- crew_controller_local(
name = "geo_controller",
workers = 16L,
launch_max = 8L,
seconds_idle = 120
calc_controller <- crew::crew_controller_local(
name = "calc_controller",
workers = 96,
seconds_idle = 30
)

############################## STORE ##############################
targets::tar_config_set(store = "/opt/_targets")


# Setting up the NASA Earthdata token inside the container
# This needs to be tested
if (!nzchar(Sys.getenv("NASA_EARTHDATA_TOKEN"))){
tar_source("/mnt/NASA_token_setup.R")
file.exists(".netrc")
file.exists(".urs_cookies")
file.exists(".dodsrc")
}


arglist_download <-
set_args_download(
char_period = c("2018-01-01", "2022-12-31"),
char_input_dir = "/input",
nasa_earth_data_token = Sys.getenv("NASA_EARTHDATA_TOKEN"),
mod06_filelist = "/pipeline/targets/mod06_links_2018_2022.csv",
export = TRUE,
path_export = "/pipeline/targets/download_spec.qs"
)






### NOTE: It is important to source the scipts after the global variables are defined from the set_args functions
#tar_source("/pipeline/targets/targets_aqs.R")
tar_source("/pipeline/targets/targets_download.R")

# Toy test files - note we will not have functions defined like this directly in
# the _targets.R file
my_fun_a <- function(n) {
rnorm(n)
}

my_fun_b <- function(x) {
x^2
}




tar_option_set(
packages =
c( "amadeus", "targets", "tarchetypes",
"data.table", "sf", "terra", "exactextractr",
"dplyr", "qs", "callr", "stars", "rlang"),
controller = crew_controller_group(geo_controller),
resources = tar_resources(
crew = tar_resources_crew(controller = "geo_controller")
),
error = "abridge",
############################## OPTIONS ##############################
targets::tar_option_set(
packages = c(
"beethoven", "targets", "tarchetypes", "dplyr",
"data.table", "sf", "crew", "crew.cluster"
),
repository = "local",
error = "continue",
memory = "transient",
format = "qs",
storage = "worker",
deployment = "worker",
garbage_collection = TRUE,
seed = 202401L
)

list(
tar_target(name = A, command = my_fun_a(100)),
tar_target(name = B, command = my_fun_b(A), pattern = A),
tar_target(name = save_input, command = saveRDS(B, "/input/input.rds")),
tar_target( # Test download data with amadeus
download_test,
amadeus::download_narr(
variables = c("weasd", "omega"),
year = c(2023, 2023),
directory_to_save = "/input/narr_monolevel",
acknowledgement = TRUE,
download = TRUE,
remove_command = TRUE
)
),
target_download
seed = 202401L,
controller = crew::crew_controller_group(
default_controller,
calc_controller
)


# Style below that uses sources scripts for targets by pipeline step
# Note that variables created in _targets.R are in the same local
# environment as the sourced scripts

# list(
# target_init,
# target_download
# target_calculate_fit,
# target_baselearner#,
# target_metalearner,
# target_calculate_predict,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
# summary_urban_rural,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "urbanrural"))
# ,
# targets::tar_target(
# summary_state,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "state"
# )
# )
# )

# targets::tar_visnetwork(targets_only = TRUE)
# END OF FILE

# list(
# target_init,
# target_download,
# target_calculate_fit,
# target_baselearner,
# target_metalearner,
# target_calculate_predict#,
# target_predict,
# # documents and summary statistics
# targets::tar_target(
# summary_urban_rural,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "urbanrural"))
# ,
# targets::tar_target(
# summary_state,
# summary_prediction(
# grid_filled,
# level = "point",
# contrast = "state"
# )
# )
)

# targets::tar_visnetwork(targets_only = TRUE)
# END OF FILE
########################### SOURCE TARGETS ###########################
targets::tar_source("inst/targets/targets_critical.R")
targets::tar_source("inst/targets/targets_initiate.R")
targets::tar_source("inst/targets/targets_download.R")
targets::tar_source("inst/targets/targets_aqs.R")
targets::tar_source("inst/targets/targets_calculate_fit.R")

############################## PIPELINE ##############################
list(
target_critical,
target_initiate,
target_download,
target_aqs,
target_calculate_fit
)
2 changes: 1 addition & 1 deletion _targets.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
main:
store: /ddn/gs1/home/manwareme/beethoven/beethoven_targets
store: /opt/_targets
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions archive/narr_variables.csv
Git LFS file not shown
File renamed without changes.
10 changes: 3 additions & 7 deletions archive/run_interactive.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
#!/bin/bash

export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin
export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
if [ "$USER" != "songi2" ]; then
export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:/ddn/gs1/biotools/R/lib64/R/library
else
export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
fi
export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH
export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER

# Submit the pipeline as a background process with ./run.sh
# module load R # Uncomment if R is an environment module.
Expand Down
18 changes: 7 additions & 11 deletions archive/run_slurm.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@
#!/bin/bash

#SBATCH --job-name=pipeline_bench
#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_out.out
#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_err.err
#SBATCH --job-name=beethoven
#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/output.out
#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/error.err
#SBATCH --mail-type=END,FAIL
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=32g
#SBATCH --partition=geo
#SBATCH --mail-user=manwareme@nih.gov

export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin
export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
if [ "$USER" != "songi2" ]; then
export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
else
export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library
fi
export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH
export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH
export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER

# modify it into the proper directory path. and output/error paths in the
# # SBATCH directives
# USER_PROJDIR=/ddn/gs1/home/$USER/projects
USER_PROJDIR=/ddn/gs1/home/manwareme/beethoven/

nohup nice -4 Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R
nohup nice -4 /ddn/gs1/tools/set/R432/bin/Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R
Loading
Loading