diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index dec2daf8..308a0d5b 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, dev] pull_request: - branches: [main, master] + branches: [main, master, dev] name: R-CMD-check diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index f60d0479..da54e138 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, dev] pull_request: - branches: [main, master] + branches: [main, master, dev] name: lint diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 184ae644..f57508e7 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, dev] pull_request: - branches: [main, master] + branches: [main, master, dev] release: types: [published] workflow_dispatch: diff --git a/.github/workflows/test-container-dl-calc.yaml b/.github/workflows/test-container-dl-calc.yaml new file mode 100644 index 00000000..792a8819 --- /dev/null +++ b/.github/workflows/test-container-dl-calc.yaml @@ -0,0 +1,73 @@ +name: Test Coverage for Download and Calculate via Apptainer + +on: + push: + branches: [main, master, dev] + pull_request: + branches: [main, master, dev] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Create run_dl_calc_tests.R dynamically + run: | + mkdir -p tests/testthat # Ensure the directory exists + echo 'testthat::test_file("tests/testthat/test_download.R")' > tests/testthat/run_dl_calc_tests.R + echo 'testthat::test_file("tests/testthat/test_calculate.R")' >> tests/testthat/run_dl_calc_tests.R + + - name: Install Apptainer dependencies + run: | + sudo apt-get update && sudo apt-get install -y \ + libseccomp-dev \ + squashfs-tools \ + cryptsetup \ + wget + + - name: Install Apptainer + run: | + wget https://github.com/apptainer/apptainer/releases/download/v1.1.0/apptainer-1.1.0.tar.gz + tar -xvf apptainer-1.1.0.tar.gz + cd apptainer-1.1.0 + ./mconfig && make -C builddir && sudo make -C builddir install + + - name: Restore .sif file from cache + id: cache-sif + uses: actions/cache@v3 + with: + path: beethoven_dl_calc.sif + key: sif-cache-${{ runner.os }}-${{ hashFiles('container/beethoven_dl_calc.def') }} + restore-keys: | + sif-cache-${{ runner.os }}- + + - name: Build the Apptainer container (if cache miss) + if: steps.cache-sif.outputs.cache-hit != 'true' + run: | + apptainer build --force --fakeroot beethoven_dl_calc.sif container/beethoven_dl_calc.def + + - name: Cache the .sif file + if: steps.cache-sif.outputs.cache-hit != 'true' + uses: actions/cache@v3 + with: + path: beethoven_dl_calc.sif + key: sif-cache-${{ runner.os }}-${{ hashFiles('container/beethoven_dl_calc.def') }} + + - name: Check if .sif file exists + run: | + if [ ! -f beethoven_dl_calc.sif ]; then + echo "Error: .sif file not found!" + exit 1 + fi + + - name: Run R tests + run: | + apptainer exec \ + --bind $PWD/inst:/pipeline \ + --bind $PWD/input:/input \ + --bind $PWD:/mnt \ + beethoven_dl_calc.sif \ + Rscript /mnt/tests/testthat/run_dl_calc_tests.R diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 220709d7..51f2bbb2 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -3,9 +3,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, master, dev] pull_request: - branches: [main, master] + branches: [main, master, dev] name: test-coverage-local diff --git a/.gitignore b/.gitignore index 593e14a5..0e8a6de6 100644 --- a/.gitignore +++ b/.gitignore @@ -95,6 +95,7 @@ _targets # future batchtools outputs .future slurm_error.log +slurm/ # SLURM messages and logs **/*.err @@ -110,4 +111,10 @@ targets_start.Rout .netrc .urs_cookies -beethoven_branching_notes.txt \ No newline at end of file +beethoven_branching_notes.txt + +# NASA Earthdata login credentials +inst/extdata/nasa_token.txt + +# flexible library for beethoven ad-hoc update +lib-flex \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 7a3d8313..0791bd60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: beethoven Title: Building an Extensible, rEproducible, Test-driven, Harmonized, Open-source, Versioned, ENsemble model for air quality -Version: 0.4.1 +Version: 0.4.2 Authors@R: c( person("Kyle", "Messier", , "kyle.messier@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9508-9623")), person("Insang", "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")), diff --git a/NAMESPACE b/NAMESPACE index d82ed22b..6112bb24 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(fit_base_learner) export(fit_base_tune) export(fit_meta_learner) export(fl_dates) +export(fl_dates_flatten) export(generate_cv_index_sp) export(generate_cv_index_spt) export(generate_cv_index_ts) @@ -38,6 +39,7 @@ export(pred_colname) export(predict_meta_learner) export(process_geos_bulk) export(process_narr2) +export(query_modis_files) export(read_locs) export(read_paths) export(reduce_list) @@ -51,10 +53,10 @@ export(switch_generate_cv_rset) export(switch_model) export(unmarshal_function) export(vis_spt_rset) -importFrom(amadeus,calc_covariates) importFrom(amadeus,calc_prepare_locs) -importFrom(amadeus,calc_temporal_dummies) importFrom(amadeus,calc_worker) +importFrom(amadeus,calculate_covariates) +importFrom(amadeus,calculate_temporal_dummies) importFrom(amadeus,check_for_null_parameters) importFrom(amadeus,download_data) importFrom(amadeus,download_sanitize_path) diff --git a/R/calc_postprocessing.R b/R/calc_postprocessing.R index bbe2a708..bd4d7717 100644 --- a/R/calc_postprocessing.R +++ b/R/calc_postprocessing.R @@ -286,7 +286,7 @@ post_calc_df_year_expand <- function( #' @note This version assumes the time_id contains Date-like strings. #' @return data.frame #' @importFrom data.table merge.data.table -#' @importFrom amadeus calc_temporal_dummies +#' @importFrom amadeus calculate_temporal_dummies #' @export post_calc_merge_all <- function( @@ -314,7 +314,7 @@ post_calc_merge_all <- # need POSIXt class for amadeus function locs_merged[[time_id]] <- as.POSIXct(locs_merged[[time_id]]) locs_merged <- - amadeus::calc_temporal_dummies( + amadeus::calculate_temporal_dummies( locs = locs_merged, locs_id = locs_id, year = target_years diff --git a/R/calculate.R b/R/calculate.R index d6c29aca..d4a95525 100644 --- a/R/calculate.R +++ b/R/calculate.R @@ -40,7 +40,8 @@ calc_geos_strict <- paths <- list.files( path, pattern = "GEOS-CF.v01.rpl", - full.names = TRUE + full.names = TRUE, + recursive = TRUE ) paths <- paths[grep( ".nc4", @@ -203,13 +204,18 @@ calc_geos_strict <- return(rast_ext) } - future::plan(future::multicore, workers = 10) + # future::plan(future::multicore, workers = 10) + # rast_summary <- + # future.apply::future_lapply( + # future_inserted, + # function(fs) summary_byvar(fs = fs) + # ) + # future::plan(future::sequential) rast_summary <- - future.apply::future_lapply( + lapply( future_inserted, function(fs) summary_byvar(fs = fs) ) - future::plan(future::sequential) rast_summary <- data.table::rbindlist(rast_summary) return(rast_summary) @@ -519,10 +525,11 @@ par_narr <- function(domain, path, date, locs, nthreads = 24L) { if (!dir.exists(path)) { stop("The specified path does not exist.") } - future::plan(future::multicore, workers = nthreads) + # future::plan(future::multicore, workers = nthreads) res <- - future.apply::future_lapply( + # future.apply::future_lapply( + lapply( domain, function(x) { from <- process_narr2( @@ -538,7 +545,32 @@ par_narr <- function(domain, path, date, locs, nthreads = 24L) { }, future.seed = TRUE ) - future::plan(future::sequential) + # future::plan(future::sequential) return(res) } + +#' Identify MODIS files +#' @description +#' This function identifies the relevant MODIS file paths based on +#' path, list of julian dates, and index. Designed to help set arguments +#' for the `inject_modis_par` function. +#' @keywords Calculation +#' @param path A character vector specifying the path to the MODIS data. +#' @param list A list of julian dates. +#' @param index An integer specifying the index of the julian date to use. +#' @return A character vector of MODIS file paths. +#' @export +query_modis_files <- function(path, list, index) { + grep_files <- list.files( + path, + full.names = TRUE, + recursive = TRUE + ) |> grep( + pattern = paste0( + "A", list[[index]], collapse = "|" + ), + value = TRUE + ) + return(grep_files) +} diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 00000000..711f0793 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,34 @@ +# Helper functions for checking SLURM jobs and nodes + +# nocov start +job <- function(job_id) { + system( + paste0("sacct -j ", job_id, " --format=JobID,Elapsed,TotalCPU,MaxRSS") + ) +} + +kb_to_gb <- function(kb) { + gb <- kb / (1024^2) + return(gb) +} + +geo <- function() { + system("srun --partition=geo --cpus-per-task=1 --pty top") +} + +node <- function(node = "gn040815") { + system(paste0("scontrol show node ", node)) +} + +queue <- function() { + system("squeue -u $USER") +} + +cancel <- function() { + system("scancel -u $USER") +} + +batch <- function(file = "run.sh") { + system(paste0("sbatch ", file)) +} +# nocov end diff --git a/R/injection.R b/R/injection.R index 8c486c10..d64ef1d3 100644 --- a/R/injection.R +++ b/R/injection.R @@ -92,12 +92,12 @@ set_target_years <- #' @param process_function Raw data processor. Default is #' [`amadeus::process_covariates`] #' @param calc_function Function to calculate covariates. -#' [`amadeus::calc_covariates`] +#' [`amadeus::calculate_covariates`] #' @param ... Arguments passed to `process_function` and `calc_function` #' @return A data.table object. #' @importFrom data.table rbindlist #' @importFrom rlang inject -#' @importFrom amadeus process_covariates calc_covariates +#' @importFrom amadeus process_covariates calculate_covariates #' @importFrom future plan sequential multicore #' @export calculate <- @@ -106,7 +106,7 @@ calculate <- domain_name = "year", nthreads = 1L, process_function = amadeus::process_covariates, - calc_function = amadeus::calc_covariates, + calc_function = amadeus::calculate_covariates, ... ) { if (is.null(domain)) { @@ -117,17 +117,18 @@ calculate <- domainlist <- split(domain, seq_along(domain)) years_data <- seq_along(domain) + 2017 - if (nthreads == 1L) { - future::plan(future::sequential) - } else { - future::plan(future::multicore, workers = nthreads) - } + # if (nthreads == 1L) { + # future::plan(future::sequential) + # } else { + # future::plan(future::multicore, workers = nthreads) + # } # double twists: list_iteration is made to distinguish # cases where a single radius is accepted or ones have no radius # argument. res_calc <- #try( - future.apply::future_mapply( + mapply( + # future.apply::future_mapply( function(domain_each, year_each) { # we assume that ... have no "year" and "from" arguments args_process <- c(arg = domain_each, list(...)) @@ -187,11 +188,11 @@ calculate <- } return(df_iteration_calc) }, - domainlist, years_data, SIMPLIFY = FALSE, - future.seed = TRUE + domainlist, years_data, + SIMPLIFY = FALSE ) - future::plan(future::sequential) + # future::plan(future::sequential) if (inherits(res_calc, "try-error")) { cat(paste0(attr(res_calc, "condition")$message, "\n")) stop("Results do not match expectations.") @@ -252,9 +253,9 @@ inject_calculate <- function(covariate, locs, injection) { #' @param locs A data frame containing the locations for which MODIS #' features need to be calculated. #' @param injection **List** of dditional parameters to be passed to the -#' `calc_modis_par` function. +#' `calculate_modis_par` function. #' @return MODIS/VIIRS feature data.frame. -#' @seealso [`amadeus::calc_modis_daily`], [`amadeus::calc_modis_par`] +#' @seealso [`amadeus::calculate_modis_daily`], [`amadeus::calculate_modis_par`] #' @importFrom rlang inject #' @examples #' \dontrun{ @@ -274,7 +275,7 @@ inject_calculate <- function(covariate, locs, injection) { #' @export inject_modis_par <- function(locs, injection) { rlang::inject( - amadeus::calc_modis_par( + amadeus::calculate_modis_par( locs = locs, locs_id = "site_id", !!!injection @@ -326,17 +327,16 @@ inject_geos <- function(locs, injection, ...) { #' Default is 4. #' @return A data frame containing the merged results of GMTED data #' for each location within different radii. -#' @importFrom future plan -#' @importFrom future.apply future_lapply #' @importFrom rlang inject #' @export inject_gmted <- function(locs, variable, radii, injection, nthreads = 4L) { - future::plan(future::multicore, workers = nthreads) + # future::plan(future::multicore, workers = nthreads) radii_list <- split(radii, seq_along(radii)) radii_rep <- - future.apply::future_lapply( + # future.apply::future_lapply( + lapply( radii_list, function(r) { rlang::inject( @@ -354,7 +354,7 @@ inject_gmted <- function(locs, variable, radii, injection, nthreads = 4L) { radii_rep <- lapply(radii_rep, function(x) as.data.frame(x)) radii_join <- beethoven::reduce_merge(radii_rep, "site_id") - future::plan(future::sequential) + # future::plan(future::sequential) return(radii_join) } @@ -443,5 +443,5 @@ inject_nlcd <- ) { args_ext <- list(...) args_ext <- c(args_ext, list(year = year, radius = radius)) - inject_match(amadeus::calc_nlcd, args_ext) + inject_match(amadeus::calculate_nlcd, args_ext) } diff --git a/R/prediction.R b/R/prediction.R index 3d041897..a031a242 100644 --- a/R/prediction.R +++ b/R/prediction.R @@ -62,14 +62,32 @@ split_dates <- function( return(dates_split) } -#' Extract the first and last elements of a list +#' Extract the first and last elements of a vector +#' @param dates vector. A vector of dates. +#' @return a character vector of length 2 with +#' the first and last dates from the list. +#' @export +#' @keywords Utility +fl_dates <- function( + dates +) { + first <- dates[1] + last <- dates[length(dates)] + return(c(first, last)) +} + + +#' Extract the first and last elements of a list of date vectors +#' +#' It flattens the list first, then extracts the first and the last dates. #' @param dates list. A list of dates. #' @return a character vector with the first and last dates from the list. #' @export #' @keywords Utility -fl_dates <- function( +fl_dates_flatten <- function( dates ) { + dates <- unlist(dates) first <- dates[1] last <- dates[length(dates)] return(c(first, last)) diff --git a/_targets.R b/_targets.R index e90e0308..9315f7cd 100755 --- a/_targets.R +++ b/_targets.R @@ -1,159 +1,63 @@ -library(targets) -library(tarchetypes) -library(future) -library(future.batchtools) -library(dplyr) -library( - beethoven, - lib.loc = "/ddn/gs1/home/manwareme/R/x86_64-pc-linux-gnu-library/4.3" +################################################################################ +############################## BEETHOVEN ############################# +##### Main file controlling the settings, options, and sourcing of targets +##### for the beethoven analysis pipeline. +.libPaths( + c("/mnt/lib-flex", .libPaths()) ) -library(tidymodels) -library(bonsai) -# library( -# torch, -# lib.loc = "/ddn/gs1/biotools/R/lib64/R/library" -# ) -Sys.setenv("LD_LIBRARY_PATH" = paste("/ddn/gs1/biotools/R/lib64/R/customlib", Sys.getenv("LD_LIBRARY_PATH"), sep = ":")) - -# replacing yaml file. -tar_config_set( - store = "/ddn/gs1/home/manwareme/beethoven/beethoven_targets" +############################# CONTROLLER ############################# +default_controller <- crew::crew_controller_local( + name = "default_controller", + workers = 4, + seconds_idle = 30 ) - -# maximum future exportable object size is set 50GB -# TODO: the maximum size error did not appear until recently -# and suddenly appeared. Need to investigate the cause. -# Should be removed after the investigation. -# options(future.globals.maxSize = 50 * 2^30) -options(future.globals.maxSize = 60 * 1024^3) # 60 GiB - - -generate_list_download <- FALSE - -arglist_download <- - set_args_download( - char_period = c("2018-01-01", "2022-12-31"), - char_input_dir = "input", - nasa_earth_data_token = NULL,#Sys.getenv("NASA_EARTHDATA_TOKEN"), - mod06_filelist = "inst/targets/mod06_links_2018_2022.csv", - export = generate_list_download, - path_export = "inst/targets/download_spec.qs" - ) - -generate_list_calc <- FALSE - -arglist_common <- - set_args_calc( - char_siteid = "site_id", - char_timeid = "time", - char_period = c("2018-01-01", "2022-12-31"), - num_extent = c(-126, -62, 22, 52), - char_user_email = paste0(Sys.getenv("USER"), "@nih.gov"), - export = generate_list_calc, - path_export = "inst/targets/calc_spec.qs", - char_input_dir = "/ddn/gs1/group/set/Projects/NRT-AP-Model/input" - ) - -tar_source("inst/targets/targets_initialize.R") -tar_source("inst/targets/targets_download.R") -tar_source("inst/targets/targets_calculate_fit.R") -tar_source("inst/targets/targets_calculate_predict.R") -tar_source("inst/targets/targets_baselearner.R") -tar_source("inst/targets/targets_metalearner.R") -tar_source("inst/targets/targets_predict.R") - - -# bypass option -Sys.setenv("BTV_DOWNLOAD_PASS" = "TRUE") - -# -# bind custom built GDAL -# Users should export the right path to the GDAL library -# by export LD_LIBRARY_PATH=.... command. - -# arglist_common is generated above -plan( - list( - tweak( - future.batchtools::batchtools_slurm, - template = "inst/targets/template_slurm.tmpl", - resources = - list( - memory = 8, - log.file = "slurm_run.log", - ncpus = 1, partition = "geo", ntasks = 1, - email = arglist_common$char_user_email, - error.file = "slurm_error.log" - ) - ), - multicore - ) +calc_controller <- crew::crew_controller_local( + name = "calc_controller", + workers = 20, + seconds_idle = 30 ) -# # invalidate any nodes older than 180 days: force running the pipeline -# tar_invalidate(any_of(tar_older(Sys.time() - as.difftime(180, units = "days")))) +############################## STORE ############################## +targets::tar_config_set(store = "/opt/_targets") +############################## OPTIONS ############################## -# # nullify download target if bypass option is set -if (Sys.getenv("BTV_DOWNLOAD_PASS") == "TRUE") { - target_download <- NULL -} -# targets options -# For GPU support, users should be aware of setting environment -# variables and GPU versions of the packages. -# TODO: check if the controller and resources setting are required -tar_option_set( +targets::tar_option_set( packages = c( - "beethoven", "amadeus", "chopin", "targets", "tarchetypes", - "data.table", "sf", "terra", "exactextractr", - #"crew", "crew.cluster", - "tigris", "dplyr", - "future.batchtools", "qs", "collapse", "bonsai", - "tidymodels", "tune", "rsample", "torch", "brulee", - "glmnet", "xgboost", - "future", "future.apply", "future.callr", "callr", - "stars", "rlang", "parallelly" + "beethoven", "targets", "tarchetypes", "dplyr", + "data.table", "sf", "crew", "crew.cluster", + "amadeus" ), - library = c("/ddn/gs1/group/set/isong-archive/r-libs"), + # add + library = c("/mnt/lib-flex", .libPaths()), repository = "local", - error = "abridge", + error = "continue", memory = "transient", format = "qs", storage = "worker", deployment = "worker", garbage_collection = TRUE, - seed = 202401L + seed = 202401L, + controller = crew::crew_controller_group( + default_controller, + calc_controller + ) ) -# should run tar_make_future() +########################### SOURCE TARGETS ########################### +targets::tar_source("inst/targets/targets_critical.R") +targets::tar_source("inst/targets/targets_initiate.R") +targets::tar_source("inst/targets/targets_download.R") +targets::tar_source("inst/targets/targets_aqs.R") +targets::tar_source("inst/targets/targets_calculate_fit.R") +############################## PIPELINE ############################## list( - target_init, - target_download, - target_calculate_fit, - target_baselearner, - target_metalearner, - target_calculate_predict#, - # target_predict, - # # documents and summary statistics - # targets::tar_target( - # summary_urban_rural, - # summary_prediction( - # grid_filled, - # level = "point", - # contrast = "urbanrural")) - # , - # targets::tar_target( - # summary_state, - # summary_prediction( - # grid_filled, - # level = "point", - # contrast = "state" - # ) - # ) + target_critical, + target_initiate, + #target_download, + target_aqs, + target_calculate_fit ) - -# targets::tar_visnetwork(targets_only = TRUE) -# END OF FILE diff --git a/_targets.yaml b/_targets.yaml index 68fc8bc1..decffc09 100644 --- a/_targets.yaml +++ b/_targets.yaml @@ -1,2 +1,2 @@ main: - store: /ddn/gs1/home/manwareme/beethoven/beethoven_targets + store: /opt/_targets diff --git a/inst/targets/base_function_dev_demo.r b/archive/base_function_dev_demo.r similarity index 99% rename from inst/targets/base_function_dev_demo.r rename to archive/base_function_dev_demo.r index 2e4e33d7..fc30d9fa 100644 --- a/inst/targets/base_function_dev_demo.r +++ b/archive/base_function_dev_demo.r @@ -38,7 +38,7 @@ system.time( jj profvis::profvis( -kx <- amadeus::calc_covariates( +kx <- amadeus::calculate_covariates( covariate = "nlcd", from = amadeus::process_covariates(covariate = "nlcd", path = "input/nlcd/raw", year = 2019L), locs = tar_read(sf_feat_proc_aqs_sites)[1:200,], @@ -164,7 +164,7 @@ kk <- amadeus::process_ecoregion(path = "input/ecoregions/raw") kx <- amadeus::calc_ecoregion(from = kk, locs = tar_read(sf_feat_proc_aqs_sites) |> terra::vect()) kl <- amadeus::process_covariates("ecoregions", path = "input/ecoregions/raw") -kz <- amadeus::calc_covariates("ecoregions", from = kl, locs = tar_read(sf_feat_proc_aqs_sites) |> terra::vect()) +kz <- amadeus::calculate_covariates("ecoregions", from = kl, locs = tar_read(sf_feat_proc_aqs_sites) |> terra::vect()) as.data.table(kz) diff --git a/inst/targets/calc_spec.qs b/archive/calc_spec.qs similarity index 100% rename from inst/targets/calc_spec.qs rename to archive/calc_spec.qs diff --git a/inst/targets/download_spec.qs b/archive/download_spec.qs similarity index 100% rename from inst/targets/download_spec.qs rename to archive/download_spec.qs diff --git a/init_targets_storage.sh b/archive/init_targets_storage.sh similarity index 100% rename from init_targets_storage.sh rename to archive/init_targets_storage.sh diff --git a/archive/narr_variables.csv b/archive/narr_variables.csv new file mode 100644 index 00000000..dd27f589 --- /dev/null +++ b/archive/narr_variables.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ddfcf0681fdfac2c53036479b7fbfc397c2986f627260bf9b03f4a71c4229d +size 470 diff --git a/inst/targets/run_impute.sh b/archive/run_impute.sh similarity index 100% rename from inst/targets/run_impute.sh rename to archive/run_impute.sh diff --git a/archive/run_interactive.sh b/archive/run_interactive.sh new file mode 100644 index 00000000..a4b92afe --- /dev/null +++ b/archive/run_interactive.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH +export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH +export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER + +# Submit the pipeline as a background process with ./run.sh +# module load R # Uncomment if R is an environment module. +nohup nice -4 R CMD BATCH inst/targets/targets_start.R & \ No newline at end of file diff --git a/archive/run_slurm.sh b/archive/run_slurm.sh new file mode 100644 index 00000000..3401c1e4 --- /dev/null +++ b/archive/run_slurm.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +#SBATCH --job-name=beethoven +#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/output.out +#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/error.err +#SBATCH --mail-type=END,FAIL +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=2 +#SBATCH --mem-per-cpu=32g +#SBATCH --partition=geo +#SBATCH --mail-user=manwareme@nih.gov + +export PATH=/ddn/gs1/tools/set/R432/bin/R:/ddn/gs1/tools/cuda11.8/bin:$PATH +export LD_LIBRARY_PATH=/ddn/gs1/tools/set/R432/lib64/R/lib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH +export R_LIBS_USER=/ddn/gs1/tools/set/R432/lib64/R/library:$R_LIBS_USER + +# modify it into the proper directory path. and output/error paths in the +# # SBATCH directives +# USER_PROJDIR=/ddn/gs1/home/$USER/projects +USER_PROJDIR=/ddn/gs1/home/manwareme/beethoven/ + +nohup nice -4 /ddn/gs1/tools/set/R432/bin/Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R diff --git a/setup_hook.sh b/archive/setup_hook.sh similarity index 100% rename from setup_hook.sh rename to archive/setup_hook.sh diff --git a/archive/targets_calculate_fit_DEPRICATED.R b/archive/targets_calculate_fit_DEPRICATED.R new file mode 100644 index 00000000..855521f8 --- /dev/null +++ b/archive/targets_calculate_fit_DEPRICATED.R @@ -0,0 +1,327 @@ + +# meta_run, resting in the tools/pipeline/pipeline_base_functions.R, +# is a function that returns a list of parameters for the pipeline +# for users' convenience and make the pipeline less prone to errors. + +target_calculate_fit <- + list( + tarchetypes::tar_files_input( + name = file_prep_calc_args, + files = + list.files("inst/targets", pattern = "^calc*.*.qs$", full.names = TRUE), + format = "file", + iteration = "vector", + description = "Calculation arguments in QS file" + ) + , + targets::tar_target( + chr_iter_calc_features, + command = c("hms", "tri", "nei", + "ecoregions", "koppen", "population", "groads"), + iteration = "list", + description = "Base features" + ) + , + # "year" is included: tri, nlcd, nei + # "time" is included: hms + targets::tar_target( + chr_iter_calc_nasa, + command = c( + "mod11", "mod06", "mod13", + "mcd19_1km", "mcd19_5km", "mod09", "viirs" + ), + iteration = "list", + description = "MODIS/VIIRS features" + ) + , + targets::tar_target( + chr_iter_calc_geoscf, + command = c("geoscf_chm", "geoscf_aqc"), + iteration = "vector", + description = "GEOS-CF features" + ) + , + targets::tar_target( + name = chr_iter_calc_gmted_vars, + command = c( + "Breakline Emphasis", "Systematic Subsample", + "Median Statistic", "Minimum Statistic", + "Mean Statistic", "Maximum Statistic", + "Standard Deviation Statistic" + ), + iteration = "list", + description = "GMTED features" + ) + , + targets::tar_target( + list_feat_calc_base, + command = + inject_calculate( + covariate = chr_iter_calc_features, + locs = sf_feat_proc_aqs_sites, + injection = loadargs(file_prep_calc_args, chr_iter_calc_features)), + pattern = cross(file_prep_calc_args, chr_iter_calc_features), + iteration = "list", + description = "Calculate base features (fit)", + priority = 1 + ) + , + targets::tar_target( + list_feat_calc_base_flat, + command = lapply(list_feat_calc_base, + function(x) { + if (length(x) == 1) { + x[[1]] + } else if ( + sum(grepl("light|medium|heavy", + sapply(x, \(t) names(t)))) == 3) { + xr <- lapply(x, \(dt) { + dta <- data.table::copy(dt) + dta <- dta[, time := as.character(time)] + return(dta) + }) + xrr <- Reduce( + function(x, y) { + collapse::join(x, y, on = c("site_id", "time"), how = "full") }, + xr) + return(xrr) + } else { + collapse::rowbind(x, use.names = TRUE, fill = TRUE) + } + }), + description = "Calculated base feature list (all dt) (fit)" + ) + , + targets::tar_target( + name = df_feat_calc_nlcd_params, + command = expand.grid( + year = loadargs(file_prep_calc_args, "nlcd")$domain, + radius = loadargs(file_prep_calc_args, "nlcd")$radius + ) %>% + split(1:nrow(.)), + iteration = "list", + description = "NLCD features" + ) + , + targets::tar_target( + name = list_feat_calc_nlcd, + command = inject_nlcd(year = df_feat_calc_nlcd_params$year, + radius = df_feat_calc_nlcd_params$radius, + from = amadeus::process_nlcd( + path = loadargs(file_prep_calc_args, "nlcd")$path, + year = df_feat_calc_nlcd_params$year + ), + locs = sf_feat_proc_aqs_sites, + locs_id = arglist_common$char_siteid, + nthreads = 10L, + mode = "exact", + max_cells = 3e7 + ), + pattern = cross(file_prep_calc_args, df_feat_calc_nlcd_params), + iteration = "list", + description = "Calculate NLCD features (fit)", + resources = set_slurm_resource( + ntasks = 1, ncpus = 10, memory = 8 + ) + ) + , + targets::tar_target( + name = dt_feat_calc_nlcd, + command = + list_feat_calc_nlcd %>% + collapse::rowbind(fill = TRUE) %>% + collapse::funique() %>% + collapse::pivot( + ids = c(arglist_common$char_siteid, arglist_common$char_timeid), + values = names(.)[!names(.) %in% c(arglist_common$char_siteid, arglist_common$char_timeid)] + ) %>% + .[!is.na(.[["value"]]),] %>% + collapse::pivot( + ids = c("site_id", "time"), + values = c("value"), + how = "wider" + ), + description = "NLCD feature list (all dt) (fit)" + ) + , + targets::tar_target( + list_feat_calc_nasa, + command = + inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = loadargs(file_prep_calc_args, chr_iter_calc_nasa)), + pattern = cross(file_prep_calc_args, chr_iter_calc_nasa), + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_nasa, memory = 8 + ), + iteration = "list", + description = "Calculate MODIS/VIIRS features (fit)" + ) + , + targets::tar_target( + list_feat_calc_geoscf, + inject_geos( + locs = sf_feat_proc_aqs_sites, + injection = loadargs(file_prep_calc_args, chr_iter_calc_geoscf) + ), + pattern = cross(file_prep_calc_args, chr_iter_calc_geoscf), + iteration = "list", + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_geoscf, memory = 4 + ), + description = "Calculate GEOS-CF features (fit)" + ) + , + targets::tar_target( + name = list_feat_calc_gmted, + command = inject_gmted( + locs = sf_feat_proc_aqs_sites, + variable = chr_iter_calc_gmted_vars, + radii = c(0, 1e3, 1e4, 5e4), + injection = loadargs(file_prep_calc_args, "gmted") + ), + iteration = "list", + pattern = cross(file_prep_calc_args, chr_iter_calc_gmted_vars), + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_gmted, memory = 8 + ), + description = "Calculate GMTED features (fit)" + ) + , + targets::tar_target( + name = list_feat_calc_narr, + command = #rlang::inject( + par_narr( + domain = loadargs(file_prep_calc_args, "narr")$domain, + path = loadargs(file_prep_calc_args, "narr")$path, + date = arglist_common$char_period, + locs = sf_feat_proc_aqs_sites, + nthreads = arglist_common$nthreads_narr + ) + , + pattern = map(file_prep_calc_args), + iteration = "list", + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_narr, memory = 20 + ), + description = "Calculate NARR features (fit)" + ) + , + # targets::tar_target( + # name = list_feat_calc_narr_apptainer, + # command = #rlang::inject( + # par_narr_appt( + # domain = loadargs(file_prep_calc_args, "narr")$domain_appt, + # period = arglist_common$char_period + # ), + # pattern = map(file_prep_calc_args), + # iteration = "list", + # resources = set_slurm_resource( + # ntasks = 1, ncpus = 2, memory = 40 + # ) + # ) + # , + targets::tar_target( + dt_feat_calc_gmted, + command = reduce_merge(list_feat_calc_gmted, "site_id"), + description = "data.table of GMTED features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_nasa, + command = reduce_merge(list_feat_calc_nasa), + description = "data.table of MODIS/VIIRS features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_geoscf, + command = reduce_merge(list_feat_calc_geoscf), + description = "data.table of GEOS-CF features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_narr, + command = reduce_merge(list_feat_calc_narr, by = NULL), + description = "data.table of NARR features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_date, + command = + Reduce( + post_calc_autojoin, + list( + dt_feat_calc_narr, + dt_feat_calc_geoscf, + dt_feat_calc_nasa + ) + ), + description = "data.table of all daily features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_base, + command = + Reduce( + post_calc_autojoin, + c( + list(dt_feat_proc_aqs_sites_time), + list_feat_calc_base_flat, + list(dt_feat_calc_gmted), + list(dt_feat_calc_nlcd) + ) + ), + description = "Base features with PM2.5" + ) + , + targets::tar_target( + dt_feat_calc_design, + command = + post_calc_autojoin( + dt_feat_calc_base, + dt_feat_calc_date, + year_start = as.integer(substr(arglist_common$char_period[1], 1, 4)), + year_end = as.integer(substr(arglist_common$char_period[2], 1, 4)) + ), + description = "data.table of all features with PM2.5" + ) + # , + # tar_target( + # dt_feat_fit_pm, + # post_calc_join_pm25_features( + # df_pm = sf_feat_proc_aqs_pm25, + # df_covar = dt_feat_fit_x, + # locs_id = "site_id", + # time_id = "time" + # ), + # description = "data.table of all features with PM2.5" + # ) + , + targets::tar_target( + dt_feat_calc_cumulative, + command = append_predecessors( + path_qs = "output/qs", + period_new = arglist_common$char_period, + input_new = dt_feat_calc_design, + nthreads = arglist_common$nthreads_append + ), + description = "Cumulative feature calculation", + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_append, memory = 16 + ) + ), + targets::tar_target( + dt_feat_calc_imputed, + command = + impute_all( + dt_feat_calc_cumulative, + period = arglist_common$char_period, + nthreads_dt = arglist_common$nthreads_impute, + nthreads_collapse = arglist_common$nthreads_impute, + nthreads_imputation = arglist_common$nthreads_impute), + description = "Imputed features + lags", + resources = set_slurm_resource( + ntasks = 1, ncpus = arglist_common$nthreads_impute, memory = 8 + ) + ) + ) diff --git a/archive/targets_download_DEPRICATED.R b/archive/targets_download_DEPRICATED.R new file mode 100644 index 00000000..0392e1ba --- /dev/null +++ b/archive/targets_download_DEPRICATED.R @@ -0,0 +1,59 @@ +target_download <- + list( + tarchetypes::tar_files_input( + name = file_prep_download_args, + files = + list.files("inst/", pattern = "download_spec.qs$", full.names = TRUE), + format = "file", + iteration = "vector", + description = "Download arguments in QS file" + ) + , + targets::tar_target( + char_rawdir_download, + command = + sprintf("dir_input_%s", + c( + "aqs", + "nei", + "narr_monolevel", "narr_p_levels", + paste0("modis_", + c("mod11", "mod13", "mcd19", "mod06", "mod09") + ), + "viirs", + "nlcd", + "ecoregions", + "koppen", + "gmted", + "population", + "groads", + "hms", + "tri", + "geoscf_chm", "geoscf_aqc" + # add covariate lists below if necessary + ) + ), + iteration = "vector" + ) + , + # each dataset is branched + targets::tar_target( + lgl_rawdir_download, + command = feature_raw_download( + path = file_prep_download_args, + dataset_name = char_rawdir_download + ), + pattern = cross( + file_prep_download_args, + char_rawdir_download + ), + iteration = "list", + resources = tar_resources( + crew = tar_resources_crew( + controller = "download_controller" + ) + ) + ) + ) +## Status up to here is stored in meta as hash and rds/qs files +## How do we know if downloaded files were exactly what we expected? \ No newline at end of file diff --git a/inst/targets/template_slurm.tmpl b/archive/template_slurm.tmpl similarity index 85% rename from inst/targets/template_slurm.tmpl rename to archive/template_slurm.tmpl index 72405605..443a34fa 100644 --- a/inst/targets/template_slurm.tmpl +++ b/archive/template_slurm.tmpl @@ -42,7 +42,7 @@ error.file = fs::path_expand(resources$error.file) ## this line can be ignored if users' environments are not restricted ## such that GDAL 3.3.3+ is equipped as default # the profile below includes environment variables to link custom build GDAL 3.8.5 -. /ddn/gs1/home/songi2/.profile +# custom build GDAL now in /ddn/gs1/tools/set/R432/lib64/R/lib ## Export value of DEBUGME environemnt var to slave # export DEBUGME=<%= Sys.getenv("DEBUGME") %> @@ -53,5 +53,4 @@ error.file = fs::path_expand(resources$error.file) ## Run R: ## we merge R output with stdout from SLURM, which gets then logged via --output option -# Rscript -e '.libPaths(c("/ddn/gs1/biotools/R/lib64/R/custompkg", "/ddn/gs1/home/songi2/r-libs", .libPaths()));batchtools::doJobCollection("<%= uri %>")' -Rscript -e '.libPaths(c("/ddn/gs1/biotools/R/lib64/R/custompkg", "/ddn/gs1/home/manwareme/R/x86_64-pc-linux-gnu-library/4.3", "/ddn/gs1/home/songi2/r-libs", .libPaths()));batchtools::doJobCollection("<%= uri %>")' +/ddn/gs1/tools/set/R432/bin/Rscript -e '.libPaths(c("/ddn/gs1/tools/set/R432/lib64/R/library"));batchtools::doJobCollection("<%= uri %>")' diff --git a/container/beethoven_dl_calc.def b/container/beethoven_dl_calc.def new file mode 100644 index 00000000..29c869e2 --- /dev/null +++ b/container/beethoven_dl_calc.def @@ -0,0 +1,46 @@ +BootStrap: docker +From: rocker/geospatial:latest + +%post + # Update package list + apt-get update + + # Install locales and generate the necessary locale + apt-get install -y locales + locale-gen en_US.UTF-8 + + # Install fonts for Unicode support + apt-get install -y fonts-dejavu fonts-liberation fonts-noto \ + fonts-unifont + + # Install SSL certificates + apt-get install -y ca-certificates + + # Set locale for the environment + echo "LANG=en_US.UTF-8" >> /etc/default/locale + echo "LC_ALL=en_US.UTF-8" >> /etc/default/locale + export LANG=en_US.UTF-8 + export LC_ALL=en_US.UTF-8 + + # Create directories + mkdir /pipeline + mkdir /input + mkdir /opt/_targets + + # Install R packages + Rscript -e "install.packages(c('pak', 'targets', 'tarchetypes', 'crew', \ + 'crew.cluster', 'testthat', 'tidymodels', 'bonsai'))" + Rscript -e "pak::pak('NIEHS/amadeus')" + Rscript -e "pak::pak('NIEHS/beethoven@isong-exp')" + +%environment + # Set locale for the container environment + export LANG=en_US.UTF-8 + export LC_ALL=en_US.UTF-8 + export TERM=xterm-256color + +%runscript + +%labels + basic geospatial with targets and crew plus unicode text so the target \ + progress prints nicely diff --git a/container/build_dl_calc.sh b/container/build_dl_calc.sh new file mode 100755 index 00000000..b66b1a0b --- /dev/null +++ b/container/build_dl_calc.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# usage: build_apptainer_image.sh [full file path] +# where full file path ends with .sif, with full directory path to save the image +# after the image is built, group write/execution privileges are given + +# Recommended to run this script interactively via `sh build_dl_calc.sh` +apptainer build --fakeroot beethoven_dl_calc.sif beethoven_dl_calc.def \ No newline at end of file diff --git a/container/run_container_dl_calc.sh b/container/run_container_dl_calc.sh new file mode 100644 index 00000000..5d0909bb --- /dev/null +++ b/container/run_container_dl_calc.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH --job-name=download_calc +#SBATCH --partition=geo +#SBATCH --mem=128G +#SBATCH --cpus-per-task=4 +#SBATCH --ntasks=16 +#SBATCH --output=../slurm_messages/slurm-%j.out +#SBATCH --error=../slurm_messages/slurm-%j.err +#SBATCH --mail-user=kyle.messier@nih.gov +#SBATCH --mail-type=ALL + + +# Run the container +# .sif file sites in "root/container", thus we need to go up one level with bind mounts +apptainer exec \ + --bind $PWD/inst:/pipeline \ + --bind $PWD/input:/input \ + --bind $PWD/_targets:/opt/_targets \ + --bind $PWD:/mnt \ + beethoven_dl_calc.sif \ + Rscript /mnt/run.R + + diff --git a/container/run_dl_calc_local_tests.sh b/container/run_dl_calc_local_tests.sh new file mode 100644 index 00000000..72c81a20 --- /dev/null +++ b/container/run_dl_calc_local_tests.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH --job-name=dl_calc_tests +#SBATCH --partition=geo +#SBATCH --mem=128G +#SBATCH --cpus-per-task=4 +#SBATCH --ntasks=16 +#SBATCH --output=../slurm_messages/slurm-%j.out +#SBATCH --error=../slurm_messages/slurm-%j.err +#SBATCH --mail-user=kyle.messier@nih.gov +#SBATCH --mail-type=ALL + + + + +apptainer exec \ + --bind $PWD/../inst:/pipeline \ + --bind $PWD/../input:/input \ + --bind $PWD/../_targets:/opt/_targets \ + --bind $PWD/..:/mnt \ + beethoven_dl_calc.sif \ + Rscript /mnt/tests/testthat/test_download.R + + diff --git a/debug_container.sh b/debug_container.sh new file mode 100755 index 00000000..46812799 --- /dev/null +++ b/debug_container.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +DEBUG_TARGET=$1 + +#SBATCH --job-name=beethoven +#SBATCH --mail-user=manwareme@nih.gov +#SBATCH --mail-type=END,FAIL +#SBATCH --partition=geo +#SBATCH --ntasks=1 +#SBATCH --mem=100G +#SBATCH --cpus-per-task=50 +#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.err +#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.out + +# run pipeline in the container +apptainer exec \ + --bind $PWD:/mnt \ + --bind $PWD/inst:/inst \ + --bind /ddn:/input \ + --bind $PWD/_targets:/opt/_targets \ + beethoven_dl_calc.sif \ + Rscript --no-init-file -e "targets::tar_read('$DEBUG_TARGET')" + # Rscript --no-init-file -e "targets::tar_meta(fields = error, complete_only = TRUE)" + # Rscript --no-init-file -e "sf::st_write(targets::tar_read('$DEBUG_TARGET'), '/mnt/sf_base.gpkg')" + +# run interactive R session in the container +# apptainer exec --bind $PWD/inst:/inst --bind /ddn/gs1/group/set/Projects/NRT-AP-Model/input:/input --bind $PWD:/mnt beethoven_dl_calc.sif R \ No newline at end of file diff --git a/inst/targets/mod06_links_2018_2022.csv b/inst/extdata/mod06_links_2018_2022.csv similarity index 100% rename from inst/targets/mod06_links_2018_2022.csv rename to inst/extdata/mod06_links_2018_2022.csv diff --git a/inst/targets/prediction_grid.qs b/inst/extdata/prediction_grid.qs similarity index 100% rename from inst/targets/prediction_grid.qs rename to inst/extdata/prediction_grid.qs diff --git a/inst/extdata/qs_feat_calc_ecoregions.qs b/inst/extdata/qs_feat_calc_ecoregions.qs new file mode 100644 index 00000000..d620d1d6 Binary files /dev/null and b/inst/extdata/qs_feat_calc_ecoregions.qs differ diff --git a/inst/targets/README.md b/inst/targets/README.md index 8c2f5a56..ada7b60b 100644 --- a/inst/targets/README.md +++ b/inst/targets/README.md @@ -1,23 +1,37 @@ # Developer's guide ## Preamble -The objective of this document is to provide developers with the current implementation of `beethoven` pipeline as of July 20, 2024 (version 0.3.7) +The objective of this document is to provide developers with the current implementation of `beethoven` pipeline as of October 9, 2024 (version 0.4.2) We assume the potential users have basic knowledge of `targets` and `tarchetypes` packages as well as functional programming and metaprogramming. It is recommended to read Advanced R (by Hadley Wickham)'s chapters for these topics. ## Before running the pipeline -For the future release and tests on various environments, one should check several lines across R and shell script files: +For developing and running the pipeline, users should check several lines across R and shell script files: -- Shell script - - `/tar_run.sh`: all system variables including `PATH` and `LD_LIBRARY_PATH` to align with the current system environment. The lines in the provided file are set for NIEHS HPC. - - `inst/targets/run.sh`: project directory path - - `inst/targets/run_impute.sh` (if necessary when the imputation target is dispatched separately): project directory path -- R script - - `/targets.R`: Lines 10-12, `tar_config_set(store = ...)` should be reviewed if it is set properly not to overwrite successfully run targets. - - `/targets.R`: `set_args_download` and `set_args_calc` functions, i.e., `char_input_dir` argument and `char_period`. - - `/targets.R`: `library` argument value in `tar_option_set` to match the current system environment +- Container + - `/beethoven_dl_calc.sif`: The container image is not hosted on GitHub due to the large file size, so the image must be built by each. The definition file can be found at `/container/beethoven_dl_calc.sh`, and the container image can be build by running `sh build_dl_calc.sh` **from within the `/container` folder. Once the container image is built, it can be copied or moved to the repository root for running the pipeline (`mv beethoven_dl_calc.sif ../` or `cp beethoven_dl_calc.sif ../`). +- Shell + - `/run_container.sh`: file controls SLURM submission details (ie, `--mem`, `--cpus-per-task`), and **which local directories are mounted to the container**. Local directories which must be explicitly mounted to the container at run time are 1. group data store (line 17) and 2. local targets store (line 18). For ongoing development, a local directory is used for targets store. +- R + - `/_targets.R`: Ensure sum of controller-specific workers (line 9 + line 14) is equal to the total number of workers requested in `/run_container.R` (line 9). + - `/_targets.R`: Ensure targets store (line 18) matches the mount location in `/run_container.sh` (line 18 **after the semicolon**). + - `/inst/targets/targets_critical.R`: Critical targets are those which will require changes between users (`chr_nasa_token`), for development (`num_dates_split`), manual updates (`/inst/extdata/mod06_links_2018_2022.csv` called via `chr_mod06_links`), and mounted data path (`chr_input_dir`). **Most importantly**, critical target `chr_daterange` controls the entire temporal range of the downstream pipeline. Time-related specifications (dates, months, years, julian dates, etc) are defined relative to `chr_daterange`. + +## Refactor (October 9, 2024) +Targets have been refactored to decrease reliance on "injection" functions, which obscured the source-specific inputs and relied on external (non-target) file. Refactoring from generalized injection functions to source-specific functions adds code, but is easier to follow, debug, and develop. To compare original to refactored code, see `/archive/targets_download_DEPRICATED.R` and `/inst/targets/targets_download.R`. + +## Ongoing development +- `dt_feat_calc_ecoregions` + - Calculating ecoregion covariates with `amadeus::process_ecoregion` and `amadeus::calc_ecoregion` function returns errors. See [discussion](https://github.com/orgs/NIEHS/projects/8/views/1?layout=board&pane=issue&itemId=82653782&issue=NIEHS%7Cbeethoven%7C376). + - For development purpose, ecoregion covariates have been manually calculated for all AQS sites from 2018 to 2024. Sites are filtered to relevant sites after import. + - `/inst/targets/targets_calculate_fit.R` (line 746) + +- `download_[modis]` + - Current container definition file does not support SSL Certificate verification, which is required for downloading MODIS/VIIRS `.hdf` files. See [discussion](https://github.com/orgs/NIEHS/projects/8/views/1?layout=board&pane=issue&itemId=82627613&issue=NIEHS%7Cbeethoven%7C375). + - For development purpose, the `download_modis_clean` target deletes all corruptly downloaded data files to ensure downstream targets complete. + - `/inst/targets/targets_download.R` (line 234) ## Basic structure of branches We will call "grand target" as a set of branches if any branching technique is applied at a target. diff --git a/inst/targets/narr_variables.csv b/inst/targets/narr_variables.csv deleted file mode 100644 index 2181c01a..00000000 --- a/inst/targets/narr_variables.csv +++ /dev/null @@ -1,25 +0,0 @@ -"dirs" -"input/narr/air.sfc" -"input/narr/albedo" -"input/narr/apcp" -"input/narr/dswrf" -"input/narr/evap" -"input/narr/hcdc" -"input/narr/hpbl" -"input/narr/lcdc" -"input/narr/lhtfl" -"input/narr/mcdc" -"input/narr/omega" -"input/narr/pr_wtr" -"input/narr/prate" -"input/narr/pres.sfc" -"input/narr/shtfl" -"input/narr/shum" -"input/narr/snowc" -"input/narr/soilm" -"input/narr/tcdc" -"input/narr/ulwrf.sfc" -"input/narr/uwnd.10m" -"input/narr/vis" -"input/narr/vwnd.10m" -"input/narr/weasd" diff --git a/inst/targets/targets_aqs.R b/inst/targets/targets_aqs.R new file mode 100644 index 00000000..8e9f4af1 --- /dev/null +++ b/inst/targets/targets_aqs.R @@ -0,0 +1,50 @@ +################################################################################ +##### Import US EPA AQS data +target_aqs <- + list( + ########################### AQS ########################### + targets::tar_target( + sf_feat_proc_aqs_sites, + command = { + # download_aqs + amadeus::process_aqs( + path = list.files( + path = file.path( + arglist_common$char_input_dir, + "aqs", + "data_files" + ), + pattern = "daily_88101_[0-9]{4}.csv", + full.names = TRUE + ), + date = arglist_common$char_period, + mode = "location", + return_format = "sf" + ) + }, + description = "AQS sites" + ) + , + targets::tar_target( + dt_feat_proc_aqs_sites_time, + command = { + # download_aqs + amadeus::process_aqs( + path = list.files( + path = file.path( + arglist_common$char_input_dir, + "aqs", + "data_files" + ), + pattern = "daily_88101_[0-9]{4}.csv", + full.names = TRUE + ), + date = arglist_common$char_period, + mode = "available-data", + data_field = c("Arithmetic.Mean", "Event.Type"), + return_format = "data.table" + ) + }, + description = "AQS sites with time" + ) + ) diff --git a/inst/targets/targets_arglist.R b/inst/targets/targets_arglist.R deleted file mode 100644 index d4c7a4b6..00000000 --- a/inst/targets/targets_arglist.R +++ /dev/null @@ -1,519 +0,0 @@ -library(dplyr) -library(qs) -# basic idea: a list that provides -# anything needed for the pipeline -# ultimately the name "dataset" is a key for iteration to make -# the outermost pipeline look as simple as possible - -# for reference, full list of parameters in amadeus::process_* -# and amadeus::calc_*. -# This list will be useful for entering parameters by rlang::inject. -# amadeusArgs <- list( -# path = NULL, -# date = NULL, -# variable = NULL, -# year = NULL, -# county = NULL, -# variables = NULL, -# from = NULL, -# locs = NULL, -# locs_id = NULL, -# radius = NULL, -# fun = NULL, -# name_extracted = NULL, -# fun_summary = NULL, -# max_cells = NULL, -# preprocess = NULL, -# name_covariates = NULL, -# subdataset = NULL, -# nthreads = NULL, -# package_list_add = NULL, -# export_list_add = NULL, -# sedc_bandwidth = NULL, -# target_fields = NULL -# ) - -library(beethoven) - -## This file is to define required parameters that are reused -## throughout the pipeline. -## First, let's start with the list of objects generated in this code. -## arglist_common: common information for running the pipeline -## site identifiers, time identifier, time period of computation, -## spatial extent of computation, and an email address (for receiving -## computing node status). - - -#' Set arguments for the calculation process -#' -#' This function sets the arguments for the calculation process. It takes several parameters -#' including site ID, time ID, time period, extent, user email, export path, and input path. -#' It returns a list of arguments for the calculation process. -#' -#' @param char_siteid Character string specifying the site ID. -#' Default is "site_id". -#' @param char_timeid Character string specifying the time ID. -#' Default is "time". -#' @param char_period Character vector specifying the time period. -#' Default is c("2018-01-01", "2022-10-31"). -#' @param extent Numeric vector specifying the extent. -#' Default is c(-126, -62, 22, 52). -#' @param user_email Character string specifying the user email. -#' Default is the current user's email. -#' @param path_export Character string specifying the export path. -#' Default is "inst/targets/punchcard_calc.qs". -#' If `NULL`, a list object "arglist_common" is exported to the global -#' environment and returns a list of arguments for the calculation process. -#' @param path_input Character string specifying the input path. -#' Default is "input". -#' -#' @return A list of arguments for the calculation process. -#' @importFrom qs qsave -#' @export -set_args_calc <- - function( - char_siteid = "site_id", - char_timeid = "time", - char_period = c("2018-01-01", "2022-10-31"), - extent = c(-126, -62, 22, 52), - user_email = paste0(Sys.getenv("USER"), "@nih.gov"), - path_export = "inst/targets/punchcard_calc.qs", - path_input = "input" - ) { - list_common <- - list( - char_siteid = char_siteid, - char_timeid = char_timeid, - char_period = char_period, - extent = num_extent, - user_email = char_user_email - ) - ain <- function(x) file.path(path_input, x) - list_paths <- - list( - mod11 = load_modis_files(ain("modis/raw/61/MOD11A1"), date = arglist_common$char_period), - mod06 = load_modis_files(ain("modis/raw/61/MOD06_L2"), date = arglist_common$char_period), - mod09 = load_modis_files(ain("modis/raw/61/MOD09GA"), date = arglist_common$char_period), - mcd19 = load_modis_files(ain("modis/raw/61/MCD19A2"), date = arglist_common$char_period), - mod13 = load_modis_files(ain("modis/raw/61/MOD13A2"), date = arglist_common$char_period), - viirs = load_modis_files(ain("modis/raw/5000/VNP46A2"), "h5$", date = arglist_common$char_period) - ) - - list_proccalc <- - list( - aqs = list(path = ain("aqs")), - mod11 = list(from = list_paths$mod11, - name_covariates = sprintf("MOD_SFCT%s_0_", c("D", "N")), - subdataset = "^LST_", - nthreads = 14L, - radius = c(1e3, 1e4, 5e4)), - mod06 = list(from = list_paths$mod06, - name_covariates = sprintf("MOD_CLCV%s_0_", c("D", "N")), - subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), - nthreads = 14L, - preprocess = amadeus::process_modis_swath, - radius = c(1e3, 1e4, 5e4)), - mod09 = list(from = list_paths$mod09, - name_covariates = sprintf("MOD_SFCRF_%d_", seq(1, 7)), - subdataset = "^sur_refl_", - nthreads = 14L, - radius = c(1e3, 1e4, 5e4)), - mcd19_1km = list(from = list_paths$mcd19, - name_covariates = sprintf("MOD_AD%dTA_0_", c(4, 5)), - subdataset = "^Optical_Depth", - nthreads = 14L, - radius = c(1e3, 1e4, 5e4)), - mcd19_5km = list(from = list_paths$mcd19, - name_covariates = sprintf("MOD_%sAN_0_", c("CSZ", "CVZ", "RAZ", "SCT", "GLN")), - subdataset = "cos|RelAZ|Angle", - nthreads = 14L, - radius = c(1e3, 1e4, 5e4)), - mod13 = list(from = list_paths$mod13, - name_covariates = "MOD_NDVIV_0_", - subdataset = "(NDVI)", - nthreads = 14L, - radius = c(1e3, 1e4, 5e4)), - viirs = list(from = list_paths$viirs, - name_covariates = "MOD_LGHTN_0_", - subdataset = 3, - nthreads = 14L, - preprocess = amadeus::process_bluemarble, - radius = c(1e3, 1e4, 5e4)), - geoscf_aqc = list(date = list_common$char_period, - path = ain("geos/aqc_tavg_1hr_g1440x721_v1")), - geoscf_chm = list(date = list_common$char_period, - path = ain("geos/chm_tavg_1hr_g1440x721_v1")), - # base class covariates start here - hms = list(path = ain("HMS_Smoke/data"), - date = list_common$char_period, - covariate = "hms", - domain = c("Light", "Medium", "Heavy"), - nthreads = 3L, - domain_name = "variable"), - gmted = list( - path = ain("gmted"), - covariate = "gmted" - ), - nei = list( - domain = c(2017, 2020), - domain_name = "year", - path = ain("nei"), - covariate = "nei" - ), - tri = list( - domain = seq(2018, 2022), - domain_name = "year", - path = ain("tri"), - radius = c(1e3, 1e4, 5e4), - covariate = "tri", - nthreads = 5L - ), - nlcd = list( - domain = c(2019, 2021), - domain_name = "year", - path = ain("nlcd/raw"), - covariate = "nlcd", - radius = c(1e3, 1e4, 5e4), - nthreads = 2L, - max_cells = 1e8 - ), - koppen = list(path = ain("koppen_geiger/raw/Beck_KG_V1_present_0p0083.tif"), - covariate = "koppen", - nthreads = 1L), - ecoregions = list(path = ain("ecoregions/raw/us_eco_l3_state_boundaries.shp"), - covariate = "ecoregions", - nthreads = 1L), - narr = list( - path = ain("narr"), - covariate = "narr", - domain_reduced = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", - "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", - "pres.sfc", "shtfl", "snowc", "soilm", - "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd"), - domain_appt = c("prate", "shum"), - domain = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", - "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", - "prate", "pres.sfc", "shtfl", "shum", "snowc", "soilm", - "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd"), - domain_name = "variable", - date = list_common$char_period, - process_function = process_narr2, - calc_function = calc_narr2, - nthreads = 24L - ), - groads = list( - path = ain("sedac_groads/groads-v1-americas-gdb/gROADS-v1-americas.gdb"), - covariate = "groads", - radius = c(1e3, 1e4, 5e4), - nthreads = 3L), - population = list( - path = ain("sedac_population/gpw_v4_population_density_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec.tif"), - covariate = "population", fun = "mean", - radius = c(1e3, 1e4, 5e4), - nthreads = 3L - ) - ) - - attr(list_proccalc, "description") <- - tibble::tribble( - ~dataset, ~description, - "mod11", "MODIS Land Surface Temperature Day/Night", - "mod06", "MODIS Cloud Fraction Day/Night", - "mod09", "MODIS Surface Reflectance", - "mcd19_1km", "MCD19A2 1km", - "mcd19_5km", "MCD19A2 5km", - "mod13", "MODIS Normalized Difference Vegetation Indexß", - "viirs", "VIIRS Nighttime Lights", - "hms", "NOAA Hazard Mapping System Smoke", - "geoscf_aqc", "GEOS-CF AQC", - "geoscf_chm", "GEOS-CF CHM", - "gmted", "GMTED elevation", - "nei", "National Emission Inventory", - "tri", "Toxic Release Inventory", - "nlcd", "National Land Cover Database", - "koppen", "Koppen-Geiger Climate Classification", - "ecoregions", "EPA Ecoregions", - "narr", "NARR", - "groads", "SEDAC Global Roads", - "population", "SEDAC Population Density" - ) - if (is.null(path_export)) { - assign("arglist_common", list_common, envir = .GlobalEnv) - return(list_proccalc) - } else { - qs::qsave( - list_proccalc, - path_export - ) - return(list_common) - } - } - - -arglist_common <- - list( - char_siteid = "site_id", - char_timeid = "time", - char_period = c("2018-01-01", "2022-10-31"), - extent = c(-126, -62, 22, 52), - user_email = Sys.getenv("USER_EMAIL") - ) - -## arglist_paths will include large lists of MODIS/VIIRS product files -## we keep the list of files since the functions for the products -## operate at file paths to automate preprocessing and calculation. -## by using load_modis_files macro, users can predefine the scope of -## file lists, which will result in reducing the number of function calls -## in the main pipeline run. -# arglist_paths <- -# list( -# mod11 = load_modis_files("input/modis/raw/61/MOD11A1", date = arglist_common$char_period), -# mod06 = load_modis_files("input/modis/raw/61/MOD06_L2", date = arglist_common$char_period), -# mod09 = load_modis_files("input/modis/raw/61/MOD09GA", date = arglist_common$char_period), -# mcd19 = load_modis_files("input/modis/raw/61/MCD19A2", date = arglist_common$char_period), -# mod13 = load_modis_files("input/modis/raw/61/MOD13A2", date = arglist_common$char_period), -# viirs = load_modis_files("input/modis/raw/5000/VNP46A2", "h5$", date = arglist_common$char_period) -# ) - -# arglist_proccalc: calculation parameters by raw dataset -# This named and nested list object includes raw data names -# with lists inside. The inner list will be passed to rlang::inject -# to fit in the generalized workflow for most covariate groups. -# Names in the inner lists are usually present in amadeus::process_* -# or amadeus::calc_* functions. Otherwise, some modified functions -# will have their own arguments. Users should know what arguments -# are used in each function in the pipeline and make sure that the -# arguments are available in the target functions to which they are -# passed. Most amadeus functions have the same arguments, but -# a potential issue is that __all__ functions have ellipsis (...) arguments. -# This means that the functions can take any arguments, but the functions -# in the pipeline may not be able to handle them or it could entail -# unexpected consequences or errors. Always consult the original -# function documentation to know what arguments are available and -# the correct format of arguments. -# arglist_proccalc <- -# list( -# aqs = list(path = "input/aqs"), -# mod11 = list(from = arglist_paths$mod11, -# name_covariates = sprintf("MOD_SFCT%s_0_", c("D", "N")), -# subdataset = "^LST_", -# nthreads = 14L, -# radius = c(1e3, 1e4, 5e4)), -# mod06 = list(from = arglist_paths$mod06, -# name_covariates = sprintf("MOD_CLCV%s_0_", c("D", "N")), -# subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), -# nthreads = 14L, -# preprocess = amadeus::process_modis_swath, -# radius = c(1e3, 1e4, 5e4)), -# mod09 = list(from = arglist_paths$mod09, -# name_covariates = sprintf("MOD_SFCRF_%d_", seq(1, 7)), -# subdataset = "^sur_refl_", -# nthreads = 14L, -# radius = c(1e3, 1e4, 5e4)), -# mcd19_1km = list(from = arglist_paths$mcd19, -# name_covariates = sprintf("MOD_AD%dTA_0_", c(4, 5)), -# subdataset = "^Optical_Depth", -# nthreads = 14L, -# radius = c(1e3, 1e4, 5e4)), -# mcd19_5km = list(from = arglist_paths$mcd19, -# name_covariates = sprintf("MOD_%sAN_0_", c("CSZ", "CVZ", "RAZ", "SCT", "GLN")), -# subdataset = "cos|RelAZ|Angle", -# nthreads = 14L, -# radius = c(1e3, 1e4, 5e4)), -# mod13 = list(from = arglist_paths$mod13, -# name_covariates = "MOD_NDVIV_0_", -# subdataset = "(NDVI)", -# nthreads = 14L, -# radius = c(1e3, 1e4, 5e4)), -# viirs = list(from = arglist_paths$viirs, -# name_covariates = "MOD_LGHTN_0_", -# subdataset = 3, -# nthreads = 14L, -# preprocess = amadeus::process_bluemarble, -# radius = c(1e3, 1e4, 5e4)), -# geoscf_aqc = list(date = arglist_common$char_period, -# path = "input/geos/aqc_tavg_1hr_g1440x721_v1"), -# geoscf_chm = list(date = arglist_common$char_period, -# path = "input/geos/chm_tavg_1hr_g1440x721_v1"), -# # base class covariates start here -# hms = list(path = "input/HMS_Smoke/data", -# date = arglist_common$char_period, -# covariate = "hms", -# domain = c("Light", "Medium", "Heavy"), -# nthreads = 3L, -# domain_name = "variable"), -# gmted = list( -# path = "input/gmted", -# covariate = "gmted"#, -# # domain = -# # expand.grid( -# # variables = c( -# # "Breakline Emphasis", "Systematic Subsample", -# # "Median Statistic", "Minimum Statistic", -# # "Mean Statistic", "Maximum Statistic", -# # "Standard Deviation Statistic" -# # ), -# # resolution = sprintf("%s arc-seconds", c("7.5")) -# # ) %>% -# # split(., seq_len(nrow(.))) %>% -# # lapply(unlist) %>% -# # lapply(unname) %>% -# # lapply(as.character), -# # domain_name = "variable" -# ), -# nei = list( -# domain = c(2017, 2020), -# domain_name = "year", -# path = "input/nei", -# covariate = "nei" -# ), -# tri = list( -# domain = seq(2018, 2022), -# domain_name = "year", -# path = "input/tri", -# radius = c(1e3, 1e4, 5e4), -# covariate = "tri", -# nthreads = 5L -# #year = seq(2018, 2022) -# ), -# nlcd = list( -# domain = c(2019, 2021), # rep(_,c(3,2)) -- how to parametrize? -# domain_name = "year", -# path = "input/nlcd/raw", -# covariate = "nlcd", -# radius = c(1e3, 1e4, 5e4), -# nthreads = 2L, -# max_cells = 1e8 -# #years = c(2019, 2021) -# ), -# koppen = list(path = "input/koppen_geiger/raw/Beck_KG_V1_present_0p0083.tif", -# covariate = "koppen", -# nthreads = 1L), -# ecoregions = list(path = "input/ecoregions/raw/us_eco_l3_state_boundaries.shp", -# covariate = "ecoregions", -# nthreads = 1L), -# narr = list( -# path = "input/narr", -# covariate = "narr", -# domain_reduced = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", -# "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", -# "pres.sfc", "shtfl", "snowc", "soilm", -# "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd"), -# domain_appt = c("prate", "shum"), -# domain = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", -# "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", -# "prate", "pres.sfc", "shtfl", "shum", "snowc", "soilm", -# "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd"), -# domain_name = "variable", -# date = arglist_common$char_period, -# process_function = process_narr2, -# calc_function = calc_narr2, -# nthreads = 24L -# ), -# groads = list( -# path = "input/sedac_groads/groads-v1-americas-gdb/gROADS-v1-americas.gdb", -# covariate = "groads", -# radius = c(1e3, 1e4, 5e4), -# nthreads = 3L), -# population = list( -# path = "input/sedac_population/gpw_v4_population_density_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec.tif", -# covariate = "population", fun = "mean", -# radius = c(1e3, 1e4, 5e4), -# nthreads = 3L -# ) -# ) - -# attr(arglist_proccalc, "description") <- -# tibble::tribble( -# ~dataset, ~description, -# "mod11", "MODIS Land Surface Temperature Day/Night", -# "mod06", "MODIS Cloud Fraction Day/Night", -# "mod09", "MODIS Surface Reflectance", -# "mcd19_1km", "MCD19A2 1km", -# "mcd19_5km", "MCD19A2 5km", -# "mod13", "MODIS Normalized Difference Vegetation Indexß", -# "viirs", "VIIRS Nighttime Lights", -# "hms", "NOAA Hazard Mapping System Smoke", -# "geoscf_aqc", "GEOS-CF AQC", -# "geoscf_chm", "GEOS-CF CHM", -# "gmted", "GMTED elevation", -# "nei", "National Emission Inventory", -# "tri", "Toxic Release Inventory", -# "nlcd", "National Land Cover Database", -# "koppen", "Koppen-Geiger Climate Classification", -# "ecoregions", "EPA Ecoregions", -# "narr", "NARR", -# "groads", "SEDAC Global Roads", -# "population", "SEDAC Population Density" -# ) - - - -# arglist_download <- -# list( - -# mod11 = list(name_covariates = sprintf("MOD_SFCT%s_0_", c("D", "N")), -# subdataset = sprintf("LST_%s_", c("Day", "Night"))), -# mod06 = list(name_covariates = sprintf("MOD_CLCV%s_0_", c("Day", "Night")), -# subdataset = sprintf("Cloud_Fraction_%s", c("Day", "Night"))), -# mod09 = list(name_covariates = sprintf("MOD_SFCRF_%d_", seq(1, 7)), -# subdataset = seq(2, 8)), -# mcd19 = list(name_covariates = -# list( -# res1km = sprintf("MOD_AD%dTA_0_", c(4, 5)), -# res5km = sprintf("MOD_%sAN_0_", c("CSZ", "CVZ", "RAZ", "SCT", "GLN")) -# ), -# subdataset = -# list( -# res1km = c("Optical_Depth"), -# res5km = c("cos|RelAZ|Angle") -# ) -# ), -# mod13 = list(name_covariates = "MOD_NDVIV_0_", -# subdataset = "(NDVI)"), -# viirs = list(name_covariates = "MOD_LGHTN_0_", -# subdataset = 3), -# hms = list(levels = c("Light", "Medium", "Heavy")), -# geoscf = list(element = list(aqc = "aqc", chm = "chm")), -# gmted = list( -# variables = c( -# "Breakline Emphasis", "Systematic Subsample", -# "Median Statistic", "Minimum Statistic", -# "Mean Statistic", "Maximum Statistic", -# "Standard Deviation Statistic" -# ), -# resolution = sprintf("%s arc-seconds", c("7.5", "15", "30")) -# ), -# nei = list( -# years = c(2017, 2020), -# county = process_counties(year = 2020) -# ), -# tri = list( -# years = seq(2018, 2022) -# ), -# nlcd = list( -# years = c(2019, 2021) -# ), -# koppen = list(), -# ecoregions = list(), -# narr = list( -# variables = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", -# "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", -# "prate", "pres.sfc", "shtfl", "shum", "snowc", "soilm", -# "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd") -# ), -# groads = list(), -# population = list(fun = "mean") -# ) - - -# export -# time_create <- gsub("[[:punct:]]|[[:blank:]]", "", Sys.time()) - -# Generated argument list for the pipeline is stored to -# the desired location. The current pipeline setting accepts -# the argument list in RDS format. -# qs::qsave( -# arglist_proccalc, -# "./inst/targets/punchcard_calc.qs" -# ) diff --git a/inst/targets/targets_calculate_fit.R b/inst/targets/targets_calculate_fit.R index c7347e02..9585772a 100644 --- a/inst/targets/targets_calculate_fit.R +++ b/inst/targets/targets_calculate_fit.R @@ -1,46 +1,476 @@ - -# meta_run, resting in the tools/pipeline/pipeline_base_functions.R, -# is a function that returns a list of parameters for the pipeline -# for users' convenience and make the pipeline less prone to errors. - +################################################################################ +##### Calculate covariates at US EPA AQS sites target_calculate_fit <- list( - tarchetypes::tar_files_input( - name = file_prep_calc_args, - files = list.files("inst/targets", pattern = "^calc*.*.qs$", full.names = TRUE), - # cue = tar_invalidate(tar_older(Sys.time() - as.difftime(4, units = "weeks"))), - format = "file", - iteration = "vector", - description = "Calculation arguments in QS file" + ########################### GEOS ########################### + targets::tar_target( + chr_iter_calc_geos, + command = c( + "aqc_tavg_1hr_g1440x721_v1", + "chm_tavg_1hr_g1440x721_v1" + ), + iteration = "list", + description = "GEOS-CF features" ) , + # targets::tar_target( + # chr_iter_calc_geos, + # command = c("geoscf_chm", "geoscf_aqc"), + # iteration = "vector", + # description = "GEOS-CF feature calculation" + # ) + # , targets::tar_target( - chr_iter_calc_features, - command = c("hms", "tri", "nei", - "ecoregions", "koppen", "population", "groads"), + list_feat_calc_geos, + command = { + # download_geos + inject_geos( + locs = sf_feat_proc_aqs_sites, + injection = list( + date = fl_dates(list_dates), + path = file.path( + arglist_common$char_input_dir, + "geos", + chr_iter_calc_geos + ), + nthreads = 1 + ) + ) + }, + #pattern = map(chr_iter_calc_geos), + pattern = cross(chr_iter_calc_geos, list_dates), iteration = "list", - description = "Base features" + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate GEOS-CF features (fit)" ) , - # "year" is included: tri, nlcd, nei - # "time" is included: hms targets::tar_target( - chr_iter_calc_nasa, - command = c( - "mod11", "mod06", "mod13", - "mcd19_1km", "mcd19_5km", "mod09", "viirs" + dt_feat_calc_geos, + command = reduce_merge(reduce_list(list_feat_calc_geos)), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "data.table of GEOS-CF features (fit)" + ) + , + ########################### NARR ########################### + targets::tar_target( + name = chr_iter_calc_narr, + command = c("air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", + "hpbl", "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", + "pres.sfc", "shtfl", "snowc", "soilm", + "tcdc", "ulwrf.sfc", "uwnd.10m", "vis", "vwnd.10m", "weasd"), + iteration = "vector" + ) + , + targets::tar_target( + list_feat_calc_narr, + command = { + # download_narr + par_narr( + domain = chr_iter_calc_narr, + path = paste0(arglist_common$char_input_dir, "/narr/"), + date = fl_dates(unlist(list_dates)), + locs = sf_feat_proc_aqs_sites, + nthreads = 1 + ) + }, + pattern = cross(list_dates, chr_iter_calc_narr), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate NARR features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_narr, + command = reduce_merge( + lapply( + list(list_feat_calc_narr), + function(x) reduce_merge(reduce_list(lapply(x, "[[", 1))) + ), + by = c("site_id", "time") + ), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "data.table of NARR features (fit)" + ) + , + ########################### HMS ########################### + targets::tar_target( + list_feat_calc_hms, + command = { + # download_hms + inject_calculate( + covariate = "hms", + locs = sf_feat_proc_aqs_sites, + injection = list( + path = paste0( + arglist_common$char_input_dir, + "/hms/data_files/" + ), + date = fl_dates(unlist(list_dates)), + covariate = "hms" + ) + ) + }, + pattern = map(list_dates), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate HMS features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_hms, + command = reduce_merge( + lapply( + list(list_feat_calc_hms), + function(x) reduce_merge(reduce_list(lapply(x, "[[", 1))) + ), + by = c("site_id", "time") + ), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "data.table of HMS features (fit)" + ) + , + ########################### MODIS - MOD11 ###################### + targets::tar_target( + list_args_calc_mod11, + command = { + # download_modis_clean # download_mod11 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MOD11A1/"), + list_dates_julian, + chr_dates ), + name_covariates = c("MOD_SFCTD_0_", "MOD_SFCTN_0_"), + subdataset = "^LST_", + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), iteration = "list", - description = "MODIS/VIIRS features" + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MOD11 arguments" ) , targets::tar_target( - chr_iter_calc_geoscf, - command = c("geoscf_chm", "geoscf_aqc"), - iteration = "vector", - description = "GEOS-CF features" + list_feat_calc_mod11, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mod11 + ), + pattern = map(list_args_calc_mod11), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MOD11 features (fit)" + ) + , + ########################### MODIS - MOD06 ###################### + targets::tar_target( + list_args_calc_mod06, + command = { + # download_modis_clean # download_mod06 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MOD06_L2/"), + list_dates_julian, + chr_dates + ), + name_covariates = c("MOD_CLCVD_0_", "MOD_CLCVN_0_"), + subdataset = c("Cloud_Fraction_Day", "Cloud_Fraction_Night"), + nthreads = 1, + preprocess = amadeus::process_modis_swath, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MOD06 arguments" + ) + , + targets::tar_target( + list_feat_calc_mod06, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mod06 + ), + pattern = map(list_args_calc_mod06), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MOD06 features (fit)" + ) + , + ########################### MODIS - MOD13 ###################### + targets::tar_target( + list_args_calc_mod13, + command = { + # download_modis_clean # download_mod13 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MOD13A2/"), + list_dates_julian, + chr_dates + ), + name_covariates = "MOD_NDVIV_0_", + subdataset = "(NDVI)", + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MOD13 arguments" + ) + , + targets::tar_target( + list_feat_calc_mod13, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mod13 + ), + pattern = map(list_args_calc_mod13), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MOD13 features (fit)" + ) + , + ########################### MODIS - MCD19_1km ###################### + targets::tar_target( + list_args_calc_mcd19_1km, + command = { + # download_modis_clean # download_mcd19 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MCD19A2/"), + list_dates_julian, + chr_dates + ), + name_covariates = c("MOD_AD4TA_0_", "MOD_AD5TA_0_"), + subdataset = "^Optical_Depth", + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MCD19_1km arguments" + ) + , + targets::tar_target( + list_feat_calc_mcd19_1km, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mcd19_1km + ), + pattern = map(list_args_calc_mcd19_1km), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MCD19_1km features (fit)" + ) + , + ########################### MODIS - MCD19_5km ###################### + targets::tar_target( + list_args_calc_mcd19_5km, + command = { + # download_modis_clean # download_mcd19 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MCD19A2/"), + list_dates_julian, + chr_dates + ), + name_covariates = c( + "MOD_CSZAN_0_", "MOD_CVZAN_0_", "MOD_RAZAN_0_", + "MOD_SCTAN_0_", "MOD_GLNAN_0_" + ), + subdataset = "cos|RelAZ|Angle", + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MCD19_5km arguments" + ) + , + targets::tar_target( + list_feat_calc_mcd19_5km, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mcd19_5km + ), + pattern = map(list_args_calc_mcd19_5km), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MCD19_5km features (fit)" + ) + , + ########################### MODIS - MOD09 ###################### + targets::tar_target( + list_args_calc_mod09, + command = { + # download_modis_clean # download_mod09 + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/61/MOD09GA/"), + list_dates_julian, + chr_dates + ), + name_covariates = c( + "MOD_SFCRF_1_", "MOD_SFCRF_2_", "MOD_SFCRF_3_", "MOD_SFCRF_4_", + "MOD_SFCRF_5_", "MOD_SFCRF_6_", "MOD_SFCRF_7_" + ), + subdataset = "^sur_refl_", + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - MOD09 arguments" + ) + , + targets::tar_target( + list_feat_calc_mod09, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_mod09 + ), + pattern = map(list_args_calc_mod09), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - MOD09 features (fit)" + ) + , + ########################### MODIS - VIIRS ###################### + targets::tar_target( + list_args_calc_viirs, + command = { + # download_modis_clean # download_viirs + list( + from = query_modis_files( + paste0(arglist_common$char_input_dir, "/modis/raw/5000/VNP46A2/"), + list_dates_julian, + chr_dates + ), + name_covariates = "MOD_LGHTN_0_", + subdataset = 3, + preprocess = amadeus::process_blackmarble, + nthreads = 1, + radius = chr_iter_radii + ) + }, + pattern = map(chr_dates), + iteration = "list", + cue = targets::tar_cue(mode = "never"), + description = "MODIS - VIIRS arguments" ) , + targets::tar_target( + list_feat_calc_viirs, + command = inject_modis_par( + locs = sf_feat_proc_aqs_sites, + injection = list_args_calc_viirs + ), + pattern = map(list_args_calc_viirs), + iteration = "list", + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + cue = targets::tar_cue(mode = "never"), + description = "Calculate MODIS - VIIRS features (fit)" + ) + , + ########################### MODIS/VIIRS ###################### + targets::tar_target( + dt_feat_calc_nasa, + command = reduce_merge( + lapply( + list( + list_feat_calc_mod11, + list_feat_calc_mod06, + list_feat_calc_mod13, + list_feat_calc_mcd19_1km, + list_feat_calc_mcd19_5km, + list_feat_calc_mod09, + list_feat_calc_viirs + ), + function(x) data.table::data.table(reduce_list(x)[[1]]) + ), + by = NULL + ), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "data.table of MODIS/VIIRS features (fit)" + ) + , + ########################### GMTED ########################### targets::tar_target( name = chr_iter_calc_gmted_vars, command = c( @@ -50,278 +480,477 @@ target_calculate_fit <- "Standard Deviation Statistic" ), iteration = "list", - description = "GMTED features" + description = "GMTED variables" ) , targets::tar_target( - list_feat_calc_base, - command = - inject_calculate( - covariate = chr_iter_calc_features, + chr_iter_calc_gmted_radii, + command = c(0, 1e3, 1e4), + description = "GMTED radii" + ) + , + targets::tar_target( + list_feat_calc_gmted, + command = { + # download_gmted + inject_gmted( locs = sf_feat_proc_aqs_sites, - injection = loadargs(file_prep_calc_args, chr_iter_calc_features)), - pattern = cross(file_prep_calc_args, chr_iter_calc_features), + variable = chr_iter_calc_gmted_vars, + radii = chr_iter_calc_gmted_radii, + injection = list( + path = paste0(arglist_common$char_input_dir, "/gmted/data_files"), + covariate = "gmted" + ) + ) + }, iteration = "list", - description = "Calculate base features (fit)", - priority = 1 + pattern = cross(chr_iter_calc_gmted_vars, chr_iter_calc_gmted_radii), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate GMTED features (fit)" ) , targets::tar_target( - list_feat_calc_base_flat, - command = lapply(list_feat_calc_base, - function(x) { - if (length(x) == 1) { - x[[1]] - } else if ( - sum(grepl("light|medium|heavy", - sapply(x, \(t) names(t)))) == 3) { - xr <- lapply(x, \(dt) { - dta <- data.table::copy(dt) - dta <- dta[, time := as.character(time)] - return(dta) - }) - xrr <- Reduce( - function(x, y) { - collapse::join(x, y, on = c("site_id", "time"), how = "full") }, - xr) - return(xrr) - } else { - collapse::rowbind(x, use.names = TRUE, fill = TRUE) - } - }), - description = "Calculated base feature list (all dt) (fit)" + dt_feat_calc_gmted, + command = reduce_merge(list_feat_calc_gmted, by = "site_id"), + description = "data.table of GMTED features (fit)" ) , + ########################### NLCD ########################### targets::tar_target( - name = df_feat_calc_nlcd_params, + df_feat_calc_nlcd_params, command = expand.grid( - year = loadargs(file_prep_calc_args, "nlcd")$domain, - radius = loadargs(file_prep_calc_args, "nlcd")$radius + year = chr_iter_calc_nlcd, + radius = chr_iter_radii ) %>% - split(1:nrow(.)), + split(seq_len(nrow(.))), iteration = "list", description = "NLCD features" ) , targets::tar_target( - name = list_feat_calc_nlcd, - command = inject_nlcd(year = df_feat_calc_nlcd_params$year, - radius = df_feat_calc_nlcd_params$radius, - from = amadeus::process_nlcd( - path = loadargs(file_prep_calc_args, "nlcd")$path, - year = df_feat_calc_nlcd_params$year - ), - locs = sf_feat_proc_aqs_sites, - locs_id = arglist_common$char_siteid, - nthreads = 10L, - mode = "exact", - max_cells = 3e7 - ), - pattern = cross(file_prep_calc_args, df_feat_calc_nlcd_params), + chr_iter_calc_nlcd, + command = c(2019, 2021), + description = "NLCD years" + ) + , + targets::tar_target( + list_feat_calc_nlcd, + command = { + # download_nlcd + inject_nlcd( + locs = sf_feat_proc_aqs_sites, + locs_id = arglist_common$char_siteid, + year = df_feat_calc_nlcd_params$year, + radius = df_feat_calc_nlcd_params$radius, + from = amadeus::process_nlcd( + path = paste0(arglist_common$char_input_dir, "/nlcd/data_files/"), + year = df_feat_calc_nlcd_params$year + ), + nthreads = 1, + mode = "exact", + max_cells = 3e7 + ) + }, iteration = "list", - description = "Calculate NLCD features (fit)", - resources = set_slurm_resource( - ntasks = 1, ncpus = 10, memory = 8 - ) + pattern = map(df_feat_calc_nlcd_params), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate NLCD features (fit)" ) , targets::tar_target( name = dt_feat_calc_nlcd, - command = - list_feat_calc_nlcd %>% - collapse::rowbind(fill = TRUE) %>% - collapse::funique() %>% - collapse::pivot( - ids = c(arglist_common$char_siteid, arglist_common$char_timeid), - values = names(.)[!names(.) %in% c(arglist_common$char_siteid, arglist_common$char_timeid)] - ) %>% - .[!is.na(.[["value"]]),] %>% - collapse::pivot( - ids = c("site_id", "time"), - values = c("value"), - how = "wider" - ), + command = list_feat_calc_nlcd %>% + collapse::rowbind(fill = TRUE) %>% + collapse::funique() %>% + collapse::pivot( + ids = c(arglist_common$char_siteid, arglist_common$char_timeid), + values = names(.)[!names(.) %in% c( + arglist_common$char_siteid, + arglist_common$char_timeid + )] + ) %>% + .[!is.na(.[["value"]]),] %>% + collapse::pivot( + ids = c("site_id", "time"), + values = c("value"), + how = "wider" + ), description = "NLCD feature list (all dt) (fit)" ) , + ########################### KOPPEN ########################### targets::tar_target( - list_feat_calc_nasa, - command = - inject_modis_par( + dt_feat_calc_koppen, + command = { + # download_koppen + inject_calculate( + covariate = "koppen", locs = sf_feat_proc_aqs_sites, - injection = loadargs(file_prep_calc_args, chr_iter_calc_nasa)), - pattern = cross(file_prep_calc_args, chr_iter_calc_nasa), - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_nasa, memory = 8 - ), - iteration = "list", - description = "Calculate MODIS/VIIRS features (fit)" + injection = list( + path = paste0( + arglist_common$char_input_dir, + "/koppen_geiger", + "/data_files", + "/Beck_KG_V1_present_0p0083.tif" + ), + nthreads = 1, + covariate = "koppen" + ) + ) + }, + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate Koppen Geiger features (fit)" ) , + ########################### POPULATION ########################### targets::tar_target( - list_feat_calc_geoscf, - inject_geos( - locs = sf_feat_proc_aqs_sites, - injection = loadargs(file_prep_calc_args, chr_iter_calc_geoscf) - ), - pattern = cross(file_prep_calc_args, chr_iter_calc_geoscf), + list_feat_calc_pop, + command = { + # download_population + inject_calculate( + covariate = "population", + locs = sf_feat_proc_aqs_sites, + injection = list( + path = paste0( + arglist_common$char_input_dir, + "/population", + "/data_files", + "/gpw_v4_population_density_adjusted_to_", + "2015_unwpp_country_totals_rev11_2020_30_sec.tif" + ), + fun = "mean", + radius = chr_iter_radii, + nthreads = 1, + covariate = "population" + ) + ) + }, + pattern = map(chr_iter_radii), iteration = "list", - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_geoscf, memory = 4 - ), - description = "Calculate GEOS-CF features (fit)" + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate population features (fit)" ) , targets::tar_target( - name = list_feat_calc_gmted, - command = inject_gmted( - locs = sf_feat_proc_aqs_sites, - variable = chr_iter_calc_gmted_vars, - radii = c(0, 1e3, 1e4, 5e4), - injection = loadargs(file_prep_calc_args, "gmted") + dt_feat_calc_pop, + command = reduce_merge( + lapply( + list_feat_calc_pop, + function(x) data.table::data.table(reduce_list(x)[[1]]) + ), + c("site_id", "time", "population_year") ), + description = "data.table of population features (fit)" + ) + , + ########################### TRI ########################### + targets::tar_target( + df_feat_calc_tri_params, + command = expand.grid(year = chr_years, radius = chr_iter_radii) %>% + split(seq_len(nrow(.))), iteration = "list", - pattern = cross(file_prep_calc_args, chr_iter_calc_gmted_vars), - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_gmted, memory = 8 - ), - description = "Calculate GMTED features (fit)" + description = "TRI features" ) , targets::tar_target( - name = list_feat_calc_narr, - command = #rlang::inject( - par_narr( - domain = loadargs(file_prep_calc_args, "narr")$domain, - path = loadargs(file_prep_calc_args, "narr")$path, - date = arglist_common$char_period, + list_feat_calc_tri, + command = { + # download_tri + inject_calculate( + covariate = "tri", locs = sf_feat_proc_aqs_sites, - nthreads = arglist_common$nthreads_narr + injection = list( + domain = df_feat_calc_tri_params$year, + domain_name = "year", + path = paste0(arglist_common$char_input_dir, "/tri/"), + variables = c(1, 13, 12, 14, 3 + c(20, 34, 36, 47, 48, 49)), + radius = df_feat_calc_tri_params$radius, + nthreads = 1, + covariate = "tri" + ) ) - , - pattern = map(file_prep_calc_args), + }, iteration = "list", - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_narr, memory = 20 + pattern = map(df_feat_calc_tri_params), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) ), - description = "Calculate NARR features (fit)" + description = "Calculate TRI features (fit)" ) , - # targets::tar_target( - # name = list_feat_calc_narr_apptainer, - # command = #rlang::inject( - # par_narr_appt( - # domain = loadargs(file_prep_calc_args, "narr")$domain_appt, - # period = arglist_common$char_period - # ), - # pattern = map(file_prep_calc_args), - # iteration = "list", - # resources = set_slurm_resource( - # ntasks = 1, ncpus = 2, memory = 40 - # ) - # ) - # , targets::tar_target( - dt_feat_calc_gmted, - command = reduce_merge(list_feat_calc_gmted, "site_id"), - description = "data.table of GMTED features (fit)" + dt_feat_calc_tri, + command = reduce_merge( + lapply( + list_feat_calc_tri, + function(x) data.table::data.table(reduce_list(x)[[1]]) + ), + c("site_id", "time") + ), + description = "data.table of TRI features (fit)" ) , + ########################### NEI ########################### targets::tar_target( - dt_feat_calc_nasa, - command = reduce_merge(list_feat_calc_nasa), - description = "data.table of MODIS/VIIRS features (fit)" + chr_iter_calc_nei, + command = c("2017", "2020"), + #iteration = "list", + description = "NEI domain dummy" ) , targets::tar_target( - dt_feat_calc_geoscf, - command = reduce_merge(list_feat_calc_geoscf), - description = "data.table of GEOS-CF features (fit)" + list_feat_calc_nei, + command = { + # download_nei + inject_calculate( + covariate = "nei", + locs = sf_feat_proc_aqs_sites, + injection = list( + domain = chr_iter_calc_nei, + domain_name = "year", + path = paste0(arglist_common$char_input_dir, "/nei/data_files"), + covariate = "nei" + ) + ) + }, + #iteration = "list", + #pattern = map(chr_iter_calc_nei), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate NEI features (fit)" ) , targets::tar_target( - dt_feat_calc_narr, - command = reduce_merge(list_feat_calc_narr, by = NULL), - description = "data.table of NARR features (fit)" + dt_feat_calc_nei, + command = reduce_list( + lapply( + list_feat_calc_nei, + function(x) data.table::data.table(reduce_list(x)[[1]]) + ) + )[[1]], + description = "data.table of NEI features (fit)" + ) + , + ############################################################################ + ############################################################################ + ########################### ECOREGIONS ########################### + ##### Ecoregions covariates have been calculated manually due to ongoing + ##### issues with the `process_ecoregions` and `calc_ecoregions` functions. + ##### Covariates have been calculated at all sites from 2018 to 2024, and + ##### are filtered to the relevant sites after import. + ##### amadeus::download_aqs( + ##### year = c(2018, 2024), + ##### directory_to_save = paste0(arglist_common$char_input_dir, "/aqs"), + ##### unzip = TRUE, + ##### remove_zip = TRUE, + ##### acknowledgement = TRUE, + ##### download = TRUE + ##### ) + ##### sf_aqs_2018_2024 <- amadeus::process_aqs( + ##### path = list.files( + ##### path = paste0(arglist_common$char_input_dir, "/aqs/data_files"), + ##### full.names = TRUE, + ##### recursive = TRUE + ##### ), + ##### date = c("2018-01-01", "2024-12-31"), + ##### mode = "location", + ##### return_format = "sf" + ##### ) + ##### qs_feat_calc_ecoregions <- data.table::data.table( + ##### amadeus::calc_ecoregion( + ##### from = amadeus::process_ecoregion( + ##### path = paste0( + ##### paste0( + ##### arglist_common$char_input_dir, + ##### "ecoregions/", + ##### "data_files/us_eco_l3_state_boundaries.shp" + ##### ) + ##### ) + ##### ), + ##### locs = sf_aqs_2018_2024, + ##### locs_id = "site_id", + ##### ) + ##### ) + ##### qs::qsave( + ##### qs_feat_calc_ecoregions, + ##### file = "./inst/extdata/dt_feat_calc_ecoregion.qs" + ##### ) + targets::tar_target( + qs_feat_calc_ecoregions, + command = qs::qread("/mnt/inst/extdata/qs_feat_calc_ecoregions.qs"), + description = "Import calculated ecoregion features (2018 - 2024)" + ) + , + targets::tar_target( + dt_feat_calc_ecoregions, + command = qs_feat_calc_ecoregions[ + qs_feat_calc_ecoregions$site_id %in% sf_feat_proc_aqs_sites$site_id, + ], + description = "data.table of Ecoregions features (fit)" + ) + ############################################################################ + ############################################################################ + ############################################################################ + , + ########################### GROADS ########################### + targets::tar_target( + list_feat_calc_groads, + command = { + # download_groads + inject_calculate( + covariate = "groads", + locs = sf_feat_proc_aqs_sites, + injection = list( + path = paste0( + arglist_common$char_input_dir, + "/groads/data_files", + "/gROADS-v1-americas.gdb" + ), + radius = chr_iter_radii, + nthreads = 1, + covariate = "groads" + ) + ) + }, + iteration = "list", + pattern = map(chr_iter_radii), + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "Calculate gRoads features (fit)" + ) + , + targets::tar_target( + dt_feat_calc_groads, + command = reduce_merge( + lapply( + list_feat_calc_groads, + function(x) data.table::data.table(reduce_list(x)[[1]]) + ), + by = c("site_id", "groads_year", "description") + ), + description = "data.table of gRoads features (fit)" ) , + ######################## DATE FEATURES ######################### targets::tar_target( dt_feat_calc_date, - command = - Reduce( + command = Reduce( post_calc_autojoin, list( + dt_feat_calc_geos, dt_feat_calc_narr, - dt_feat_calc_geoscf, dt_feat_calc_nasa ) ), - description = "data.table of all daily features (fit)" + resources = targets::tar_resources( + crew = targets::tar_resources_crew( + controller = "calc_controller" + ) + ), + description = "data.table of all features (fit)" + ) + , + ######################## BASE FEATURES ######################### + targets::tar_target( + list_feat_calc_base_flat, + command = lapply( + list( + list(dt_feat_calc_hms), + list(dt_feat_calc_tri), + list(dt_feat_calc_nei), + list(dt_feat_calc_ecoregions), + dt_feat_calc_koppen, + list(dt_feat_calc_pop), + list(dt_feat_calc_groads) + ), + function(x) { + if (length(x) == 1) { + x[[1]] + } else if ( + sum(grepl("light|medium|heavy", sapply(x, \(t) names(t)))) == 3 + ) { + xr <- lapply(x, \(dt) { + dta <- data.table::copy(dt) + dta <- dta[, time := as.character(time)] + return(dta) + }) + xrr <- Reduce( + function(x, y) { + collapse::join(x, y, on = c("site_id", "time"), how = "full") + }, + xr + ) + return(xrr) + } else { + collapse::rowbind(x, use.names = TRUE, fill = TRUE) + } + }), + description = "Calculated base feature list (all dt) (fit)" ) , targets::tar_target( dt_feat_calc_base, - command = - Reduce( + command = Reduce( post_calc_autojoin, c( list(dt_feat_proc_aqs_sites_time), list_feat_calc_base_flat, list(dt_feat_calc_gmted), - list(dt_feat_calc_nlcd) + list(data.table::data.table(dt_feat_calc_nlcd)) ) ), description = "Base features with PM2.5" ) , + ####################### CUMULATIVE FEATURES ####################### targets::tar_target( dt_feat_calc_design, - command = - post_calc_autojoin( - dt_feat_calc_base, - dt_feat_calc_date, - year_start = as.integer(substr(arglist_common$char_period[1], 1, 4)), - year_end = as.integer(substr(arglist_common$char_period[2], 1, 4)) - ), + command = post_calc_autojoin( + dt_feat_calc_base, + dt_feat_calc_date, + year_start = as.integer(substr(arglist_common$char_period[1], 1, 4)), + year_end = as.integer(substr(arglist_common$char_period[2], 1, 4)) + ), description = "data.table of all features with PM2.5" ) - # , - # tar_target( - # dt_feat_fit_pm, - # post_calc_join_pm25_features( - # df_pm = sf_feat_proc_aqs_pm25, - # df_covar = dt_feat_fit_x, - # locs_id = "site_id", - # time_id = "time" + # targets::tar_target( + # dt_feat_calc_imputed, + # command = impute_all( + # dt_feat_calc_design, + # period = arglist_common$char_period, + # nthreads_dt = 1, + # nthreads_collapse = 1, + # nthreads_imputation = 1 # ), - # description = "data.table of all features with PM2.5" + # description = "Imputed features + lags", + # resources = targets::tar_resources( + # crew = targets::tar_resources_crew( + # controller = "calc_controller" + # ) + # ) # ) - , - targets::tar_target( - dt_feat_calc_cumulative, - command = append_predecessors( - path_qs = "output/qs", - period_new = arglist_common$char_period, - input_new = dt_feat_calc_design, - nthreads = arglist_common$nthreads_append - ), - description = "Cumulative feature calculation", - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_append, memory = 16 - ) - ), - targets::tar_target( - dt_feat_calc_imputed, - command = - impute_all( - dt_feat_calc_cumulative, - period = arglist_common$char_period, - nthreads_dt = arglist_common$nthreads_impute, - nthreads_collapse = arglist_common$nthreads_impute, - nthreads_imputation = arglist_common$nthreads_impute), - description = "Imputed features + lags", - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_impute, memory = 8 - ) - ) ) diff --git a/inst/targets/targets_calculate_predict.R b/inst/targets/targets_calculate_predict.R index 2fc9b3bc..97d4844e 100644 --- a/inst/targets/targets_calculate_predict.R +++ b/inst/targets/targets_calculate_predict.R @@ -6,6 +6,14 @@ target_calculate_predict <- list( + targets::tar_target( + library, + command = .Library + ), + targets::tar_target( + libPaths, + command = .libPaths() + ), targets::tar_target( df_pred_calc_grid, command = qs::qread( @@ -133,12 +141,12 @@ target_calculate_predict <- # # # description = "List of HMS features" # # # ) # # # , - targets::tar_target( - chr_iter_pred_features, - command = chr_iter_calc_features |> base::setdiff(c("hms", "ecoregions")), - description = "Drop HMS and ecoregions from base features" - ) - , + # targets::tar_target( + # chr_iter_pred_features, + # command = chr_iter_calc_features |> base::setdiff(c("hms", "ecoregions")), + # description = "Drop HMS and ecoregions from base features" + # ) + # , # targets::tar_target( # list_pred_split_calc_base, # command = @@ -271,24 +279,24 @@ target_calculate_predict <- # description = "NLCD feature list (all dt) (pred)" # ) # , - targets::tar_target( - list_pred_split_calc_nasa, - command = - inject_modis_par( - locs = list_pred_calc_grid_DEV[[chr_pred_calc_grid_DEV]], - injection = loadargs(file_prep_calc_args, chr_iter_calc_nasa) - ), - pattern = cross( - file_prep_calc_args, - chr_iter_calc_nasa, - chr_pred_calc_grid_DEV - ), - resources = set_slurm_resource( - ntasks = 1, ncpus = arglist_common$nthreads_nasa, memory = 8 - ), - iteration = "list", - description = "Calculate MODIS/VIIRS features with branched sublists (pred)" - ) + # targets::tar_target( + # list_pred_split_calc_nasa, + # command = + # inject_modis_par( + # locs = list_pred_calc_grid_DEV[[chr_pred_calc_grid_DEV]], + # injection = loadargs(file_prep_calc_args, chr_iter_calc_nasa) + # ), + # pattern = cross( + # file_prep_calc_args, + # chr_iter_calc_nasa, + # chr_pred_calc_grid_DEV + # ), + # resources = set_slurm_resource( + # ntasks = 1, ncpus = arglist_common$nthreads_nasa, memory = 8 + # ), + # iteration = "list", + # description = "Calculate MODIS/VIIRS features with branched sublists (pred)" + # ) # , # targets::tar_target( # name = list_pred_calc_nasa, @@ -360,43 +368,44 @@ target_calculate_predict <- # description = "data.table of GEOS-CF features (pred)" # ) # , - # # # targets::tar_target( - # # # chr_pred_calc_gmted_radii, - # # # command = c(0, 1e3, 1e4, 5e4), - # # # description = "Radii for GMTED features" - # # # ) - # # # , - # # # targets::tar_target( - # # # name = list_pred_split_calc_gmted, - # # # command = inject_gmted( - # # # locs = list_pred_calc_grid[[chr_pred_calc_grid]], - # # # variable = chr_iter_calc_gmted_vars, - # # # radii = chr_pred_calc_gmted_radii, - # # # injection = loadargs(file_prep_calc_args, "gmted") - # # # ), - # # # iteration = "list", - # # # pattern = cross( - # # # file_prep_calc_args, - # # # chr_iter_calc_gmted_vars, - # # # chr_pred_calc_grid, - # # # chr_pred_calc_gmted_radii - # # # ), - # # # resources = set_slurm_resource( - # # # ntasks = 1, - # # # ncpus = arglist_common$nthreads_gmted, - # # # memory = 8 - # # # ), - # # # description = "Calculate GMTED features with branched sublists (pred)" - # # # ) - # # # , - # # # targets::tar_target( - # # # dt_pred_calc_gmted, - # # # command = reduce_merge( - # # # reduce_list(list_pred_split_calc_gmted), - # # # "site_id" - # # # ), - # # # description = "data.table of GMTED features (pred)" - # # # ) + targets::tar_target( + chr_pred_calc_gmted_radii, + # command = c(0, 1e3, 1e4, 5e4), + command = c(200), + description = "Radii for GMTED features" + ) + , + targets::tar_target( + name = list_pred_split_calc_gmted, + command = inject_gmted( + locs = list_pred_calc_grid_DEV[[chr_pred_calc_grid_DEV]], + variable = chr_iter_calc_gmted_vars, + radii = chr_pred_calc_gmted_radii, + injection = loadargs(file_prep_calc_args, "gmted") + ), + iteration = "list", + pattern = cross( + file_prep_calc_args, + chr_iter_calc_gmted_vars, + chr_pred_calc_grid_DEV, + chr_pred_calc_gmted_radii + ), + resources = set_slurm_resource( + ntasks = 1, + ncpus = arglist_common$nthreads_gmted, + memory = 8 + ), + description = "Calculate GMTED features with branched sublists (pred)" + ) + , + targets::tar_target( + dt_pred_calc_gmted, + command = reduce_merge( + reduce_list(list_pred_split_calc_gmted), + "site_id" + ), + description = "data.table of GMTED features (pred)" + ) # # # , # # # targets::tar_target( # # # chr_pred_calc_narrmono, diff --git a/inst/targets/targets_critical.R b/inst/targets/targets_critical.R new file mode 100644 index 00000000..67bdd545 --- /dev/null +++ b/inst/targets/targets_critical.R @@ -0,0 +1,63 @@ +################################################################################ +##### Define critical targets +target_critical <- + list( + ############################################################################ + ############################################################################ + ########################### CRITICAL TARGETS ###################### + ##### 1. chr_daterange controls all time-related targets for the entire + ##### pipeline. This is the only target that needs to be changed to + ##### update the pipeline with a new temopral range. Month and year + ##### specific arguments are derived from the time range defined by + ##### chr_daterange. + targets::tar_target( + chr_daterange, + command = c("2021-12-01", "2021-12-31"), + description = "Date range" + ) + , + ##### 2. chr_nasa_token sets the file path to the user's NASA Earthdata + ##### account credentials. We can create a group credential file, + ##### but this target is still critical since the CREDENTIALS + ##### EXPIRE AT ~90 DAY INTERVALS. Regardless of the user or group + ##### credential file, the token must be updated every 90 days. + targets::tar_target( + chr_nasa_token, + command = readLines("/inst/extdata/nasa_token.txt"), + description = "NASA Earthdata token" + ) + , + ##### 3. chr_mod06_links is the file path to the MOD06 links file. These + ##### links must be manually downloaded per the `amadeus::download_modis` + ##### function. The links are then stored in a CSV file that is read + ##### by the function. The new file with links must be updated to match + ##### the new date range. + targets::tar_target( + chr_mod06_links, + command = "/inst/extdata/mod06_links_2018_2022.csv", + description = "File of MOD06 links" + ) + , + ##### 4. chr_input_dir is the file path to the input directory. This target + ##### controls where the raw data files are downloaded to and imported + ##### from. This file path **MUST** be mounted to the container at run + ##### time in the `run_container.sh` script. + targets::tar_target( + chr_input_dir, + command = "/input", + description = "Input directory" + ) + , + ##### 5. chr_dates_split controls the size of temporal splits. Splitting the + ##### temporal range into smaller chunks allows for parallel processing + ##### across multiple workers. It also allows for dispatching new dynamic + ##### branches when the temporal range is updated. + targets::tar_target( + num_dates_split, + command = 10, + description = "Number of days to include in each temporal split" + ) + ############################################################################ + ############################################################################ + ############################################################################ + ) \ No newline at end of file diff --git a/inst/targets/targets_download.R b/inst/targets/targets_download.R index eae4f782..6ea939df 100644 --- a/inst/targets/targets_download.R +++ b/inst/targets/targets_download.R @@ -1,40 +1,421 @@ target_download <- list( - tarchetypes::tar_files_input( - name = file_prep_download_args, - files = list.files("inst/targets", pattern = "download_spec.qs$", full.names = TRUE), - # cue = tar_invalidate(tar_older(Sys.time() - as.difftime(4, units = "weeks"))), - format = "file", - iteration = "vector", - description = "Download arguments in QS file" - ) - , - targets::tar_target( - char_rawdir_download, - command = - sprintf("dir_input_%s", - c("aqs", "nei", "narr_monolevel", "narr_p_levels", - paste0("modis_", - c("mod11", "mod13", "mcd19", "mod06", "mod09") - ), - "viirs", "nlcd", "ecoregions", "koppen", "gmted", - "population", "groads", - "hms", "tri", "geoscf_chm", "geoscf_aqc" # add covariate lists below if necessary - ) + targets::tar_target( + list_download_args, + command = list( + unzip = TRUE, + remove_zip = FALSE, + remove_command = TRUE, + acknowledgement = TRUE, + download = TRUE, + hash = TRUE + ), + description = "Common download arguments" + ) + , + ########################### AQS ########################### + targets::tar_target( + download_aqs, + command = amadeus::download_aqs( + directory_to_save = paste0(arglist_common$char_input_dir, "/aqs/"), + year = chr_years, + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_years), + iteration = "vector", + description = "Download AQS data" + ) + , + ########################### GEOS ########################### + targets::tar_target( + chr_iter_calc_geos, + command = c( + "aqc_tavg_1hr_g1440x721_v1", + "chm_tavg_1hr_g1440x721_v1" + ), + iteration = "vector", + description = "GEOS-CF features" + ) + , + targets::tar_target( + download_geos, + command = amadeus::download_geos( + collection = chr_iter_calc_geos, + directory_to_save = paste0(arglist_common$char_input_dir, "/geos/"), + date = fl_dates(list_dates[[chr_dates]]), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = cross(chr_iter_calc_geos, chr_dates), + iteration = "vector", + description = "Download GEOS-CF data" + ) + , + ########################### NARR ########################### + targets::tar_target( + chr_iter_calc_narr, + command = c( + "air.sfc", "weasd" + # "air.sfc", "albedo", "apcp", "dswrf", "evap", "hcdc", "hpbl", + # "lcdc", "lhtfl", "mcdc", "omega", "pr_wtr", "prate", "pres.sfc", + # "shtfl", "shum", "snowc", "soilm", "tcdc", "ulwrf.sfc", "uwnd.10m", + # "vis", "vwnd.10m", "weasd" + ), + iteration = "vector", + description = "NARR features" + ) + , + targets::tar_target( + download_narr, + command = amadeus::download_narr( + variables = chr_iter_calc_narr, + directory_to_save = paste0(arglist_common$char_input_dir, "/narr/"), + year = chr_years, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = cross(chr_iter_calc_narr, chr_years), + iteration = "vector", + description = "Download NARR data" + ) + , + ########################### HMS ########################### + targets::tar_target( + download_hms, + command = amadeus::download_hms( + directory_to_save = paste0(arglist_common$char_input_dir, "/hms/"), + date = fl_dates(list_dates[[chr_dates]]), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + iteration = "vector", + description = "Download HMS data" + ) + , + ########################### MODIS - MOD11 ###################### + targets::tar_target( + download_mod11, + command = amadeus::download_modis( + product = "MOD11A1", + nasa_earth_data_token = chr_nasa_token, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/61/MOD11A1" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - MOD11 data" + ) + , + ########################### MODIS - MOD06 ###################### + targets::tar_target( + download_mod06, + command = amadeus::download_modis( + product = "MOD06_L2", + nasa_earth_data_token = chr_nasa_token, + mod06_links = chr_mod06_links, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/61/MOD06_L2" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - MOD06 data" + ) + , + ########################### MODIS - MOD13 ###################### + targets::tar_target( + download_mod13, + command = amadeus::download_modis( + product = "MOD13A2", + nasa_earth_data_token = chr_nasa_token, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/61/MOD13A2" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - MOD13 data" + ) + , + ########################### MODIS - MCD19 ###################### + targets::tar_target( + download_mcd19, + command = amadeus::download_modis( + product = "MCD19A2", + nasa_earth_data_token = chr_nasa_token, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/61/MCD19A2" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - MCD19 data" + ) + , + ########################### MODIS - MOD09 ###################### + targets::tar_target( + download_mod09, + command = amadeus::download_modis( + product = "MOD09GA", + nasa_earth_data_token = chr_nasa_token, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/61/MOD09GA" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - MOD09 data" + ) + , + ########################### MODIS - VIIRS ###################### + targets::tar_target( + download_viirs, + command = amadeus::download_modis( + product = "VNP46A2", + version = "5000", + nasa_earth_data_token = chr_nasa_token, + date = fl_dates(list_dates[[chr_dates]]), + directory_to_save = paste0( + arglist_common$char_input_dir, "/modis/raw/5000/VNP46A2" + ), + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_dates), + description = "Download MODIS - VIIRS data" + ) + , + ############################################################################ + ############################################################################ + ########################### MODIS - DEBUG ###################### + ##### This target detects corrupt MODIS files which were improperly + ##### downloaded and are empty. The files are not downloaded due to missing + ##### SSL certiciation abilities of the container. Corrupt files are removed + ##### to ensure the downstream targets still dispatch and complete without + ##### errors. + targets::tar_target( + download_modis_clean, + command = { + download_mod06 + download_mod09 + download_mod11 + download_mod13 + download_mcd19 + download_viirs + modis_corrupt_files <- list.files( + paste0(arglist_common$char_input_dir, "/modis/raw/"), + recursive = TRUE, + full.names = TRUE, + pattern = ".h07v03.061." + ) + for (c in seq_along(modis_corrupt_files)) { + if (file.size(modis_corrupt_files[c]) == 0) { + file.remove(modis_corrupt_files[c]) + } + } + }, + description = "Clean corrupt MODIS files [DEBUG]" + ) + ############################################################################ + ############################################################################ + ############################################################################ + , + ########################### GMTED ########################### + targets::tar_target( + chr_iter_calc_gmted_vars, + command = c( + "Breakline Emphasis", "Systematic Subsample", + "Median Statistic", "Minimum Statistic", + "Mean Statistic", "Maximum Statistic", + "Standard Deviation Statistic" + ), + description = "GMTED features" + ) + , + targets::tar_target( + download_gmted, + command = amadeus::download_gmted( + statistic = chr_iter_calc_gmted_vars, + resolution = "7.5 arc-seconds", + directory_to_save = paste0(arglist_common$char_input_dir, "/gmted/"), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_iter_calc_gmted_vars), + iteration = "vector", + description = "Download GMTED data" + ) + , + ########################### NLCD ########################### + targets::tar_target( + chr_iter_calc_nlcd, + command = c(2019, 2021), + description = "NLCD years" + ) + , + targets::tar_target( + download_nlcd, + command = amadeus::download_nlcd( + year = chr_iter_calc_nlcd, + directory_to_save = paste0(arglist_common$char_input_dir, "/nlcd/"), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_iter_calc_nlcd), + iteration = "vector", + description = "Download NLCD data" + ) + , + ########################### KOPPEN ########################### + targets::tar_target( + download_koppen, + command = amadeus::download_koppen_geiger( + data_resolution = "0.0083", + time_period = "Present", + directory_to_save = paste0( + arglist_common$char_input_dir, "/koppen_geiger/" ), - iteration = "vector" + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + description = "Download Koppen-Geiger data" + ) + , + ########################### POPULATION ########################### + targets::tar_target( + download_population, + command = amadeus::download_sedac_population( + data_resolution = "30 second", + data_format = "GeoTIFF", + year = "2020", + directory_to_save = + paste0(arglist_common$char_input_dir, "/population/"), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + description = "Download population data" + ) + , + ########################### TRI ########################### + targets::tar_target( + download_tri, + command = amadeus::download_tri( + directory_to_save = paste0(arglist_common$char_input_dir, "/tri/"), + year = chr_years, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_years), + description = "Download TRI data" + ) + , + ########################### NEI ########################### + targets::tar_target( + chr_iter_calc_nei, + command = c(2017, 2020), + iteration = "list", + description = "NEI features" + ) + , + targets::tar_target( + download_nei, + command = amadeus::download_nei( + directory_to_save = paste0(arglist_common$char_input_dir, "/nei/"), + year = chr_iter_calc_nei, + unzip = list_download_args$unzip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + pattern = map(chr_iter_calc_nei), + description = "Download NEI data" + ) + , + ########################### ECOREGIONS ########################### + targets::tar_target( + download_ecoregions, + command = amadeus::download_ecoregion( + directory_to_save = paste0(arglist_common$char_input_dir, "/ecoregions/"), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + description = "Download ecoregions data" ) , - # each dataset is branched + ########################### GROADS ########################### targets::tar_target( - lgl_rawdir_download, - command = - feature_raw_download( - path = file_prep_download_args, - dataset_name = char_rawdir_download), - pattern = cross(file_prep_download_args, char_rawdir_download), - iteration = "list" + download_groads, + command = amadeus::download_sedac_groads( + data_region = "Americas", + data_format = "Geodatabase", + directory_to_save = paste0(arglist_common$char_input_dir, "/groads/"), + unzip = list_download_args$unzip, + remove_zip = list_download_args$remove_zip, + remove_command = list_download_args$remove_command, + acknowledgement = list_download_args$acknowledgement, + download = list_download_args$download, + hash = list_download_args$hash + ), + description = "Download gRoads data" ) ) -## Status up to here is stored in meta as hash and rds/qs files -## How do we know if downloaded files were exactly what we expected? \ No newline at end of file diff --git a/inst/targets/targets_initialize.R b/inst/targets/targets_initialize.R deleted file mode 100644 index a60f3fe0..00000000 --- a/inst/targets/targets_initialize.R +++ /dev/null @@ -1,34 +0,0 @@ -target_init <- - list( - targets::tar_target( - sf_feat_proc_aqs_sites, - read_locs( - export = FALSE, - path = list.files( - path = file.path(arglist_common$char_input_dir, "aqs", "data_files"), - pattern = "daily_88101_[0-9]{4}.csv", - full.names = TRUE - ), - date = arglist_common$char_period, - mode = "location", - return_format = "sf" - ), - description = "AQS sites" - ) - , - targets::tar_target( - dt_feat_proc_aqs_sites_time, - read_locs( - path = list.files( - path = file.path(arglist_common$char_input_dir, "aqs", "data_files"), - pattern = "daily_88101_[0-9]{4}.csv", - full.names = TRUE - ), - date = arglist_common$char_period, - mode = "available-data", - data_field = c("Arithmetic.Mean", "Event.Type"), - return_format = "data.table" - ), - description = "AQS sites with time" - ) - ) diff --git a/inst/targets/targets_initiate.R b/inst/targets/targets_initiate.R new file mode 100644 index 00000000..1cb35879 --- /dev/null +++ b/inst/targets/targets_initiate.R @@ -0,0 +1,69 @@ +################################################################################ +##### Initiate pipeline arguments +target_initiate <- + list( + targets::tar_target( + chr_years, + command = seq( + as.numeric(substr(chr_daterange[1], 1, 4)), + as.numeric(substr(chr_daterange[2], 1, 4)) + ), + description = "Year range" + ) + , + targets::tar_target( + list_dates, + command = beethoven::split_dates( + dates = chr_daterange, + n = num_dates_split + ), + description = "Split date range into list" + ) + , + targets::tar_target( + chr_dates, + command = names(list_dates), + description = "Names of date list" + ) + , + targets::tar_target( + chr_dates_julian, + command = format( + amadeus::generate_date_sequence( + chr_daterange[1], + chr_daterange[2], + FALSE + ), + "%Y%j" + ), + description = "Julian dates" + ) + , + targets::tar_target( + list_dates_julian, + command = split( + chr_dates_julian, + ceiling(seq_along(chr_dates_julian) / num_dates_split) + ) + ) + , + targets::tar_target( + chr_iter_radii, + command = c(1000, 10000), + # command = c(1000, 10000, 50000), + description = "Buffer radii" + ) + , + targets::tar_target( + arglist_common, + command = set_args_calc( + char_siteid = "site_id", + char_timeid = "time", + char_period = chr_daterange, + num_extent = c(-126, -62, 22, 52), + char_user_email = paste0(Sys.getenv("USER"), "@nih.gov"), + char_input_dir = chr_input_dir + ), + description = "Set calculation arguments" + ) + ) diff --git a/inst/targets/targets_start.R b/inst/targets/targets_start.R index a2ecb51c..0b5acc26 100644 --- a/inst/targets/targets_start.R +++ b/inst/targets/targets_start.R @@ -1,30 +1,19 @@ -# load targets -library( - beethoven, - lib.loc = "/ddn/gs1/home/manwareme/R/x86_64-pc-linux-gnu-library/4.3" +################################################################################ +############################## LIBPATHS ############################# +.libPaths( + grep( + paste0("biotools|", Sys.getenv("USER")), .libPaths(), + value = TRUE, + invert = TRUE + ) ) -library(targets) -# assume that the working directory is beethoven git repository directory -# only runs after package deployment -# file.copy( -# from = system.file("targets", "_targets.R", package = "beethoven"), -# to = "_targets.R" -# ) -tar_make_future( - workers = 16 +.libPaths( + c("/mnt/lib-flex", .libPaths()) ) -# TODO: should find a way of auto-invalidate feat_calc_(modis|viirs|geoscf) -# when the date range changes in the configuration. -# manual example includes: -# targets::tar_invalidate( -# matches("feat_calc_(modis|viirs|geoscf)") -# ) -# selective execution, mix with time components -# status saving with timestamp? editable log/config file? -# tar_make_future( -# names = contains("download") -# ) +cat("Active library paths:\n") +.libPaths() -# tar_visnetwork(targets_only = TRUE) +############################ RUN PIPELINE ############################ +targets::tar_make() diff --git a/invalidate_container.sh b/invalidate_container.sh new file mode 100755 index 00000000..0c638548 --- /dev/null +++ b/invalidate_container.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +DEBUG_TARGET=$1 + +#SBATCH --job-name=beethoven +#SBATCH --mail-user=manwareme@nih.gov +#SBATCH --mail-type=END,FAIL +#SBATCH --partition=geo +#SBATCH --ntasks=1 +#SBATCH --mem=100G +#SBATCH --cpus-per-task=50 +#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.err +#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.out + +# run pipeline in the container +apptainer exec \ + --bind $PWD:/mnt \ + --bind $PWD/inst:/inst \ + --bind /ddn:/input \ + --bind $PWD/_targets:/opt/_targets \ + beethoven_dl_calc.sif \ + Rscript --no-init-file -e "targets::tar_invalidate('$DEBUG_TARGET')" + +# run interactive R session in the container +# apptainer exec --bind $PWD/inst:/inst --bind /ddn/gs1/group/set/Projects/NRT-AP-Model/input:/input --bind $PWD:/mnt beethoven_dl_calc.sif R \ No newline at end of file diff --git a/man/calculate.Rd b/man/calculate.Rd index 681ad209..b17d04df 100644 --- a/man/calculate.Rd +++ b/man/calculate.Rd @@ -9,7 +9,7 @@ calculate( domain_name = "year", nthreads = 1L, process_function = amadeus::process_covariates, - calc_function = amadeus::calc_covariates, + calc_function = amadeus::calculate_covariates, ... ) } @@ -26,7 +26,7 @@ Nullable; If \code{NULL}, it will be set to \code{c(1)}.} \code{\link[amadeus:process_covariates]{amadeus::process_covariates}}} \item{calc_function}{Function to calculate covariates. -\code{\link[amadeus:calc_covariates]{amadeus::calc_covariates}}} +\code{\link[amadeus:calculate_covariates]{amadeus::calculate_covariates}}} \item{...}{Arguments passed to \code{process_function} and \code{calc_function}} } diff --git a/man/fl_dates.Rd b/man/fl_dates.Rd index 4e6678f2..ccd05af9 100644 --- a/man/fl_dates.Rd +++ b/man/fl_dates.Rd @@ -2,17 +2,18 @@ % Please edit documentation in R/prediction.R \name{fl_dates} \alias{fl_dates} -\title{Extract the first and last elements of a list} +\title{Extract the first and last elements of a vector} \usage{ fl_dates(dates) } \arguments{ -\item{dates}{list. A list of dates.} +\item{dates}{vector. A vector of dates.} } \value{ -a character vector with the first and last dates from the list. +a character vector of length 2 with +the first and last dates from the list. } \description{ -Extract the first and last elements of a list +Extract the first and last elements of a vector } \keyword{Utility} diff --git a/man/fl_dates_flatten.Rd b/man/fl_dates_flatten.Rd new file mode 100644 index 00000000..35557c0a --- /dev/null +++ b/man/fl_dates_flatten.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prediction.R +\name{fl_dates_flatten} +\alias{fl_dates_flatten} +\title{Extract the first and last elements of a list of date vectors} +\usage{ +fl_dates_flatten(dates) +} +\arguments{ +\item{dates}{list. A list of dates.} +} +\value{ +a character vector with the first and last dates from the list. +} +\description{ +It flattens the list first, then extracts the first and the last dates. +} +\keyword{Utility} diff --git a/man/inject_modis_par.Rd b/man/inject_modis_par.Rd index 42fcf01a..9a3f8b27 100644 --- a/man/inject_modis_par.Rd +++ b/man/inject_modis_par.Rd @@ -11,7 +11,7 @@ inject_modis_par(locs, injection) features need to be calculated.} \item{injection}{\strong{List} of dditional parameters to be passed to the -\code{calc_modis_par} function.} +\code{calculate_modis_par} function.} } \value{ MODIS/VIIRS feature data.frame. @@ -36,6 +36,6 @@ inject_modis_par( } } \seealso{ -\code{\link[amadeus:calc_modis_daily]{amadeus::calc_modis_daily}}, \code{\link[amadeus:calc_modis_par]{amadeus::calc_modis_par}} +\code{\link[amadeus:calculate_modis_daily]{amadeus::calculate_modis_daily}}, \code{\link[amadeus:calculate_modis_par]{amadeus::calculate_modis_par}} } \keyword{Calculation} diff --git a/man/query_modis_files.Rd b/man/query_modis_files.Rd new file mode 100644 index 00000000..941999cf --- /dev/null +++ b/man/query_modis_files.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate.R +\name{query_modis_files} +\alias{query_modis_files} +\title{Identify MODIS files} +\usage{ +query_modis_files(path, list, index) +} +\arguments{ +\item{path}{A character vector specifying the path to the MODIS data.} + +\item{list}{A list of julian dates.} + +\item{index}{An integer specifying the index of the julian date to use.} +} +\value{ +A character vector of MODIS file paths. +} +\description{ +This function identifies the relevant MODIS file paths based on +path, list of julian dates, and index. Designed to help set arguments +for the \code{inject_modis_par} function. +} +\keyword{Calculation} diff --git a/run_container.sh b/run_container.sh new file mode 100755 index 00000000..7892d7c6 --- /dev/null +++ b/run_container.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +#SBATCH --job-name=beethoven +#SBATCH --mail-user=manwareme@nih.gov +#SBATCH --mail-type=END,FAIL +#SBATCH --partition=geo +#SBATCH --ntasks=1 +#SBATCH --mem=100G +#SBATCH --cpus-per-task=50 +#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.err +#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.out + +# run pipeline in the container +apptainer exec \ + --bind $PWD:/mnt \ + --bind $PWD/inst:/inst \ + --bind /ddn:/input \ + --bind $PWD/_targets:/opt/_targets \ + beethoven_dl_calc.sif \ + Rscript --no-init-file /mnt/inst/targets/targets_start.R + +# run interactive R session in the container +# apptainer exec --bind $PWD/inst:/inst --bind /ddn/gs1/group/set/Projects/NRT-AP-Model/input:/input --bind $PWD:/mnt beethoven_dl_calc.sif R \ No newline at end of file diff --git a/run_container_one.sh b/run_container_one.sh new file mode 100755 index 00000000..4b8e5cd0 --- /dev/null +++ b/run_container_one.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +DEBUG_TARGET=$1 + +#SBATCH --job-name=beethoven +#SBATCH --mail-user=manwareme@nih.gov +#SBATCH --mail-type=END,FAIL +#SBATCH --partition=geo +#SBATCH --ntasks=1 +#SBATCH --mem=100G +#SBATCH --cpus-per-task=50 +#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.err +#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/beethoven/slurm/beethoven_%j.out + +# if [ "$(whoami)" = "isong" ]; then +# mount /ddn +# fi + +# run pipeline in the container +apptainer exec \ + --bind $PWD:/mnt \ + --bind $PWD/inst:/inst \ + --bind /ddn:/input \ + --bind $PWD/_targets:/opt/_targets \ + beethoven_dl_calc.sif \ + Rscript --no-init-file -e "targets::tar_make('$DEBUG_TARGET')" + +# run interactive R session in the container +# apptainer exec --bind $PWD/inst:/inst --bind /ddn/gs1/group/set/Projects/NRT-AP-Model/input:/input --bind $PWD:/mnt beethoven_dl_calc.sif R \ No newline at end of file diff --git a/run_interactive.sh b/run_interactive.sh deleted file mode 100644 index d08151cb..00000000 --- a/run_interactive.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin -export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH -if [ "$USER" != "songi2" ]; then - export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:/ddn/gs1/biotools/R/lib64/R/library -else - export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library -fi - -# Submit the pipeline as a background process with ./run.sh -# module load R # Uncomment if R is an environment module. -nohup nice -4 R CMD BATCH inst/targets/targets_start.R & \ No newline at end of file diff --git a/run_slurm.sh b/run_slurm.sh deleted file mode 100644 index 51750dd2..00000000 --- a/run_slurm.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -#SBATCH --job-name=pipeline_bench -#SBATCH --output=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_out.out -#SBATCH --error=/ddn/gs1/home/manwareme/beethoven/pipeline/pipeline_err.err -#SBATCH --mail-type=END,FAIL -#SBATCH --ntasks=1 -#SBATCH --cpus-per-task=2 -#SBATCH --mem-per-cpu=32g -#SBATCH --partition=geo -#SBATCH --mail-user=manwareme@nih.gov - -export PATH=$PATH:/ddn/gs1/tools/cuda11.8/bin -export LD_LIBRARY_PATH=/ddn/gs1/biotools/R/lib64/R/customlib:/ddn/gs1/tools/cuda11.8/lib64:$LD_LIBRARY_PATH -if [ "$USER" != "songi2" ]; then - export R_LIBS_USER=/ddn/gs1/biotools/R/lib64/R/custompkg:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library -else - export R_LIBS_USER=/ddn/gs1/home/songi2/r-libs:$R_LIBS_USER:/ddn/gs1/biotools/R/lib64/R/library -fi - -# modify it into the proper directory path. and output/error paths in the -# # SBATCH directives -# USER_PROJDIR=/ddn/gs1/home/$USER/projects -USER_PROJDIR=/ddn/gs1/home/manwareme/beethoven/ - -nohup nice -4 Rscript $USER_PROJDIR/beethoven/inst/targets/targets_start.R diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h07v05.061.2021295010454.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h07v05.061.2021295010454.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h07v06.061.2021295010322.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h07v06.061.2021295010322.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v03.061.2021295010420.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v03.061.2021295010420.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v04.061.2021295010503.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v04.061.2021295010503.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v05.061.2021295010353.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v05.061.2021295010353.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v06.061.2021295010322.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h08v06.061.2021295010322.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v03.061.2021295010540.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v03.061.2021295010540.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v04.061.2021295010748.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v04.061.2021295010748.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v05.061.2021295010550.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v05.061.2021295010550.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v06.061.2021295010314.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h09v06.061.2021295010314.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v03.061.2021295010847.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v03.061.2021295010847.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v04.061.2021295010743.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v04.061.2021295010743.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v05.061.2021295010413.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v05.061.2021295010413.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v06.061.2021295010321.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h10v06.061.2021295010321.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v03.061.2021295010914.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v03.061.2021295010914.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v04.061.2021295010734.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v04.061.2021295010734.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v05.061.2021295010608.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v05.061.2021295010608.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v06.061.2021295010608.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h11v06.061.2021295010608.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v03.061.2021295010627.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v03.061.2021295010627.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v04.061.2021295010714.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v04.061.2021295010714.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v05.061.2021295010455.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h12v05.061.2021295010455.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h13v03.061.2021295010658.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h13v03.061.2021295010658.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h13v04.061.2021295010545.hdf b/tests/testdata/calculate/modis/001/MOD09GA.A2018001.h13v04.061.2021295010545.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h07v05.061.2021295015118.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h07v05.061.2021295015118.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h07v06.061.2021295015149.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h07v06.061.2021295015149.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v03.061.2021295015453.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v03.061.2021295015453.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v04.061.2021295015438.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v04.061.2021295015438.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v05.061.2021295015304.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v05.061.2021295015304.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v06.061.2021295015352.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h08v06.061.2021295015352.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v03.061.2021295020225.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v03.061.2021295020225.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v04.061.2021295020131.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v04.061.2021295020131.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v05.061.2021295015958.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v05.061.2021295015958.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v06.061.2021295015921.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h09v06.061.2021295015921.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v03.061.2021295020552.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v03.061.2021295020552.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v04.061.2021295020334.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v04.061.2021295020334.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v05.061.2021295020232.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v05.061.2021295020232.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v06.061.2021295015951.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h10v06.061.2021295015951.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v03.061.2021295020528.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v03.061.2021295020528.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v04.061.2021295020132.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v04.061.2021295020132.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v05.061.2021295020108.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v05.061.2021295020108.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v06.061.2021295015950.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h11v06.061.2021295015950.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v03.061.2021295020413.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v03.061.2021295020413.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v04.061.2021295020235.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v04.061.2021295020235.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v05.061.2021295020328.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h12v05.061.2021295020328.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h13v03.061.2021295020730.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h13v03.061.2021295020730.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h13v04.061.2021295020435.hdf b/tests/testdata/calculate/modis/002/MOD09GA.A2018002.h13v04.061.2021295020435.hdf new file mode 100644 index 00000000..e69de29b diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 4b6e49ee..dfec2ea4 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -92,3 +92,27 @@ testthat::test_that("par_narr (weasd + omega)", { ) }) + +################################################################################ +##### query_modis_files +testthat::test_that("query_modis_files", { + + path <- testthat::test_path("..", "testdata", "calculate", "modis") + list <- list( + c("2018001"), c("2018002") + ) + + # expect no error + testthat::expect_no_error( + files1 <- query_modis_files(path, list, index = 1) + ) + # expect 23 files + testthat::expect_length(files1, 23) + + # expect no error + testthat::expect_no_error( + files2 <- query_modis_files(path, list, index = 1) + ) + # expect 23 files + testthat::expect_length(files2, 23) +}) diff --git a/tests/testthat/test_calculate.R b/tests/testthat/test_calculate.R new file mode 100644 index 00000000..94a48719 --- /dev/null +++ b/tests/testthat/test_calculate.R @@ -0,0 +1,13 @@ +################################################################################ +##### unit and integration tests for covariate calculations +##### Currently arbitrary placeholders +##### + +################################################################################ +##### +testthat::test_that("calculate targets work", { + + a <- 1 + testthat::expect_equal(a, 1) + +}) diff --git a/tests/testthat/test_download.R b/tests/testthat/test_download.R new file mode 100644 index 00000000..fc706b8d --- /dev/null +++ b/tests/testthat/test_download.R @@ -0,0 +1,16 @@ +################################################################################ +##### unit and integration tests for data download +##### Currently arbitrary placeholders +##### + +################################################################################ +##### loadargs +testthat::test_that("download targets work", { + + a <- 1 + testthat::expect_equal(a, 1) + + b <- 2 + testthat::expect_equal(b, 2) + +}) diff --git a/tools/targets-old/blueprint.csv b/tools/targets-old/blueprint.csv index a1506346..6f042804 100644 --- a/tools/targets-old/blueprint.csv +++ b/tools/targets-old/blueprint.csv @@ -1,47 +1,3 @@ -dataset,buffers,input -mod11,1000,input/modis/raw/61/MOD11A1 -mod11,10000,input/modis/raw/61/MOD11A1 -mod11,50000,input/modis/raw/61/MOD11A1 -mod13,1000,input/modis/raw/61/MOD13A2 -mod13,10000,input/modis/raw/61/MOD13A2 -mod13,50000,input/modis/raw/61/MOD13A2 -mcd19,1000,input/modis/raw/61/MCD19A2 -mcd19,10000,input/modis/raw/61/MCD19A2 -mcd19,50000,input/modis/raw/61/MCD19A2 -mod06,1000,input/modis/raw/61/MOD06_L2 -mod06,10000,input/modis/raw/61/MOD06_L2 -mod06,50000,input/modis/raw/61/MOD06_L2 -mod09,1000,input/modis/raw/61/MOD09GA -mod09,10000,input/modis/raw/61/MOD09GA -mod09,50000,input/modis/raw/61/MOD09GA -vnp46,1000,input/viirs/raw/61/VNP46A2 -vnp46,10000,input/viirs/raw/61/VNP46A2 -vnp46,50000,input/viirs/raw/61/VNP46A2 -ecoregion,-1,input/ecoregions -tri,1000,input/tri -tri,10000,input/tri -tri,50000,input/tri -nei,-1,input/nei -hms,-1,input/hms -koppen,0,input/koppen_geiger -groads,1000,input/sedac_groads -groads,10000,input/sedac_groads -groads,50000,input/sedac_groads -pop,1000,input/sedac_population -pop,10000,input/sedac_population -pop,50000,input/sedac_population -nlcd,1000,input/nlcd -nlcd,10000,input/nlcd -nlcd,50000,input/nlcd -geos,1000,input/geos -geos,10000,input/geos -geos,50000,input/geos -gmted,1000,input/gmted -gmted,10000,input/gmted -gmted,50000,input/gmted -narr_monolevel,1000,input/narr/monolevel -narr_monolevel,10000,input/narr/monolevel -narr_monolevel,50000,input/narr/monolevel -narr_plevels,1000,input/narr/p_levels -narr_plevels,10000,input/narr/p_levels -narr_plevels,50000,input/narr/p_levels +version https://git-lfs.github.com/spec/v1 +oid sha256:69f6f6e008ead22acb51d9ab3e87c87a5b8efbea5bc46802b1ad308004c81d8e +size 1508 diff --git a/tools/targets-old/narr_variables.csv b/tools/targets-old/narr_variables.csv index 2181c01a..dd27f589 100644 --- a/tools/targets-old/narr_variables.csv +++ b/tools/targets-old/narr_variables.csv @@ -1,25 +1,3 @@ -"dirs" -"input/narr/air.sfc" -"input/narr/albedo" -"input/narr/apcp" -"input/narr/dswrf" -"input/narr/evap" -"input/narr/hcdc" -"input/narr/hpbl" -"input/narr/lcdc" -"input/narr/lhtfl" -"input/narr/mcdc" -"input/narr/omega" -"input/narr/pr_wtr" -"input/narr/prate" -"input/narr/pres.sfc" -"input/narr/shtfl" -"input/narr/shum" -"input/narr/snowc" -"input/narr/soilm" -"input/narr/tcdc" -"input/narr/ulwrf.sfc" -"input/narr/uwnd.10m" -"input/narr/vis" -"input/narr/vwnd.10m" -"input/narr/weasd" +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ddfcf0681fdfac2c53036479b7fbfc397c2986f627260bf9b03f4a71c4229d +size 470 diff --git a/tools/targets-old/pipeline_base_functions.R b/tools/targets-old/pipeline_base_functions.R index 316e8330..998b6ccd 100644 --- a/tools/targets-old/pipeline_base_functions.R +++ b/tools/targets-old/pipeline_base_functions.R @@ -242,7 +242,7 @@ process_counties <- #' @param process_function Raw data processor. Default is #' [`amadeus::process_covariates`] #' @param calc_function Covariate calculator. Default is -#' [`amadeus::calc_covariates`] +#' [`amadeus::calculate_covariates`] #' @param ... Arguments passed to `calc_function` #' @return Nothing. It will automatically save xz-compressed #' RDS file to `outpath` @@ -251,7 +251,7 @@ process_counties <- calculate_single <- function( process_function = amadeus::process_covariates, - calc_function = amadeus::calc_covariates, + calc_function = amadeus::calculate_covariates, ... ) { prep_calc <- @@ -286,7 +286,7 @@ calculate_single <- #' @param process_function Raw data processor. Default is #' [`amadeus::process_covariates`] #' @param calc_function Function to calculate covariates. -#' [`amadeus::calc_covariates`] +#' [`amadeus::calculate_covariates`] #' @param ... Arguments passed to `process_function` and `calc_function` #' @return A data.table object. #' @importFrom data.table rbindlist @@ -299,7 +299,7 @@ calculate_multi <- # outpath = NULL, domain = NULL, process_function = amadeus::process_covariates, - calc_function = amadeus::calc_covariates, + calc_function = amadeus::calculate_covariates, ... ) { domainlist <- split(domain, seq_along(domain)) diff --git a/tools/targets-old/punchcard.csv b/tools/targets-old/punchcard.csv index de511813..bf165f5d 100644 --- a/tools/targets-old/punchcard.csv +++ b/tools/targets-old/punchcard.csv @@ -1,109 +1,3 @@ -last_updated,varname,value,index,class,command -2024-03-15,root_custom,../../../../group/set/Projects/NRT-AP-Model/,0,path,paste0 -2024-03-15,root_absolute,missing,1,path,getwd -2024-03-15,root_relative,.,2,path,paste0 -2024-03-15,dir_input,input,3,path,file.path -2024-03-15,dir_output,output,4,path,file.path -2024-03-15,dir_input_aqs,input/aqs,5,path,file.path -2024-03-15,dir_input_nei,input/nei,6,path,file.path -2024-03-15,dir_input_narr,input/narr,7,path,file.path -2024-03-15,file_narr_variables,tools/pipeline/narr_variables.csv,8,path,file.path -2024-03-15,dir_input_modis,input/modis,9,path,file.path -2024-03-15,dir_input_nlcd,input/nlcd/raw,10,path,file.path -2024-03-15,dir_input_ecoregion,input/ecoregions,11,path,file.path -2024-03-15,dir_input_koppen,input/koppen_geiger,12,path,file.path -2024-03-15,dir_input_gmted,input/gmted,13,path,file.path -2024-03-15,dir_input_sedac_population,input/sedac_population,14,path,file.path -2024-03-15,dir_input_sedac_groads,input/sedac_groads,15,path,file.path -2024-03-15,dir_input_hms,input/HMS_Smoke,16,path,file.path -2024-03-15,dir_input_tri,input/tri,17,path,file.path -2024-03-15,dir_input_geos,input/geos,18,path,file.path -2024-03-15,dir_input_modis_mod11,input/modis/raw/61/MOD11A1,19,path,file.path -2024-03-15,dir_input_modis_mod13,input/modis/raw/61/MOD13A2,20,path,file.path -2024-03-15,dir_input_modis_mcd19,input/modis/raw/61/MCD19A2,21,path,file.path -2024-03-15,dir_input_modis_mod09,input/modis/raw/61/MOD09GA,22,path,file.path -2024-03-15,dir_input_modis_mod06,input/modis/raw/61/MOD06_L2,23,path,file.path -2024-03-15,dir_input_modis_vnp46,input/modis/raw/5000/VNP46A2,24,path,file.path -2024-03-15,y2018,2018,25,indicator,paste0 -2024-03-15,y2019,2019,26,indicator,paste0 -2024-03-15,y2020,2020,27,indicator,paste0 -2024-03-15,y2021,2021,28,indicator,paste0 -2024-03-15,y2022,2022,29,indicator,paste0 -2024-03-15,format_aqs,*.csv$,30,extension,paste0 -2024-03-15,format_nei,*.csv$,31,extension,paste0 -2024-03-15,format_narrmono,*.nc$,32,extension,paste0 -2024-03-15,format_narrplevels,*.nc$,33,extension,paste0 -2024-03-15,format_modis,*.hdf$,34,extension,paste0 -2024-03-15,format_viirs,*.h5$,35,extension,paste0 -2024-03-15,format_nlcd,*.img$,36,extension,paste0 -2024-03-15,format_ecoregion,*.shp$,37,extension,paste0 -2024-03-15,format_koppen,*.tif$,38,extension,paste0 -2024-03-15,format_gmted,*.nc$,39,extension,paste0 -2024-03-15,format_sedac_population,*.shp$,40,extension,paste0 -2024-03-15,format_sedac_groads,*.tif$,41,extension,paste0 -2024-03-15,format_hms,*.shp$,42,extension,paste0 -2024-03-15,format_tri,*.nc$,43,extension,paste0 -2024-03-15,format_geos,*.nc$,44,extension,paste0 -2024-03-15,file_list_mod11,mod11_static_list.txt,45,status,paste0 -2024-03-15,file_list_mod13,mod13_static_list.txt,46,status,paste0 -2024-03-15,file_list_mcd19,mcd19_static_list.txt,47,status,paste0 -2024-03-15,file_list_mod06,mod06_static_list.txt,48,status,paste0 -2024-03-15,file_list_mod09,mod09_static_list.txt,49,status,paste0 -2024-03-15,file_list_tri,tri_static_list.txt,50,status,paste0 -2024-03-15,file_covar_modis_mod11,covar_modis_mod11.rds,51,status,paste0 -2024-03-15,file_covar_modis_mod09,covar_modis_mod09.rds,52,status,paste0 -2024-03-15,file_covar_modis_mod06,covar_modis_mod06.rds,53,status,paste0 -2024-03-15,file_covar_modis_mod13,covar_modis_mod13.rds,54,status,paste0 -2024-03-15,file_covar_modis_mcd19,covar_modis_mcd19.rds,55,status,paste0 -2024-03-15,file_covar_modis_vnp46,covar_modis_vnp46.rds,56,status,paste0 -2024-03-15,file_covar_tri,covar_tri.rds,57,status,paste0 -2024-03-15,file_covar_nei,covar_nei.rds,58,status,paste0 -2024-03-15,file_covar_nlcd,covar_nlcd.rds,59,status,paste0 -2024-03-15,file_covar_ecoregion,covar_ecoregion.rds,60,status,paste0 -2024-03-15,file_covar_gmted,covar_gmted.rds,61,status,paste0 -2024-03-15,file_covar_koppen,covar_koppen.rds,62,status,paste0 -2024-03-15,file_covar_geos,covar_geos.rds,63,status,paste0 -2024-03-15,file_covar_hms,covar_hms.rds,64,status,paste0 -2024-03-15,file_covar_sedac_population,covar_sedac_population.rds,65,status,paste0 -2024-03-15,file_covar_sedac_groads,covar_sedac_groads.rds,66,status,paste0 -2024-03-15,pointid,site_id,67,identifier,paste0 -2024-03-15,timeid,time,68,identifier,paste0 -2024-03-15,file_covar_dummies,covar_dummies.rds,69,status,paste0 -2024-03-15,file_aqs_pm,aqs_cleaned.rds,70,status,paste0 -2024-03-15,name_dep,pm2.5,71,label,paste0 -2024-03-15,file_name_indep,list_covariate_names.txt,72,label,readLines -2024-03-15,file_grid_prediction,prediction_grid.rds,73,status,paste0 -2024-03-15,nthreads_predict,16,74,setting,as.integer -2024-03-15,file_covar_predict_modis_mod11,covar_predict_modis_mod11.rds,75,status,paste0 -2024-03-15,file_covar_predict_modis_mod09,covar_predict_modis_mod09.rds,76,status,paste0 -2024-03-15,file_covar_predict_modis_mod06,covar_predict_modis_mod06.rds,77,status,paste0 -2024-03-15,file_covar_predict_modis_mod13,covar_predict_modis_mod13.rds,78,status,paste0 -2024-03-15,file_covar_predict_modis_mcd19,covar_predict_modis_mcd19.rds,79,status,paste0 -2024-03-15,file_covar_predict_modis_vnp46,covar_predict_modis_vnp46.rds,80,status,paste0 -2024-03-15,file_covar_predict_tri,covar_predict_tri.rds,81,status,paste0 -2024-03-15,file_covar_predict_nei,covar_predict_nei.rds,82,status,paste0 -2024-03-15,file_covar_predict_nlcd,covar_predict_nlcd.rds,83,status,paste0 -2024-03-15,file_covar_predict_ecoregion,covar_predict_ecoregion.rds,84,status,paste0 -2024-03-15,file_covar_predict_gmted,covar_predict_gmted.rds,85,status,paste0 -2024-03-15,file_covar_predict_koppen,covar_predict_koppen.rds,86,status,paste0 -2024-03-15,file_covar_predict_geos,covar_predict_geos.rds,87,status,paste0 -2024-03-15,file_covar_predict_hms,covar_predict_hms.rds,88,status,paste0 -2024-03-15,file_covar_predict_sedac_population,covar_predict_sedac_population.rds,89,status,paste0 -2024-03-15,file_covar_predict_sedac_groads,covar_predict_sedac_groads.rds,90,status,paste0 -2024-03-15,dir_input_nei2017,input/nei/nei_onroad_byregions_2017,91,path,file.path -2024-03-15,dir_input_nei2020,input/nei/nei_onroad_byregions_2020,92,path,file.path -2024-03-15,file_input_sedac_groads,groads-v1-americas-gdb/gROADS-v1-americas.gdb,96,path,paste0 -2024-03-15,file_input_sedac_population,gpw_v4_population_density_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec.tif,97,path,paste0 -2024-03-15,date_start,2020-04-01,93,domain,as.Date -2024-03-15,date_end,2020-04-15,94,domain,as.Date -2024-03-15,extent,-126|-62|22|52,95,domain,strsplit -2024-03-29,nei_year_sequence,2017|2017|2020|2020|2020,98,domain,strsplit -2024-03-29,nlcd_year_sequence,2019|2019|2019|2021|2021,99,domain,strsplit -2024-03-29,sedac_population_year,2020|2020|2020|2020|2020,101,domain,strsplit -2024-03-29,nei_year_sequence_test,2020,1001,domain,paste0 -2024-03-29,nlcd_year_sequence_test,2019,1002,domain,paste0 -2024-03-29,sedac_population_year_test,2020,1003,domain,as.integer -2024-03-29,tri_year_sequence_test,2020|2020,1004,domain,strsplit -2024-03-29,nthreads_calc,20,100,setting,as.integer -2024-03-29,tri_year_sequence,2018|2019|2020|2021|2022,102,domain,strsplit -2024-03-29,slurm_user_email,songi2@nih.gov,103,supplement,paste0 +version https://git-lfs.github.com/spec/v1 +oid sha256:8df74412e1068d8e7ea0576f974a74b92d4c36f3453372b80d4e663c4fc15055 +size 7098