From 39151b0fe65925d9afcf71261956b954227b520d Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Fri, 17 Nov 2023 13:41:33 -0500 Subject: [PATCH 01/17] add 'unzip' argument --- .../download_geos_cf_data.R | 12 +++--- .../download_functions/download_gmted_data.R | 39 ++++++++++--------- .../download_koppen_geiger_data.R | 36 +++++++++-------- .../download_functions/download_merra2_data.R | 15 +++---- .../download_functions/download_modis_data.R | 18 +++------ .../download_narr_monolevel_data.R | 10 ++--- .../download_narr_p_levels_data.R | 13 +++---- .../download_functions/download_nlcd_data.R | 30 +++++++------- .../download_sedac_groads_data.R | 34 ++++++++-------- .../download_sedac_population_data.R | 31 ++++++++------- 10 files changed, 118 insertions(+), 120 deletions(-) diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 62c409a4..1767f0c3 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -32,11 +32,10 @@ download_geos_cf_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE. ", - "Please acknowledge that the data downloaded using this ", - "function may be very large and use lots of machine storage ", - "and memory.\n")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE. ", + "Please acknowledge that the data downloaded using this ", + "function may be very large and use lots of machine storage ", + "and memory.\n")) } #### 2. check for collection if (is.null(collection) == TRUE) { @@ -49,8 +48,7 @@ download_geos_cf_data <- function( "xgc_tavg_1hr_g1440x721_x1", "chm_inst_1hr_g1440x721_p23", "met_inst_1hr_g1440x721_p23") if (!(collection %in% collections)) { - cat(paste0("Requested collection is not recognized.\n")) - stop() + stop(paste0("Requested collection is not recognized.\n")) } #### 4. define date sequence date_start_date_format <- as.Date(date_start, format = "%Y-%m-%d") diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 59e1629f..18ac0bb8 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -22,8 +22,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. -#' @param remove_download logical(1). Remove download files in -#' directory_to_download. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip file from directory_to_download. +#' Default = `FALSE`. #' @author Mitchell Manware #' @return NULL; #' @export @@ -33,7 +34,8 @@ download_gmted_data <- function( directory_to_download = "./input/gmted2010/raw/", directory_to_save = "./input/gmted2010/raw/", data_download_acknowledgement = FALSE, - remove_download = FALSE + unzip = TRUE, + remove_zip = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -50,16 +52,14 @@ download_gmted_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE.", - "Please acknowledge that the data downloaded using this", - "function may be very large and use lots of machine storage", - "and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) } #### 3. check for statistic if (is.null(statistic) == TRUE) { - cat(paste0("Please select a GMTED2010 statistic.\n")) - stop() + stop(paste0("Please select a GMTED2010 statistic.\n")) } #### 4. check for valid statistic valid_statistics <- c("Breakline Emphasis", "Systematic Subsample", @@ -67,19 +67,16 @@ download_gmted_data <- function( "Mean Statistic", "Maximum Statistic", "Standard Deviation Statistic") if (!(statistic %in% valid_statistics)) { - cat(paste0("Requested statistic is not recognized.\n")) - stop() + stop(paste0("Requested statistic is not recognized.\n")) } #### 5. check for resolution if (is.null(resolution) == TRUE) { - cat(paste0("Please select a data resolution.\n")) - stop() + stop(paste0("Please select a data resolution.\n")) } #### 6. check for valid resolution valid_resolutions <- c("7.5 arc-seconds", "15 arc-seconds", "30 arc-seconds") if (!(resolution %in% valid_resolutions)) { - cat(paste0("Requested resolution is not recognized.\n")) - stop() + stop(paste0("Requested resolution is not recognized.\n")) } #### 7. define URL base base <- paste0("https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/topo", @@ -118,15 +115,19 @@ download_gmted_data <- function( system(command = system_command) Sys.sleep(5L) cat(paste0("Requested file downloaded.\n")) - #### 14. unzip downlaoded data + #### 14. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downloaded files will not be unzipped.\n"))) + } + #### 15. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 15. remove zip files - if (remove_download == TRUE) { + #### 16. remove zip files + if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) cat(paste0("Download files removed.\n")) diff --git a/input/Rinput/download_functions/download_koppen_geiger_data.R b/input/Rinput/download_functions/download_koppen_geiger_data.R index 5ec620ce..dfd0e10d 100644 --- a/input/Rinput/download_functions/download_koppen_geiger_data.R +++ b/input/Rinput/download_functions/download_koppen_geiger_data.R @@ -25,8 +25,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. -#' @param remove_download logical(1). Remove download files in -#' directory_to_download. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip files from directory_to_download. +#' Default = `FALSE`. #' @author Mitchell Manware #' @return NULL; #' @export @@ -36,7 +37,8 @@ download_koppen_geiger_data <- function( directory_to_download = "./input/koppen_geiger/raw/", directory_to_save = "./input/koppen_geiger/raw/", data_download_acknowledgement = FALSE, - remove_download = TRUE + unzip = TRUE, + remove_zip = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -53,26 +55,22 @@ download_koppen_geiger_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE.", - "Please acknowledge that the data downloaded using this", - "function may be very large and use lots of machine storage", - "and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) } #### 3. check for data resolution if (is.null(data_resolution)) { - cat(paste0("Please select a data resolution.\n")) - stop() + stop(paste0("Please select a data resolution.\n")) } #### 4. check for valid time period if (!(time_period %in% c("Present", "Future"))) { - cat(paste0("Requested time period is not recognized.\n")) - stop() + stop(paste0("Requested time period is not recognized.\n")) } #### 5. check for valid data resolution if (!(data_resolution %in% c("0.0083", "0.083", "0.5"))) { - cat(paste0("Requested time period is not recognized.\n")) - stop() + stop(paste0("Requested time period is not recognized.\n")) } #### 6. define time period period <- tolower(time_period) @@ -111,7 +109,11 @@ download_koppen_geiger_data <- function( cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 13. remove unwanted files + #### 13. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downlaoded files will not be unzipped.\n"))) + } + #### 14. remove unwanted files unwanted_names <- list.files(path = directory_to_save, pattern = "Beck_KG", full.names = TRUE) @@ -128,8 +130,8 @@ download_koppen_geiger_data <- function( unwanted_names, invert = TRUE)] file.remove(unwanted_names) - #### 14. remove zip files - if (remove_download == TRUE) { + #### 15. remove zip files + if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) cat(paste0("Download files removed.\n")) diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index 7629e669..12b4b31a 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -32,16 +32,14 @@ download_merra2_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE. ", - "Please acknowledge that the data downloaded using this ", - "function may be very large and use lots of machine storage ", - "and memory.\n")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE. ", + "Please acknowledge that the data downloaded using this ", + "function may be very large and use lots of machine storage ", + "and memory.\n")) } #### 2. check for collection if (is.null(collection) == TRUE) { - cat(paste0("Please select a MERRA2 collection.\n")) - stop() + stop(paste0("Please select a MERRA2 collection.\n")) } #### 3. check if collection is recognized identifiers <- paste0("inst1_2d_asm_Nx M2I1NXASM 10.5067/3Z173KIE2TPD,", @@ -91,8 +89,7 @@ download_merra2_data <- function( } colnames(identifiers_df) <- c("collection_id", "estd_name", "DOI") if (!(collection %in% identifiers_df$collection_id)) { - cat(paste0("Requested collection is not recognized.\n")) - stop() + stop(paste0("Requested collection is not recognized.\n")) } #### 4. define date sequence date_start_date_format <- as.Date(date_start, format = "%Y-%m-%d") diff --git a/input/Rinput/download_functions/download_modis_data.R b/input/Rinput/download_functions/download_modis_data.R index 214d7e3a..760d2066 100644 --- a/input/Rinput/download_functions/download_modis_data.R +++ b/input/Rinput/download_functions/download_modis_data.R @@ -39,41 +39,35 @@ download_modis_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0( + stop(paste0( "Data download acknowledgement is set to FALSE. ", "Please acknowledge that the data downloaded using this ", "function may be very large and use lots of machine storage ", "and memory.\n" )) - stop() } #### 3. check for NASA earth data token if (nasa_earth_data_token == FALSE) { - cat(paste0("Please provide NASA EarthData Login token.\n")) - stop() + stop(paste0("Please provide NASA EarthData Login token.\n")) } #### 4. check for product if (is.null(product) == TRUE) { - cat(paste0("Please select a MODIS product.\n")) - stop() + stop(paste0("Please select a MODIS product.\n")) } #### 5. check for version if (is.null(version) == TRUE) { - cat(paste0("Please select a data version.\n")) - stop() + stop(paste0("Please select a data version.\n")) } #### 6. check for valid horizontal tiles for (h in seq_along(horizontal_tiles)) { if (horizontal_tiles[h] < 0 || horizontal_tiles[h] > 35) { - cat(paste0("Horizontal tiles invalid.\n")) - stop() + stop(paste0("Horizontal tiles invalid.\n")) } } #### 7. check for valid vertical tiles for (v in seq_along(vertical_tiles)) { if (vertical_tiles[v] < 0 || vertical_tiles[v] > 17) { - cat(paste0("Vertical tiles invalid.\n")) - stop() + stop(paste0("Vertical tiles invalid.\n")) } } #### 8. define date sequence diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index fe6f6f82..a74e1a15 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -40,15 +40,13 @@ download_narr_monolevel_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE. Please", - "acknowledge that the data downloaded using this function may", - "be very large and use lots of machine storage and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE. Please", + "acknowledge that the data downloaded using this function may", + "be very large and use lots of machine storage and memory.")) } #### 3. check for variables if (is.null(variables) == TRUE) { - cat(paste0("Please select an NCEP-NARR variable.\n")) - stop() + stop(paste0("Please select an NCEP-NARR variable.\n")) } #### 4. define years sequence years <- seq(year_start, year_end, 1) diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index bdda4ab4..d597746d 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -23,6 +23,7 @@ #' @author Mitchell Manware #' @return NULL; NCEP North American Regional Reanalysis pressure levels #' meteorological data will be returned to the designated saving directory. +#' @importFrom stringr str_pad #' @export download_narr_p_levels_data <- function( year_start = 2022, @@ -31,7 +32,6 @@ download_narr_p_levels_data <- function( directory_to_save = "./input/ncep_narr_pressure_levels/raw/", data_download_acknowledgement = FALSE ) { - library(stringr) #### 1. directory setup chars_dir_save <- nchar(directory_to_save) if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { @@ -41,15 +41,14 @@ download_narr_p_levels_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat("Data download acknowledgement is set to FALSE. Please acknowledge that - the data downloaded using this function may be very large and use lots - of machine storage and memory.") - stop() + stop(cat(paste0("Data download acknowledgement is set to FALSE. Please ", + "acknowledge that the data downloaded using this function ", + "may be very large and use lots of machine storage and ", + "memory."))) } #### 3. check for variables if (is.null(variables) == TRUE) { - cat(paste0("Please select an NCEP-NARR variable.\n")) - stop() + stop(paste0("Please select an NCEP-NARR variable.\n")) } #### 4. define years sequence years <- seq(year_start, year_end, 1) diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 17d795dd..9c5e44eb 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -20,8 +20,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. -#' @param remove_download logical(1). Remove download files in -#' directory_to_download. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip files from directory_to_download. +#' Default = `FALSE`. #' @author Mitchell Manware #' @return NULL; #' @export @@ -31,7 +32,8 @@ download_nlcd_data <- function( directory_to_download = "./input/nlcd/raw/", directory_to_save = "./input/nlcd/raw/", data_download_acknowledgement = FALSE, - remove_download = FALSE + unzip = TRUE, + remove_zip = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -48,17 +50,15 @@ download_nlcd_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE.", - "Please acknowledge that the data downloaded using this", - "function may be very large and use lots of machine storage", - "and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) } #### 2. check for valid years valid_years <- c(2001, 2004, 2006, 2008, 2011, 2013, 2016, 2019, 2021) if (!(year %in% valid_years)) { - cat(paste0("Requested year is not recognized.\n")) - stop() + stop(paste0("Requested year is not recognized.\n")) } #### 3. define URL base base <- "https://s3-us-west-2.amazonaws.com/mrlc/" @@ -103,15 +103,19 @@ download_nlcd_data <- function( system(command = system_command) Sys.sleep(5L) cat(paste0("Requested file downloaded.\n")) - #### 10. unzip downloaded data + #### 10. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downloaded files will not be unzipped.\n"))) + } + #### 11. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 11. remove zip files - if (remove_download == TRUE) { + #### 12. remove zip files + if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) cat(paste0("Download files removed.\n")) diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index b1bc9fdf..b1dac783 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -23,8 +23,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. -#' @param remove_download logical(1). Remove download files in -#' directory_to_download. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip files from directory_to_download. +#' Default = `FALSE`. #' @author Mitchell Manware #' @return NULL; NASA UN WPP-Adjusted Population Density, v4.11 data will be #' returned to the designated saving directory in the indicated format. @@ -35,7 +36,8 @@ download_sedac_groads_data <- function( directory_to_download = "./input/sedac_groads/raw/", directory_to_save = "./input/sedac_groads/raw/", data_download_acknowledgement = FALSE, - remove_download = FALSE + unzip = TRUE, + remove_zip = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -52,29 +54,25 @@ download_sedac_groads_data <- function( } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE.", - "Please acknowledge that the data downloaded using this", - "function may be very large and use lots of machine storage", - "and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) } #### 3. check for region if (is.null(data_region) == TRUE) { - cat(paste0("Please select a data region.\n")) - stop() + stop(paste0("Please select a data region.\n")) } #### 4. check if region is valid regions <- c("Global", "Africa", "Asia", "Europe", "Americas", "Oceania East", "Oceania West") if (!(data_region %in% regions)) { - cat(paste0("Requested region not recognized.\n")) - stop() + stop(paste0("Requested region not recognized.\n")) } #### 5. check for data format formats <- c("Shapefile", "Geodatabase") if (!(data_format %in% formats)) { - cat(paste0("Requested data format not recognized.\n")) - stop() + stop(paste0("Requested data format not recognized.\n")) } #### 6. define URL base base <- paste0("https://sedac.ciesin.columbia.edu/downloads/data/groads/", @@ -112,14 +110,18 @@ download_sedac_groads_data <- function( cat(paste0("Downloading requested file...\n")) system(command = system_command) cat(paste0("Requested file downloaded.\n")) - #### 13. unzip downloaded data + #### 13. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downloaded files will not be unzipped.\n"))) + } + #### 14. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) #### 14. remove zip file - if (remove_download == TRUE) { + if (remove_zip == TRUE) { cat(paste0("Removing downloaded zip file...\n")) file.remove(download_name) cat(paste0("Downloaded zip files deleted.\n")) diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 363bf022..e9ac28fe 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -25,8 +25,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. -#' @param remove_download logical(1). Remove download files in -#' directory_to_download. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip files from directory_to_download. +#' Default = `FALSE`. #' @author Mitchell Manware #' @return NULL; NASA UN WPP-Adjusted Population Density, v4.11 data will be #' returned to the designated saving directory in the indicated format. @@ -38,7 +39,8 @@ download_sedac_population_data <- function( directory_to_download = "./input/nasa_sedac/raw/", directory_to_save = "./input/nasa_sedac/raw/", data_download_acknowledgement = FALSE, - remove_download = FALSE + unzip = TRUE, + remove_zip = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -55,21 +57,18 @@ download_sedac_population_data <- function( } #### 2. check for data acknowledgement if (data_download_acknowledgement == FALSE) { - cat(paste0("Data download acknowledgement is set to FALSE.", - "Please acknowledge that the data downloaded using this", - "function may be very large and use lots of machine storage", - "and memory.")) - stop() + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) } #### 3. check for data format if (is.null(data_format)) { - cat(paste0("Please select a data format.\n")) - stop() + stop(paste0("Please select a data format.\n")) } #### 4. check for data resolution if (is.null(data_resolution)) { - cat(paste0("Please select a data resolution.\n")) - stop() + stop(paste0("Please select a data resolution.\n")) } #### 5. define URL base base <- paste0("https://sedac.ciesin.columbia.edu/downloads/data/gpw-v4/") @@ -139,14 +138,18 @@ download_sedac_population_data <- function( cat(paste0("Downloading requested file...\n")) system(command = download_command) cat(paste0("Requested file downloaded.\n")) - #### 13. unzip downloaded data + #### 13. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downloaded files will not be unzipped.\n"))) + } + #### 14. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) #### 14. remove zip file from download directory - if (remove_download == TRUE) { + if (remove_zip == TRUE) { cat(paste0("Deleting downloaded zip files...\n")) file.remove(download_name) cat(paste0("Downloaded zip files deleted.\n")) From ee9f80025fe9df31f5f807162d4eb63975f01f4e Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Fri, 17 Nov 2023 13:51:25 -0500 Subject: [PATCH 02/17] lintr download_aqs_data.R --- .../download_functions/download_aqs_data.R | 234 +++++++++++------- 1 file changed, 143 insertions(+), 91 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index bb75249d..d2a28868 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -6,105 +6,157 @@ ################################################################################ #' download_aqs_data: download daily data from AQS datamart #' -#' @param parameter_code integer(1). length of 5. EPA pollutant parameter code. For details, please refer to https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html +#' @param parameter_code integer(1). length of 5. EPA pollutant parameter code. +#' For details, please refer to +#' https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html #' @param year_start integer(1). length of 4. Start year for downloading data. #' @param year_end integer(1). length of 4. End year for downloading data. -#' @param resolution_temporal character(1). Name of column containing POC values. Currently, no value other than "daily" works. -#' @param directory_to_download character(1). Directory to download zip files from AQS data mart. -#' @param directory_to_save character(1). Directory to decompress zip files +#' @param resolution_temporal character(1). Name of column containing POC +#' values. Currently, no value other than "daily" works. +#' @param directory_to_download character(1). Directory to download zip files +#' from AQS data mart. +#' @param directory_to_save character(1). Directory to decompress zip files #' @param url_aqs_download character(1). URL to the AQS pre-generated datasets. #' @param remove_zips logical(1). remove zip files in directory_to_download. #' @author Mariana Kassien, Insang Song -#' @return NULL; Separate comma-separated value (CSV) files of monitors and the daily representative values will be stored in directory_to_save. +#' @return NULL; Separate comma-separated value (CSV) files of monitors and the +#' daily representative values will be stored in directory_to_save. #' @export download_aqs_data <- function( - parameter_code = 88101, - year_start = 2018, - year_end = 2022, - resolution_temporal = "daily", - directory_to_download = "./input/aqs/", - directory_to_save = "./input/aqs/", - url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", - remove_zips = FALSE + parameter_code = 88101, + year_start = 2018, + year_end = 2022, + resolution_temporal = "daily", + directory_to_download = "./input/aqs/", + directory_to_save = "./input/aqs/", + url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", + remove_zips = FALSE ) { - #nocov start - chars_dir_download = nchar(directory_to_download) - chars_dir_save = nchar(directory_to_save) - - if (substr(directory_to_download, chars_dir_download, chars_dir_download) != "/") { - directory_to_download = paste(directory_to_download, "/", sep = "") - } - if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { - directory_to_save = paste(directory_to_save, "/", sep = "") - } - - #### 1. define measurement data paths - year_sequence = seq(year_start, year_end, 1) - file_urls = sprintf(paste(url_aqs_download, resolution_temporal, "_", parameter_code, "_%.0f.zip", sep = ""), year_sequence) - download_names = sprintf(paste(directory_to_download, "download_output_%.0f.zip", sep = ""), year_sequence) - - #### 2. Downloading data - # Download zip files from website - if (!any(file.exists(download_names))) { - download.file(file_urls, download_names, method = "libcurl") - } - - # Construct string with unzipped file names - csv_names = sprintf(paste(directory_to_download, resolution_temporal, "_", parameter_code, "_%.0f.csv", sep = ""), year_sequence) - #### 3. Processing data - # Unzip and read in .csv files, process and join in one dataframe. - # The unique site identifier "ID.Monitor" is a string with the structure State-County-Site-Parameter-POC - for (n in seq(1, length(file_urls))) { - unzip(download_names[n], exdir = directory_to_save) - - # Read in dataframe - cat(paste("reading and processing file: ", csv_names[n], "...\n") ) - data = read.csv(csv_names[n], stringsAsFactors = F) - - #Make unique site identifier: State-County-Site-Parameter-POC - # data$ID.Monitor=paste(data$State.Code,data$County.Code,data$Site.Num,data$Parameter.Code,data$POC, sep="-") - # ISong: Some POCs are two digits, so here I changed POC slot to zero-padded two digits. - data$ID.Monitor = sprintf("%02d-%03d-%04d-%05d-%02d", - data$State.Code, data$County.Code, data$Site.Num, data$Parameter.Code, data$POC) - - #Concatenate with other years - if (n == 1) { - data_all = data - } else { - data_all = rbind(data_all, data) - } - } - - cat(paste("Downloading monitor metadata...\n")) - #### 4. Downloading monitor metadata file and filter for relevant sites - # Download monitors file - dest_monitors = paste(directory_to_download, "aqs_monitors.zip", sep = "") - if (!file.exists(dest_monitors)) { - download.file(sprintf("%saqs_monitors.zip", url_aqs_download), dest_monitors) + # nocov start + chars_dir_download <- nchar(directory_to_download) + chars_dir_save <- nchar(directory_to_save) + if (substr(directory_to_download, + chars_dir_download, + chars_dir_download) != "/") { + directory_to_download <- paste(directory_to_download, "/", sep = "") + } + if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { + directory_to_save <- paste(directory_to_save, "/", sep = "") + } + #### 1. define measurement data paths + year_sequence <- seq(year_start, year_end, 1) + file_urls <- sprintf(paste(url_aqs_download, + resolution_temporal, + "_", + parameter_code, + "_%.0f.zip", + sep = ""), + year_sequence) + download_names <- sprintf(paste(directory_to_download, + "download_output_%.0f.zip", + sep = ""), + year_sequence) + #### 2. Downloading data + # Download zip files from website + if (!any(file.exists(download_names))) { + download.file(file_urls, download_names, method = "libcurl") + } + # Construct string with unzipped file names + csv_names <- sprintf(paste(directory_to_download, + resolution_temporal, + "_", + parameter_code, + "_%.0f.csv", + sep = ""), + year_sequence) + #### 3. Processing data + # Unzip and read in .csv files, process and join in one dataframe. + # The unique site identifier "ID.Monitor" is a string with the structure + # State-County-Site-Parameter-POC + for (n in seq(1, length(file_urls))) { + unzip(download_names[n], exdir = directory_to_save) + # Read in dataframe + cat(paste("reading and processing file: ", csv_names[n], "...\n")) + data <- read.csv(csv_names[n], stringsAsFactors = FALSE) + # Make unique site identifier: State-County-Site-Parameter-POC + # ISong: Some POCs are two digits, so here I changed POC slot to zero-padded + # two digits. + data$ID.Monitor <- sprintf( + "%02d-%03d-%04d-%05d-%02d", + data$State.Code, + data$County.Code, + data$Site.Num, + data$Parameter.Code, + data$POC + ) + # Concatenate with other years + if (n == 1) { + data_all <- data + } else { + data_all <- rbind(data_all, data) } - # Unzip and read in - unzip(dest_monitors, exdir = directory_to_save) - monitors = read.csv(sprintf("%saqs_monitors.csv", directory_to_save), stringsAsFactors = F) - - # Create site identifier - monitors$State.Code = as.numeric(monitors$State.Code) # Convert from string to numeric to get rid of leading zeros, the NAs introduced are from monitors in Canada with site number="CC" - monitors$ID.Monitor = sprintf("%02d-%03d-%04d-%05d-%02d", - monitors$State.Code, monitors$County.Code, monitors$Site.Num, monitors$Parameter.Code, monitors$POC) - # Filter monitors file to include only monitors in our csv - monitors_filter = monitors[which(monitors$ID.Monitor %in% data_all$ID.Monitor),] - #### 5. Uploading data to desired folder - cat(paste("All requested files were downloaded. Write the cleaned data to ", directory_to_save, "...\n", sep = "")) - write.csv(data_all, paste(directory_to_save, resolution_temporal, "_", parameter_code, "_", year_start, "-", year_end, ".csv", sep="")) - write.csv(monitors_filter, paste(directory_to_save, "monitors_", parameter_code, "_", year_start, "-", year_end, ".csv", sep="")) - - if (remove_zips) { - cat(paste("Delete zip files ... \n")) - path_zips = list.files(pattern = ".(zip|ZIP)$", - path = directory_to_download, - full.names = TRUE) - for (zipfile in path_zips) { - file.remove(zipfile) - } + } + cat(paste("Downloading monitor metadata...\n")) + #### 4. Downloading monitor metadata file and filter for relevant sites + # Download monitors file + dest_monitors <- paste(directory_to_download, "aqs_monitors.zip", sep = "") + if (!file.exists(dest_monitors)) { + download.file(sprintf("%saqs_monitors.zip", + url_aqs_download), + dest_monitors) + } + # Unzip and read in + unzip(dest_monitors, exdir = directory_to_save) + monitors <- read.csv(sprintf("%saqs_monitors.csv", + directory_to_save), + stringsAsFactors = FALSE) + # Create site identifier + monitors$State.Code <- as.numeric(monitors$State.Code) + # Convert from string to numeric to get rid of leading zeros, + # the NAs introduced are from monitors in Canada with site number="CC" + monitors$ID.Monitor <- sprintf( + "%02d-%03d-%04d-%05d-%02d", + monitors$State.Code, + monitors$County.Code, + monitors$Site.Num, + monitors$Parameter.Code, + monitors$POC + ) + # Filter monitors file to include only monitors in our csv + monitors_filter <- + monitors[which(monitors$ID.Monitor %in% data_all$ID.Monitor), ] + #### 5. Uploading data to desired folder + cat(paste("All requested files were downloaded. Write the cleaned data to ", + directory_to_save, "...\n", sep = "")) + write.csv(data_all, paste(directory_to_save, + resolution_temporal, + "_", + parameter_code, + "_", + year_start, + "-", + year_end, + ".csv", + sep = "")) + write.csv(monitors_filter, paste(directory_to_save, + "monitors_", + parameter_code, + "_", + year_start, + "-", + year_end, + ".csv", + sep = "")) + if (remove_zips) { + cat(paste("Delete zip files ... \n")) + path_zips <- list.files( + pattern = ".(zip|ZIP)$", + path = directory_to_download, + full.names = TRUE + ) + for (zipfile in path_zips) { + file.remove(zipfile) } - #nocov end + } + # nocov end } From 4d87d8d6ff4e1b83242d2718bd94aaf106021941 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Mon, 20 Nov 2023 14:05:32 -0500 Subject: [PATCH 03/17] uniform roxygen descriptions --- input/Rinput/download_functions/download_aqs_data.R | 1 - .../download_functions/download_geos_cf_data.R | 13 +++++++++---- .../Rinput/download_functions/download_gmted_data.R | 4 ++-- .../download_functions/download_merra2_data.R | 10 ++++++++-- .../Rinput/download_functions/download_modis_data.R | 3 ++- .../download_narr_monolevel_data.R | 13 ++++++------- .../download_narr_p_levels_data.R | 13 ++++++------- .../download_noaa_hms_smoke_data.R | 5 +---- .../download_functions/download_sedac_groads_data.R | 3 +-- .../download_sedac_population_data.R | 3 +-- 10 files changed, 36 insertions(+), 32 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index d2a28868..8d242612 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -5,7 +5,6 @@ ################################################################################ #' download_aqs_data: download daily data from AQS datamart -#' #' @param parameter_code integer(1). length of 5. EPA pollutant parameter code. #' For details, please refer to #' https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 1767f0c3..4e28c128 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -4,19 +4,25 @@ ################################################################################ ################################################################################ -#' download_geos_cf_data: +#' download_geos_cf_data: download atmospheric composition data from the NASA +#' Global Earth Observing System (GEOS) model. #' @description +#' The `download_goes_cf_data()` function accesses and downloads various +#' atmospheric composition collections from the [NASA Global Earth Observing] +#' [System (GEOS) model](https://gmao.gsfc.nasa.gov/GEOS_systems/). #' @param date_start character(1). length of 10. Start date for downloading #' data. Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). #' @param date_end character(1). length of 10. End date for downloading data. #' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). -#' @param collection character(1). +#' @param collection character(1). GEOS-CF data collection file name. #' @param directory_to_save character(1). Directory to save data. #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. #' @author Mitchell Manware #' @return NULL; +#' @importFrom stringr str_sub +#' @importFrom stringr str_pad #' @export download_geos_cf_data <- function( date_start = "2023-09-01", @@ -39,8 +45,7 @@ download_geos_cf_data <- function( } #### 2. check for collection if (is.null(collection) == TRUE) { - cat(paste0("Please select a GEOS-CF collection.\n")) - stop() + stop(paste0("Please select a GEOS-CF collection.\n")) } #### 3. check if collection is valid collections <- c("htf_inst_15mn_g1440x721_x1", "aqc_tavg_1hr_g1440x721_v1", diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 18ac0bb8..59783a74 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -7,8 +7,8 @@ #' download_gmted_data: download global elevation data from the Global Multi- #' resolution Terrain Elevation Data (GMTED2010). #' @description -#' The `download_gmted_data()` download Global Multi-resolution Terrain -#' Elevation Data (GMTED2010) from +#' The `download_gmted_data()` function acesses and downloads Global +#' Multi-resolution Terrain Elevation Data (GMTED2010) from #' [U.S. Geological Survey and National Geospatial-Intelligence Agency] #' (https://www.usgs.gov/coastal-changes-and-impacts/gmted2010). #' @param statistic character(1). Available statistics include "Breakline diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index 12b4b31a..8647228e 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -4,13 +4,19 @@ ################################################################################ ################################################################################ -#' download_merra2_data: +#' download_merra2_data: download meteorological and atmospheric data from the +#' Modern-Era Retrospective analysis for Research and Applications, Version 2 +#' (MERRA-2) model. #' @description +#' The `download_merra2_data()` function accesses and downloads various +#' meteorological and atmospheric collections from the [Modern-Era] +#' [Retrospective analysis for Research and Applications, Version 2 (MERRA-2)] +#' (https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/). #' @param date_start character(1). length of 10. Start date for downloading #' data. Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). #' @param date_end character(1). length of 10. End date for downloading data. #' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). -#' @param collection character(1). +#' @param collection character(1). MERRA-2 data collection file name. #' @param directory_to_save character(1). Directory to save data. #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very diff --git a/input/Rinput/download_functions/download_modis_data.R b/input/Rinput/download_functions/download_modis_data.R index 760d2066..d96de770 100644 --- a/input/Rinput/download_functions/download_modis_data.R +++ b/input/Rinput/download_functions/download_modis_data.R @@ -4,7 +4,7 @@ ################################################################################ ################################################################################ -#' download_modis_data: +#' download_modis_data: #' @description #' @param date_start character(1). length of 10. Start date for downloading #' data. Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). @@ -21,6 +21,7 @@ #' large and use lots of machine storage and memory. #' @author Mitchell Manware #' @return NULL; +#' @importFrom stringr str_pad #' @export download_modis_data <- function( date_start = "2023-09-01", diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index a74e1a15..129deaa7 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -4,25 +4,24 @@ ################################################################################ ################################################################################ -#' download_narr_monolevel_data: download daily monolevel meteorological -#' data from NOAA NCEP North American Regional Reanalysis +#' download_narr_monolevel_data: download monolevel meteorological data from +#' NOAA NCEP North American Regional Reanalysis (NARR) model. #' @description #' The `download_narr_monolevel_data` function accesses and downloads -#' monolevel meteorological data. +#' monolevel meteorological data from [NOAA NCEP North American Regional] +#' [Reanalysis (NARR)](https://psl.noaa.gov/data/gridded/data.narr.html). #' @param year_start integer(1). length of 4. Start of year range for #' downloading data. #' @param year_end integer(1). length of 4. End of year range for downloading #' data. -#' @param variables character(1). Variable code(s) that should be downloaded. -#' For full list of variables and variable codes see ***. +#' @param variables character(1). Variable(s) name acronym. #' @param directory_to_save character(1). Directory(s) to save downloaded data #' files. #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. #' @author Mitchell Manware -#' @return NULL; NCEP North American Regional Reanalysis monolevel -#' meteorological data will be returned to the designated saving directory. +#' @return NULL; #' @export download_narr_monolevel_data <- function( year_start = 2022, diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index d597746d..1eb69ab8 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -4,25 +4,24 @@ ################################################################################ ################################################################################ -#' download_narr_p_levels_data: download daily pressure levels -#' meteorological data from NOAA NCEP North American Regional Reanalysis +#' download_narr_p_levels_data: download pressure level meteorological data from +#' NOAA NCEP North American Regional Reanalysis (NARR) model. #' @description #' The `download_narr_p_levels_data` function accesses and downloads -#' pressure levels meteorological data. +#' pressure level meteorological data from [NOAA NCEP North American Regional] +#' [Reanalysis (NARR)](https://psl.noaa.gov/data/gridded/data.narr.html). #' @param year_start integer(1). length of 4. Start of year range for #' downloading data. #' @param year_end integer(1). length of 4. End of year range for downloading #' data. -#' @param variables character(1). Variable code(s) that should be downloaded. -#' For full list of variables and variable codes see ***. +#' @param variables character(1). Variable(s) name acronym. #' @param directory_to_save character(1). Directory(s) to save downloaded data #' files. #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. #' @author Mitchell Manware -#' @return NULL; NCEP North American Regional Reanalysis pressure levels -#' meteorological data will be returned to the designated saving directory. +#' @return NULL; #' @importFrom stringr str_pad #' @export download_narr_p_levels_data <- function( diff --git a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R index da8cea55..f90c2c21 100644 --- a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R +++ b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R @@ -6,13 +6,11 @@ ################################################################################ #' download_noaa_hms_smoke_data: download daily wildfire smoke plume data from #' NOAA Hazard Mapping System Fire and Smoke Product -#' #' @description #' The `download_noaa_hms_smoke_data()` function accesses and downloads wildfire #' smoke plume coverage data from the National Oceanic and Atmospheric #' Administration's (NOAA) [Hazard Mapping System Fire and Smoke Product] #' (https://www.ospo.noaa.gov/Products/land/hms.html#0). -#' #' @param date_start character(1). length of 10. Start date for downloading #' data. Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). #' @param date_end character(1). length of 10. End date for downloading data. @@ -31,8 +29,7 @@ #' @param remove_zip logical(1). Remove zip files from #' directory_to_download. Default = `FALSE`. (Ignored if `data_format = "KML"`.) #' @author Mitchell Manware, Insang Song -#' @return NULL; NOAA Hazard Mapping System Fire and Smoke Product data will be -#' returned to the designated saving directory in the indicated format. +#' @return NULL; #' @export download_noaa_hms_smoke_data <- function( date_start = "2023-09-01", diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index b1dac783..592e723f 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -27,8 +27,7 @@ #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. #' @author Mitchell Manware -#' @return NULL; NASA UN WPP-Adjusted Population Density, v4.11 data will be -#' returned to the designated saving directory in the indicated format. +#' @return NULL; #' @export download_sedac_groads_data <- function( data_format = "Shapefile", diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index e9ac28fe..1a05f1a5 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -29,8 +29,7 @@ #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. #' @author Mitchell Manware -#' @return NULL; NASA UN WPP-Adjusted Population Density, v4.11 data will be -#' returned to the designated saving directory in the indicated format. +#' @return NULL; #' @export download_sedac_population_data <- function( year = "2020", From 2c31bd017ad363a1c76bd2d5613a0afcae0677ea Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Tue, 21 Nov 2023 07:58:15 -0500 Subject: [PATCH 04/17] create directory_to_download and directory_to_save if they do not exist --- .../Rinput/download_functions/download_aqs_data.R | 15 +++++++++++---- .../download_functions/download_geos_cf_data.R | 3 +++ .../download_functions/download_gmted_data.R | 6 ++++++ .../download_koppen_geiger_data.R | 6 ++++++ .../download_functions/download_merra2_data.R | 3 +++ .../download_functions/download_modis_data.R | 3 +++ .../download_narr_monolevel_data.R | 3 +++ .../download_narr_p_levels_data.R | 3 +++ .../download_functions/download_nlcd_data.R | 6 ++++++ .../download_noaa_hms_smoke_data.R | 6 ++++++ .../download_sedac_groads_data.R | 6 ++++++ .../download_sedac_population_data.R | 6 ++++++ 12 files changed, 62 insertions(+), 4 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index 8d242612..e86313f7 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -32,6 +32,7 @@ download_aqs_data <- function( remove_zips = FALSE ) { # nocov start + #### 1. directory setup chars_dir_download <- nchar(directory_to_download) chars_dir_save <- nchar(directory_to_save) if (substr(directory_to_download, @@ -42,7 +43,13 @@ download_aqs_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } - #### 1. define measurement data paths + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } + #### 2. define measurement data paths year_sequence <- seq(year_start, year_end, 1) file_urls <- sprintf(paste(url_aqs_download, resolution_temporal, @@ -55,7 +62,7 @@ download_aqs_data <- function( "download_output_%.0f.zip", sep = ""), year_sequence) - #### 2. Downloading data + #### 3. Downloading data # Download zip files from website if (!any(file.exists(download_names))) { download.file(file_urls, download_names, method = "libcurl") @@ -68,7 +75,7 @@ download_aqs_data <- function( "_%.0f.csv", sep = ""), year_sequence) - #### 3. Processing data + #### 4. Processing data # Unzip and read in .csv files, process and join in one dataframe. # The unique site identifier "ID.Monitor" is a string with the structure # State-County-Site-Parameter-POC @@ -96,7 +103,7 @@ download_aqs_data <- function( } } cat(paste("Downloading monitor metadata...\n")) - #### 4. Downloading monitor metadata file and filter for relevant sites + #### 5. Downloading monitor metadata file and filter for relevant sites # Download monitors file dest_monitors <- paste(directory_to_download, "aqs_monitors.zip", sep = "") if (!file.exists(dest_monitors)) { diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 4e28c128..ff534f91 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -36,6 +36,9 @@ download_geos_cf_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE. ", diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 59783a74..98a90a5c 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -50,6 +50,12 @@ download_gmted_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_koppen_geiger_data.R b/input/Rinput/download_functions/download_koppen_geiger_data.R index dfd0e10d..b57a493c 100644 --- a/input/Rinput/download_functions/download_koppen_geiger_data.R +++ b/input/Rinput/download_functions/download_koppen_geiger_data.R @@ -53,6 +53,12 @@ download_koppen_geiger_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index 8647228e..63c13b48 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -36,6 +36,9 @@ download_merra2_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE. ", diff --git a/input/Rinput/download_functions/download_modis_data.R b/input/Rinput/download_functions/download_modis_data.R index d96de770..bf949d48 100644 --- a/input/Rinput/download_functions/download_modis_data.R +++ b/input/Rinput/download_functions/download_modis_data.R @@ -38,6 +38,9 @@ download_modis_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0( diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index 129deaa7..8829f83d 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -37,6 +37,9 @@ download_narr_monolevel_data <- function( "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE. Please", diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index 1eb69ab8..d1077dca 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -38,6 +38,9 @@ download_narr_p_levels_data <- function( "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(cat(paste0("Data download acknowledgement is set to FALSE. Please ", diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 9c5e44eb..01a0e1f9 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -48,6 +48,12 @@ download_nlcd_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R index f90c2c21..54cc2edd 100644 --- a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R +++ b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R @@ -53,6 +53,12 @@ download_noaa_hms_smoke_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0( diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index 592e723f..e0c023e2 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -51,6 +51,12 @@ download_sedac_groads_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 1a05f1a5..1e042d72 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -54,6 +54,12 @@ download_sedac_population_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0("Data download acknowledgement is set to FALSE.", From 4cd10ed8eecd928be3de31c7eac75866a7beaaba Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Tue, 21 Nov 2023 08:13:47 -0500 Subject: [PATCH 05/17] default directories ../../data/covariates/ --- input/Rinput/download_functions/download_aqs_data.R | 4 ++-- input/Rinput/download_functions/download_geos_cf_data.R | 2 +- input/Rinput/download_functions/download_gmted_data.R | 4 ++-- input/Rinput/download_functions/download_koppen_geiger_data.R | 4 ++-- input/Rinput/download_functions/download_merra2_data.R | 2 +- input/Rinput/download_functions/download_modis_data.R | 2 +- .../Rinput/download_functions/download_narr_monolevel_data.R | 2 +- input/Rinput/download_functions/download_narr_p_levels_data.R | 2 +- input/Rinput/download_functions/download_nlcd_data.R | 4 ++-- .../Rinput/download_functions/download_noaa_hms_smoke_data.R | 4 ++-- input/Rinput/download_functions/download_sedac_groads_data.R | 4 ++-- .../download_functions/download_sedac_population_data.R | 4 ++-- 12 files changed, 19 insertions(+), 19 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index e86313f7..9151c3d3 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -26,8 +26,8 @@ download_aqs_data <- function( year_start = 2018, year_end = 2022, resolution_temporal = "daily", - directory_to_download = "./input/aqs/", - directory_to_save = "./input/aqs/", + directory_to_download = "../../data/aqs/", + directory_to_save = "../../data/aqs/", url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", remove_zips = FALSE ) { diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index ff534f91..14d1b3f7 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -28,7 +28,7 @@ download_geos_cf_data <- function( date_start = "2023-09-01", date_end = "2023-09-01", collection = NULL, - directory_to_save = "./input/geos_cf/raw/", + directory_to_save = "../../data/covariates/geos_cf/", data_download_acknowledgement = FALSE ) { #### 1. directory setup diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 98a90a5c..e7d154d1 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -31,8 +31,8 @@ download_gmted_data <- function( statistic = NULL, resolution = NULL, - directory_to_download = "./input/gmted2010/raw/", - directory_to_save = "./input/gmted2010/raw/", + directory_to_download = "../../data/covariates/gmted/", + directory_to_save = "../../data/covariates/gmted/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE diff --git a/input/Rinput/download_functions/download_koppen_geiger_data.R b/input/Rinput/download_functions/download_koppen_geiger_data.R index b57a493c..68473e6b 100644 --- a/input/Rinput/download_functions/download_koppen_geiger_data.R +++ b/input/Rinput/download_functions/download_koppen_geiger_data.R @@ -34,8 +34,8 @@ download_koppen_geiger_data <- function( time_period = "Present", data_resolution = NULL, - directory_to_download = "./input/koppen_geiger/raw/", - directory_to_save = "./input/koppen_geiger/raw/", + directory_to_download = "../../data/covariates/koppen_geiger/", + directory_to_save = "../../data/covariates/koppen_geiger/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index 63c13b48..f3274e85 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -28,7 +28,7 @@ download_merra2_data <- function( date_start = "2023-09-01", date_end = "2023-09-01", collection = NULL, - directory_to_save = "./input/merra2/raw/", + directory_to_save = "../../data/covariates/merra2/", data_download_acknowledgement = FALSE ) { #### 1. directory setup diff --git a/input/Rinput/download_functions/download_modis_data.R b/input/Rinput/download_functions/download_modis_data.R index bf949d48..4fe06807 100644 --- a/input/Rinput/download_functions/download_modis_data.R +++ b/input/Rinput/download_functions/download_modis_data.R @@ -31,7 +31,7 @@ download_modis_data <- function( horizontal_tiles = c(7, 13), vertical_tiles = c(3, 6), nasa_earth_data_token = NULL, - directory_to_save = "./input/modis/raw/", + directory_to_save = "../../data/covariates/modis/", data_download_acknowledgement = FALSE) { #### 1. directory setup chars_dir_save <- nchar(directory_to_save) diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index 8829f83d..88e68c0b 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -27,7 +27,7 @@ download_narr_monolevel_data <- function( year_start = 2022, year_end = 2022, variables = NULL, - directory_to_save = "./input/ncep_narr_monolevel/raw/", + directory_to_save = "../../data/covariates/narr/", data_download_acknowledgement = FALSE ) { #### 1. directory setup diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index d1077dca..dd7120f5 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -28,7 +28,7 @@ download_narr_p_levels_data <- function( year_start = 2022, year_end = 2022, variables = NULL, - directory_to_save = "./input/ncep_narr_pressure_levels/raw/", + directory_to_save = "../../data/covariates/narr/", data_download_acknowledgement = FALSE ) { #### 1. directory setup diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 01a0e1f9..1105d390 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -29,8 +29,8 @@ download_nlcd_data <- function( year = 2021, collection = "Coterminous United States", - directory_to_download = "./input/nlcd/raw/", - directory_to_save = "./input/nlcd/raw/", + directory_to_download = "../../data/covariates/nlcd/", + directory_to_save = "../../data/covariates/nlcd/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE diff --git a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R index 54cc2edd..bbc401ef 100644 --- a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R +++ b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R @@ -35,8 +35,8 @@ download_noaa_hms_smoke_data <- function( date_start = "2023-09-01", date_end = "2023-09-01", data_format = "Shapefile", - directory_to_download = "./input/noaa_hms/raw/", - directory_to_save = "./input/noaa_hms/raw/", + directory_to_download = "../../data/covariates/noaa_hms/", + directory_to_save = "../../data/covariates/noaa_hms/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index e0c023e2..60716c3f 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -32,8 +32,8 @@ download_sedac_groads_data <- function( data_format = "Shapefile", data_region = "Americas", - directory_to_download = "./input/sedac_groads/raw/", - directory_to_save = "./input/sedac_groads/raw/", + directory_to_download = "../../data/covariates/sedac_groads/", + directory_to_save = "../../data/covariates/sedac_groads/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 1e042d72..32ace53b 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -35,8 +35,8 @@ download_sedac_population_data <- function( year = "2020", data_format = "GeoTIFF", data_resolution = "60 minute", - directory_to_download = "./input/nasa_sedac/raw/", - directory_to_save = "./input/nasa_sedac/raw/", + directory_to_download = "../../data/covariates/sedac_population/", + directory_to_save = "../../data/covariates/sedac_population/", data_download_acknowledgement = FALSE, unzip = TRUE, remove_zip = FALSE From f6c59108dfa7641d35fed021ede8d5904e8a90ee Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Fri, 17 Nov 2023 13:51:25 -0500 Subject: [PATCH 06/17] lintr download_aqs_data.R --- .../download_functions/download_aqs_data.R | 234 +++++++++++------- 1 file changed, 143 insertions(+), 91 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index bb75249d..d2a28868 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -6,105 +6,157 @@ ################################################################################ #' download_aqs_data: download daily data from AQS datamart #' -#' @param parameter_code integer(1). length of 5. EPA pollutant parameter code. For details, please refer to https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html +#' @param parameter_code integer(1). length of 5. EPA pollutant parameter code. +#' For details, please refer to +#' https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html #' @param year_start integer(1). length of 4. Start year for downloading data. #' @param year_end integer(1). length of 4. End year for downloading data. -#' @param resolution_temporal character(1). Name of column containing POC values. Currently, no value other than "daily" works. -#' @param directory_to_download character(1). Directory to download zip files from AQS data mart. -#' @param directory_to_save character(1). Directory to decompress zip files +#' @param resolution_temporal character(1). Name of column containing POC +#' values. Currently, no value other than "daily" works. +#' @param directory_to_download character(1). Directory to download zip files +#' from AQS data mart. +#' @param directory_to_save character(1). Directory to decompress zip files #' @param url_aqs_download character(1). URL to the AQS pre-generated datasets. #' @param remove_zips logical(1). remove zip files in directory_to_download. #' @author Mariana Kassien, Insang Song -#' @return NULL; Separate comma-separated value (CSV) files of monitors and the daily representative values will be stored in directory_to_save. +#' @return NULL; Separate comma-separated value (CSV) files of monitors and the +#' daily representative values will be stored in directory_to_save. #' @export download_aqs_data <- function( - parameter_code = 88101, - year_start = 2018, - year_end = 2022, - resolution_temporal = "daily", - directory_to_download = "./input/aqs/", - directory_to_save = "./input/aqs/", - url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", - remove_zips = FALSE + parameter_code = 88101, + year_start = 2018, + year_end = 2022, + resolution_temporal = "daily", + directory_to_download = "./input/aqs/", + directory_to_save = "./input/aqs/", + url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", + remove_zips = FALSE ) { - #nocov start - chars_dir_download = nchar(directory_to_download) - chars_dir_save = nchar(directory_to_save) - - if (substr(directory_to_download, chars_dir_download, chars_dir_download) != "/") { - directory_to_download = paste(directory_to_download, "/", sep = "") - } - if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { - directory_to_save = paste(directory_to_save, "/", sep = "") - } - - #### 1. define measurement data paths - year_sequence = seq(year_start, year_end, 1) - file_urls = sprintf(paste(url_aqs_download, resolution_temporal, "_", parameter_code, "_%.0f.zip", sep = ""), year_sequence) - download_names = sprintf(paste(directory_to_download, "download_output_%.0f.zip", sep = ""), year_sequence) - - #### 2. Downloading data - # Download zip files from website - if (!any(file.exists(download_names))) { - download.file(file_urls, download_names, method = "libcurl") - } - - # Construct string with unzipped file names - csv_names = sprintf(paste(directory_to_download, resolution_temporal, "_", parameter_code, "_%.0f.csv", sep = ""), year_sequence) - #### 3. Processing data - # Unzip and read in .csv files, process and join in one dataframe. - # The unique site identifier "ID.Monitor" is a string with the structure State-County-Site-Parameter-POC - for (n in seq(1, length(file_urls))) { - unzip(download_names[n], exdir = directory_to_save) - - # Read in dataframe - cat(paste("reading and processing file: ", csv_names[n], "...\n") ) - data = read.csv(csv_names[n], stringsAsFactors = F) - - #Make unique site identifier: State-County-Site-Parameter-POC - # data$ID.Monitor=paste(data$State.Code,data$County.Code,data$Site.Num,data$Parameter.Code,data$POC, sep="-") - # ISong: Some POCs are two digits, so here I changed POC slot to zero-padded two digits. - data$ID.Monitor = sprintf("%02d-%03d-%04d-%05d-%02d", - data$State.Code, data$County.Code, data$Site.Num, data$Parameter.Code, data$POC) - - #Concatenate with other years - if (n == 1) { - data_all = data - } else { - data_all = rbind(data_all, data) - } - } - - cat(paste("Downloading monitor metadata...\n")) - #### 4. Downloading monitor metadata file and filter for relevant sites - # Download monitors file - dest_monitors = paste(directory_to_download, "aqs_monitors.zip", sep = "") - if (!file.exists(dest_monitors)) { - download.file(sprintf("%saqs_monitors.zip", url_aqs_download), dest_monitors) + # nocov start + chars_dir_download <- nchar(directory_to_download) + chars_dir_save <- nchar(directory_to_save) + if (substr(directory_to_download, + chars_dir_download, + chars_dir_download) != "/") { + directory_to_download <- paste(directory_to_download, "/", sep = "") + } + if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { + directory_to_save <- paste(directory_to_save, "/", sep = "") + } + #### 1. define measurement data paths + year_sequence <- seq(year_start, year_end, 1) + file_urls <- sprintf(paste(url_aqs_download, + resolution_temporal, + "_", + parameter_code, + "_%.0f.zip", + sep = ""), + year_sequence) + download_names <- sprintf(paste(directory_to_download, + "download_output_%.0f.zip", + sep = ""), + year_sequence) + #### 2. Downloading data + # Download zip files from website + if (!any(file.exists(download_names))) { + download.file(file_urls, download_names, method = "libcurl") + } + # Construct string with unzipped file names + csv_names <- sprintf(paste(directory_to_download, + resolution_temporal, + "_", + parameter_code, + "_%.0f.csv", + sep = ""), + year_sequence) + #### 3. Processing data + # Unzip and read in .csv files, process and join in one dataframe. + # The unique site identifier "ID.Monitor" is a string with the structure + # State-County-Site-Parameter-POC + for (n in seq(1, length(file_urls))) { + unzip(download_names[n], exdir = directory_to_save) + # Read in dataframe + cat(paste("reading and processing file: ", csv_names[n], "...\n")) + data <- read.csv(csv_names[n], stringsAsFactors = FALSE) + # Make unique site identifier: State-County-Site-Parameter-POC + # ISong: Some POCs are two digits, so here I changed POC slot to zero-padded + # two digits. + data$ID.Monitor <- sprintf( + "%02d-%03d-%04d-%05d-%02d", + data$State.Code, + data$County.Code, + data$Site.Num, + data$Parameter.Code, + data$POC + ) + # Concatenate with other years + if (n == 1) { + data_all <- data + } else { + data_all <- rbind(data_all, data) } - # Unzip and read in - unzip(dest_monitors, exdir = directory_to_save) - monitors = read.csv(sprintf("%saqs_monitors.csv", directory_to_save), stringsAsFactors = F) - - # Create site identifier - monitors$State.Code = as.numeric(monitors$State.Code) # Convert from string to numeric to get rid of leading zeros, the NAs introduced are from monitors in Canada with site number="CC" - monitors$ID.Monitor = sprintf("%02d-%03d-%04d-%05d-%02d", - monitors$State.Code, monitors$County.Code, monitors$Site.Num, monitors$Parameter.Code, monitors$POC) - # Filter monitors file to include only monitors in our csv - monitors_filter = monitors[which(monitors$ID.Monitor %in% data_all$ID.Monitor),] - #### 5. Uploading data to desired folder - cat(paste("All requested files were downloaded. Write the cleaned data to ", directory_to_save, "...\n", sep = "")) - write.csv(data_all, paste(directory_to_save, resolution_temporal, "_", parameter_code, "_", year_start, "-", year_end, ".csv", sep="")) - write.csv(monitors_filter, paste(directory_to_save, "monitors_", parameter_code, "_", year_start, "-", year_end, ".csv", sep="")) - - if (remove_zips) { - cat(paste("Delete zip files ... \n")) - path_zips = list.files(pattern = ".(zip|ZIP)$", - path = directory_to_download, - full.names = TRUE) - for (zipfile in path_zips) { - file.remove(zipfile) - } + } + cat(paste("Downloading monitor metadata...\n")) + #### 4. Downloading monitor metadata file and filter for relevant sites + # Download monitors file + dest_monitors <- paste(directory_to_download, "aqs_monitors.zip", sep = "") + if (!file.exists(dest_monitors)) { + download.file(sprintf("%saqs_monitors.zip", + url_aqs_download), + dest_monitors) + } + # Unzip and read in + unzip(dest_monitors, exdir = directory_to_save) + monitors <- read.csv(sprintf("%saqs_monitors.csv", + directory_to_save), + stringsAsFactors = FALSE) + # Create site identifier + monitors$State.Code <- as.numeric(monitors$State.Code) + # Convert from string to numeric to get rid of leading zeros, + # the NAs introduced are from monitors in Canada with site number="CC" + monitors$ID.Monitor <- sprintf( + "%02d-%03d-%04d-%05d-%02d", + monitors$State.Code, + monitors$County.Code, + monitors$Site.Num, + monitors$Parameter.Code, + monitors$POC + ) + # Filter monitors file to include only monitors in our csv + monitors_filter <- + monitors[which(monitors$ID.Monitor %in% data_all$ID.Monitor), ] + #### 5. Uploading data to desired folder + cat(paste("All requested files were downloaded. Write the cleaned data to ", + directory_to_save, "...\n", sep = "")) + write.csv(data_all, paste(directory_to_save, + resolution_temporal, + "_", + parameter_code, + "_", + year_start, + "-", + year_end, + ".csv", + sep = "")) + write.csv(monitors_filter, paste(directory_to_save, + "monitors_", + parameter_code, + "_", + year_start, + "-", + year_end, + ".csv", + sep = "")) + if (remove_zips) { + cat(paste("Delete zip files ... \n")) + path_zips <- list.files( + pattern = ".(zip|ZIP)$", + path = directory_to_download, + full.names = TRUE + ) + for (zipfile in path_zips) { + file.remove(zipfile) } - #nocov end + } + # nocov end } From 08f9380996bbf6a7e3f5ea611f202478857a3e31 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Tue, 21 Nov 2023 07:58:15 -0500 Subject: [PATCH 07/17] create directory_to_download and directory_to_save if they do not exist --- .../Rinput/download_functions/download_aqs_data.R | 15 +++++++++++---- .../download_functions/download_geos_cf_data.R | 3 +++ .../download_functions/download_gmted_data.R | 6 ++++++ .../download_koppen_geiger_data.R | 6 ++++++ .../download_functions/download_merra2_data.R | 3 +++ .../download_functions/download_modis_data.R | 3 +++ .../download_narr_monolevel_data.R | 3 +++ .../download_narr_p_levels_data.R | 3 +++ .../download_functions/download_nlcd_data.R | 6 ++++++ .../download_noaa_hms_smoke_data.R | 6 ++++++ .../download_sedac_groads_data.R | 6 ++++++ .../download_sedac_population_data.R | 6 ++++++ 12 files changed, 62 insertions(+), 4 deletions(-) diff --git a/input/Rinput/download_functions/download_aqs_data.R b/input/Rinput/download_functions/download_aqs_data.R index d2a28868..bb0f43f4 100644 --- a/input/Rinput/download_functions/download_aqs_data.R +++ b/input/Rinput/download_functions/download_aqs_data.R @@ -33,6 +33,7 @@ download_aqs_data <- function( remove_zips = FALSE ) { # nocov start + #### 1. directory setup chars_dir_download <- nchar(directory_to_download) chars_dir_save <- nchar(directory_to_save) if (substr(directory_to_download, @@ -43,7 +44,13 @@ download_aqs_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } - #### 1. define measurement data paths + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } + #### 2. define measurement data paths year_sequence <- seq(year_start, year_end, 1) file_urls <- sprintf(paste(url_aqs_download, resolution_temporal, @@ -56,7 +63,7 @@ download_aqs_data <- function( "download_output_%.0f.zip", sep = ""), year_sequence) - #### 2. Downloading data + #### 3. Downloading data # Download zip files from website if (!any(file.exists(download_names))) { download.file(file_urls, download_names, method = "libcurl") @@ -69,7 +76,7 @@ download_aqs_data <- function( "_%.0f.csv", sep = ""), year_sequence) - #### 3. Processing data + #### 4. Processing data # Unzip and read in .csv files, process and join in one dataframe. # The unique site identifier "ID.Monitor" is a string with the structure # State-County-Site-Parameter-POC @@ -97,7 +104,7 @@ download_aqs_data <- function( } } cat(paste("Downloading monitor metadata...\n")) - #### 4. Downloading monitor metadata file and filter for relevant sites + #### 5. Downloading monitor metadata file and filter for relevant sites # Download monitors file dest_monitors <- paste(directory_to_download, "aqs_monitors.zip", sep = "") if (!file.exists(dest_monitors)) { diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 62c409a4..f25f3f61 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -30,6 +30,9 @@ download_geos_cf_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE. ", diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 59e1629f..0a2e6106 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -48,6 +48,12 @@ download_gmted_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_koppen_geiger_data.R b/input/Rinput/download_functions/download_koppen_geiger_data.R index 5ec620ce..cb341155 100644 --- a/input/Rinput/download_functions/download_koppen_geiger_data.R +++ b/input/Rinput/download_functions/download_koppen_geiger_data.R @@ -51,6 +51,12 @@ download_koppen_geiger_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index 7629e669..1d4aeb48 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -30,6 +30,9 @@ download_merra2_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE. ", diff --git a/input/Rinput/download_functions/download_modis_data.R b/input/Rinput/download_functions/download_modis_data.R index 76cdc939..3b6a5676 100644 --- a/input/Rinput/download_functions/download_modis_data.R +++ b/input/Rinput/download_functions/download_modis_data.R @@ -55,6 +55,9 @@ download_modis_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste0(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop( diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index fe6f6f82..f7a6bb43 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -38,6 +38,9 @@ download_narr_monolevel_data <- function( "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE. Please", diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index bdda4ab4..49d94311 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -39,6 +39,9 @@ download_narr_p_levels_data <- function( "/", sep = "") } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat("Data download acknowledgement is set to FALSE. Please acknowledge that diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 17d795dd..34877ca7 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -46,6 +46,12 @@ download_nlcd_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R index da8cea55..07a10d4f 100644 --- a/input/Rinput/download_functions/download_noaa_hms_smoke_data.R +++ b/input/Rinput/download_functions/download_noaa_hms_smoke_data.R @@ -56,6 +56,12 @@ download_noaa_hms_smoke_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { stop(paste0( diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index b1bc9fdf..6e241d6e 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -50,6 +50,12 @@ download_sedac_groads_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data download acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE.", diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 363bf022..e5c6614e 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -53,6 +53,12 @@ download_sedac_population_data <- function( if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { directory_to_save <- paste(directory_to_save, "/", sep = "") } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } #### 2. check for data acknowledgement if (data_download_acknowledgement == FALSE) { cat(paste0("Data download acknowledgement is set to FALSE.", From 0eca522f2216723182d1cc9797b53ac23630c358 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Wed, 29 Nov 2023 13:44:21 -0500 Subject: [PATCH 08/17] add function parameter --- .../download_geos_cf_data.R | 15 ++++++++++---- .../download_functions/download_gmted_data.R | 16 ++++++++++----- .../download_functions/download_merra2_data.R | 15 ++++++++++---- .../download_narr_monolevel_data.R | 20 ++++++++++++------- .../download_narr_p_levels_data.R | 20 ++++++++++++------- .../download_functions/download_nlcd_data.R | 16 ++++++++++----- .../download_sedac_groads_data.R | 14 +++++++++---- .../download_sedac_population_data.R | 14 +++++++++---- 8 files changed, 90 insertions(+), 40 deletions(-) diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 14d1b3f7..2ea6420c 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -19,6 +19,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @importFrom stringr str_sub @@ -29,7 +32,8 @@ download_geos_cf_data <- function( date_end = "2023-09-01", collection = NULL, directory_to_save = "../../data/covariates/geos_cf/", - data_download_acknowledgement = FALSE + data_download_acknowledgement = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_save <- nchar(directory_to_save) @@ -122,7 +126,10 @@ download_geos_cf_data <- function( commands_txt, "\n") #### 11. download data - system(command = system_command) - #### 12. remove "..._wget_commands.txt" file - file.remove(commands_txt) + if (download == TRUE) { + system(command = system_command) + file.remove(commands_txt) + } else if (download == FALSE) { + return(cat(paste0("Data not data download.\n"))) + } } diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index e7d154d1..906d746f 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -25,6 +25,7 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip file from directory_to_download. #' Default = `FALSE`. +#' @param download logical(1). #' @author Mitchell Manware #' @return NULL; #' @export @@ -35,7 +36,8 @@ download_gmted_data <- function( directory_to_save = "../../data/covariates/gmted/", data_download_acknowledgement = FALSE, unzip = TRUE, - remove_zip = FALSE + remove_zip = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -117,10 +119,14 @@ download_gmted_data <- function( " --url ", download_url) #### 13. download data - cat(paste0("Downloading requested file...\n")) - system(command = system_command) - Sys.sleep(5L) - cat(paste0("Requested file downloaded.\n")) + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = system_command) + Sys.sleep(5L) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } #### 14. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) diff --git a/input/Rinput/download_functions/download_merra2_data.R b/input/Rinput/download_functions/download_merra2_data.R index f3274e85..1a543402 100644 --- a/input/Rinput/download_functions/download_merra2_data.R +++ b/input/Rinput/download_functions/download_merra2_data.R @@ -21,6 +21,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the #' user acknowledge that the data downloaded using this function may be very #' large and use lots of machine storage and memory. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -29,7 +32,8 @@ download_merra2_data <- function( date_end = "2023-09-01", collection = NULL, directory_to_save = "../../data/covariates/merra2/", - data_download_acknowledgement = FALSE + data_download_acknowledgement = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_save <- nchar(directory_to_save) @@ -213,7 +217,10 @@ download_merra2_data <- function( commands_txt, "\n") #### 11. download data - system(command = system_command) - #### 12. remove "..._wget_commands.txt" - file.remove(commands_txt) + if (download == TRUE) { + system(command = system_command) + file.remove(commands_txt) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } } diff --git a/input/Rinput/download_functions/download_narr_monolevel_data.R b/input/Rinput/download_functions/download_narr_monolevel_data.R index 88e68c0b..36ec4e91 100644 --- a/input/Rinput/download_functions/download_narr_monolevel_data.R +++ b/input/Rinput/download_functions/download_narr_monolevel_data.R @@ -20,6 +20,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -28,7 +31,8 @@ download_narr_monolevel_data <- function( year_end = 2022, variables = NULL, directory_to_save = "../../data/covariates/narr/", - data_download_acknowledgement = FALSE + data_download_acknowledgement = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_save <- nchar(directory_to_save) @@ -91,15 +95,17 @@ download_narr_monolevel_data <- function( } #### 9. finish "..._curl_commands.txt" sink() - cat(paste0("Downloading requested files...\n")) #### 10. build system command system_command <- paste0(". ", commands_txt, "\n") #### 11. download data - system(command = system_command) - cat(paste0("Requested files have been downloaded.\n")) - #### 12. remove "..._curl_commands.txt" - Sys.sleep(5L) - file.remove(commands_txt) + if (download == TRUE) { + cat(paste0("Downloading requested files...\n")) + system(command = system_command) + cat(paste0("Requested files have been downloaded.\n")) + file.remove(commands_txt) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } } diff --git a/input/Rinput/download_functions/download_narr_p_levels_data.R b/input/Rinput/download_functions/download_narr_p_levels_data.R index dd7120f5..7396e76d 100644 --- a/input/Rinput/download_functions/download_narr_p_levels_data.R +++ b/input/Rinput/download_functions/download_narr_p_levels_data.R @@ -20,6 +20,9 @@ #' @param data_download_acknowledgement logical(1). By setting `= TRUE` the user #' acknowledge that the data downloaded using this function may be very large #' and use lots of machine storage and memory. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @importFrom stringr str_pad @@ -29,7 +32,8 @@ download_narr_p_levels_data <- function( year_end = 2022, variables = NULL, directory_to_save = "../../data/covariates/narr/", - data_download_acknowledgement = FALSE + data_download_acknowledgement = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_save <- nchar(directory_to_save) @@ -100,15 +104,17 @@ download_narr_p_levels_data <- function( } #### 10. finish "..._curl_commands.txt" sink() - cat(paste0("Downloading requested files...\n")) #### 11. build system command system_command <- paste0(". ", commands_txt, "\n") #### 12. download data - system(command = system_command) - cat(paste0("Requested files have been downloaded.\n")) - #### 13. remove "..._curl_commands.txt" - Sys.sleep(10L) - file.remove(commands_txt) + if (download == TRUE) { + cat(paste0("Downloading requested files...\n")) + system(command = system_command) + cat(paste0("Requested files have been downloaded.\n")) + file.remove(commands_txt) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } } diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 1105d390..35d8aefd 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -23,6 +23,7 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. +#' @param download logical(1). #' @author Mitchell Manware #' @return NULL; #' @export @@ -33,7 +34,8 @@ download_nlcd_data <- function( directory_to_save = "../../data/covariates/nlcd/", data_download_acknowledgement = FALSE, unzip = TRUE, - remove_zip = FALSE + remove_zip = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -105,10 +107,14 @@ download_nlcd_data <- function( download_url, "\n") #### 9. download data - cat(paste0("Downloading requested file...\n")) - system(command = system_command) - Sys.sleep(5L) - cat(paste0("Requested file downloaded.\n")) + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = system_command) + Sys.sleep(5L) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } #### 10. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index 60716c3f..1c51b6c8 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -26,6 +26,7 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. +#' @param download logical(1). #' @author Mitchell Manware #' @return NULL; #' @export @@ -36,7 +37,8 @@ download_sedac_groads_data <- function( directory_to_save = "../../data/covariates/sedac_groads/", data_download_acknowledgement = FALSE, unzip = TRUE, - remove_zip = FALSE + remove_zip = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -112,9 +114,13 @@ download_sedac_groads_data <- function( download_url, "\n") #### 12. download data - cat(paste0("Downloading requested file...\n")) - system(command = system_command) - cat(paste0("Requested file downloaded.\n")) + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = system_command) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } #### 13. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 32ace53b..7c42b817 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -28,6 +28,7 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. +#' @param download logical(1). #' @author Mitchell Manware #' @return NULL; #' @export @@ -39,7 +40,8 @@ download_sedac_population_data <- function( directory_to_save = "../../data/covariates/sedac_population/", data_download_acknowledgement = FALSE, unzip = TRUE, - remove_zip = FALSE + remove_zip = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -140,9 +142,13 @@ download_sedac_population_data <- function( download_url, "\n") #### 12. download data - cat(paste0("Downloading requested file...\n")) - system(command = download_command) - cat(paste0("Requested file downloaded.\n")) + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = download_command) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } #### 13. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) From 235ac0bc82c2f78c4fd273a2ac958fc8ae02c4c3 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Wed, 29 Nov 2023 15:19:43 -0500 Subject: [PATCH 09/17] apply sink() -> system_command sequence to all download functions --- .../download_functions/download_gmted_data.R | 38 +++++++++++++----- .../download_koppen_geiger_data.R | 40 ++++++++++++++----- .../download_functions/download_nlcd_data.R | 33 ++++++++++----- .../download_sedac_groads_data.R | 40 +++++++++++++------ .../download_sedac_population_data.R | 30 +++++++++++--- 5 files changed, 134 insertions(+), 47 deletions(-) diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 906d746f..361e0224 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -25,7 +25,9 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip file from directory_to_download. #' Default = `FALSE`. -#' @param download logical(1). +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -113,12 +115,28 @@ download_gmted_data <- function( statistic_code, resolution_code, "_grd.zip") - #### 12. build system command - system_command <- paste0("curl -s -o ", - download_name, - " --url ", - download_url) - #### 13. download data + #### 12. build download command + download_command <- paste0("curl -s -o ", + download_name, + " --url ", + download_url) + #### 13. initiate "..._curl_commands.txt" + commands_txt <- paste0(directory_to_download, + "gmted_", + statistic_code, + "_", + resolution_code, + "_curl_command.txt") + sink(commands_txt) + #### 14. concatenate and print download command to "..._curl_commands.txt" + cat(download_command) + #### 15. finish "..._curl_commands.txt" file + sink() + #### 16. build system command + system_command <- paste0(". ", + commands_txt, + "\n") + #### 17 download data if (download == TRUE) { cat(paste0("Downloading requested file...\n")) system(command = system_command) @@ -127,18 +145,18 @@ download_gmted_data <- function( } else if (download == FALSE) { return(cat(paste0("Skipping data download.\n"))) } - #### 14. end if unzip == FALSE + #### 18. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) } - #### 15. unzip downloaded data + #### 19 unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 16. remove zip files + #### 20. remove zip files if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) diff --git a/input/Rinput/download_functions/download_koppen_geiger_data.R b/input/Rinput/download_functions/download_koppen_geiger_data.R index 68473e6b..454198ef 100644 --- a/input/Rinput/download_functions/download_koppen_geiger_data.R +++ b/input/Rinput/download_functions/download_koppen_geiger_data.R @@ -28,6 +28,9 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -38,7 +41,8 @@ download_koppen_geiger_data <- function( directory_to_save = "../../data/covariates/koppen_geiger/", data_download_acknowledgement = FALSE, unzip = TRUE, - remove_zip = FALSE + remove_zip = FALSE, + download = FALSE ) { #### 1. directory setup chars_dir_download <- nchar(directory_to_download) @@ -103,23 +107,39 @@ download_koppen_geiger_data <- function( " -O ", download_name, "\n") - #### 11. download data - cat(paste0("Downloading requested file...\n")) - system(command = download_command) - Sys.sleep(2L) - cat(paste0("Requested file downloaded.\n")) - #### 12. unzip downloaded data + #### 11. initiate "..._wget_commands.txt" + commands_txt <- paste0(directory_to_download, + "koppen_geiger_wget_command.txt") + sink(commands_txt) + #### 12. concatenate and print download command to "..._wget_commands.txt" + cat(download_command) + #### 13. finish "..._wget_commands.txt" file + sink() + #### 14. build system command + system_command <- paste0(". ", + commands_txt, + "\n") + #### 15. download data + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = download_command) + Sys.sleep(2L) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } + #### 16. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 13. end if unzip == FALSE + #### 17. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downlaoded files will not be unzipped.\n"))) } - #### 14. remove unwanted files + #### 18. remove unwanted files unwanted_names <- list.files(path = directory_to_save, pattern = "Beck_KG", full.names = TRUE) @@ -136,7 +156,7 @@ download_koppen_geiger_data <- function( unwanted_names, invert = TRUE)] file.remove(unwanted_names) - #### 15. remove zip files + #### 19. remove zip files if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) diff --git a/input/Rinput/download_functions/download_nlcd_data.R b/input/Rinput/download_functions/download_nlcd_data.R index 35d8aefd..fa468747 100644 --- a/input/Rinput/download_functions/download_nlcd_data.R +++ b/input/Rinput/download_functions/download_nlcd_data.R @@ -23,7 +23,9 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. -#' @param download logical(1). +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -101,12 +103,25 @@ download_nlcd_data <- function( release_date, ".zip") #### 8. build system command - system_command <- paste0("curl -o ", - download_name, - " --url ", - download_url, + download_command <- paste0("curl -o ", + download_name, + " --url ", + download_url, + "\n") + #### 9. initiate "..._curl_command.txt" + commands_txt <- paste0(directory_to_download, + collection_code, + "curl_command.txt") + sink(commands_txt) + #### 10. concatenate and print download command to "..._curl_commands.txt" + cat(download_command) + #### 11. finish "..._curl_command.txt" + sink() + #### 12. build system command + system_command <- paste0(". ", + commands_txt, "\n") - #### 9. download data + #### 13. download data if (download == TRUE) { cat(paste0("Downloading requested file...\n")) system(command = system_command) @@ -115,18 +130,18 @@ download_nlcd_data <- function( } else if (download == FALSE) { return(cat(paste0("Skipping data download.\n"))) } - #### 10. end if unzip == FALSE + #### 14. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) } - #### 11. unzip downloaded data + #### 15. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 12. remove zip files + #### 16. remove zip files if (remove_zip == TRUE) { cat(paste0("Removing download files...\n")) file.remove(download_name) diff --git a/input/Rinput/download_functions/download_sedac_groads_data.R b/input/Rinput/download_functions/download_sedac_groads_data.R index 1c51b6c8..943ac3ea 100644 --- a/input/Rinput/download_functions/download_sedac_groads_data.R +++ b/input/Rinput/download_functions/download_sedac_groads_data.R @@ -26,7 +26,9 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. -#' @param download logical(1). +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -95,25 +97,39 @@ download_sedac_groads_data <- function( region <- tolower(data_region) #### 9. build download URL download_url <- paste0(base, - region, + gsub(" ", "-", region), "-", format, ".zip") #### 10. build download file name download_name <- paste0(directory_to_download, "groads_v1_", - region, + gsub(" ", "-", region), "_", format, ".zip") #### 11. build system command - system_command <- paste0("curl -n -c ~/.urs_cookies -b ~/.urs_cookies -LJ", - " -o ", - download_name, - " --url ", - download_url, + download_command <- paste0("curl -n -c ~/.urs_cookies -b ~/.urs_cookies -LJ", + " -o ", + download_name, + " --url ", + download_url, + "\n") + #### 12. initiate "..._curl_commands.txt" + commands_txt <- paste0(directory_to_download, + "sedac_groads_", + gsub(" ", "_", region), + "_curl_command.txt") + sink(commands_txt) + #### 13. concatenate and print download command to "..._curl_commands.txt" + cat(download_command) + #### 14. finish "..._curl_commands.txt" file + sink() + #### 15. build system command + system_command <- paste0(". ", + commands_txt, "\n") - #### 12. download data + #### 16. download data if (download == TRUE) { cat(paste0("Downloading requested file...\n")) system(command = system_command) @@ -121,17 +137,17 @@ download_sedac_groads_data <- function( } else if (download == FALSE) { return(cat(paste0("Skipping data download.\n"))) } - #### 13. end if unzip == FALSE + #### 17. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) } - #### 14. unzip downloaded data + #### 18. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 14. remove zip file + #### 19. remove zip file if (remove_zip == TRUE) { cat(paste0("Removing downloaded zip file...\n")) file.remove(download_name) diff --git a/input/Rinput/download_functions/download_sedac_population_data.R b/input/Rinput/download_functions/download_sedac_population_data.R index 7c42b817..8591d128 100644 --- a/input/Rinput/download_functions/download_sedac_population_data.R +++ b/input/Rinput/download_functions/download_sedac_population_data.R @@ -28,7 +28,9 @@ #' @param unzip logical(1). Unzip zip files. Default = `TRUE`. #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default = `FALSE`. -#' @param download logical(1). +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. #' @author Mitchell Manware #' @return NULL; #' @export @@ -141,25 +143,41 @@ download_sedac_population_data <- function( " --url ", download_url, "\n") - #### 12. download data + #### 12. initiate "..._curl_command.txt" + commands_txt <- paste0(directory_to_download, + "sedac_population_", + year, + "_", + resolution, + "_curl_commands.txt") + sink(commands_txt) + #### 13. concatenate and print download command to "..._curl_commands.txt" + cat(download_command) + #### 14. finish "..._curl_commands.txt" file + sink() + #### 15. build system command + system_command <- paste0(". ", + commands_txt, + "\n") + #### 16. download data if (download == TRUE) { cat(paste0("Downloading requested file...\n")) - system(command = download_command) + system(command = system_command) cat(paste0("Requested file downloaded.\n")) } else if (download == FALSE) { return(cat(paste0("Skipping data download.\n"))) } - #### 13. end if unzip == FALSE + #### 17. end if unzip == FALSE if (unzip == FALSE) { return(cat(paste0("Downloaded files will not be unzipped.\n"))) } - #### 14. unzip downloaded data + #### 18. unzip downloaded data cat(paste0("Unzipping files...\n")) unzip(download_name, exdir = directory_to_save) cat(paste0("Files unzipped and saved in ", directory_to_save, ".\n")) - #### 14. remove zip file from download directory + #### 19. remove zip file from download directory if (remove_zip == TRUE) { cat(paste0("Deleting downloaded zip files...\n")) file.remove(download_name) From ef8ec40a71b77fa471dbb18ca6babdfa547ad006 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 08:50:10 -0500 Subject: [PATCH 10/17] create check_url_file_exist.R --- R/check_url_file_exist.R | 16 ++++++++ tests/testthat/test-download_functions.R | 49 ++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 R/check_url_file_exist.R create mode 100644 tests/testthat/test-download_functions.R diff --git a/R/check_url_file_exist.R b/R/check_url_file_exist.R new file mode 100644 index 00000000..272a06f7 --- /dev/null +++ b/R/check_url_file_exist.R @@ -0,0 +1,16 @@ +# Function to test status of download function URLs +# Date created: 2023-11-30 +# Insang Song (based on comment #164 Location of download functions) +# Mitchell Manware + +#' Check if sample of download URLs exist +#' +#' @param url Download URL to be checked. +#' @author Insang Song; Mitchell Manware +#' @export +check_url_file_exist <- function(url){ + http_status_ok <- 200 + hd <- httr::HEAD(url) + status <- hd$all_headers[[1]]$status + return(status == http_status_ok) +} \ No newline at end of file diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R new file mode 100644 index 00000000..ce8a97ae --- /dev/null +++ b/tests/testthat/test-download_functions.R @@ -0,0 +1,49 @@ +#' @author Mitchell Manware +#' @description Unit test for for checking data download functions. +#' +testthat::test_that("GEOS-CF download URLs exist.", { + withr::local_package("httr") + # function parameters + date_start <- "2018-01-01" + date_end <- "2022-12-31" + collections <- c("chm_tavg_1hr_g1440x721_v1", + "aqc_tavg_1hr_g1440x721_v1") + directory_to_save <- "../../input/data/covariates/geos_cf/" + for (c in seq_along(collections)) { + # run download function + download_geos_cf_data(date_start = date_start, + date_end = date_end, + collection = collections[c], + directory_to_save = directory_to_save, + data_download_acknowledgement = TRUE, + download = FALSE) + # TEST that directory_to_save exists + testthat::expect_true(dir.exists(directory_to_save)) + # path with commands + commands_path <- paste0(directory_to_save, + collections[c], + "_wget_commands.txt") + # TEST that path with commands exists + testthat::expect_true(file.exists(commands_path)) + # import wget commands + wget_commands <- read.csv(commands_path, + header = FALSE) + # convert to character + wget_commands <- wget_commands[1:nrow(wget_commands),] + # extract URLs from `wget_commands` + url_list <- NULL + for (w in seq_along(wget_commands)) { + command <- wget_commands[w] + url <- stringr::str_split_i(command, " ", 2) + url_list <- c(url_list, url) + } + # sample URLs + url_sample <- sample(url_list, 30L, replace = FALSE) + # apply urlFileExist to sample of urls + url_status <- sapply(url_sample, urlFileExist) + # TEST that URLs are character + testthat::expect_true(is.character(url_list)) + # TEST that URLs exist + testthat::expect_true(all(url_status)) + } +}) From ac3c19e52f77a1e5ce787310b01a7af27ce5dbc7 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 09:05:10 -0500 Subject: [PATCH 11/17] sink to testdata/ folder --- tests/testthat/test-download_functions.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R index ce8a97ae..95be1e7f 100644 --- a/tests/testthat/test-download_functions.R +++ b/tests/testthat/test-download_functions.R @@ -1,6 +1,10 @@ #' @author Mitchell Manware #' @description Unit test for for checking data download functions. -#' +#' + +getwd() +setwd("/Volumes/manwareme/NRT-AP-Model/tests/testthat/") + testthat::test_that("GEOS-CF download URLs exist.", { withr::local_package("httr") # function parameters @@ -8,7 +12,7 @@ testthat::test_that("GEOS-CF download URLs exist.", { date_end <- "2022-12-31" collections <- c("chm_tavg_1hr_g1440x721_v1", "aqc_tavg_1hr_g1440x721_v1") - directory_to_save <- "../../input/data/covariates/geos_cf/" + directory_to_save <- "../testdata/" for (c in seq_along(collections)) { # run download function download_geos_cf_data(date_start = date_start, @@ -29,7 +33,7 @@ testthat::test_that("GEOS-CF download URLs exist.", { wget_commands <- read.csv(commands_path, header = FALSE) # convert to character - wget_commands <- wget_commands[1:nrow(wget_commands),] + wget_commands <- wget_commands[seq_len(nrow(wget_commands)), ] # extract URLs from `wget_commands` url_list <- NULL for (w in seq_along(wget_commands)) { @@ -39,8 +43,8 @@ testthat::test_that("GEOS-CF download URLs exist.", { } # sample URLs url_sample <- sample(url_list, 30L, replace = FALSE) - # apply urlFileExist to sample of urls - url_status <- sapply(url_sample, urlFileExist) + # apply check_url_file_exist to sample of urls + url_status <- sapply(url_sample, check_url_file_exist) # TEST that URLs are character testthat::expect_true(is.character(url_list)) # TEST that URLs exist From a7d8bd1da362cac2c638f884cd789d44970defba Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 09:25:34 -0500 Subject: [PATCH 12/17] lint check_url_file_exist.R --- R/check_url_file_exist.R | 4 ++-- input/Rinput/download_functions/download_geos_cf_data.R | 2 +- tests/testthat/test-download_functions.R | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/R/check_url_file_exist.R b/R/check_url_file_exist.R index 272a06f7..51a3d641 100644 --- a/R/check_url_file_exist.R +++ b/R/check_url_file_exist.R @@ -8,9 +8,9 @@ #' @param url Download URL to be checked. #' @author Insang Song; Mitchell Manware #' @export -check_url_file_exist <- function(url){ +check_url_file_exist <- function(url) { http_status_ok <- 200 hd <- httr::HEAD(url) status <- hd$all_headers[[1]]$status return(status == http_status_ok) -} \ No newline at end of file +} diff --git a/input/Rinput/download_functions/download_geos_cf_data.R b/input/Rinput/download_functions/download_geos_cf_data.R index 2ea6420c..fab4212d 100644 --- a/input/Rinput/download_functions/download_geos_cf_data.R +++ b/input/Rinput/download_functions/download_geos_cf_data.R @@ -130,6 +130,6 @@ download_geos_cf_data <- function( system(command = system_command) file.remove(commands_txt) } else if (download == FALSE) { - return(cat(paste0("Data not data download.\n"))) + return(cat(paste0("Skipping data download.\n"))) } } diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R index 95be1e7f..3f680bf9 100644 --- a/tests/testthat/test-download_functions.R +++ b/tests/testthat/test-download_functions.R @@ -1,10 +1,6 @@ #' @author Mitchell Manware #' @description Unit test for for checking data download functions. #' - -getwd() -setwd("/Volumes/manwareme/NRT-AP-Model/tests/testthat/") - testthat::test_that("GEOS-CF download URLs exist.", { withr::local_package("httr") # function parameters From 3680244d4e59eda9e310909a5cae1e2951f8b30f Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 09:28:43 -0500 Subject: [PATCH 13/17] add file.remove(..._wget_commands.txt) --- tests/testthat/test-download_functions.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R index 3f680bf9..f001043d 100644 --- a/tests/testthat/test-download_functions.R +++ b/tests/testthat/test-download_functions.R @@ -45,5 +45,7 @@ testthat::test_that("GEOS-CF download URLs exist.", { testthat::expect_true(is.character(url_list)) # TEST that URLs exist testthat::expect_true(all(url_status)) + # remove path with commands after test + file.remove(commands_path) } }) From cc60ca7bdb028e0d2c5e952404a2030187e72696 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 09:49:52 -0500 Subject: [PATCH 14/17] _curl_command.txt naming for GMTED2010 data --- input/Rinput/download_functions/download_gmted_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 361e0224..639c32f5 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -123,9 +123,9 @@ download_gmted_data <- function( #### 13. initiate "..._curl_commands.txt" commands_txt <- paste0(directory_to_download, "gmted_", - statistic_code, + gsub(" ", "", statistic), "_", - resolution_code, + gsub(" ", "", resolution), "_curl_command.txt") sink(commands_txt) #### 14. concatenate and print download command to "..._curl_commands.txt" From ab26bf729a5a4e392b48c4dbcfca6c2d92205646 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 10:02:24 -0500 Subject: [PATCH 15/17] \n line break in download command --- input/Rinput/download_functions/download_gmted_data.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/input/Rinput/download_functions/download_gmted_data.R b/input/Rinput/download_functions/download_gmted_data.R index 639c32f5..e2a399d6 100644 --- a/input/Rinput/download_functions/download_gmted_data.R +++ b/input/Rinput/download_functions/download_gmted_data.R @@ -119,7 +119,8 @@ download_gmted_data <- function( download_command <- paste0("curl -s -o ", download_name, " --url ", - download_url) + download_url, + "\n") #### 13. initiate "..._curl_commands.txt" commands_txt <- paste0(directory_to_download, "gmted_", From f9e219e8549af9a871cbd1aade7c1d08c3fcd11f Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 11:59:43 -0500 Subject: [PATCH 16/17] add test for download_gmted_data() --- tests/testthat/test-download_functions.R | 59 ++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R index f001043d..a229dbbc 100644 --- a/tests/testthat/test-download_functions.R +++ b/tests/testthat/test-download_functions.R @@ -18,13 +18,13 @@ testthat::test_that("GEOS-CF download URLs exist.", { data_download_acknowledgement = TRUE, download = FALSE) # TEST that directory_to_save exists - testthat::expect_true(dir.exists(directory_to_save)) + expect_true(dir.exists(directory_to_save)) # path with commands commands_path <- paste0(directory_to_save, collections[c], "_wget_commands.txt") # TEST that path with commands exists - testthat::expect_true(file.exists(commands_path)) + expect_true(file.exists(commands_path)) # import wget commands wget_commands <- read.csv(commands_path, header = FALSE) @@ -42,9 +42,60 @@ testthat::test_that("GEOS-CF download URLs exist.", { # apply check_url_file_exist to sample of urls url_status <- sapply(url_sample, check_url_file_exist) # TEST that URLs are character - testthat::expect_true(is.character(url_list)) + expect_true(is.character(url_list)) # TEST that URLs exist - testthat::expect_true(all(url_status)) + expect_true(all(url_status)) + # remove path with commands after test + file.remove(commands_path) + } +}) + +testthat::test_that("GMTED download URLs exist.", { + withr::local_package("httr") + # function parameters + statistics <- c("Breakline Emphasis", "Systematic Subsample", + "Median Statistic", "Minimum Statistic", + "Mean Statistic", "Maximum Statistic", + "Standard Deviation Statistic") + resolution <- "7.5 arc-seconds" + directory_to_download <- "../testdata/" + directory_to_save <- "../testdata/" + for (s in seq_along(statistics)) { + # run download function + download_gmted_data(statistic = statistics[s], + resolution = resolution, + directory_to_download = directory_to_download, + directory_to_save = directory_to_save, + data_download_acknowledgement = TRUE, + unzip = FALSE, + remove_zip = FALSE, + download = FALSE) + # TEST that directory_to_download exists + expect_true(dir.exists(directory_to_download)) + # TEST that directory_to_save exists + expect_true(dir.exists(directory_to_save)) + # path with commands + commands_path <- paste0(directory_to_download, + "gmted_", + gsub(" ", "", statistics[s]), + "_", + gsub(" ", "", resolution), + "_curl_command.txt") + # TEST that that path with command exists + expect_true(file.exists(commands_path)) + # import curl command + curl_command <- read.csv(commands_path, + header = FALSE) + # convert to characer + curl_command <- curl_command[seq_len(nrow(curl_command)), ] + # extract URL from `curl_command` + url <- stringr::str_split_i(curl_command, " ", 6) + # apply check_url_file_exist to URL + url_status <- check_url_file_exist(url) + # TEST that URLs are character + expect_true(is.character(url)) + # TEST that URLs exist + expect_true(all(url_status)) # remove path with commands after test file.remove(commands_path) } From b2be2655a2958c8056186c127f35bb82e74fffae Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Thu, 30 Nov 2023 12:08:25 -0500 Subject: [PATCH 17/17] explore download functions in R/ folder --- R/download_geos_cf_data.R | 135 +++++++++++++++++++++++++++++++ R/download_gmted_data.R | 166 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 R/download_geos_cf_data.R create mode 100644 R/download_gmted_data.R diff --git a/R/download_geos_cf_data.R b/R/download_geos_cf_data.R new file mode 100644 index 00000000..fab4212d --- /dev/null +++ b/R/download_geos_cf_data.R @@ -0,0 +1,135 @@ +################################################################################ +# Date created: 2023-10-16 +# Packages required: stringr +################################################################################ + +################################################################################ +#' download_geos_cf_data: download atmospheric composition data from the NASA +#' Global Earth Observing System (GEOS) model. +#' @description +#' The `download_goes_cf_data()` function accesses and downloads various +#' atmospheric composition collections from the [NASA Global Earth Observing] +#' [System (GEOS) model](https://gmao.gsfc.nasa.gov/GEOS_systems/). +#' @param date_start character(1). length of 10. Start date for downloading +#' data. Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). +#' @param date_end character(1). length of 10. End date for downloading data. +#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). +#' @param collection character(1). GEOS-CF data collection file name. +#' @param directory_to_save character(1). Directory to save data. +#' @param data_download_acknowledgement logical(1). By setting `= TRUE` the +#' user acknowledge that the data downloaded using this function may be very +#' large and use lots of machine storage and memory. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. +#' @author Mitchell Manware +#' @return NULL; +#' @importFrom stringr str_sub +#' @importFrom stringr str_pad +#' @export +download_geos_cf_data <- function( + date_start = "2023-09-01", + date_end = "2023-09-01", + collection = NULL, + directory_to_save = "../../data/covariates/geos_cf/", + data_download_acknowledgement = FALSE, + download = FALSE +) { + #### 1. directory setup + chars_dir_save <- nchar(directory_to_save) + if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { + directory_to_save <- paste0(directory_to_save, "/", sep = "") + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } + #### 2. check for data download acknowledgement + if (data_download_acknowledgement == FALSE) { + stop(paste0("Data download acknowledgement is set to FALSE. ", + "Please acknowledge that the data downloaded using this ", + "function may be very large and use lots of machine storage ", + "and memory.\n")) + } + #### 2. check for collection + if (is.null(collection) == TRUE) { + stop(paste0("Please select a GEOS-CF collection.\n")) + } + #### 3. check if collection is valid + collections <- c("htf_inst_15mn_g1440x721_x1", "aqc_tavg_1hr_g1440x721_v1", + "chm_tavg_1hr_g1440x721_v1", "met_tavg_1hr_g1440x721_x1", + "xgc_tavg_1hr_g1440x721_x1", "chm_inst_1hr_g1440x721_p23", + "met_inst_1hr_g1440x721_p23") + if (!(collection %in% collections)) { + stop(paste0("Requested collection is not recognized.\n")) + } + #### 4. define date sequence + date_start_date_format <- as.Date(date_start, format = "%Y-%m-%d") + date_end_date_format <- as.Date(date_end, format = "%Y-%m-%d") + date_sequence <- seq(date_start_date_format, date_end_date_format, "day") + date_sequence <- gsub("-", "", as.character(date_sequence)) + #### 5. define time sequence + if (stringr::str_sub(collection, -1, -1) == "1") { + time_sequence <- as.character(seq(from = 30, to = 2330, by = 100)) + time_sequence <- stringr::str_pad(time_sequence, + pad = "0", + width = 4, + side = "left") + } else if (stringr::str_sub(collection, -1, -1) == "3") { + time_sequence <- as.character(seq(from = 100, to = 2400, by = 100)) + time_sequence <- stringr::str_pad(time_sequence, + pad = "0", + width = 4, + side = "left") + } + #### 6. define URL base + base <- "https://portal.nccs.nasa.gov/datashare/gmao/geos-cf/v1/ana/" + #### 7. initiate "..._wget_commands.txt" file + commands_txt <- paste0(directory_to_save, + collection, + "_wget_commands.txt") + sink(commands_txt) + #### 8. concatenate and print download commands to "..._wget_commands.txt" + for (d in seq_along(date_sequence)){ + date <- date_sequence[d] + year <- stringr::str_sub(date, 1, 4) + month <- stringr::str_sub(date, 5, 6) + day <- stringr::str_sub(date, 7, 8) + for (t in seq_along(time_sequence)){ + download_url <- paste0(base, + "Y", + year, + "/M", + month, + "/D", + day, + "/GEOS-CF.v01.rpl.", + collection, + ".", + date, + "_", + time_sequence[t], + "z.nc4") + download_folder <- paste0(directory_to_save, + collection) + download_command <- paste0("wget ", + download_url, + " -P ", + download_folder, + "\n") + cat(download_command) + } + } + #### 9. finish "..._wget_commands.txt" file + sink() + #### 10. build system command + system_command <- paste0(". ", + commands_txt, + "\n") + #### 11. download data + if (download == TRUE) { + system(command = system_command) + file.remove(commands_txt) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } +} diff --git a/R/download_gmted_data.R b/R/download_gmted_data.R new file mode 100644 index 00000000..e2a399d6 --- /dev/null +++ b/R/download_gmted_data.R @@ -0,0 +1,166 @@ +################################################################################ +# Date created: 2023-10-24 +# Packages required: none +################################################################################ + +################################################################################ +#' download_gmted_data: download global elevation data from the Global Multi- +#' resolution Terrain Elevation Data (GMTED2010). +#' @description +#' The `download_gmted_data()` function acesses and downloads Global +#' Multi-resolution Terrain Elevation Data (GMTED2010) from +#' [U.S. Geological Survey and National Geospatial-Intelligence Agency] +#' (https://www.usgs.gov/coastal-changes-and-impacts/gmted2010). +#' @param statistic character(1). Available statistics include "Breakline +#' Emphasis", "Systematic Subsample", "Median Statistic", "Minimum Statistic", +#' "Mean Statistic", "Maximum Statistic", and "Standard Deviation Statistic". +#' @param resolution character(1). Available resolutions include "7.5 arc- +#' seconds", "15 arc-seconds", and "30 arc-seconds". +#' @param directory_to_download character(1). Directory to download zip files +#' from Global Multi-resolution Terrain Elevation Data (GMTED2010). +#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param data_download_acknowledgement logical(1). By setting `= TRUE` the +#' user acknowledge that the data downloaded using this function may be very +#' large and use lots of machine storage and memory. +#' @param unzip logical(1). Unzip zip files. Default = `TRUE`. +#' @param remove_zip logical(1). Remove zip file from directory_to_download. +#' Default = `FALSE`. +#' @param download logical(1). `= FALSE` will generate a `.txt` file containing +#' all download commands. By setting `= TRUE` the function will download all of +#' the requested data files. +#' @author Mitchell Manware +#' @return NULL; +#' @export +download_gmted_data <- function( + statistic = NULL, + resolution = NULL, + directory_to_download = "../../data/covariates/gmted/", + directory_to_save = "../../data/covariates/gmted/", + data_download_acknowledgement = FALSE, + unzip = TRUE, + remove_zip = FALSE, + download = FALSE +) { + #### 1. directory setup + chars_dir_download <- nchar(directory_to_download) + chars_dir_save <- nchar(directory_to_save) + if (substr(directory_to_download, + chars_dir_download, + chars_dir_download) != "/") { + directory_to_download <- paste(directory_to_download, + "/", + sep = "") + } + if (substr(directory_to_save, chars_dir_save, chars_dir_save) != "/") { + directory_to_save <- paste(directory_to_save, "/", sep = "") + } + if (dir.exists(directory_to_download) == FALSE) { + dir.create(directory_to_download) + } + if (dir.exists(directory_to_save) == FALSE) { + dir.create(directory_to_save) + } + #### 2. check for data download acknowledgement + if (data_download_acknowledgement == FALSE) { + stop(paste0("Data download acknowledgement is set to FALSE.", + "Please acknowledge that the data downloaded using this", + "function may be very large and use lots of machine storage", + "and memory.")) + } + #### 3. check for statistic + if (is.null(statistic) == TRUE) { + stop(paste0("Please select a GMTED2010 statistic.\n")) + } + #### 4. check for valid statistic + valid_statistics <- c("Breakline Emphasis", "Systematic Subsample", + "Median Statistic", "Minimum Statistic", + "Mean Statistic", "Maximum Statistic", + "Standard Deviation Statistic") + if (!(statistic %in% valid_statistics)) { + stop(paste0("Requested statistic is not recognized.\n")) + } + #### 5. check for resolution + if (is.null(resolution) == TRUE) { + stop(paste0("Please select a data resolution.\n")) + } + #### 6. check for valid resolution + valid_resolutions <- c("7.5 arc-seconds", "15 arc-seconds", "30 arc-seconds") + if (!(resolution %in% valid_resolutions)) { + stop(paste0("Requested resolution is not recognized.\n")) + } + #### 7. define URL base + base <- paste0("https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/topo", + "/downloads/GMTED/Grid_ZipFiles/") + #### 8. define URL statistic code + statistics <- c("Breakline Emphasis", "Systematic Subsample", + "Median Statistic", "Minimum Statistic", + "Mean Statistic", "Maximum Statistic", + "Standard Deviation Statistic") + statistic_codes <- c("be", "ds", "md", "mi", "mn", "mx", "sd") + statistic_codes <- cbind(statistics, statistic_codes) + statistic_code <- subset(statistic_codes, statistics == statistic)[2] + #### 9. define URL resolution code + resolutions <- c("7.5 arc-seconds", "15 arc-seconds", "30 arc-seconds") + resolution_codes <- c("75", "15", "30") + resolution_codes <- cbind(resolutions, resolution_codes) + resolution_code <- subset(resolution_codes, resolutions == resolution)[2] + #### 10. build url + download_url <- paste0(base, + statistic_code, + resolution_code, + "_grd.zip") + #### 11. build download file name + download_name <- paste0(directory_to_download, + "gmted2010_", + statistic_code, + resolution_code, + "_grd.zip") + #### 12. build download command + download_command <- paste0("curl -s -o ", + download_name, + " --url ", + download_url, + "\n") + #### 13. initiate "..._curl_commands.txt" + commands_txt <- paste0(directory_to_download, + "gmted_", + gsub(" ", "", statistic), + "_", + gsub(" ", "", resolution), + "_curl_command.txt") + sink(commands_txt) + #### 14. concatenate and print download command to "..._curl_commands.txt" + cat(download_command) + #### 15. finish "..._curl_commands.txt" file + sink() + #### 16. build system command + system_command <- paste0(". ", + commands_txt, + "\n") + #### 17 download data + if (download == TRUE) { + cat(paste0("Downloading requested file...\n")) + system(command = system_command) + Sys.sleep(5L) + cat(paste0("Requested file downloaded.\n")) + } else if (download == FALSE) { + return(cat(paste0("Skipping data download.\n"))) + } + #### 18. end if unzip == FALSE + if (unzip == FALSE) { + return(cat(paste0("Downloaded files will not be unzipped.\n"))) + } + #### 19 unzip downloaded data + cat(paste0("Unzipping files...\n")) + unzip(download_name, + exdir = directory_to_save) + cat(paste0("Files unzipped and saved in ", + directory_to_save, + ".\n")) + #### 20. remove zip files + if (remove_zip == TRUE) { + cat(paste0("Removing download files...\n")) + file.remove(download_name) + cat(paste0("Download files removed.\n")) + } +}