From df33216adb4d921a69cb03a659a243045e26e476 Mon Sep 17 00:00:00 2001 From: mitchellmanware Date: Wed, 17 Jan 2024 14:06:43 -0500 Subject: [PATCH] calc_* and process_hms functions; .gitignore --- .gitignore | 13 +- input/Rinput/processing_functions/calc_geos.R | 177 ++++++++++++++++ input/Rinput/processing_functions/calc_hms.R | 105 ++++++++++ input/Rinput/processing_functions/calc_narr.R | 193 ++++++++++++++++++ .../Rinput/processing_functions/process_hms.R | 134 ++++++++++++ 5 files changed, 621 insertions(+), 1 deletion(-) create mode 100644 input/Rinput/processing_functions/calc_geos.R create mode 100644 input/Rinput/processing_functions/calc_hms.R create mode 100644 input/Rinput/processing_functions/calc_narr.R create mode 100644 input/Rinput/processing_functions/process_hms.R diff --git a/.gitignore b/.gitignore index 618d387f..a95823d8 100644 --- a/.gitignore +++ b/.gitignore @@ -83,5 +83,16 @@ tools/metalearner_test .dodsrc # raw data -input/ +input/aqs +input/ecoregions +input/geos +input/gmted +input/HMS_Smoke +input/koppen_geiger +input/merra2 +input/modis +input/narr +input/nlcd +input/tri +input/NCEP-NCAR-Reanalysis-1 diff --git a/input/Rinput/processing_functions/calc_geos.R b/input/Rinput/processing_functions/calc_geos.R new file mode 100644 index 00000000..7c75e3ca --- /dev/null +++ b/input/Rinput/processing_functions/calc_geos.R @@ -0,0 +1,177 @@ +################################################################################ +# Date created: 2023-11-27 +# Packages required: stringr +################################################################################ + +################################################################################ +#' calc_geos: Calculate atmospheric chemistry covariates from GEOS-CF. +#' @param date_start character(1). length of 10. Format YYYY-MM-DD +#' (ex. September 1, 2023 = "2023-09-01"). +#' @param date_end character(1). length of 10. Format YYYY-MM-DD +#' (ex. September 1, 2023 = "2023-09-01"). +#' @param collection character(1). GEOS-CF data collection file name. +#' @param variable character(1). +#' @param sites +#' @param identifiier character(1). Column within `locations` CSV file +#' containing identifier for each unique coordinate location. +#' @param crs integer(1). Coordinate reference system. +#' @param fun character(1). Function used to summarize hourly or three hourly +#' observations into daily statistic. (Default = `mean`). +#' @param directory_with_data character(1). Directory with downloaded GEOS-CF +#' netCDF files. +#' @param directory_to_save character(1). Directory to save processed data. +#' @author Mitchell Manware +#' @return a data.frame object; +#' @importFrom stringr str_sub +#' @importFrom stringr str_pad +#' @importFrom terra vect +#' @importFrom terra rast +#' @importFrom terra as.data.frame +#' @importFrom terra project +#' @importFrom terra extract +#' @export +calc_geos <- function( + date_start = "2023-09-01", + date_end = "2023-09-01", + collection = NULL, + variable = NULL, + sites, + identifier = NULL, + crs = 4326, + fun = "mean", + directory_with_data = "../../data/covariates/geos_cf/") { + #### 1. directory setup + directory_with_data <- download_sanitize_path(directory_with_data) + #### 2. check for variable + check_for_null_parameters(mget(ls())) + #### 6. check if collection is valid + collections <- c( + "htf_inst_15mn_g1440x721_x1", "aqc_tavg_1hr_g1440x721_v1", + "chm_tavg_1hr_g1440x721_v1", "met_tavg_1hr_g1440x721_x1", + "xgc_tavg_1hr_g1440x721_x1", "chm_inst_1hr_g1440x721_p23", + "met_inst_1hr_g1440x721_p23" + ) + if (!(collection %in% collections)) { + stop(paste0("Requested collection is not recognized.\n")) + } + #### 8. assign variable code + codes <- cbind( + unlist(strsplit(paste0( + "ACET ALD2 ALK4 BCPI BCPO BENZ C2H6 C3H8 CH4 CO ", + "DST1 DST2 DST3 DST4 EOH H2O2 HCHO HNO3 HNO4 ISOP ", + "MACR MEK MVK N2O5 NH3 NH4 NIT NO NO2 NOy OCPI ", + "OCPO PAN PM25_RH35_GCC PM25_RH35_GOCART ", + "PM25bc_RH35_GCC PM25du_RH35_GCC PM25ni_RH35_GCC ", + "PM25oc_RH35_GCC PM25soa_RH35_GCC PM25ss_RH35_GCC ", + "PM25su_RH35_GCC PRPE RCHO SALA SALC SO2 SOAP SOAS ", + "TOLU XYLE CO NO2 O3 SO2" + ), " ")), + unlist(strsplit(paste0( + "ACETO ACETA CALKA HIBCA HOBCA BENZE ETHTE PROPA ", + "METHA CMONO DUST1 DUST2 DUST3 DUST4 ETHOL HYPER ", + "FORMA NITAC PERAC ISOPR METHC MEKET MVKET DIPEN ", + "AMNIA AMNUM INNIT NIOXI NIDIO NITRO HIORG HOORG ", + "PERNI PM25X PM25R BLCPM DUSPM NITPM ORCPM SORPM ", + "SEAPM SULPM CALKE CALDH FSEAS CSEAS SULDI SOAPR ", + "SOASI TOLUE XYLEN COVMR NOVMR OZVMR SOVMR" + ), " ")), + c(rep("chm_tavg_1hr_g1440x721_v1", 51), rep("aqc_tavg_1hr_g1440x721_v1", 4)) + ) + collection_codes <- subset(codes, subset = codes[, 3] == collection) + code <- collection_codes[collection_codes[, 1] == variable, ][2] + #### 7. define date sequence + date_sequence <- generate_date_sequence( + date_start, + date_end, + sub_hyphen = TRUE + ) + #### 8. define time sequence + collection_end <- substr(collection, nchar(collection), nchar(collection)) + if (collection_end == "1") { + time_sequence <- seq(from = 30, to = 2330, by = 100) + } else if (collection_end == "3") { + time_sequence <- seq(from = 0, to = 2300, by = 100) + } + time_sequence <- sprintf("%04d", time_sequence) + #### 10. location SpatVector + sites_v <- terra::vect(sites, + geom = c("lon", "lat"), + crs = paste0( + "EPSG:", + crs + ) + ) + #### 11. site identifiers + sites_id <- terra::as.data.frame(sites_v) + #### 12. empty location data.frame + sites_extracted_df <- NULL + for (d in seq_along(date_sequence)) { + date <- date_sequence[d] + data_date <- terra::rast() + for (t in seq_along(time_sequence)) { + #### 13. define file paths + path <- paste0( + directory_with_data, + "/GEOS-CF.v01.rpl.", + collection, + ".", + date, + "_", + time_sequence[t], + "z.nc4" + ) + #### 14. import SpatRaster data + data <- terra::rast(path) + data_time <- terra::subset( + data, + subset = grep( + variable, + names(data) + ) + ) + #### 15. combine SpatRaster with same date + data_date <- c(data_date, data_time, warn = FALSE) + } + #### 16. calculate daily mean + data_fun <- do.call(fun, list(data_date, na.rm = TRUE)) + #### 17. define varname based on variable and date + names(data_fun) <- paste0(variable) + #### 18. set coordinate reference system + terra::crs(data_fun) <- paste0("EPSG:", crs) + #### 19. project to coordinate reference system + data_fun <- terra::project( + data_fun, + paste0("EPSG:", crs) + ) + #### 20. extract raster values at locations + sites_extracted_date <- terra::extract(data_fun, + sites_v, + method = "simple", + ID = FALSE, + bind = FALSE + ) + #### 21. combine with locations data.frame base + sites_extracted_date <- data.frame(cbind( + sites_id, + as.Date(date, + format = "%Y%m%d" + ), + sites_extracted_date + )) + #### 22. define column names + colnames(sites_extracted_date) <- c( + identifier, + "date", + paste0( + "GEO_", + code, + "_0_00000" + ) + ) + #### 23. combine with locations data.frame + sites_extracted_df <- rbind(sites_extracted_df, sites_extracted_date) + cat(paste0("Calculated ", variable, " for date ", date, "\n")) + } + #### 24. export data + return(sites_extracted_df) +} diff --git a/input/Rinput/processing_functions/calc_hms.R b/input/Rinput/processing_functions/calc_hms.R new file mode 100644 index 00000000..f03626e4 --- /dev/null +++ b/input/Rinput/processing_functions/calc_hms.R @@ -0,0 +1,105 @@ +################################################################################ +# Date created: 2024-01-09 +# Packages required: stringr +################################################################################ + +################################################################################ +#' calc_hms: Calculate wildfire smoke plume binary covariates from NOAA HMS +#' Smoke Product. +#' @param date_start character(1). length of 10. Start date of downloaded data. +#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). +#' @param date_end character(1). length of 10. End date of downloaded data. +#' Format YYYY-MM-DD (ex. September 10, 2023 = "2023-09-10"). +#' @param data SpatVector(1). SpatVector object produced by `process_hms` +#' containing density level-specific polygons. +#' @param density_level character(1). "Light", "Medium", or "Heavy". +#' @param sites +#' @param identifiier character(1). Column within `locations` CSV file +#' containing identifier for each unique coordinate location. +#' @author Mitchell Manware. +#' @return a data.frame object; +#' @importFrom terra vect +#' @importFrom terra project +#' @importFrom terra extract +#' @importFrom terra as.data.frame +#' @export +calc_hms <- function( + date_start = "2023-09-01", + date_end = "2023-09-02", + data, + density_level = c("Light", "Medium", "Heavy"), + sites, + identifier = "site_id") { + #### 2. check for null parameters + check_for_null_parameters(mget(ls())) + #### 3. sites as SpatVector + sites_v <- terra::vect( + sites, + geom = c("lon", "lat"), + crs = "EPSG:4326" + ) + sites_v <- terra::project(sites_v, "EPSG:4326") + #### 4. site identifiers + sites_id <- terra::as.data.frame(sites_v) + #### 5. define date sequence + date_sequence <- generate_date_sequence( + date_start, + date_end, + sub_hyphen = TRUE + ) + #### 6. empty data.frame for extracted values + sites_extracted <- NULL + for (d in seq_along(date_sequence)) { + cat(paste0( + "Extracting ", + tolower(density_level), + " smoke plume data for date: ", + date_sequence[d], + "...\n" + )) + #### 7. subset to date + data_date <- data[data$Date == date_sequence[d], ] + #### 8. check for DENSITY SPECFIIC polygons + if (nrow(data_date) == 0) { + cat(paste0( + density_level, + " smoke plume polygons absent for date: ", + date_sequence[d], + ". Returning 0 (NO SMOKE PRESENT).\n" + )) + } + #### 9. extract data at sites + sites_extracted_date <- terra::extract( + data_date, + sites_v + ) + #### 10. binary identifier (0 = NO SMOKE PRESENT; 1 = SMOKE PRESENT) + sites_extracted_date$binary12 <- match( + as.character(sites_extracted_date$Density), + c(NA, density_level) + ) + sites_extracted_date$binary01 <- sub(1, 0, sites_extracted_date$binary12) + sites_extracted_date$binary01 <- sub(2, 1, sites_extracted_date$binary01) + #### 11. combine with site identifier + sites_extracted_date_id <- data.frame(cbind( + sites_id, + paste0(as.Date( + date_sequence[d], + format = "%Y%m%d" + )), + as.numeric(sites_extracted_date$binary01) + )) + #### 12. set column names + colnames(sites_extracted_date_id) <- c( + identifier, + "date", + paste0( + "OTH_HMSW", + substr(density_level, 1, 1), + "_0_00000" + ) + ) + sites_extracted <- rbind(sites_extracted, sites_extracted_date_id) + } + return(sites_extracted) +} diff --git a/input/Rinput/processing_functions/calc_narr.R b/input/Rinput/processing_functions/calc_narr.R new file mode 100644 index 00000000..e22859d8 --- /dev/null +++ b/input/Rinput/processing_functions/calc_narr.R @@ -0,0 +1,193 @@ +################################################################################ +# Date created: 2023-11-28 +# Packages required: stringr +################################################################################ + +################################################################################ +#' calc_narr: Calculate meteorological covariates from NOAA NCEP North American +#' Regional Reanalysis. +#' @param year_start integer(1). length of 4. Start of year range for +#' downloading data. +#' @param year_end integer(1). length of 4. End of year range for downloading +#' data. +#' @param month_start integer(1). 1 = January; 12 = December +#' @param month_end integer(1). 1 = January; 12 = December +#' @param variable character(1). +#' @param sites +#' @param identifiier character(1). Column within `locations` CSV file +#' containing identifier for each unique coordinate location. +#' @param level integer(1). Pressure level of data. +#' @param directory_with_data character(1). Directory with downloaded NARR +#' monolevel netCDF files. +#' @author Mitchell Manware +#' @return a data.frame object +#' @export +calc_narr <- function( + year_start = 2018, + year_end = 2022, + month_start = NULL, + month_end = NULL, + variable = NULL, + sites, + identifier = "site_id", + level = NULL, + directory_with_data = "../../data/covariates/narr/") { + #### 1. directory setup + chars_dir_data <- nchar(directory_with_data) + if (substr(directory_with_data, chars_dir_data, chars_dir_data) != "/") { + directory_with_data <- paste0(directory_with_data, "/", sep = "") + } + #### 2. assign variable code + code <- gsub("\\.", "", variable) + code <- gsub("_", "", code) + code <- substr(toupper(paste0(code, code)), 1, 5) + #### 3. NARR-specific coordinate reference system + crs_narr <- paste0("PROJCRS[\"unnamed\",\n + BASEGEOGCRS[\"WGS 84\",\n + DATUM[\"World Geodetic System 1984\",\n + ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n + LENGTHUNIT[\"metre\",1]]],\n + PRIMEM[\"Greenwich\",0,\n + ANGLEUNIT[\"degree\",0.0174532925199433]],\n + ID[\"EPSG\",4326]],\n + CONVERSION[\"unnamed\",\n + METHOD[\"Lambert Conic Conformal (2SP)\",\n + ID[\"EPSG\",9802]],\n + PARAMETER[\"Latitude of false origin\",50,\n + ANGLEUNIT[\"degree\",0.0174532925199433],\n + ID[\"EPSG\",8821]],\n + PARAMETER[\"Longitude of false origin\",-107,\n + ANGLEUNIT[\"degree\",0.0174532925199433],\n + ID[\"EPSG\",8822]],\n + PARAMETER[\"Latitude of 1st standard parallel\",50,\n + ANGLEUNIT[\"degree\",0.0174532925199433],\n + ID[\"EPSG\",8823]],\n + PARAMETER[\"Latitude of 2nd standard parallel\",50,\n + ANGLEUNIT[\"degree\",0.0174532925199433],\n + ID[\"EPSG\",8824]],\n + PARAMETER[\"Easting at false origin\",5632642.22547,\n + LENGTHUNIT[\"metre\",1],\n + ID[\"EPSG\",8826]],\n + PARAMETER[\"Northing at false origin\",4612545.65137,\n + LENGTHUNIT[\"metre\",1],\n + ID[\"EPSG\",8827]]],\n + CS[Cartesian,2],\n + AXIS[\"easting\",east,\n + ORDER[1],\n + LENGTHUNIT[\"metre\",1,\n + ID[\"EPSG\",9001]]],\n + AXIS[\"northing\",north,\n + ORDER[2],\n + LENGTHUNIT[\"metre\",1,\n + ID[\"EPSG\",9001]]]]") + #### 4. sites as SpatVector + sites_v <- terra::vect(sites, + geom = c("lon", "lat"), + crs = "EPSG:4326" + ) + sites_v <- terra::project(sites_v, crs_narr) + #### 5. site identifiers + sites_id <- terra::as.data.frame(sites_v) + #### 6. define year or year+month sequence + sequence <- NULL + years <- seq(year_start, year_end, 1) + if (!is.null(c(month_start, month_end))) { + months <- seq(month_start, month_end, 1) + for (y in seq_along(years)) { + year <- years[y] + for (m in seq_along(months)) { + year_month <- paste0( + year, + stringr::str_pad(months[m], + width = 2, + pad = "0", + side = "left" + ) + ) + sequence <- c(sequence, year_month) + } + } + } else { + sequence <- c(sequence, years) + } + #### 7. define file paths + paths <- paste0( + directory_with_data, + variable, + "/", + variable, + ".", + sequence, + ".nc" + ) + #### 8. empty data.frame for extracted values + sites_extracted_df <- NULL + #### 9. import SpatRaster data + for (p in seq_along(paths)) { + data_ym <- terra::rast(paths[p]) + #### 10. subset to pressure level if not null + if (!is.null(level)) { + data_ym <- terra::subset(data_ym, + subset = grep( + paste0( + "level=", + level + ), + names(data_ym) + ) + ) + } + #### 11. define varname based on variable and date + names(data_ym) <- paste0( + variable, + "_", + gsub( + "-", + "", + as.Date(terra::time(data_ym), + format = "%Y%d%m" + ) + ) + ) + for (l in seq_len(terra::nlyr(data_ym))) { + #### 12. store data date + date <- as.Date(terra::time(data_ym[[l]]), + format = "%Y%m%d" + ) + cat(paste0("Extracting ", variable, " data for date: ", date, "\n")) + #### 13. extract data + sites_extracted_date <- terra::extract(data_ym[[l]], + sites_v, + method = "simple", + ID = FALSE, + bind = FALSE + ) + #### 14. combine with identifier + sites_extracted_date <- data.frame(cbind( + sites_id, + paste0(date), + sites_extracted_date + )) + colnames(sites_extracted_date) <- c( + identifier, + "date", + paste0( + "MET_", + code, + "_0_00000" + ) + ) + #### 15. replace missing data flag with NA + sites_extracted_date[, 3] <- + replace( + sites_extracted_date[, 3], + sites_extracted_date[, 3] == 9.96921e+36, + NA + ) + #### 16. combine with locations data.frame + sites_extracted_df <- rbind(sites_extracted_df, sites_extracted_date) + } + } + #### 17. return data frame with extracted values + return(sites_extracted_df) +} diff --git a/input/Rinput/processing_functions/process_hms.R b/input/Rinput/processing_functions/process_hms.R new file mode 100644 index 00000000..689869a2 --- /dev/null +++ b/input/Rinput/processing_functions/process_hms.R @@ -0,0 +1,134 @@ +################################################################################ +# Date modified: 2024-01-08 +# Packages required: terra +################################################################################ + +################################################################################ +#' process_hms: import and aggregate wildfire smoke plume data based on smoke +#' density level. +#' +#' @param date_start character(1). length of 10. Start date of downloaded data. +#' Format YYYY-MM-DD (ex. September 1, 2023 = "2023-09-01"). +#' @param date_end character(1). length of 10. End date of downloaded data. +#' Format YYYY-MM-DD (ex. September 10, 2023 = "2023-09-10"). +#' @param data_format character(1). "Shapefile" or "KML". Format of downloaded +#' data. +#' @param density_level character(1). "Light", "Medium", or "Heavy". +#' @param directory_with_data character(1). Directory storing +#' downloaded ".shp" or ".kml" files. +#' @author Mitchell Manware. +#' @return a SpatVector object; +#' @importFrom terra vect +#' @importFrom terra aggregate +#' @importFrom terra subset +#' @export +process_hms <- function( + date_start = "2023-09-01", + date_end = "2023-09-02", + data_format = "Shapefile", + density_level = c("Light", "Medium", "Heavy"), + directory_with_data = "./input/noaa_hms/raw/") { + #### 0. + if (data_format == "KML") { + message(paste0("KML processes under construction")) + return(NULL) + } + #### 1. directory setup + directory_with_data <- download_sanitize_path(directory_with_data) + #### 2. check for variable + check_for_null_parameters(mget(ls())) + #### 3. define date sequence + date_sequence <- generate_date_sequence( + date_start, + date_end, + sub_hyphen = TRUE + ) + #### 4. define list of data file names + if (data_format == "Shapefile") { + data_files <- list.files( + path = directory_with_data, + pattern = "hms_smoke", + full.names = TRUE + ) + data_files <- data_files[grep(".shp", data_files)] + data_files <- data_files[grepl( + paste0( + date_sequence, + collapse = "|" + ), + data_files + )] + } else if (data_format == "KML") { + data_files <- list.files( + path = directory_with_data, + pattern = "hms_smoke_KML", + full.names = TRUE + ) + } + #### 5. check for matching dates + data_file_dates <- stringr::str_split_i( + stringr::str_split_i( + data_files, + "hms_smoke", + 2 + ), + ".shp", + 1 + ) + if (!(identical(data_file_dates, date_sequence))) { + message(paste0( + "Data for requested dates does not match available files.\n" + )) + return(NULL) + } + #### 6. process data + data <- terra::vect() + if (data_format == "Shapefile") { + for (d in seq_along(data_files)) { + cat(paste0( + "Processing file ", + data_files[d], + "...\n" + )) + data_date <- terra::vect(data_files[d]) + data_date_p <- terra::project(data_date, "EPSG:4326") + #### 7. absent polygons (ie. December 31, 2018) + if (nrow(data_date) == 0) { + cat(paste0( + "Smoke plume polygons absent for date: ", + date_sequence[d], + ". Returning empty SpatVector.\n" + )) + data_missing <- data_date_p + data_missing$Density <- "" + data_missing$Date <- "" + data_return <- rbind(data, data_missing) + } else { + #### 8. zero buffer to avoid self-intersecting geometry error + data_date_b <- terra::buffer( + data_date_p, + width = 0 + ) + #### 9. aggregate density-specific polygons + data_date_aggregate <- terra::aggregate( + data_date_b, + by = "Density", + dissolve = TRUE + ) + #### 10. factorize + data_date_aggregate$Density <- factor( + data_date_aggregate$Density, + levels = c("Light", "Medium", "Heavy") + ) + data_date_aggregate$Date <- paste0(date_sequence[d]) + data <- rbind(data, data_date_aggregate) + } + } + } + #### 11. select "Density" and "Date" + data <- data[seq_len(nrow(data)), c("Density", "Date")] + #### 12. subset to density level + data_return <- data[data$Density == density_level, ] + #### 13. return SpatVector + return(data_return) +}