Skip to content

Commit

Permalink
adding model selection in config.txt for slurm job submission
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffersonfparil committed Jun 24, 2024
1 parent 1006ee9 commit 7f713cd
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 16 deletions.
8 changes: 4 additions & 4 deletions R/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#' (see ?fn_load_phenotype for details)
#' - $pheno_idx_col_y: column number in the phenotype file corresponding to the numeric phenotype data
#' (see ?fn_load_phenotype for details)
#' - $pheno_na_strings: strings of characters corresponding to missing data in the phenotype file
#' - $pheno_vec_na_strings: strings of characters corresponding to missing data in the phenotype file
#' (see ?fn_load_phenotype for details)
#' - $pheno_bool_remove_outliers: remove outliers from the phenotype file?
#' - $pheno_bool_remove_NA: remove samples missing phenotype data in the phenotype file?
Expand Down Expand Up @@ -236,7 +236,7 @@
#' pheno_idx_col_id=1,
#' pheno_idx_col_pop=2,
#' pheno_idx_col_y=3,
#' pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
#' pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
#' pheno_bool_remove_outliers=TRUE,
#' pheno_bool_remove_NA=FALSE,
#' bool_within=TRUE,
Expand Down Expand Up @@ -286,7 +286,7 @@ gp = function(args) {
# pheno_idx_col_id=1,
# pheno_idx_col_pop=2,
# pheno_idx_col_y=3,
# pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
# pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
# pheno_bool_remove_outliers=FALSE,
# pheno_bool_remove_NA=FALSE,
# bool_within=TRUE,
Expand Down Expand Up @@ -318,7 +318,7 @@ gp = function(args) {
idx_col_id=args$pheno_idx_col_id,
idx_col_pop=args$pheno_idx_col_pop,
idx_col_y=args$pheno_idx_col_y,
na_strings=args$pheno_na_strings,
na_strings=args$pheno_vec_na_strings,
verbose=args$verbose
)
if (methods::is(list_pheno, "gpError")) {return(list_pheno)}
Expand Down
12 changes: 6 additions & 6 deletions inst/exec_Rscript/0-submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ CONFIG_NREPS=$(sed "s/\"/'/g" config.txt | sed -n '4p')
CONFIG_DIR_OUT=$(sed "s/\"/'/g" config.txt | sed -n '5p')
CONFIG_JOB_NAME=$(sed "s/\"/'/g" config.txt | sed -n '6p')
CONFIG_ACCOUNT_NAME=$(sed "s/\"/'/g" config.txt | sed -n '7p')
CONFIG_NTASKS=$(sed "s/\"/'/g" config.txt | sed -n '8p')
CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '9p')
CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '10p')
CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '11p')
CONFIG_NCPUS=$(sed "s/\"/'/g" config.txt | sed -n '8p')
CONFIG_MEM=$(sed "s/\"/'/g" config.txt | sed -n '9p')
CONFIG_TIME_LIMIT=$(sed "s/\"/'/g" config.txt | sed -n '10p')
CONFIG_MODELS=$(sed "s/\"/'/g" config.txt | sed -n '11p')
### Create the checks and submission scripts using the config variables
sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-checks_and_submision.sh | \
sed "s|PHENOTYPE_DATA_TSV=\${DIR_SRC}/input/test_pheno.tsv|$CONFIG_PHENO|g" | \
Expand All @@ -24,10 +24,10 @@ sed "s|GENOTYPE_DATA_RDS=\${DIR_SRC}/input/test_geno.Rds|$CONFIG_GENO|g" 1-check
### Create the slurm job scripts using the config variables
sed "s|SBATCH --job-name='GS'|$CONFIG_JOB_NAME|g" 2-gp_slurm_job.sh | \
sed "s|SBATCH --account='dbiopast1'|$CONFIG_ACCOUNT_NAME|g" | \
sed "s|SBATCH --ntasks=1|$CONFIG_NTASKS|g" | \
sed "s|SBATCH --cpus-per-task=16|$CONFIG_NCPUS|g" | \
sed "s|SBATCH --mem=100G|$CONFIG_MEM|g" | \
sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" \
sed "s|SBATCH --time=1-0:0:00|$CONFIG_TIME_LIMIT|g" | \
sed "s|--vec-models-to-test ridge,lasso,elastic_net,Bayes_A,Bayes_B,Bayes_C,gBLUP|--vec-models-to-test $CONFIG_MODELS|g" \
> 2-gp_slurm_job-${RUN_NAME}.sh
### Check input and submit the slurm job
chmod +x 1-checks_and_submision-${RUN_NAME}.sh
Expand Down
1 change: 1 addition & 0 deletions inst/exec_Rscript/2-gp_slurm_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Rscript ${DIR_SRC}/gp.R \
--pheno-idx-col-y $COLUMN_ID \
--bool-within TRUE \
--bool-across $BOOL_ACROSS \
--vec-models-to-test ridge,lasso,elastic_net,Bayes_A,Bayes_B,Bayes_C,gBLUP \
--n-folds $KFOLDS \
--n-reps $NREPS \
--bool-parallel TRUE \
Expand Down
4 changes: 2 additions & 2 deletions inst/exec_Rscript/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ NREPS=2
DIR_OUT=${DIR_SRC}
SBATCH --job-name="test"
SBATCH --account="dbiopast2"
SBATCH --ntasks=1
SBATCH --cpus-per-task=4
SBATCH --mem=10G
SBATCH --time=0-0:10:00
SBATCH --time=0-0:10:00
ridge,Bayes_A,Bayes_B,Bayes_C,gBLUP
6 changes: 5 additions & 1 deletion inst/exec_Rscript/gp.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ parser$add_argument("--pheno-header", dest="p
parser$add_argument("--pheno-idx-col-id", dest="pheno_idx_col_id", type="integer", default=1, help="Column number in the phenotype file corresponding to the sample names [default=1].")
parser$add_argument("--pheno-idx-col-pop", dest="pheno_idx_col_pop", type="integer", default=2, help="Column number in the phenotype file corresponding to the population/grouping names [default=2].")
parser$add_argument("--pheno-idx-col-y", dest="pheno_idx_col_y", type="integer", default=3, help="Column number in the phenotype file corresponding to the numeric phenotype data [default=3].")
parser$add_argument("--pheno-na-strings", dest="pheno_na_strings", type="character", default=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), help="Strings of characters corresponding to missing data in the phenotype file [default=c('', '-', 'NA', 'na', 'NaN', 'missing', 'MISSING')].")
parser$add_argument("--pheno-na-strings", dest="pheno_vec_na_strings", type="character", default=c("", "-", "NA", "na", "NaN", "missing", "MISSING"), help="Strings of characters corresponding to missing data in the phenotype file [default=c('', '-', 'NA', 'na', 'NaN', 'missing', 'MISSING')].")
parser$add_argument("--pheno-bool-remove-outliers", dest="pheno_bool_remove_outliers", type="logical", default=FALSE, help="Remove outliers from the phenotype file [default=FALSE]?")
parser$add_argument("--pheno-bool-remove-NA", dest="pheno_bool_remove_NA", type="logical", default=FALSE, help="Remove samples missing phenotype data in the phenotype file? [default=FALSE].")
parser$add_argument("--bool-within", dest="bool_within", type="logical", default=TRUE, help="Perform within population k-fold cross-validation? [default=TRUE].")
Expand Down Expand Up @@ -63,7 +63,11 @@ print(paste0(" - with a total of ", args$n_threads, " threads available and
print(paste0(" a total memory of ", args$max_mem_Gb, " Gb."))
print(paste0("Start time: ", time_ini))
print("Input parameters:")
### Parse input vectors
args$vec_models_to_test = unlist(strsplit(gsub(" ", "", args$vec_models_to_test), ","))
args$pheno_na_strings = unlist(strsplit(gsub(" ", "", args$pheno_na_strings), ","))
print(args)
### Run
fname_out_Rds = gp::gp(args=args)
time_fin = Sys.time()
time_duration_minutes = as.numeric(difftime(time_fin, time_ini, units="min"))
Expand Down
4 changes: 2 additions & 2 deletions man/gp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-main.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ test_that("gp", {
pheno_idx_col_id=1,
pheno_idx_col_pop=2,
pheno_idx_col_y=3,
pheno_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
pheno_vec_na_strings=c("", "-", "NA", "na", "NaN", "missing", "MISSING"),
pheno_bool_remove_outliers=TRUE,
pheno_bool_remove_NA=FALSE,
bool_within=TRUE,
Expand Down

0 comments on commit 7f713cd

Please sign in to comment.