Skip to content

Commit

Permalink
skipping allele append to column names if there are already allele ID…
Browse files Browse the repository at this point in the history
…s in non-numeric to numeric G conversion
  • Loading branch information
jeffersonfparil committed Oct 14, 2024
1 parent 473cb95 commit 3e9c2ca
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 15 deletions.
20 changes: 13 additions & 7 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ fn_G_numeric_to_non_numeric = function(G, ploidy=2, verbose=FALSE) {
#' @param retain_minus_one_alleles_per_locus omit the alternative or trailing allele per locus? (Default=TRUE)
#' @param verbose show non-numeric to numeric genotype data conversion messages? (Default=FALSE)
#' @returns
#' - Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1)
#' - Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1). If the column names do not have
#' allele IDs then the alleles will be extracted from the non-numeric genotype encodings.
#' - Err: grError
#' @examples
#' ploidy = 42
Expand All @@ -356,7 +357,7 @@ fn_G_non_numeric_to_numeric = function(G_non_numeric, retain_minus_one_alleles_p
###################################################
### TEST
# G = simquantgen::fn_simulate_genotypes(ploidy=42, n_alleles=52, verbose=TRUE)
# G_non_numeric = fn_G_numeric_to_non_numeric(G=G, ploidy=42, verbose=TRUE)
# G_non_numeric = gp::fn_G_numeric_to_non_numeric(G=G, ploidy=42, verbose=TRUE)
# verbose = TRUE
###################################################
### Input sanity check
Expand Down Expand Up @@ -421,7 +422,12 @@ fn_G_non_numeric_to_numeric = function(G_non_numeric, retain_minus_one_alleles_p
if (verbose) {utils::setTxtProgressBar(pb, j)}
}
if (verbose) {close(pb)}
colnames(G) = vec_colnames
### Do not update the allele IDs in the column names if there are already allele names, i.e. there are 3 elements per column name after splitting by tabs
if (length(unlist(strsplit(vec_loci_names[1], "\t"))) == 3) {
colnames(G) = vec_loci_names
} else {
colnames(G) = vec_colnames
}
### Return numeric allele frequency genotype matrix with or without all the alleles
if (retain_minus_one_alleles_per_locus) {
list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose)
Expand Down Expand Up @@ -1731,17 +1737,17 @@ fn_filter_genotype = function(G, maf=0.01, sdev_min=0.0001,
return(G)
}

#' Merge two genotypes matrices where if there are conflict:
#' - data on the first matrix will be used,
#' Merge two genotypes matrices where if there are conflicts:
#' - data on the first matrix will be used, or
#' - data on the second matrix will be used, or
#' - arithmetic mean between the two matrices will be used.
#'
#' @param G1 numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names.
#' @param G1 numeric n1 samples x p1 loci-alleles matrix of allele frequencies with non-null row and column names.
#' Row names can be any string of characters which identify the sample or entry or pool names.
#' Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name,
#' the second should be numeric which refers to the position in the chromosome/scaffold, and
#' subsequent elements are optional which may refer to the allele identifier and other identifiers.
#' @param G2 numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names.
#' @param G2 numeric n2 samples x p2 loci-alleles matrix of allele frequencies with non-null row and column names.
#' Row names can be any string of characters which identify the sample or entry or pool names.
#' Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name,
#' the second should be numeric which refers to the position in the chromosome/scaffold, and
Expand Down
3 changes: 2 additions & 1 deletion man/fn_G_non_numeric_to_numeric.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/fn_merge_genotype_genotype.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-io.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ test_that("fn_G_non_numeric_to_numeric", {
n_alleles = 2
G_numeric = simquantgen::fn_simulate_genotypes(ploidy=ploidy, n_alleles=n_alleles, verbose=TRUE)
G_non_numeric = fn_G_numeric_to_non_numeric(G=G_numeric, ploidy=ploidy, verbose=TRUE)
G_numeric_back = fn_G_non_numeric_to_numeric(G=G_non_numeric, verbose=TRUE)
G_numeric_back = fn_G_non_numeric_to_numeric(G_non_numeric=G_non_numeric, verbose=TRUE)
expect_equal(sum(abs(G_numeric - G_numeric_back) < 1e-4), prod(dim(G_numeric)))
### The converted non-numeric to numeric matrix can have less loci-alleles than the original numeric matrix as fixed loci will be omitted
expect_equal(ncol(G_numeric_back) <= ncol(G_numeric), TRUE)
Expand Down

0 comments on commit 3e9c2ca

Please sign in to comment.