diff --git a/R/io.R b/R/io.R index 4318a9b..0dc71ba 100644 --- a/R/io.R +++ b/R/io.R @@ -344,7 +344,8 @@ fn_G_numeric_to_non_numeric = function(G, ploidy=2, verbose=FALSE) { #' @param retain_minus_one_alleles_per_locus omit the alternative or trailing allele per locus? (Default=TRUE) #' @param verbose show non-numeric to numeric genotype data conversion messages? (Default=FALSE) #' @returns -#' - Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1) +#' - Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1). If the column names do not have +#' allele IDs then the alleles will be extracted from the non-numeric genotype encodings. #' - Err: grError #' @examples #' ploidy = 42 @@ -356,7 +357,7 @@ fn_G_non_numeric_to_numeric = function(G_non_numeric, retain_minus_one_alleles_p ################################################### ### TEST # G = simquantgen::fn_simulate_genotypes(ploidy=42, n_alleles=52, verbose=TRUE) - # G_non_numeric = fn_G_numeric_to_non_numeric(G=G, ploidy=42, verbose=TRUE) + # G_non_numeric = gp::fn_G_numeric_to_non_numeric(G=G, ploidy=42, verbose=TRUE) # verbose = TRUE ################################################### ### Input sanity check @@ -421,7 +422,12 @@ fn_G_non_numeric_to_numeric = function(G_non_numeric, retain_minus_one_alleles_p if (verbose) {utils::setTxtProgressBar(pb, j)} } if (verbose) {close(pb)} - colnames(G) = vec_colnames + ### Do not update the allele IDs in the column names if there are already allele names, i.e. there are 3 elements per column name after splitting by tabs + if (length(unlist(strsplit(vec_loci_names[1], "\t"))) == 3) { + colnames(G) = vec_loci_names + } else { + colnames(G) = vec_colnames + } ### Return numeric allele frequency genotype matrix with or without all the alleles if (retain_minus_one_alleles_per_locus) { list_G_G_alt = fn_G_split_off_alternative_allele(G=G, verbose=verbose) @@ -1731,17 +1737,17 @@ fn_filter_genotype = function(G, maf=0.01, sdev_min=0.0001, return(G) } -#' Merge two genotypes matrices where if there are conflict: -#' - data on the first matrix will be used, +#' Merge two genotypes matrices where if there are conflicts: +#' - data on the first matrix will be used, or #' - data on the second matrix will be used, or #' - arithmetic mean between the two matrices will be used. #' -#' @param G1 numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names. +#' @param G1 numeric n1 samples x p1 loci-alleles matrix of allele frequencies with non-null row and column names. #' Row names can be any string of characters which identify the sample or entry or pool names. #' Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name, #' the second should be numeric which refers to the position in the chromosome/scaffold, and #' subsequent elements are optional which may refer to the allele identifier and other identifiers. -#' @param G2 numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names. +#' @param G2 numeric n2 samples x p2 loci-alleles matrix of allele frequencies with non-null row and column names. #' Row names can be any string of characters which identify the sample or entry or pool names. #' Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name, #' the second should be numeric which refers to the position in the chromosome/scaffold, and diff --git a/man/fn_G_non_numeric_to_numeric.Rd b/man/fn_G_non_numeric_to_numeric.Rd index 9d3fbec..1440689 100644 --- a/man/fn_G_non_numeric_to_numeric.Rd +++ b/man/fn_G_non_numeric_to_numeric.Rd @@ -25,7 +25,8 @@ and the second should be numeric which refers to the position in the chromosome/ } \value{ \itemize{ -\item Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1) +\item Ok: n samples x p loci-alleles matrix of genotype classes (numeric ranging from 0 to 1). If the column names do not have +allele IDs then the alleles will be extracted from the non-numeric genotype encodings. \item Err: grError } } diff --git a/man/fn_merge_genotype_genotype.Rd b/man/fn_merge_genotype_genotype.Rd index 728dd26..bedb0d8 100644 --- a/man/fn_merge_genotype_genotype.Rd +++ b/man/fn_merge_genotype_genotype.Rd @@ -2,9 +2,9 @@ % Please edit documentation in R/io.R \name{fn_merge_genotype_genotype} \alias{fn_merge_genotype_genotype} -\title{Merge two genotypes matrices where if there are conflict: +\title{Merge two genotypes matrices where if there are conflicts: \itemize{ -\item data on the first matrix will be used, +\item data on the first matrix will be used, or \item data on the second matrix will be used, or \item arithmetic mean between the two matrices will be used. }} @@ -17,13 +17,13 @@ fn_merge_genotype_genotype( ) } \arguments{ -\item{G1}{numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names. +\item{G1}{numeric n1 samples x p1 loci-alleles matrix of allele frequencies with non-null row and column names. Row names can be any string of characters which identify the sample or entry or pool names. Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name, the second should be numeric which refers to the position in the chromosome/scaffold, and subsequent elements are optional which may refer to the allele identifier and other identifiers.} -\item{G2}{numeric n samples x p loci-alleles matrix of allele frequencies with non-null row and column names. +\item{G2}{numeric n2 samples x p2 loci-alleles matrix of allele frequencies with non-null row and column names. Row names can be any string of characters which identify the sample or entry or pool names. Column names need to be tab-delimited, where first element refers to the chromosome or scaffold name, the second should be numeric which refers to the position in the chromosome/scaffold, and @@ -46,9 +46,9 @@ subsequent elements are optional which may refer to the allele identifier and ot } } \description{ -Merge two genotypes matrices where if there are conflict: +Merge two genotypes matrices where if there are conflicts: \itemize{ -\item data on the first matrix will be used, +\item data on the first matrix will be used, or \item data on the second matrix will be used, or \item arithmetic mean between the two matrices will be used. } diff --git a/tests/testthat/test-io.R b/tests/testthat/test-io.R index c0be599..13f1a81 100644 --- a/tests/testthat/test-io.R +++ b/tests/testthat/test-io.R @@ -43,7 +43,7 @@ test_that("fn_G_non_numeric_to_numeric", { n_alleles = 2 G_numeric = simquantgen::fn_simulate_genotypes(ploidy=ploidy, n_alleles=n_alleles, verbose=TRUE) G_non_numeric = fn_G_numeric_to_non_numeric(G=G_numeric, ploidy=ploidy, verbose=TRUE) - G_numeric_back = fn_G_non_numeric_to_numeric(G=G_non_numeric, verbose=TRUE) + G_numeric_back = fn_G_non_numeric_to_numeric(G_non_numeric=G_non_numeric, verbose=TRUE) expect_equal(sum(abs(G_numeric - G_numeric_back) < 1e-4), prod(dim(G_numeric))) ### The converted non-numeric to numeric matrix can have less loci-alleles than the original numeric matrix as fixed loci will be omitted expect_equal(ncol(G_numeric_back) <= ncol(G_numeric), TRUE)