Skip to content

Commit

Permalink
Don't throw an error in the Z-score calculation when just one feature…
Browse files Browse the repository at this point in the history
… has variation. Update tests accordingly. Partially addresses #83
  • Loading branch information
Nick-Eagles committed Oct 31, 2024
1 parent 05e7886 commit 6d3ba57
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
6 changes: 3 additions & 3 deletions R/multi_gene_z_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
#' @family functions for summarizing expression of multiple continuous variables simultaneously
#' @keywords internal
multi_gene_z_score <- function(cont_mat) {
# Z-score calculation requires at least 2 features with nonzero variance.
# Z-score calculation requires at least 1 feature with nonzero variance.
# Verify this and drop any zero-variance features
good_indices <- which(colSds(cont_mat, na.rm = TRUE) != 0)
if (length(good_indices) < 2) {
stop("After dropping features with no expression variation, less than 2 features were left. This error can occur when using data from only 1 spot.", call. = FALSE)
if (length(good_indices) < 1) {
stop("After dropping features with no expression variation, no features were left. This error can occur when using data from only 1 spot.", call. = FALSE)
}
if (ncol(cont_mat) - length(good_indices) > 0) {
warning(
Expand Down
21 changes: 18 additions & 3 deletions tests/testthat/test-multi_gene_z_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,32 @@ test_that(
)

# NAs should be correctly removed from columns (as long as 2 non-NAs remain
# in at least 2 columns), and the result should have no NAs
# in at least 1 column), and the result should have no NAs
cont_mat <- matrix(c(1, NA, 3, NA, 2, 0), ncol = 2)
colnames(cont_mat) <- c("good1", "good2")
expect_equal(any(is.na(multi_gene_z_score(cont_mat))), FALSE)

# With only one good column, an error should be thrown
# With only one good column, the result should simply be the
# Z-score-normalized good column. A warning should indicate which
# columns were dropped
cont_mat <- matrix(c(1, NA, 3, 4, 2, 2), ncol = 3)
colnames(cont_mat) <- c("bad1", "good", "bad2")

temp = c(3, 4)
expected_result = (temp - mean(temp)) / sd(temp)

expect_warning(
{ actual_result = multi_gene_z_score(cont_mat) },
"Dropping features\\(s\\) 'bad1', 'bad2' which have no expression variation"
)
expect_equal(actual_result, expected_result)

# An error should be thrown if no columns have variation
cont_mat <- matrix(c(1, 1, 0, 0, 2, 2), ncol = 3)
colnames(cont_mat) <- c("bad1", "bad2", "bad3")
expect_error(
multi_gene_z_score(cont_mat),
"After dropping features with no expression variation, less than 2 features were left"
"^After dropping features with no expression variation"
)
}
)

0 comments on commit 6d3ba57

Please sign in to comment.