Skip to content

Commit

Permalink
make strandSpecificity by sample
Browse files Browse the repository at this point in the history
  • Loading branch information
AtaJadidAhari committed Apr 16, 2024
1 parent a97d4da commit 63397df
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 37 deletions.
27 changes: 17 additions & 10 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,13 @@ setReplaceMethod("workingDir", "FraserDataSet", function(object, value) {
#' @export
#' @rdname fds-methods
setMethod("strandSpecific", "FraserDataSet", function(object) {
return(slot(object, "strandSpecific"))
if(!"strand" %in% colnames(colData(object))){
warning("Strand is not specified. Please set the used RNA-seq",
" protocol by using 'strandSpecific(object) <- c(...)'.",
"\n\nWe assume as default a non stranded protocol.")
return(0)
}
return(colData(object)$strand)
})

#' @export
Expand All @@ -161,15 +167,17 @@ setReplaceMethod("strandSpecific", "FraserDataSet", function(object, value) {
value <- as.integer(value)
}
if(is.character(value)){
value <- switch(tolower(value),
'no' = 0L,
'unstranded' = 0L,
'yes' = 1L,
'stranded' = 1L,
'reverse' = 2L,
-1L)
val_chr <- tolower(value)
value <- sapply(val_chr, switch, 'no' = 0L, 'unstranded' = 0L,
'yes' = 1L, 'stranded' = 1L, 'reverse' = 2L, -1L)
if(any(value < 0)){
stop("Please specify correct strandness of the samples.",
" It needs to be one of: 'no', 'unstranded', 'yes',",
" 'stranded' or 'reverse'.", "\n\nIt was specified: ",
paste(collapse = ", ", val_chr[value < 0]))
}
}
slot(object, "strandSpecific") <- value
colData(object)$strand <- as.integer(value)
validObject(object)
return(object)
})
Expand Down Expand Up @@ -309,7 +317,6 @@ subset.FRASER <- function(x, i, j, by=c("j", "ss"), ..., drop=FALSE){
subX,
name = name(x),
bamParam = scanBamParam(x),
strandSpecific = strandSpecific(x),
workingDir = workingDir(x),
nonSplicedReads = nsrObj
)
Expand Down
38 changes: 22 additions & 16 deletions R/FraserDataSet-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ setClass("FraserDataSet",
slots = list(
name = "character",
bamParam = "ScanBamParam",
strandSpecific = "integer",
workingDir = "character",
nonSplicedReads = "RangedSummarizedExperiment"
),
prototype = list(
name = "Data Analysis",
bamParam = ScanBamParam(mapqFilter=0),
strandSpecific = 0L,
workingDir = "FRASER_output",
nonSplicedReads = SummarizedExperiment(rowRanges=GRanges())
)
Expand Down Expand Up @@ -73,9 +71,13 @@ validateBamParam <- function(object) {
}

validateStrandSpecific <- function(object) {
if(!isScalarInteger(object@strandSpecific)) {
return(paste("The 'strandSpecific' option must be 0L (unstranded),",
"1L (stranded) or 2L (reverse)."))
sampleData <- as.data.table(colData(object))
if("strand" %in% colnames(sampleData) &&
(!is.integer(sampleData[,strand])
|| any(!sampleData[,strand] %in% 0:2))){
return(paste("The 'strand' column in the sample annotation in",
"'colData(fds)' must be empty or contain only integers:",
"0L == 'no', 1L == 'yes', 2L == 'reverse'."))
}
NULL
}
Expand All @@ -85,8 +87,8 @@ validatePairedEnd <- function(object) {
if("pairedEnd" %in% colnames(sampleData) &&
any(!is.logical(sampleData[,pairedEnd]))){
return(paste("The 'pairedEnd' column in the sample annotation in",
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
"'colData(fds)' must only contain logical values ",
"(TRUE or FALSE)."))
}
NULL
}
Expand Down Expand Up @@ -114,7 +116,8 @@ validateNonSplicedReadsType <- function(object) {
return("'nonSplicedReads' must be a RangedSummarizedExperiment object")
}
if(length(object) != 0 && dim(object@nonSplicedReads)[2] != dim(object)[2]){
return("The nonSplicedReads dimensions are not correct. This is a internal error!")
return(paste("The nonSplicedReads dimensions are not correct.",
"This is a internal error!"))
}
ans <- validObject(object@nonSplicedReads)
if(!isScalarLogical(ans) || ans == FALSE){
Expand All @@ -134,21 +137,24 @@ validateAssays <- function(object){
NULL
}

# for non-empty fds objects check if non-spliced reads are overlapping with at least 1 donor/acceptor site
# for non-empty fds objects check if non-spliced reads are overlapping
# with at least 1 donor/acceptor site
validateNonSplicedReadsSanity <- function(object){
# fds object must have samples and junctions
if(all(dim(object) > c(0,0))){

# fds object must be annotated with start/end/spliceSite indexes
if("startID" %in% names(mcols(object)) && "endID" %in% names(mcols(object)) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){
if(all(c("startID", "endID") %in% names(mcols(object))) &&
"spliceSiteID" %in% names(mcols(object, type="theta"))){

# check that every spliceSiteID matches either a start or end index
if(length( intersect( mcols(object, type="theta")$spliceSiteID, unlist(mcols(object)[, c("startID", "endID")] ) ) )
!= nrow(nonSplicedReads(object))){
return("The nonSplicedReads do not have corresponding splitReads. This is probably the result of merging")
}
# check that every spliceSiteID matches either a start or end index
if(nrow(nonSplicedReads(object)) != length(intersect(
mcols(object, type="theta")$spliceSiteID,
unlist(mcols(object)[, c("startID", "endID")])))){
return(paste("The nonSplicedReads do not have corresponding",
"splitReads. This is probably the result of merging"))
}
}
}
NULL
}
Expand Down
11 changes: 9 additions & 2 deletions R/annotationOfRanges.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,15 @@ findAnnotatedJunction <- function(fds, annotation, annotateNames=TRUE,
# check if strandspecific data is used
gr <- rowRanges(fds, type="psi5")

if(isFALSE(as.logical(stranded))){
strand(gr) <- "*"
#
if(0 %in% stranded){
if(length(unique(stranded)) > 1){
warning("You use a mixture of stranded and unstranded data.",
" This is highly discouraged and ",
" can lead to unwanted results. We treat all as stranded.")
} else {
strand(gr) <- "*"
}
}

# set correct seq level names
Expand Down
20 changes: 17 additions & 3 deletions R/countRNAseqData.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ countRNAData <- function(fds, NcpuPerSample=1, minAnchor=5, recount=FALSE,
stopifnot(is.numeric(minAnchor) & minAnchor >= 1)
minAnchor <- as.integer(minAnchor)

# Check mixed strand type
ss <- strandSpecific(fds)
if ((any(ss == 0) && any(ss == 1)) || (any(ss == 0) && any(ss == 2))){
stop(paste("Data contains a mix of stranded and unstranded samples.\n ",
"Please consider analyzing them separately."))
}

# load needed genomes if provided
if(!(is.null(genome) | any(is.na(unique(genome))))){
for(i in unique(genome)){
Expand Down Expand Up @@ -400,13 +407,19 @@ addCountsToFraserDataSet <- function(fds, splitCounts, nonSplitCounts){

# check for valid fds
validObject(fds)

# Check mixed strand type
ss <- strandSpecific(fds)
if ((any(ss == 0) && any(ss == 1)) || (any(ss == 0) && any(ss == 2))){
stop(paste("Data contains a mix of stranded and unstranded samples.\n ",
"Please consider analyzing them separately."))
}

# create final FRASER dataset
fds <- new("FraserDataSet",
splitCounts,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitCounts,
metadata = metadata(fds)
Expand Down Expand Up @@ -461,7 +474,7 @@ countSplitReads <- function(sampleID, fds, NcpuPerSample=1, genome=NULL,
recount=FALSE, keepNonStandardChromosomes=TRUE,
bamfile=bamFile(fds[,sampleID]),
pairedend=pairedEnd(fds[,sampleID]),
strandmode=strandSpecific(fds),
strandmode=strandSpecific(fds[,sampleID]),
cacheFile=getSplitCountCacheFile(sampleID, fds),
scanbamparam=scanBamParam(fds),
coldata=colData(fds)){
Expand Down Expand Up @@ -865,6 +878,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
# unstranded case: for counting only non spliced reads we
# skip this information
isPairedEnd <- pairedEnd(fds[,samples(fds) == sampleID])[[1]]
strand <- strandSpecific(fds[,samples(fds) == sampleID])[[1]]
doAutosort <- isPairedEnd

# check cache if available
Expand Down Expand Up @@ -906,7 +920,7 @@ countNonSplicedReads <- function(sampleID, splitCountRanges, fds,
allowMultiOverlap=TRUE,
checkFragLength=FALSE,
minMQS=bamMapqFilter(scanBamParam(fds)),
strandSpecific=as.integer(strandSpecific(fds)),
strandSpecific=strand,

# activating long read mode
isLongRead=longRead,
Expand Down
3 changes: 2 additions & 1 deletion R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,8 @@ getBPParam <- function(worker, tasks=0, ...){
}

getStrandString <- function(fds){
strand <- switch(strandSpecific(fds)+1L, "no", "yes", "reverse")
strand <- sapply(strandSpecific(fds)+1L, switch, "no", "yes", "reverse")
strand <- paste(collapse=", ", strand)
return(strand)
}

Expand Down
2 changes: 0 additions & 2 deletions R/makeSimulatedDataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ makeSimulatedFraserDataSet_BetaBinomial <- function(m=200, j=10000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down Expand Up @@ -364,7 +363,6 @@ makeSimulatedFraserDataSet_Multinomial <- function(m=200, j=1000, q=10,
junctionData,
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
nonSplicedReads = nonSplitData)

Expand Down
1 change: 0 additions & 1 deletion R/mergeExternalData.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ mergeExternalData <- function(fds, countFiles, sampleIDs, annotation=NULL){
ans <- new("FraserDataSet",
name = name(fds),
bamParam = scanBamParam(fds),
strandSpecific = strandSpecific(fds),
workingDir = workingDir(fds),
colData = newColData,
assays = Assays(SimpleList(
Expand Down
2 changes: 1 addition & 1 deletion man/countRNA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test_counting.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ test_that("Strand spcific counting", {
fds <- createTestFraserSettings()
strandSpecific(fds) <- TRUE
ans <- countSplitReadsPerChromosome("chrUn_gl000218", bamFile(fds)[1],
pairedEnd=TRUE, strandMode=strandSpecific(fds), genome=NULL,
pairedEnd=TRUE, strandMode=strandSpecific(fds)[1], genome=NULL,
scanBamParam=scanBamParam(fds))
expect_equivalent(ans, GRanges())

Expand Down

0 comments on commit 63397df

Please sign in to comment.