Skip to content

Commit

Permalink
feat(derive/junction-annotation): better results reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Dec 26, 2023
1 parent bf31648 commit 390dc1f
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 14 deletions.
74 changes: 66 additions & 8 deletions src/derive/junction_annotation/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,15 +264,24 @@ pub fn summarize(
params: &JunctionAnnotationParameters,
) -> anyhow::Result<()> {
// Filter out junctions that are too short or don't have enough read support.
let mut num_rejected: usize = 0;
let mut num_junctions_too_short: usize = 0;
let mut num_not_enough_support: usize = 0;
for (_, v) in results.junction_annotations.known.iter_mut() {
v.retain(|(start, end), count| {
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
if *count < params.min_read_support as usize {
num_not_enough_support += 1;
}
num_rejected += 1;
false
} else if *count < params.min_read_support as usize {
num_not_enough_support += 1;
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
}
num_rejected += 1;
false
} else {
true
Expand All @@ -283,9 +292,17 @@ pub fn summarize(
v.retain(|(start, end), count| {
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
if *count < params.min_read_support as usize {
num_not_enough_support += 1;
}
num_rejected += 1;
false
} else if *count < params.min_read_support as usize {
num_not_enough_support += 1;
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
}
num_rejected += 1;
false
} else {
true
Expand All @@ -296,9 +313,17 @@ pub fn summarize(
v.retain(|(start, end), count| {
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
if *count < params.min_read_support as usize {
num_not_enough_support += 1;
}
num_rejected += 1;
false
} else if *count < params.min_read_support as usize {
num_not_enough_support += 1;
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
}
num_rejected += 1;
false
} else {
true
Expand All @@ -313,15 +338,24 @@ pub fn summarize(
v.retain(|(start, end), count| {
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
if *count < params.min_read_support as usize {
num_not_enough_support += 1;
}
num_rejected += 1;
false
} else if *count < params.min_read_support as usize {
num_not_enough_support += 1;
if end.get() - start.get() < params.min_intron_length {
num_junctions_too_short += 1;
}
num_rejected += 1;
false
} else {
true
}
});
}
results.summary.total_rejected_junctions = num_rejected;
results.summary.intron_too_short = num_junctions_too_short;
results.summary.junctions_with_not_enough_read_support = num_not_enough_support;

Expand All @@ -332,7 +366,7 @@ pub fn summarize(
.values()
.map(|v| v.len())
.sum();
results.summary.known_spliced_reads = results
results.summary.known_junctions_read_support = results
.junction_annotations
.known
.values()
Expand All @@ -344,7 +378,7 @@ pub fn summarize(
.values()
.map(|v| v.len())
.sum();
results.summary.partial_novel_spliced_reads = results
results.summary.partial_novel_junctions_read_support = results
.junction_annotations
.partial_novel
.values()
Expand All @@ -356,7 +390,7 @@ pub fn summarize(
.values()
.map(|v| v.len())
.sum();
results.summary.complete_novel_spliced_reads = results
results.summary.complete_novel_junctions_read_support = results
.junction_annotations
.complete_novel
.values()
Expand All @@ -368,7 +402,7 @@ pub fn summarize(
.values()
.map(|v| v.len())
.sum();
results.summary.unannotated_reference_spliced_reads = results
results.summary.unannotated_reference_junctions_read_support = results
.junction_annotations
.unannotated_reference
.values()
Expand All @@ -380,10 +414,34 @@ pub fn summarize(
+ results.summary.partial_novel_junctions
+ results.summary.complete_novel_junctions
+ results.summary.unannotated_reference_junctions;
results.summary.total_spliced_reads = results.summary.known_spliced_reads
+ results.summary.partial_novel_spliced_reads
+ results.summary.complete_novel_spliced_reads
+ results.summary.unannotated_reference_spliced_reads;
results.summary.total_junctions_read_support = results.summary.known_junctions_read_support
+ results.summary.partial_novel_junctions_read_support
+ results.summary.complete_novel_junctions_read_support
+ results.summary.unannotated_reference_junctions_read_support;

// Calculate percentages.
let total_junctions = results.summary.total_junctions as f64
- results.summary.unannotated_reference_junctions as f64;
results.summary.known_junctions_percent =
results.summary.known_junctions as f64 / total_junctions * 100.0;
results.summary.partial_novel_junctions_percent =
results.summary.partial_novel_junctions as f64 / total_junctions * 100.0;
results.summary.complete_novel_junctions_percent =
results.summary.complete_novel_junctions as f64 / total_junctions * 100.0;

// Calculate average read support.
results.summary.average_junction_read_support = results.summary.total_junctions_read_support
as f64
/ results.summary.total_junctions as f64;
results.summary.average_known_junction_read_support =
results.summary.known_junctions_read_support as f64
/ results.summary.known_junctions as f64;
results.summary.average_partial_novel_junction_read_support =
results.summary.partial_novel_junctions_read_support as f64
/ results.summary.partial_novel_junctions as f64;
results.summary.average_complete_novel_junction_read_support =
results.summary.complete_novel_junctions_read_support as f64
/ results.summary.complete_novel_junctions as f64;

Ok(())
}
48 changes: 42 additions & 6 deletions src/derive/junction_annotation/results.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ impl Serialize for JunctionAnnotations {
#[derive(Clone, Default, Serialize)]
pub struct RecordMetrics {
/// The number of records that have been fully processed.
/// This is the number of spliced records that have been considered.
pub processed: usize,

/// The number of records that have been ignored because of their flags.
Expand All @@ -101,7 +102,10 @@ pub struct SummaryResults {
pub total_junctions: usize,

/// The total number of spliced reads observed in the file.
pub total_spliced_reads: usize,
pub total_junctions_read_support: usize,

/// The average number of spliced reads supporting a junction.
pub average_junction_read_support: f64,

/// The total number of known junctions observed in the file.
pub known_junctions: usize,
Expand All @@ -117,24 +121,56 @@ pub struct SummaryResults {
pub unannotated_reference_junctions: usize,

/// The total number of known spliced reads observed in the file.
pub known_spliced_reads: usize,
pub known_junctions_read_support: usize,

/// The total number of partially novel spliced reads observed in the file.
pub partial_novel_spliced_reads: usize,
pub partial_novel_junctions_read_support: usize,

/// The total number of complete novel spliced reads observed in the file.
pub complete_novel_spliced_reads: usize,
pub complete_novel_junctions_read_support: usize,

/// The total number of spliced reads on reference sequences for which
/// junction annotations were not found.
pub unannotated_reference_spliced_reads: usize,
pub unannotated_reference_junctions_read_support: usize,

/// The percentage of junctions that are known.
/// This percentage excludes junctions on reference sequences for which
/// junction annotations were not found.
pub known_junctions_percent: f64,

/// The percentage of junctions that are partially novel.
/// This percentage excludes junctions on reference sequences for which
/// junction annotations were not found.
pub partial_novel_junctions_percent: f64,

/// The percentage of junctions that are completely novel.
/// This percentage excludes junctions on reference sequences for which
/// junction annotations were not found.
pub complete_novel_junctions_percent: f64,

/// Average number of reads supporting known junctions.
pub average_known_junction_read_support: f64,

/// Average number of reads supporting partially novel junctions.
pub average_partial_novel_junction_read_support: f64,

/// Average number of reads supporting completely novel junctions.
pub average_complete_novel_junction_read_support: f64,

/// The total number of junctions that have been rejected because
/// they failed the min_read_support or the min_intron_length filter.
/// A junction can be rejected for both reasons, so do not expect this
/// number to be equal to the sum of `junctions_with_not_enough_read_support`
/// and `intron_too_short`.
pub total_rejected_junctions: usize,

/// The total number of junctions which were discarded due to lack of
/// read support.
/// read support. This is not mutually exclusive with `intron_too_short`.
pub junctions_with_not_enough_read_support: usize,

/// The number of junctions that have been ignored because
/// they failed the min_intron_length filter.
/// This is not mutually exclusive with `junctions_with_not_enough_read_support`.
pub intron_too_short: usize,
}

Expand Down

0 comments on commit 390dc1f

Please sign in to comment.