From 390dc1f57683c7e32f03ab312dcfa877c7aabb00 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Tue, 26 Dec 2023 15:47:02 -0500 Subject: [PATCH] feat(derive/junction-annotation): better results reporting --- src/derive/junction_annotation/compute.rs | 74 ++++++++++++++++++++--- src/derive/junction_annotation/results.rs | 48 +++++++++++++-- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/src/derive/junction_annotation/compute.rs b/src/derive/junction_annotation/compute.rs index a76a072..7252bc9 100644 --- a/src/derive/junction_annotation/compute.rs +++ b/src/derive/junction_annotation/compute.rs @@ -264,15 +264,24 @@ pub fn summarize( params: &JunctionAnnotationParameters, ) -> anyhow::Result<()> { // Filter out junctions that are too short or don't have enough read support. + let mut num_rejected: usize = 0; let mut num_junctions_too_short: usize = 0; let mut num_not_enough_support: usize = 0; for (_, v) in results.junction_annotations.known.iter_mut() { v.retain(|(start, end), count| { if end.get() - start.get() < params.min_intron_length { num_junctions_too_short += 1; + if *count < params.min_read_support as usize { + num_not_enough_support += 1; + } + num_rejected += 1; false } else if *count < params.min_read_support as usize { num_not_enough_support += 1; + if end.get() - start.get() < params.min_intron_length { + num_junctions_too_short += 1; + } + num_rejected += 1; false } else { true @@ -283,9 +292,17 @@ pub fn summarize( v.retain(|(start, end), count| { if end.get() - start.get() < params.min_intron_length { num_junctions_too_short += 1; + if *count < params.min_read_support as usize { + num_not_enough_support += 1; + } + num_rejected += 1; false } else if *count < params.min_read_support as usize { num_not_enough_support += 1; + if end.get() - start.get() < params.min_intron_length { + num_junctions_too_short += 1; + } + num_rejected += 1; false } else { true @@ -296,9 +313,17 @@ pub fn summarize( v.retain(|(start, end), count| { if end.get() - start.get() < params.min_intron_length { num_junctions_too_short += 1; + if *count < params.min_read_support as usize { + num_not_enough_support += 1; + } + num_rejected += 1; false } else if *count < params.min_read_support as usize { num_not_enough_support += 1; + if end.get() - start.get() < params.min_intron_length { + num_junctions_too_short += 1; + } + num_rejected += 1; false } else { true @@ -313,15 +338,24 @@ pub fn summarize( v.retain(|(start, end), count| { if end.get() - start.get() < params.min_intron_length { num_junctions_too_short += 1; + if *count < params.min_read_support as usize { + num_not_enough_support += 1; + } + num_rejected += 1; false } else if *count < params.min_read_support as usize { num_not_enough_support += 1; + if end.get() - start.get() < params.min_intron_length { + num_junctions_too_short += 1; + } + num_rejected += 1; false } else { true } }); } + results.summary.total_rejected_junctions = num_rejected; results.summary.intron_too_short = num_junctions_too_short; results.summary.junctions_with_not_enough_read_support = num_not_enough_support; @@ -332,7 +366,7 @@ pub fn summarize( .values() .map(|v| v.len()) .sum(); - results.summary.known_spliced_reads = results + results.summary.known_junctions_read_support = results .junction_annotations .known .values() @@ -344,7 +378,7 @@ pub fn summarize( .values() .map(|v| v.len()) .sum(); - results.summary.partial_novel_spliced_reads = results + results.summary.partial_novel_junctions_read_support = results .junction_annotations .partial_novel .values() @@ -356,7 +390,7 @@ pub fn summarize( .values() .map(|v| v.len()) .sum(); - results.summary.complete_novel_spliced_reads = results + results.summary.complete_novel_junctions_read_support = results .junction_annotations .complete_novel .values() @@ -368,7 +402,7 @@ pub fn summarize( .values() .map(|v| v.len()) .sum(); - results.summary.unannotated_reference_spliced_reads = results + results.summary.unannotated_reference_junctions_read_support = results .junction_annotations .unannotated_reference .values() @@ -380,10 +414,34 @@ pub fn summarize( + results.summary.partial_novel_junctions + results.summary.complete_novel_junctions + results.summary.unannotated_reference_junctions; - results.summary.total_spliced_reads = results.summary.known_spliced_reads - + results.summary.partial_novel_spliced_reads - + results.summary.complete_novel_spliced_reads - + results.summary.unannotated_reference_spliced_reads; + results.summary.total_junctions_read_support = results.summary.known_junctions_read_support + + results.summary.partial_novel_junctions_read_support + + results.summary.complete_novel_junctions_read_support + + results.summary.unannotated_reference_junctions_read_support; + + // Calculate percentages. + let total_junctions = results.summary.total_junctions as f64 + - results.summary.unannotated_reference_junctions as f64; + results.summary.known_junctions_percent = + results.summary.known_junctions as f64 / total_junctions * 100.0; + results.summary.partial_novel_junctions_percent = + results.summary.partial_novel_junctions as f64 / total_junctions * 100.0; + results.summary.complete_novel_junctions_percent = + results.summary.complete_novel_junctions as f64 / total_junctions * 100.0; + + // Calculate average read support. + results.summary.average_junction_read_support = results.summary.total_junctions_read_support + as f64 + / results.summary.total_junctions as f64; + results.summary.average_known_junction_read_support = + results.summary.known_junctions_read_support as f64 + / results.summary.known_junctions as f64; + results.summary.average_partial_novel_junction_read_support = + results.summary.partial_novel_junctions_read_support as f64 + / results.summary.partial_novel_junctions as f64; + results.summary.average_complete_novel_junction_read_support = + results.summary.complete_novel_junctions_read_support as f64 + / results.summary.complete_novel_junctions as f64; Ok(()) } diff --git a/src/derive/junction_annotation/results.rs b/src/derive/junction_annotation/results.rs index 9c63a03..6f7330b 100644 --- a/src/derive/junction_annotation/results.rs +++ b/src/derive/junction_annotation/results.rs @@ -75,6 +75,7 @@ impl Serialize for JunctionAnnotations { #[derive(Clone, Default, Serialize)] pub struct RecordMetrics { /// The number of records that have been fully processed. + /// This is the number of spliced records that have been considered. pub processed: usize, /// The number of records that have been ignored because of their flags. @@ -101,7 +102,10 @@ pub struct SummaryResults { pub total_junctions: usize, /// The total number of spliced reads observed in the file. - pub total_spliced_reads: usize, + pub total_junctions_read_support: usize, + + /// The average number of spliced reads supporting a junction. + pub average_junction_read_support: f64, /// The total number of known junctions observed in the file. pub known_junctions: usize, @@ -117,24 +121,56 @@ pub struct SummaryResults { pub unannotated_reference_junctions: usize, /// The total number of known spliced reads observed in the file. - pub known_spliced_reads: usize, + pub known_junctions_read_support: usize, /// The total number of partially novel spliced reads observed in the file. - pub partial_novel_spliced_reads: usize, + pub partial_novel_junctions_read_support: usize, /// The total number of complete novel spliced reads observed in the file. - pub complete_novel_spliced_reads: usize, + pub complete_novel_junctions_read_support: usize, /// The total number of spliced reads on reference sequences for which /// junction annotations were not found. - pub unannotated_reference_spliced_reads: usize, + pub unannotated_reference_junctions_read_support: usize, + + /// The percentage of junctions that are known. + /// This percentage excludes junctions on reference sequences for which + /// junction annotations were not found. + pub known_junctions_percent: f64, + + /// The percentage of junctions that are partially novel. + /// This percentage excludes junctions on reference sequences for which + /// junction annotations were not found. + pub partial_novel_junctions_percent: f64, + + /// The percentage of junctions that are completely novel. + /// This percentage excludes junctions on reference sequences for which + /// junction annotations were not found. + pub complete_novel_junctions_percent: f64, + + /// Average number of reads supporting known junctions. + pub average_known_junction_read_support: f64, + + /// Average number of reads supporting partially novel junctions. + pub average_partial_novel_junction_read_support: f64, + + /// Average number of reads supporting completely novel junctions. + pub average_complete_novel_junction_read_support: f64, + + /// The total number of junctions that have been rejected because + /// they failed the min_read_support or the min_intron_length filter. + /// A junction can be rejected for both reasons, so do not expect this + /// number to be equal to the sum of `junctions_with_not_enough_read_support` + /// and `intron_too_short`. + pub total_rejected_junctions: usize, /// The total number of junctions which were discarded due to lack of - /// read support. + /// read support. This is not mutually exclusive with `intron_too_short`. pub junctions_with_not_enough_read_support: usize, /// The number of junctions that have been ignored because /// they failed the min_intron_length filter. + /// This is not mutually exclusive with `junctions_with_not_enough_read_support`. pub intron_too_short: usize, }