From 29ea59e0da8beb8478f630fa1a71dcdde0f1fd63 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 14:11:39 +0200 Subject: [PATCH 01/53] Compar: DEV-4061: Add variant copy numbers to variant comparisons --- .../compar/mutation/GermlineVariantComparer.java | 2 ++ .../compar/mutation/GermlineVariantData.java | 3 +++ .../compar/mutation/SomaticVariantComparer.java | 4 +++- .../compar/mutation/SomaticVariantData.java | 15 +++++++++++---- .../hmftools/compar/mutation/VariantCommon.java | 3 ++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 0339b705a7..14966a1c0f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -5,6 +5,7 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.GERMLINEVARIANT; import java.util.List; @@ -51,6 +52,7 @@ public void registerThresholds(final DiffThresholds thresholds) { // same as somatic thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java index 5d3a68517b..86fdb2dc85 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java @@ -15,6 +15,7 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HOTSPOT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import java.util.Arrays; import java.util.List; @@ -121,6 +122,7 @@ private static List findVariantDiffs( checkDiff(diffs, FLD_OTHER_REPORTED, refVar.otherReportedEffects(), otherVar.otherReportedEffects()); checkDiff(diffs, FLD_QUAL, (int) refVar.qual(), (int) otherVar.qual(), thresholds); + checkDiff(diffs, FLD_VARIANT_COPY_NUMBER, refVar.variantCopyNumber(), otherVar.variantCopyNumber(), thresholds); return diffs; } @@ -138,5 +140,6 @@ protected static void addDisplayValues(final GermlineVariant variant, final List values.add(String.format("%s", variant.canonicalHgvsProteinImpact())); values.add(String.format("%s", variant.otherReportedEffects())); values.add(String.format("%.0f", variant.qual())); + values.add(String.format("%.3f", variant.variantCopyNumber())); } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index f1036c6b51..5ea08bba71 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -16,6 +16,7 @@ import static com.hartwig.hmftools.compar.common.MismatchType.REF_ONLY; import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_LPS; import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_SUBCLONAL_LIKELIHOOD; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.tables.Somaticvariant.SOMATICVARIANT; import java.util.List; @@ -251,7 +252,7 @@ protected SomaticVariantData findUnfilteredVariant(final SomaticVariantData test "", false, Hotspot.fromVariant(context), VariantTier.fromContext(context), false, "", "", "", "", "", context.hasAttribute(LOCAL_PHASE_SET), (int)context.getPhredScaledQual(), - 0, context.getFilters()); + 0, context.getFilters(), 0); } return null; @@ -289,6 +290,7 @@ public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java index 4a073d86f5..16906eb99b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java @@ -1,5 +1,6 @@ package com.hartwig.hmftools.compar.mutation; +import static com.hartwig.hmftools.common.variant.PurpleVcfTags.PURPLE_VARIANT_CN; import static com.hartwig.hmftools.compar.common.MismatchType.NEW_ONLY; import static com.hartwig.hmftools.compar.common.MismatchType.REF_ONLY; import static java.lang.String.format; @@ -27,6 +28,7 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HOTSPOT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.SOMATICVARIANT; import java.util.Arrays; @@ -75,6 +77,7 @@ public class SomaticVariantData implements ComparableItem public final int Qual; public final double SubclonalLikelihood; public final Set Filters; + public final double VariantCopyNumber; private String mComparisonChromosome; private int mComparisonPosition; @@ -89,7 +92,7 @@ public SomaticVariantData( final String gene, final boolean reported, final Hotspot hotspotStatus, final VariantTier tier, final boolean biallelic, final String canonicalEffect, final String canonicalCodingEffect, final String canonicalHgvsCodingImpact, final String canonicalHgvsProteinImpact, final String otherReportedEffects, final boolean hasLPS, final int qual, - final double subclonalLikelihood, final Set filters) + final double subclonalLikelihood, final Set filters, final double variantCopyNumber) { Chromosome = chromosome; Position = position; @@ -110,6 +113,7 @@ public SomaticVariantData( Qual = qual; SubclonalLikelihood = subclonalLikelihood; Filters = filters; + VariantCopyNumber = variantCopyNumber; mComparisonChromosome = chromosome; mComparisonPosition = position; @@ -145,6 +149,7 @@ public List displayValues() values.add(format("%.2f", SubclonalLikelihood)); values.add(format("%s", HasLPS)); + values.add(format("%.3f", VariantCopyNumber)); return values; } @@ -213,6 +218,7 @@ protected Mismatch findDiffs( checkDiff(diffs, FLD_BIALLELIC, Biallelic, otherVar.Biallelic); checkDiff(diffs, FLD_OTHER_REPORTED, OtherReportedEffects, otherVar.OtherReportedEffects); checkDiff(diffs, FLD_SUBCLONAL_LIKELIHOOD, SubclonalLikelihood, otherVar.SubclonalLikelihood, thresholds); + checkDiff(diffs, FLD_VARIANT_COPY_NUMBER, VariantCopyNumber, otherVar.VariantCopyNumber, thresholds); } checkDiff(diffs, FLD_LPS, HasLPS, otherVar.HasLPS); @@ -249,7 +255,6 @@ public static SomaticVariantData fromContext(final VariantContext context) String alt = !context.getAlternateAlleles().isEmpty() ? context.getAlternateAlleles().get(0).toString() : ref; VariantImpact variantImpact; - if(context.hasAttribute(VAR_IMPACT)) variantImpact = VariantImpactSerialiser.fromVariantContext(context); else @@ -270,7 +275,8 @@ public static SomaticVariantData fromContext(final VariantContext context) context.hasAttribute(LOCAL_PHASE_SET), (int)context.getPhredScaledQual(), context.getAttributeAsDouble(SUBCLONAL_LIKELIHOOD_FLAG, 0), - context.getFilters()); + context.getFilters(), + context.getAttributeAsDouble(PURPLE_VARIANT_CN, 0)); } public static SomaticVariantData fromRecord(final Record record) @@ -297,7 +303,8 @@ public static SomaticVariantData fromRecord(final Record record) record.getValue(SOMATICVARIANT.OTHERTRANSCRIPTEFFECTS), localPhaseSets != null && !localPhaseSets.isEmpty(), (int)qual, record.getValue(SOMATICVARIANT.SUBCLONALLIKELIHOOD), - filters); + filters, + record.getValue(SOMATICVARIANT.VARIANTCOPYNUMBER)); } private static final String SNPEFF_WORST = "SEW"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java index db44e3bcba..26055544b3 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java @@ -19,11 +19,12 @@ public final class VariantCommon protected static final String FLD_HGVS_CODING = "CanonicalHgvsCoding"; protected static final String FLD_HGVS_PROTEIN = "CanonicalHgvsProtein"; protected static final String FLD_OTHER_REPORTED = "OtherReportedEffects"; + protected static final String FLD_VARIANT_COPY_NUMBER = "VariantCopyNumber"; protected static List comparedFieldNames() { return Lists.newArrayList( FLD_REPORTED, FLD_HOTSPOT, FLD_TIER, FLD_BIALLELIC, FLD_GENE, FLD_CANON_EFFECT, FLD_CODING_EFFECT, - FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL); + FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL, FLD_VARIANT_COPY_NUMBER); } } From b03503b93445fdd5a9974e706717994a386b758d Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 14:21:42 +0200 Subject: [PATCH 02/53] Compar: DEV-4061: Reduce number of printed decimals --- .../hartwig/hmftools/compar/mutation/GermlineVariantData.java | 2 +- .../hartwig/hmftools/compar/mutation/SomaticVariantData.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java index 86fdb2dc85..dfbdf667be 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java @@ -140,6 +140,6 @@ protected static void addDisplayValues(final GermlineVariant variant, final List values.add(String.format("%s", variant.canonicalHgvsProteinImpact())); values.add(String.format("%s", variant.otherReportedEffects())); values.add(String.format("%.0f", variant.qual())); - values.add(String.format("%.3f", variant.variantCopyNumber())); + values.add(String.format("%.2f", variant.variantCopyNumber())); } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java index 16906eb99b..530d56d6db 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java @@ -149,7 +149,7 @@ public List displayValues() values.add(format("%.2f", SubclonalLikelihood)); values.add(format("%s", HasLPS)); - values.add(format("%.3f", VariantCopyNumber)); + values.add(format("%.2f", VariantCopyNumber)); return values; } From 2f3cca071653c7340f1bb0c3c619da4a060c36c2 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 14:33:59 +0200 Subject: [PATCH 03/53] Compar: DEV-4061: Move copy numbers to correct output column --- .../hartwig/hmftools/compar/mutation/SomaticVariantData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java index 530d56d6db..8c45a5b942 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java @@ -146,10 +146,10 @@ public List displayValues() values.add(format("%s", CanonicalHgvsProteinImpact)); values.add(format("%s", OtherReportedEffects)); values.add(format("%d", Qual)); + values.add(format("%.2f", VariantCopyNumber)); values.add(format("%.2f", SubclonalLikelihood)); values.add(format("%s", HasLPS)); - values.add(format("%.2f", VariantCopyNumber)); return values; } From e328a72c07a70fd2b6f02ffb3aba78852c301517 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 14:47:06 +0200 Subject: [PATCH 04/53] Compar: DEV-4061: Add TVAF comparison --- .../compar/mutation/GermlineVariantComparer.java | 2 ++ .../compar/mutation/GermlineVariantData.java | 4 +++- .../compar/mutation/SomaticVariantComparer.java | 4 +++- .../compar/mutation/SomaticVariantData.java | 14 +++++++++++--- .../hmftools/compar/mutation/VariantCommon.java | 3 ++- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 14966a1c0f..e938b7384e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -5,6 +5,7 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.GERMLINEVARIANT; @@ -53,6 +54,7 @@ public void registerThresholds(final DiffThresholds thresholds) // same as somatic thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); + thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java index dfbdf667be..e7c1549d9f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java @@ -14,6 +14,7 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HGVS_PROTEIN; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HOTSPOT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; @@ -123,7 +124,7 @@ private static List findVariantDiffs( checkDiff(diffs, FLD_QUAL, (int) refVar.qual(), (int) otherVar.qual(), thresholds); checkDiff(diffs, FLD_VARIANT_COPY_NUMBER, refVar.variantCopyNumber(), otherVar.variantCopyNumber(), thresholds); - + checkDiff(diffs, FLD_PURITY_ADJUSTED_VAF, refVar.adjustedVAF(), otherVar.adjustedVAF(), thresholds); return diffs; } @@ -141,5 +142,6 @@ protected static void addDisplayValues(final GermlineVariant variant, final List values.add(String.format("%s", variant.otherReportedEffects())); values.add(String.format("%.0f", variant.qual())); values.add(String.format("%.2f", variant.variantCopyNumber())); + values.add(String.format("%.2f", variant.adjustedVAF())); } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 5ea08bba71..87f065c1e9 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -16,6 +16,7 @@ import static com.hartwig.hmftools.compar.common.MismatchType.REF_ONLY; import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_LPS; import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_SUBCLONAL_LIKELIHOOD; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.tables.Somaticvariant.SOMATICVARIANT; @@ -252,7 +253,7 @@ protected SomaticVariantData findUnfilteredVariant(final SomaticVariantData test "", false, Hotspot.fromVariant(context), VariantTier.fromContext(context), false, "", "", "", "", "", context.hasAttribute(LOCAL_PHASE_SET), (int)context.getPhredScaledQual(), - 0, context.getFilters(), 0); + 0, context.getFilters(), 0, 0); } return null; @@ -291,6 +292,7 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); + thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java index 8c45a5b942..0016d4958e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java @@ -1,5 +1,6 @@ package com.hartwig.hmftools.compar.mutation; +import static com.hartwig.hmftools.common.variant.PurpleVcfTags.PURPLE_AF; import static com.hartwig.hmftools.common.variant.PurpleVcfTags.PURPLE_VARIANT_CN; import static com.hartwig.hmftools.compar.common.MismatchType.NEW_ONLY; import static com.hartwig.hmftools.compar.common.MismatchType.REF_ONLY; @@ -27,6 +28,7 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HGVS_PROTEIN; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_HOTSPOT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.SOMATICVARIANT; @@ -78,6 +80,7 @@ public class SomaticVariantData implements ComparableItem public final double SubclonalLikelihood; public final Set Filters; public final double VariantCopyNumber; + public final double PurityAdjustedVaf; private String mComparisonChromosome; private int mComparisonPosition; @@ -92,7 +95,7 @@ public SomaticVariantData( final String gene, final boolean reported, final Hotspot hotspotStatus, final VariantTier tier, final boolean biallelic, final String canonicalEffect, final String canonicalCodingEffect, final String canonicalHgvsCodingImpact, final String canonicalHgvsProteinImpact, final String otherReportedEffects, final boolean hasLPS, final int qual, - final double subclonalLikelihood, final Set filters, final double variantCopyNumber) + final double subclonalLikelihood, final Set filters, final double variantCopyNumber, final double purityAdjustedVaf) { Chromosome = chromosome; Position = position; @@ -114,6 +117,7 @@ public SomaticVariantData( SubclonalLikelihood = subclonalLikelihood; Filters = filters; VariantCopyNumber = variantCopyNumber; + PurityAdjustedVaf = purityAdjustedVaf; mComparisonChromosome = chromosome; mComparisonPosition = position; @@ -147,6 +151,7 @@ public List displayValues() values.add(format("%s", OtherReportedEffects)); values.add(format("%d", Qual)); values.add(format("%.2f", VariantCopyNumber)); + values.add(format("%.2f", PurityAdjustedVaf)); values.add(format("%.2f", SubclonalLikelihood)); values.add(format("%s", HasLPS)); @@ -219,6 +224,7 @@ protected Mismatch findDiffs( checkDiff(diffs, FLD_OTHER_REPORTED, OtherReportedEffects, otherVar.OtherReportedEffects); checkDiff(diffs, FLD_SUBCLONAL_LIKELIHOOD, SubclonalLikelihood, otherVar.SubclonalLikelihood, thresholds); checkDiff(diffs, FLD_VARIANT_COPY_NUMBER, VariantCopyNumber, otherVar.VariantCopyNumber, thresholds); + checkDiff(diffs, FLD_PURITY_ADJUSTED_VAF, PurityAdjustedVaf, otherVar.PurityAdjustedVaf, thresholds); } checkDiff(diffs, FLD_LPS, HasLPS, otherVar.HasLPS); @@ -276,7 +282,8 @@ public static SomaticVariantData fromContext(final VariantContext context) (int)context.getPhredScaledQual(), context.getAttributeAsDouble(SUBCLONAL_LIKELIHOOD_FLAG, 0), context.getFilters(), - context.getAttributeAsDouble(PURPLE_VARIANT_CN, 0)); + context.getAttributeAsDouble(PURPLE_VARIANT_CN, 0), + context.getAttributeAsDouble(PURPLE_AF, 0)); } public static SomaticVariantData fromRecord(final Record record) @@ -304,7 +311,8 @@ public static SomaticVariantData fromRecord(final Record record) localPhaseSets != null && !localPhaseSets.isEmpty(), (int)qual, record.getValue(SOMATICVARIANT.SUBCLONALLIKELIHOOD), filters, - record.getValue(SOMATICVARIANT.VARIANTCOPYNUMBER)); + record.getValue(SOMATICVARIANT.VARIANTCOPYNUMBER), + record.getValue(SOMATICVARIANT.ADJUSTEDVAF)); } private static final String SNPEFF_WORST = "SEW"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java index 26055544b3..679c54d239 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java @@ -20,11 +20,12 @@ public final class VariantCommon protected static final String FLD_HGVS_PROTEIN = "CanonicalHgvsProtein"; protected static final String FLD_OTHER_REPORTED = "OtherReportedEffects"; protected static final String FLD_VARIANT_COPY_NUMBER = "VariantCopyNumber"; + protected static final String FLD_PURITY_ADJUSTED_VAF = "PurityAdjustedVaf"; protected static List comparedFieldNames() { return Lists.newArrayList( FLD_REPORTED, FLD_HOTSPOT, FLD_TIER, FLD_BIALLELIC, FLD_GENE, FLD_CANON_EFFECT, FLD_CODING_EFFECT, - FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL, FLD_VARIANT_COPY_NUMBER); + FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL, FLD_VARIANT_COPY_NUMBER, FLD_PURITY_ADJUSTED_VAF); } } From 9fa60ede2b07c773617250e79e93b5cffa99f8e9 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 16:24:50 +0200 Subject: [PATCH 05/53] Compar: DEV-4061: Correct allelic depth in germline variant DAO to tumor depth --- .../com/hartwig/hmftools/patientdb/dao/GermlineVariantDAO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/GermlineVariantDAO.java b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/GermlineVariantDAO.java index b47c5f8e9f..804262d2ad 100644 --- a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/GermlineVariantDAO.java +++ b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/GermlineVariantDAO.java @@ -399,7 +399,7 @@ public static GermlineVariant buildFromRecord(final Record record) .spliceRegion(DatabaseUtil.byteToBoolean(record.getValue(GERMLINEVARIANT.SPLICEREGION))) .otherReportedEffects(DatabaseUtil.valueNotNull(record.getValue(GERMLINEVARIANT.OTHERTRANSCRIPTEFFECTS))) .allelicDepth(new AllelicDepth( - record.getValue(GERMLINEVARIANT.GERMLINETOTALREADCOUNT), record.getValue(GERMLINEVARIANT.GERMLINEALLELEREADCOUNT))) + record.getValue(GERMLINEVARIANT.TUMORTOTALREADCOUNT), record.getValue(GERMLINEVARIANT.TUMORALLELEREADCOUNT))) .adjustedCopyNumber(record.getValue(GERMLINEVARIANT.COPYNUMBER)) .adjustedVAF(record.getValue(GERMLINEVARIANT.ADJUSTEDVAF)) .variantCopyNumber(record.getValue(GERMLINEVARIANT.VARIANTCOPYNUMBER)) From da445eb8098ac3578d20967dbbdb766de698143d Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 23 Aug 2024 16:26:01 +0200 Subject: [PATCH 06/53] Compar: DEV-4026: Add tumor read count comparison --- .../mutation/GermlineVariantComparer.java | 4 ++++ .../compar/mutation/GermlineVariantData.java | 7 ++++++ .../mutation/SomaticVariantComparer.java | 16 +++++++++----- .../compar/mutation/SomaticVariantData.java | 22 +++++++++++++++---- .../compar/mutation/VariantCommon.java | 5 ++++- 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index e938b7384e..74d40d557c 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -6,6 +6,8 @@ import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_SUPPORTING_READ_COUNT; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_TOTAL_READ_COUNT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.GERMLINEVARIANT; @@ -55,6 +57,8 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java index e7c1549d9f..e2972c9ba4 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java @@ -16,6 +16,8 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_SUPPORTING_READ_COUNT; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_TOTAL_READ_COUNT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import java.util.Arrays; @@ -125,6 +127,8 @@ private static List findVariantDiffs( checkDiff(diffs, FLD_QUAL, (int) refVar.qual(), (int) otherVar.qual(), thresholds); checkDiff(diffs, FLD_VARIANT_COPY_NUMBER, refVar.variantCopyNumber(), otherVar.variantCopyNumber(), thresholds); checkDiff(diffs, FLD_PURITY_ADJUSTED_VAF, refVar.adjustedVAF(), otherVar.adjustedVAF(), thresholds); + checkDiff(diffs, FLD_TUMOR_SUPPORTING_READ_COUNT, refVar.allelicDepth().AlleleReadCount, otherVar.allelicDepth().AlleleReadCount, thresholds); + checkDiff(diffs, FLD_TUMOR_TOTAL_READ_COUNT, refVar.allelicDepth().TotalReadCount, otherVar.allelicDepth().TotalReadCount, thresholds); return diffs; } @@ -143,5 +147,8 @@ protected static void addDisplayValues(final GermlineVariant variant, final List values.add(String.format("%.0f", variant.qual())); values.add(String.format("%.2f", variant.variantCopyNumber())); values.add(String.format("%.2f", variant.adjustedVAF())); + values.add(String.format("%d", variant.allelicDepth().AlleleReadCount)); + values.add(String.format("%d", variant.allelicDepth().TotalReadCount)); } + } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 87f065c1e9..81496b5c13 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -17,6 +17,8 @@ import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_LPS; import static com.hartwig.hmftools.compar.mutation.SomaticVariantData.FLD_SUBCLONAL_LIKELIHOOD; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_SUPPORTING_READ_COUNT; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_TOTAL_READ_COUNT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.tables.Somaticvariant.SOMATICVARIANT; @@ -29,6 +31,7 @@ import com.hartwig.hmftools.common.genome.refgenome.RefGenomeFunctions; import com.hartwig.hmftools.common.purple.PurpleCommon; import com.hartwig.hmftools.common.region.BasePosition; +import com.hartwig.hmftools.common.variant.AllelicDepth; import com.hartwig.hmftools.common.variant.Hotspot; import com.hartwig.hmftools.common.variant.VariantTier; import com.hartwig.hmftools.common.variant.VariantType; @@ -177,7 +180,7 @@ else if(newVariant.Position > refVariant.comparisonPosition()) if(matchedVariant == null) { - final SomaticVariantData unfilteredVariant = findUnfilteredVariant(refVariant, NEW_SOURCE); + final SomaticVariantData unfilteredVariant = findUnfilteredVariant(refVariant, NEW_SOURCE, sampleId); if(unfilteredVariant != null) { @@ -213,7 +216,7 @@ else if(newVariant.Position > refVariant.comparisonPosition()) if(!includeMismatchWithVariant(newVariant, matchLevel)) continue; - SomaticVariantData unfilteredVariant = findUnfilteredVariant(newVariant, REF_SOURCE); + SomaticVariantData unfilteredVariant = findUnfilteredVariant(newVariant, REF_SOURCE, sampleId); if(unfilteredVariant != null) { @@ -230,7 +233,7 @@ else if(newVariant.Position > refVariant.comparisonPosition()) return true; } - protected SomaticVariantData findUnfilteredVariant(final SomaticVariantData testVariant, final String otherSource) + protected SomaticVariantData findUnfilteredVariant(final SomaticVariantData testVariant, final String otherSource, final String sampleId) { VcfFileReader unfilteredVcfReader = mUnfilteredVcfReaders.get(otherSource); @@ -253,7 +256,8 @@ protected SomaticVariantData findUnfilteredVariant(final SomaticVariantData test "", false, Hotspot.fromVariant(context), VariantTier.fromContext(context), false, "", "", "", "", "", context.hasAttribute(LOCAL_PHASE_SET), (int)context.getPhredScaledQual(), - 0, context.getFilters(), 0, 0); + 0, context.getFilters(), 0, 0, + AllelicDepth.fromGenotype(context.getGenotype(sampleId))); } return null; @@ -293,6 +297,8 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); } @Override @@ -364,7 +370,7 @@ private List loadVariants(final String sampleId, final FileS if(variantContext.isFiltered()) continue; - SomaticVariantData variant = SomaticVariantData.fromContext(variantContext); + SomaticVariantData variant = SomaticVariantData.fromContext(variantContext, sampleId); if(mConfig.RestrictToDrivers && !mConfig.DriverGenes.contains(variant.Gene)) continue; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java index 0016d4958e..e2d3a28266 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantData.java @@ -30,6 +30,8 @@ import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_OTHER_REPORTED; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_PURITY_ADJUSTED_VAF; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TIER; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_SUPPORTING_READ_COUNT; +import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_TUMOR_TOTAL_READ_COUNT; import static com.hartwig.hmftools.compar.mutation.VariantCommon.FLD_VARIANT_COPY_NUMBER; import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.SOMATICVARIANT; @@ -39,6 +41,7 @@ import java.util.stream.Collectors; import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.variant.AllelicDepth; import com.hartwig.hmftools.common.variant.CodingEffect; import com.hartwig.hmftools.common.variant.Hotspot; import com.hartwig.hmftools.common.variant.VariantTier; @@ -81,6 +84,7 @@ public class SomaticVariantData implements ComparableItem public final Set Filters; public final double VariantCopyNumber; public final double PurityAdjustedVaf; + public final AllelicDepth TumorDepth; private String mComparisonChromosome; private int mComparisonPosition; @@ -95,7 +99,8 @@ public SomaticVariantData( final String gene, final boolean reported, final Hotspot hotspotStatus, final VariantTier tier, final boolean biallelic, final String canonicalEffect, final String canonicalCodingEffect, final String canonicalHgvsCodingImpact, final String canonicalHgvsProteinImpact, final String otherReportedEffects, final boolean hasLPS, final int qual, - final double subclonalLikelihood, final Set filters, final double variantCopyNumber, final double purityAdjustedVaf) + final double subclonalLikelihood, final Set filters, final double variantCopyNumber, final double purityAdjustedVaf, + final AllelicDepth tumorDepth) { Chromosome = chromosome; Position = position; @@ -118,6 +123,7 @@ public SomaticVariantData( Filters = filters; VariantCopyNumber = variantCopyNumber; PurityAdjustedVaf = purityAdjustedVaf; + TumorDepth = tumorDepth; mComparisonChromosome = chromosome; mComparisonPosition = position; @@ -152,6 +158,8 @@ public List displayValues() values.add(format("%d", Qual)); values.add(format("%.2f", VariantCopyNumber)); values.add(format("%.2f", PurityAdjustedVaf)); + values.add(String.format("%d", TumorDepth.AlleleReadCount)); + values.add(String.format("%d", TumorDepth.TotalReadCount)); values.add(format("%.2f", SubclonalLikelihood)); values.add(format("%s", HasLPS)); @@ -206,6 +214,8 @@ protected Mismatch findDiffs( checkDiff(diffs, FLD_REPORTED, Reported, otherVar.Reported); checkDiff(diffs, FLD_TIER, Tier.toString(), otherVar.Tier.toString()); + checkDiff(diffs, FLD_TUMOR_SUPPORTING_READ_COUNT, TumorDepth.AlleleReadCount, otherVar.TumorDepth.AlleleReadCount, thresholds); + checkDiff(diffs, FLD_TUMOR_TOTAL_READ_COUNT, TumorDepth.TotalReadCount, otherVar.TumorDepth.TotalReadCount, thresholds); if(matchFilterStatus.canComparePurpleFields()) { @@ -253,7 +263,7 @@ else if(matchFilterStatus == MatchFilterStatus.NEW_FILTERED) throw new RuntimeException(String.format("Unrecognized value for MatchFilterStatus: %s", matchFilterStatus)); } - public static SomaticVariantData fromContext(final VariantContext context) + public static SomaticVariantData fromContext(final VariantContext context, final String sampleId) { int position = context.getStart(); String chromosome = context.getContig(); @@ -283,7 +293,8 @@ public static SomaticVariantData fromContext(final VariantContext context) context.getAttributeAsDouble(SUBCLONAL_LIKELIHOOD_FLAG, 0), context.getFilters(), context.getAttributeAsDouble(PURPLE_VARIANT_CN, 0), - context.getAttributeAsDouble(PURPLE_AF, 0)); + context.getAttributeAsDouble(PURPLE_AF, 0), + AllelicDepth.fromGenotype(context.getGenotype(sampleId))); } public static SomaticVariantData fromRecord(final Record record) @@ -291,6 +302,8 @@ public static SomaticVariantData fromRecord(final Record record) Set filters = Arrays.stream(record.getValue(SOMATICVARIANT.FILTER).split(";", -1)).collect(Collectors.toSet()); String localPhaseSets = record.get(SOMATICVARIANT.LOCALPHASESET); double qual = record.getValue(Tables.SOMATICVARIANT.QUAL); + final AllelicDepth tumorDepth = + new AllelicDepth(record.getValue(SOMATICVARIANT.TOTALREADCOUNT), record.getValue(SOMATICVARIANT.ALLELEREADCOUNT)); return new SomaticVariantData( record.getValue(Tables.SOMATICVARIANT.CHROMOSOME), @@ -312,7 +325,8 @@ public static SomaticVariantData fromRecord(final Record record) (int)qual, record.getValue(SOMATICVARIANT.SUBCLONALLIKELIHOOD), filters, record.getValue(SOMATICVARIANT.VARIANTCOPYNUMBER), - record.getValue(SOMATICVARIANT.ADJUSTEDVAF)); + record.getValue(SOMATICVARIANT.ADJUSTEDVAF), + tumorDepth); } private static final String SNPEFF_WORST = "SEW"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java index 679c54d239..f3e7392dd0 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java @@ -21,11 +21,14 @@ public final class VariantCommon protected static final String FLD_OTHER_REPORTED = "OtherReportedEffects"; protected static final String FLD_VARIANT_COPY_NUMBER = "VariantCopyNumber"; protected static final String FLD_PURITY_ADJUSTED_VAF = "PurityAdjustedVaf"; + protected static final String FLD_TUMOR_SUPPORTING_READ_COUNT = "TumorSupportingReadCount"; + protected static final String FLD_TUMOR_TOTAL_READ_COUNT = "TumorTotalReadCount"; protected static List comparedFieldNames() { return Lists.newArrayList( FLD_REPORTED, FLD_HOTSPOT, FLD_TIER, FLD_BIALLELIC, FLD_GENE, FLD_CANON_EFFECT, FLD_CODING_EFFECT, - FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL, FLD_VARIANT_COPY_NUMBER, FLD_PURITY_ADJUSTED_VAF); + FLD_HGVS_CODING, FLD_HGVS_PROTEIN, FLD_OTHER_REPORTED, FLD_QUAL, FLD_VARIANT_COPY_NUMBER, FLD_PURITY_ADJUSTED_VAF, + FLD_TUMOR_SUPPORTING_READ_COUNT, FLD_TUMOR_TOTAL_READ_COUNT); } } From 414abe76ff8a0f7d086f26acdf643ed736b84c96 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 26 Aug 2024 13:52:29 +0200 Subject: [PATCH 07/53] Compar: DEV-4061: Compare chromosome and chromosomeBand for germline deletions --- .../purple/GermlineDeletionComparer.java | 31 +++++++++++++++++-- .../compar/purple/GermlineDeletionData.java | 13 +++++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index a7f35dce88..b0d561b282 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -3,6 +3,8 @@ import static com.hartwig.hmftools.compar.common.Category.GERMLINE_DELETION; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME; +import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_GERMLINE_CN; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_GERMLINE_STATUS; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_TUMOR_CN; @@ -12,6 +14,7 @@ import java.util.List; import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; import com.hartwig.hmftools.common.purple.GermlineDeletion; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.CommonUtils; @@ -23,6 +26,8 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; +import org.jetbrains.annotations.NotNull; + public class GermlineDeletionComparer implements ItemComparer { private final ComparConfig mConfig; @@ -52,7 +57,7 @@ public boolean processSample(final String sampleId, final List mismatc public List comparedFieldNames() { return Lists.newArrayList( - FLD_REPORTED, FLD_GERMLINE_STATUS, FLD_TUMOR_STATUS, FLD_GERMLINE_CN, FLD_TUMOR_CN); + FLD_REPORTED, FLD_GERMLINE_STATUS, FLD_TUMOR_STATUS, FLD_GERMLINE_CN, FLD_TUMOR_CN, FLD_CHROMOSOME, FLD_CHROMOSOME_BAND); } @Override @@ -60,7 +65,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce { final List germlineDeletions = dbAccess.readGermlineDeletions(sampleId); List items = Lists.newArrayList(); - germlineDeletions.forEach(x -> items.add(new GermlineDeletionData(x))); + germlineDeletions.forEach(x -> items.add(createGermlineDeletionData(x))); return items; } @@ -72,7 +77,7 @@ public List loadFromFile(final String sampleId, final FileSource try { List germlineDeletions = GermlineDeletion.read(GermlineDeletion.generateFilename(fileSources.Purple, sampleId)); - germlineDeletions.forEach(x -> comparableItems.add(new GermlineDeletionData(x))); + germlineDeletions.forEach(x -> comparableItems.add(createGermlineDeletionData(x))); } catch(IOException e) { @@ -82,4 +87,24 @@ public List loadFromFile(final String sampleId, final FileSource return comparableItems; } + + @NotNull + private GermlineDeletionData createGermlineDeletionData(final GermlineDeletion deletion) + { + String comparisonChromosome = determineComparisonChromosome(deletion.Chromosome, mConfig.RequiresLiftover); + return new GermlineDeletionData(deletion, comparisonChromosome); + } + + @NotNull + private static String determineComparisonChromosome(final String chromosome, final boolean requiresLiftover) + { + if(requiresLiftover) + { + return HumanChromosome.fromString(chromosome).name().substring(1); + } + else + { + return chromosome; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java index 2a333fdc9b..ff190064ef 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java @@ -20,15 +20,19 @@ public class GermlineDeletionData implements ComparableItem { public final GermlineDeletion Deletion; + public final String mComparisonChromosome; protected static final String FLD_GERMLINE_STATUS = "GermlineStatus"; protected static final String FLD_TUMOR_STATUS = "TumorStatus"; protected static final String FLD_GERMLINE_CN = "GermlineCopyNumber"; protected static final String FLD_TUMOR_CN = "TumorCopyNumber"; + protected static final String FLD_CHROMOSOME = "Chromosome"; + protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; - public GermlineDeletionData(final GermlineDeletion germlineDeletion) + public GermlineDeletionData(final GermlineDeletion germlineDeletion, final String comparisonChromosome) { Deletion = germlineDeletion; + mComparisonChromosome = comparisonChromosome; } public Category category() { @@ -44,12 +48,17 @@ public String key() @Override public List displayValues() { + String chromosomeDisplay = Deletion.Chromosome.equals(mComparisonChromosome) + ? Deletion.Chromosome + : format("%s compared(%s)", Deletion.Chromosome, mComparisonChromosome); List values = Lists.newArrayList(); values.add(format("%s", Deletion.Reported)); values.add(format("%s", Deletion.NormalStatus)); values.add(format("%s", Deletion.TumorStatus)); values.add(format("%s", Deletion.GermlineCopyNumber)); values.add(format("%s", Deletion.TumorCopyNumber)); + values.add(format("%s", chromosomeDisplay)); + values.add(format("%s", Deletion.ChromosomeBand)); return values; } @@ -77,6 +86,8 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_TUMOR_STATUS, Deletion.TumorStatus.toString(), otherDeletion.Deletion.TumorStatus.toString()); checkDiff(diffs, FLD_GERMLINE_CN, Deletion.GermlineCopyNumber, otherDeletion.Deletion.GermlineCopyNumber, thresholds); checkDiff(diffs, FLD_TUMOR_CN, Deletion.TumorCopyNumber, otherDeletion.Deletion.TumorCopyNumber, thresholds); + checkDiff(diffs, FLD_CHROMOSOME, mComparisonChromosome, otherDeletion.mComparisonChromosome); + checkDiff(diffs, FLD_CHROMOSOME_BAND, Deletion.ChromosomeBand, otherDeletion.Deletion.ChromosomeBand); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } From ae3a41decf21271098070360d7bd839dee76f001 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 26 Aug 2024 14:27:36 +0200 Subject: [PATCH 08/53] Compar: DEV-4061: Compare copy numbers and chromosome arms for drivers --- .../hmftools/compar/common/CommonUtils.java | 13 +++++++++ .../compar/driver/DriverComparer.java | 29 +++++++++++-------- .../hmftools/compar/driver/DriverData.java | 19 +++++++++++- .../purple/GermlineDeletionComparer.java | 18 +----------- .../hartwig/hmftools/compar/DriverTest.java | 8 ++--- 5 files changed, 53 insertions(+), 34 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index e552c2bde5..5ba4665e6e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -19,6 +19,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; import com.hartwig.hmftools.common.genome.refgenome.GenomeLiftoverCache; import com.hartwig.hmftools.common.genome.refgenome.RefGenomeVersion; import com.hartwig.hmftools.common.region.BasePosition; @@ -247,4 +248,16 @@ public static BasePosition determineComparisonGenomePosition( return new BasePosition(chromosome, position); } + + public static String determineComparisonChromosome(final String chromosome, final boolean requiresLiftover) + { + if(requiresLiftover) + { + return HumanChromosome.fromString(chromosome).name().substring(1); + } + else + { + return chromosome; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index 4cbe3595ac..239d448bf8 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -4,8 +4,13 @@ import static com.hartwig.hmftools.common.drivercatalog.DriverType.DRIVERS_LINX_SOMATIC; import static com.hartwig.hmftools.compar.common.Category.DRIVER; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonChromosome; +import static com.hartwig.hmftools.compar.driver.DriverData.FLD_CHROMOSOME; +import static com.hartwig.hmftools.compar.driver.DriverData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.driver.DriverData.FLD_LIKELIHOOD; import static com.hartwig.hmftools.compar.driver.DriverData.FLD_LIKE_METHOD; +import static com.hartwig.hmftools.compar.driver.DriverData.FLD_MAX_COPY_NUMBER; +import static com.hartwig.hmftools.compar.driver.DriverData.FLD_MIN_COPY_NUMBER; import java.io.IOException; import java.nio.file.Files; @@ -43,6 +48,8 @@ public DriverComparer(final ComparConfig config) public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_LIKELIHOOD, 0.1, 0); + thresholds.addFieldThreshold(FLD_MIN_COPY_NUMBER, 0.5, 0.15); + thresholds.addFieldThreshold(FLD_MAX_COPY_NUMBER, 0.5, 0.15); } @Override @@ -54,7 +61,7 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList(FLD_LIKE_METHOD, FLD_LIKELIHOOD); + return Lists.newArrayList(FLD_LIKE_METHOD, FLD_LIKELIHOOD, FLD_MIN_COPY_NUMBER, FLD_MAX_COPY_NUMBER, FLD_CHROMOSOME, FLD_CHROMOSOME_BAND); } @Override @@ -62,15 +69,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce { final List drivers = dbAccess.readDriverCatalog(sampleId); - final List driverDataList = Lists.newArrayList(); - - for(DriverCatalog driver : drivers) - { - boolean checkTranscript = mConfig.AlternateTranscriptDriverGenes.contains(driver.gene()); - driverDataList.add(new DriverData(driver, checkTranscript)); - } - - return driverDataList; + return drivers.stream().map(this::createDriverData).collect(Collectors.toList()); } @Override @@ -118,8 +117,7 @@ public List loadFromFile(final String sampleId, final FileSource for(DriverCatalog driver : drivers) { - boolean checkTranscript = mConfig.AlternateTranscriptDriverGenes.contains(driver.gene()); - comparableItems.add(new DriverData(driver, checkTranscript)); + comparableItems.add(createDriverData(driver)); } } catch(IOException e) @@ -130,4 +128,11 @@ public List loadFromFile(final String sampleId, final FileSource return comparableItems; } + + private DriverData createDriverData(final DriverCatalog driver) + { + boolean checkTranscript = mConfig.AlternateTranscriptDriverGenes.contains(driver.gene()); + String comparisonChromosome = determineComparisonChromosome(driver.chromosome(), mConfig.RequiresLiftover); + return new DriverData(driver, comparisonChromosome, checkTranscript); + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java index 13e3379cf0..f8d680f207 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java @@ -19,15 +19,21 @@ public class DriverData implements ComparableItem { public final DriverCatalog DriverCatalog; + public final String mComparisonChromosome; private final String mKey; private final boolean mCheckTranscript; protected static final String FLD_LIKELIHOOD = "Likelihood"; protected static final String FLD_LIKE_METHOD = "LikelihoodMethod"; + protected static final String FLD_CHROMOSOME = "Chromosome"; + protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; + protected static final String FLD_MIN_COPY_NUMBER = "MinCopyNumber"; + protected static final String FLD_MAX_COPY_NUMBER = "MaxCopyNumber"; - public DriverData(final DriverCatalog driverCatalog, boolean checkTranscript) + public DriverData(final DriverCatalog driverCatalog, final String comparisonChromosome, boolean checkTranscript) { DriverCatalog = driverCatalog; + mComparisonChromosome = comparisonChromosome; mCheckTranscript = checkTranscript; String key = format("%s_%s", driverCatalog.driver(), driverCatalog.gene()); @@ -46,9 +52,16 @@ public String key() @Override public List displayValues() { + String chromosomeDisplay = DriverCatalog.chromosome().equals(mComparisonChromosome) + ? DriverCatalog.chromosome() + : format("%s compared(%s)", DriverCatalog.chromosome(), mComparisonChromosome); List values = Lists.newArrayList(); values.add(format("%s", DriverCatalog.likelihoodMethod())); values.add(format("%.2f", DriverCatalog.driverLikelihood())); + values.add(format("%.2f", DriverCatalog.minCopyNumber())); + values.add(format("%.2f", DriverCatalog.maxCopyNumber())); + values.add(format("%s", chromosomeDisplay)); + values.add(format("%s", DriverCatalog.chromosomeBand())); return values; } @@ -90,6 +103,10 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL DriverCatalog.likelihoodMethod().toString(), otherDriver.DriverCatalog.likelihoodMethod().toString()); checkDiff(diffs, FLD_LIKELIHOOD, DriverCatalog.driverLikelihood(), otherDriver.DriverCatalog.driverLikelihood(), thresholds); + checkDiff(diffs, FLD_MIN_COPY_NUMBER, DriverCatalog.minCopyNumber(), otherDriver.DriverCatalog.minCopyNumber(), thresholds); + checkDiff(diffs, FLD_MAX_COPY_NUMBER, DriverCatalog.maxCopyNumber(), otherDriver.DriverCatalog.maxCopyNumber(), thresholds); + checkDiff(diffs, FLD_CHROMOSOME, mComparisonChromosome, otherDriver.mComparisonChromosome); + checkDiff(diffs, FLD_CHROMOSOME_BAND, DriverCatalog.chromosomeBand(), otherDriver.DriverCatalog.chromosomeBand()); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index b0d561b282..b58871ac00 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -3,6 +3,7 @@ import static com.hartwig.hmftools.compar.common.Category.GERMLINE_DELETION; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonChromosome; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_GERMLINE_CN; @@ -14,7 +15,6 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; import com.hartwig.hmftools.common.purple.GermlineDeletion; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.CommonUtils; @@ -26,8 +26,6 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; -import org.jetbrains.annotations.NotNull; - public class GermlineDeletionComparer implements ItemComparer { private final ComparConfig mConfig; @@ -88,23 +86,9 @@ public List loadFromFile(final String sampleId, final FileSource return comparableItems; } - @NotNull private GermlineDeletionData createGermlineDeletionData(final GermlineDeletion deletion) { String comparisonChromosome = determineComparisonChromosome(deletion.Chromosome, mConfig.RequiresLiftover); return new GermlineDeletionData(deletion, comparisonChromosome); } - - @NotNull - private static String determineComparisonChromosome(final String chromosome, final boolean requiresLiftover) - { - if(requiresLiftover) - { - return HumanChromosome.fromString(chromosome).name().substring(1); - } - else - { - return chromosome; - } - } } diff --git a/compar/src/test/java/com/hartwig/hmftools/compar/DriverTest.java b/compar/src/test/java/com/hartwig/hmftools/compar/DriverTest.java index dbce3b8981..8550e05817 100644 --- a/compar/src/test/java/com/hartwig/hmftools/compar/DriverTest.java +++ b/compar/src/test/java/com/hartwig/hmftools/compar/DriverTest.java @@ -36,12 +36,12 @@ public void testDriverDiffs() List refItems = Lists.newArrayList(); List newItems = Lists.newArrayList(); - refItems.add(new DriverData(createDriverCatalog("AR", DriverType.AMP, 1.0, 6), false)); + refItems.add(new DriverData(createDriverCatalog("AR", DriverType.AMP, 1.0, 6), "1", false)); - newItems.add(new DriverData(createDriverCatalog("TP53", DriverType.DEL, 1.0, 0.2), false)); + newItems.add(new DriverData(createDriverCatalog("TP53", DriverType.DEL, 1.0, 0.2), "2", false)); - refItems.add(new DriverData(createDriverCatalog("KRAS", DriverType.MUTATION, 0.7, 2), false)); - newItems.add(new DriverData(createDriverCatalog("KRAS", DriverType.MUTATION, 0.5, 2), false)); + refItems.add(new DriverData(createDriverCatalog("KRAS", DriverType.MUTATION, 0.7, 2), "3", false)); + newItems.add(new DriverData(createDriverCatalog("KRAS", DriverType.MUTATION, 0.5, 2), "3", false)); CommonUtils.compareItems(mismatches, MatchLevel.REPORTABLE, config.Thresholds, refItems, newItems); From 017ff844d96ad140a4ba95f9230843af03347180 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 26 Aug 2024 14:43:49 +0200 Subject: [PATCH 09/53] Compar: DEV-4061: Set default threshold variant CN to default for CN --- .../hmftools/compar/mutation/GermlineVariantComparer.java | 2 +- .../hmftools/compar/mutation/SomaticVariantComparer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 74d40d557c..7158ece84a 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -55,7 +55,7 @@ public void registerThresholds(final DiffThresholds thresholds) { // same as somatic thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); - thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 81496b5c13..5175885b46 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -295,7 +295,7 @@ public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); - thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.2); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); From b64577a7b3e66300233de5b0974c2d928e920fb3 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 26 Aug 2024 15:20:16 +0200 Subject: [PATCH 10/53] Compar: DEV-4061: Compare transcripts and JCN for fusions --- .../hmftools/compar/linx/FusionComparer.java | 14 ++++++++++++-- .../hartwig/hmftools/compar/linx/FusionData.java | 9 +++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java index 97a2c2a062..78564f8d28 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java @@ -9,9 +9,12 @@ import static com.hartwig.hmftools.compar.linx.FusionData.FLD_DOMAINS_LOST; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_DOWN; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_UP; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_LIKELIHOOD; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_PHASED; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_REPORTED_TYPE; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_DOWN; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_UP; import java.io.IOException; import java.util.List; @@ -23,6 +26,7 @@ import com.hartwig.hmftools.compar.common.CommonUtils; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.DiffThresholds; import com.hartwig.hmftools.compar.common.FileSources; import com.hartwig.hmftools.compar.ItemComparer; import com.hartwig.hmftools.compar.common.Mismatch; @@ -40,6 +44,12 @@ public FusionComparer(final ComparConfig config) @Override public Category category() { return FUSION; } + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_JUNCTION_COPY_NUMBER, 0.5, 0.2); + } + @Override public boolean processSample(final String sampleId, final List mismatches) { @@ -50,8 +60,8 @@ public boolean processSample(final String sampleId, final List mismatc public List comparedFieldNames() { return Lists.newArrayList( - FLD_REPORTED, FLD_REPORTED_TYPE, FLD_PHASED, FLD_LIKELIHOOD, FLD_EXON_UP, FLD_EXON_DOWN, FLD_CHAIN_LINKS, FLD_CHAIN_TERM, - FLD_DOMAINS_KEPT, FLD_DOMAINS_LOST); + FLD_REPORTED, FLD_REPORTED_TYPE, FLD_PHASED, FLD_LIKELIHOOD, FLD_TRANSCRIPT_UP, FLD_EXON_UP, FLD_TRANSCRIPT_DOWN, + FLD_EXON_DOWN, FLD_CHAIN_LINKS, FLD_CHAIN_TERM, FLD_DOMAINS_KEPT, FLD_DOMAINS_LOST, FLD_JUNCTION_COPY_NUMBER); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java index 7b3eb512d5..dbad164a29 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java @@ -23,12 +23,15 @@ public class FusionData implements ComparableItem protected static final String FLD_REPORTED_TYPE = "ReportedType"; protected static final String FLD_PHASED = "Phased"; protected static final String FLD_LIKELIHOOD = "likelihood"; + protected static final String FLD_TRANSCRIPT_UP = "fusedTranscriptUp"; protected static final String FLD_EXON_UP = "fusedExonUp"; + protected static final String FLD_TRANSCRIPT_DOWN = "fusedTranscriptDown"; protected static final String FLD_EXON_DOWN = "fusedExonDown"; protected static final String FLD_CHAIN_LINKS = "chainLinks"; protected static final String FLD_CHAIN_TERM = "chainTerminated"; protected static final String FLD_DOMAINS_KEPT = "domainsKept"; protected static final String FLD_DOMAINS_LOST = "domainsLost"; + protected static final String FLD_JUNCTION_COPY_NUMBER = "junctionCopyNumber"; public FusionData(final LinxFusion fusion, final String geneMappedName) { @@ -53,12 +56,15 @@ public List displayValues() values.add(String.format("%s", Fusion.reportedType())); values.add(String.format("%s", Fusion.phased())); values.add(String.format("%s", Fusion.likelihood())); + values.add(String.format("%s", Fusion.geneTranscriptStart())); values.add(String.format("%d", Fusion.fusedExonUp())); + values.add(String.format("%s", Fusion.geneTranscriptEnd())); values.add(String.format("%d", Fusion.fusedExonDown())); values.add(String.format("%d", Fusion.chainLinks())); values.add(String.format("%s", Fusion.chainTerminated())); values.add(String.format("%s", Fusion.domainsKept())); values.add(String.format("%s", Fusion.domainsLost())); + values.add(String.format("%.2f", Fusion.junctionCopyNumber())); return values; } @@ -84,12 +90,15 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_REPORTED_TYPE, Fusion.reportedType(), otherFusion.Fusion.reportedType()); checkDiff(diffs, FLD_PHASED, Fusion.phased().toString(), otherFusion.Fusion.phased().toString()); checkDiff(diffs, FLD_LIKELIHOOD, Fusion.likelihood().toString(), otherFusion.Fusion.likelihood().toString()); + checkDiff(diffs, FLD_TRANSCRIPT_UP, Fusion.geneTranscriptStart(), otherFusion.Fusion.geneTranscriptStart()); checkDiff(diffs, FLD_EXON_UP, Fusion.fusedExonUp(), otherFusion.Fusion.fusedExonUp()); + checkDiff(diffs, FLD_TRANSCRIPT_DOWN, Fusion.geneTranscriptEnd(), otherFusion.Fusion.geneTranscriptEnd()); checkDiff(diffs, FLD_EXON_DOWN, Fusion.fusedExonDown(), otherFusion.Fusion.fusedExonDown()); checkDiff(diffs, FLD_CHAIN_LINKS, Fusion.chainLinks(), otherFusion.Fusion.chainLinks()); checkDiff(diffs, FLD_CHAIN_TERM, Fusion.chainTerminated(), otherFusion.Fusion.chainTerminated()); checkDiff(diffs, FLD_DOMAINS_KEPT, Fusion.domainsKept(), otherFusion.Fusion.domainsKept()); checkDiff(diffs, FLD_DOMAINS_LOST, Fusion.domainsLost(), otherFusion.Fusion.domainsLost()); + checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, Fusion.junctionCopyNumber(), otherFusion.Fusion.junctionCopyNumber(), thresholds); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } From 72841a2d3501b58c8d65e1ac20870c3ff70928d4 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 26 Aug 2024 16:03:38 +0200 Subject: [PATCH 11/53] Compar: DEV-4061: Compare junction copy number and chromosome band for disruptions --- .../hartwig/hmftools/compar/linx/DisruptionComparer.java | 7 +++++-- .../com/hartwig/hmftools/compar/linx/DisruptionData.java | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index 2b6a6f9e62..b41d958040 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -5,10 +5,12 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; +import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_CODING_CONTEXT; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_GENE_ORIENT; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_NEXT_SPLICE; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_REGION_TYPE; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import java.io.IOException; import java.util.List; @@ -49,6 +51,7 @@ public DisruptionComparer(final ComparConfig config) @Override public void registerThresholds(final DiffThresholds thresholds) { + thresholds.addFieldThreshold(FLD_JUNCTION_COPY_NUMBER, 0.5, 0.2); } @Override @@ -60,8 +63,8 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList( - FLD_REPORTED, FLD_REGION_TYPE, FLD_CODING_CONTEXT, FLD_GENE_ORIENT, FLD_NEXT_SPLICE); + return Lists.newArrayList(FLD_REPORTED, FLD_REGION_TYPE, FLD_CODING_CONTEXT, FLD_GENE_ORIENT, FLD_NEXT_SPLICE, + FLD_JUNCTION_COPY_NUMBER, FLD_CHROMOSOME_BAND); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java index 3efe22de3f..ee00392843 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java @@ -29,6 +29,8 @@ public class DisruptionData implements ComparableItem protected static final String FLD_CODING_CONTEXT = "CodingContext"; protected static final String FLD_GENE_ORIENT = "GeneOrientation"; protected static final String FLD_NEXT_SPLICE = "NextSpliceExonRank"; + protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; + protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; public DisruptionData( final StructuralVariantData svData, final LinxBreakend breakend, final BasePosition comparisonPositionStart, @@ -70,6 +72,8 @@ public List displayValues() values.add(String.format("%s", Breakend.codingType())); values.add(String.format("%s", Breakend.geneOrientation())); values.add(String.format("%d", Breakend.nextSpliceExonRank())); + values.add(String.format("%.2f", Breakend.junctionCopyNumber())); + values.add(String.format("%s", Breakend.chrBand())); return values; } @@ -116,6 +120,8 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_REPORTED, reportable(), otherBreakend.reportable()); checkDiff(diffs, FLD_GENE_ORIENT, Breakend.geneOrientation(), otherBreakend.Breakend.geneOrientation()); checkDiff(diffs, FLD_NEXT_SPLICE, Breakend.nextSpliceExonRank(), otherBreakend.Breakend.nextSpliceExonRank()); + checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); + checkDiff(diffs, FLD_CHROMOSOME_BAND, Breakend.chrBand(), otherBreakend.Breakend.chrBand()); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } From fb0a713a40753b2eb84c2c864f149a2c5374e8d2 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Tue, 27 Aug 2024 11:31:55 +0200 Subject: [PATCH 12/53] Compar: DEV-4061: Compare junction CN and show quals for germline SVs --- .../hartwig/hmftools/compar/linx/GermlineSvComparer.java | 4 +++- .../com/hartwig/hmftools/compar/linx/GermlineSvData.java | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java index f260606d34..5116ec5da8 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java @@ -7,6 +7,7 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.GermlineSvData.FLD_GERMLINE_FRAGS; import java.io.IOException; @@ -47,6 +48,7 @@ public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_GERMLINE_FRAGS, 5, 0.1); + thresholds.addFieldThreshold(FLD_JUNCTION_COPY_NUMBER, 0.5, 0.2); } @Override @@ -58,7 +60,7 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList(FLD_REPORTED, FLD_GERMLINE_FRAGS); + return Lists.newArrayList(FLD_REPORTED, FLD_GERMLINE_FRAGS, FLD_QUAL, FLD_JUNCTION_COPY_NUMBER); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java index d136c2efab..64b32a467e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java @@ -2,6 +2,7 @@ import static com.hartwig.hmftools.compar.common.Category.GERMLINE_SV; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; @@ -24,6 +25,7 @@ public class GermlineSvData implements ComparableItem private final BasePosition mComparisonEndPosition; protected static final String FLD_GERMLINE_FRAGS = "GermlineFragments"; + protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; public GermlineSvData( final LinxGermlineSv svData, boolean isReported, final BasePosition comparisonStartPosition, @@ -62,6 +64,8 @@ public List displayValues() List values = Lists.newArrayList(); values.add(String.format("%s", mIsReported)); values.add(String.format("%d", SvData.GermlineFragments)); + values.add(String.format("%d", (int) SvData.QualScore)); + values.add(String.format("%.2f", SvData.JunctionCopyNumber)); return values; } @@ -103,8 +107,10 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL final List diffs = Lists.newArrayList(); - checkDiff(diffs, FLD_QUAL, (int) SvData.QualScore, (int) otherSv.SvData.QualScore, thresholds); + checkDiff(diffs, FLD_REPORTED, mIsReported, otherSv.mIsReported); checkDiff(diffs, FLD_GERMLINE_FRAGS, SvData.GermlineFragments, otherSv.SvData.GermlineFragments, thresholds); + checkDiff(diffs, FLD_QUAL, (int) SvData.QualScore, (int) otherSv.SvData.QualScore, thresholds); + checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, SvData.JunctionCopyNumber, otherSv.SvData.JunctionCopyNumber, thresholds); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } From 2056621db706251f1de9132b174fea9415507a15 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 28 Aug 2024 14:02:18 +0200 Subject: [PATCH 13/53] Compar: DEV-4061: Split Lilac somatic variants by type and allele --- .../hmftools/compar/lilac/LilacComparer.java | 14 ++++++- .../hmftools/compar/lilac/LilacData.java | 40 +++++++++++++------ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java index 2de0ba9a9b..2481a2d422 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java @@ -7,9 +7,13 @@ import static com.hartwig.hmftools.common.hla.LilacQcData.FLD_TOTAL_FRAGS; import static com.hartwig.hmftools.compar.common.Category.LILAC; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; -import static com.hartwig.hmftools.compar.common.DiffThresholds.DEFAULT_DIFF_PERC; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_ALLELES; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_INFRAME_INDEL; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_MISSENSE; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_NONSENSE_OR_FRAMESHIFT; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_REF_TOTAL; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_SPLICE; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_SYNONYMOUS; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_TUMOR_TOTAL; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_VARIANTS; @@ -35,6 +39,8 @@ public class LilacComparer implements ItemComparer private static final double FRAG_DIFF_PERC = 0.01; private static final double FRAG_DIFF_ABS = 10; + private static final double VARIANT_DIFF_PERC = 0.1; + private static final double VARIANT_DIFF_ABS = 0.4; public LilacComparer(final ComparConfig config) { @@ -53,6 +59,12 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_FIT_FRAGS, FRAG_DIFF_ABS, FRAG_DIFF_PERC); thresholds.addFieldThreshold(FLD_DISC_ALIGN_FRAGS, FRAG_DIFF_ABS, FRAG_DIFF_PERC); thresholds.addFieldThreshold(FLD_DISC_INDELS, FRAG_DIFF_ABS, FRAG_DIFF_PERC); + + thresholds.addFieldThreshold(FLD_MISSENSE, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); + thresholds.addFieldThreshold(FLD_NONSENSE_OR_FRAMESHIFT, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); + thresholds.addFieldThreshold(FLD_SPLICE, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); + thresholds.addFieldThreshold(FLD_INFRAME_INDEL, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); + thresholds.addFieldThreshold(FLD_SYNONYMOUS, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java index 5554d4f22a..7344ee7384 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java @@ -35,6 +35,12 @@ public class LilacData implements ComparableItem protected static final String FLD_REF_TOTAL = "RefTotal"; protected static final String FLD_TUMOR_TOTAL = "TumorTotal"; + protected static final String FLD_MISSENSE = "SomaticMissense"; + protected static final String FLD_NONSENSE_OR_FRAMESHIFT = "SomaticNonsenseOrFrameshift"; + protected static final String FLD_SPLICE = "SomaticSplice"; + protected static final String FLD_INFRAME_INDEL = "SomaticInframeIndel"; + protected static final String FLD_SYNONYMOUS = "SomaticSynonymous"; + private static final String ALLELE_DELIM = ":"; public LilacData(final LilacQcData qcData, final List alleles) @@ -94,7 +100,6 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_DISC_ALIGN_FRAGS, QcData.discardedAlignmentFragments(), otherData.QcData.discardedAlignmentFragments(), thresholds); checkDiff(diffs, FLD_DISC_INDELS, QcData.discardedIndels(), otherData.QcData.discardedIndels(), thresholds); checkDiff(diffs, FLD_HLA_Y, QcData.hlaYAllele(), otherData.QcData.hlaYAllele()); - checkDiff(diffs, FLD_VARIANTS, somaticVariantCount(), otherData.somaticVariantCount()); List origDiffs = Alleles.stream().filter(x -> !hasAllele(x, otherData.Alleles)).collect(Collectors.toList()); List newDiffs = otherData.Alleles.stream().filter(x -> !hasAllele(x, Alleles)).collect(Collectors.toList()); @@ -109,19 +114,30 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL diffs.add(String.format("%s(%s/%s)", FLD_ALLELES, origDiffsSj, newDiffsSj.toString())); } - if(matchLevel == MatchLevel.DETAILED) + // matches alleles in order when an allele is homozygous + List newAllelesToMatch = Lists.newArrayList(otherData.Alleles); + for(LilacAllele refAllele : Alleles) { - checkDiff(diffs, FLD_HLA_Y, QcData.hlaYAllele(), otherData.QcData.hlaYAllele()); - - for(LilacAllele refAllele : Alleles) + LilacAllele matchingNewAllele = + newAllelesToMatch.stream().filter(x -> x.allele().equals(refAllele.allele())).findFirst().orElse(null); + if(matchingNewAllele != null) { - LilacAllele newAllele = otherData.Alleles.stream().filter(x -> x.allele().equals(refAllele.allele())).findFirst().orElse(null); - - if(newAllele == null) - continue; - - checkDiff(diffs, FLD_REF_TOTAL, refAllele.refFragments(), newAllele.refFragments(), thresholds); - checkDiff(diffs, FLD_TUMOR_TOTAL, refAllele.tumorFragments(), newAllele.tumorFragments(), thresholds); + List temporaryDiffs = Lists.newArrayList(); + checkDiff(temporaryDiffs, FLD_MISSENSE, refAllele.somaticMissense(), matchingNewAllele.somaticMissense(), thresholds); + checkDiff(temporaryDiffs, FLD_NONSENSE_OR_FRAMESHIFT, refAllele.somaticNonsenseOrFrameshift(), + matchingNewAllele.somaticNonsenseOrFrameshift(), thresholds); + checkDiff(temporaryDiffs, FLD_SPLICE, refAllele.somaticSplice(), matchingNewAllele.somaticSplice(), thresholds); + checkDiff(temporaryDiffs, FLD_INFRAME_INDEL, refAllele.somaticInframeIndel(), matchingNewAllele.somaticInframeIndel(), thresholds); + if(matchLevel == MatchLevel.DETAILED) + { + checkDiff(temporaryDiffs, FLD_REF_TOTAL, refAllele.refFragments(), matchingNewAllele.refFragments(), thresholds); + checkDiff(temporaryDiffs, FLD_TUMOR_TOTAL, refAllele.tumorFragments(), matchingNewAllele.tumorFragments(), thresholds); + checkDiff(temporaryDiffs, FLD_SYNONYMOUS, refAllele.somaticSynonymous(), matchingNewAllele.somaticSynonymous(), thresholds); + + } + + temporaryDiffs.stream().map(d -> refAllele.allele() + ALLELE_DELIM + d).forEach(d -> diffs.add(d)); + newAllelesToMatch.remove(matchingNewAllele); } } From 634813c159f2d6122cad9be88a449d48eda248a0 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 28 Aug 2024 14:29:02 +0200 Subject: [PATCH 14/53] Compar: DEV-4061: Compare tumor copy numbers of HLA alleles --- .../java/com/hartwig/hmftools/compar/lilac/LilacComparer.java | 3 +++ .../java/com/hartwig/hmftools/compar/lilac/LilacData.java | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java index 2481a2d422..2b91544758 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java @@ -14,6 +14,7 @@ import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_REF_TOTAL; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_SPLICE; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_SYNONYMOUS; +import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_TUMOR_COPY_NUMBER; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_TUMOR_TOTAL; import static com.hartwig.hmftools.compar.lilac.LilacData.FLD_VARIANTS; @@ -65,6 +66,7 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_SPLICE, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); thresholds.addFieldThreshold(FLD_INFRAME_INDEL, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); thresholds.addFieldThreshold(FLD_SYNONYMOUS, VARIANT_DIFF_ABS, VARIANT_DIFF_PERC); + thresholds.addFieldThreshold(FLD_TUMOR_COPY_NUMBER, 0.5, 0.15); } @Override @@ -82,6 +84,7 @@ public List comparedFieldNames() @Override public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) { + // Not currently supported return Lists.newArrayList(); } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java index 7344ee7384..45990001d6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java @@ -32,14 +32,15 @@ public class LilacData implements ComparableItem protected static final String FLD_ALLELES = "Alleles"; protected static final String FLD_VARIANTS = "SomaticVariants"; + protected static final String FLD_REF_TOTAL = "RefTotal"; protected static final String FLD_TUMOR_TOTAL = "TumorTotal"; - protected static final String FLD_MISSENSE = "SomaticMissense"; protected static final String FLD_NONSENSE_OR_FRAMESHIFT = "SomaticNonsenseOrFrameshift"; protected static final String FLD_SPLICE = "SomaticSplice"; protected static final String FLD_INFRAME_INDEL = "SomaticInframeIndel"; protected static final String FLD_SYNONYMOUS = "SomaticSynonymous"; + protected static final String FLD_TUMOR_COPY_NUMBER = "TumorCopyNumber"; private static final String ALLELE_DELIM = ":"; @@ -128,6 +129,7 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL matchingNewAllele.somaticNonsenseOrFrameshift(), thresholds); checkDiff(temporaryDiffs, FLD_SPLICE, refAllele.somaticSplice(), matchingNewAllele.somaticSplice(), thresholds); checkDiff(temporaryDiffs, FLD_INFRAME_INDEL, refAllele.somaticInframeIndel(), matchingNewAllele.somaticInframeIndel(), thresholds); + checkDiff(temporaryDiffs, FLD_TUMOR_COPY_NUMBER, refAllele.tumorCopyNumber(), matchingNewAllele.tumorCopyNumber(), thresholds); if(matchLevel == MatchLevel.DETAILED) { checkDiff(temporaryDiffs, FLD_REF_TOTAL, refAllele.refFragments(), matchingNewAllele.refFragments(), thresholds); From c25e07973674d03a0ad00dfbe491a0077001769e Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 28 Aug 2024 15:58:54 +0200 Subject: [PATCH 15/53] Compar: DEV-4061: Set up basic Peach support Deriving file name from sample data directory doesn't work yet because the normal sample names are needed, which Compar doesn't support yet. Also still need to implement SQL DB support. --- .../hmftools/compar/common/Category.java | 3 +- .../hmftools/compar/common/FileSources.java | 11 +- .../hmftools/compar/peach/PeachComparer.java | 100 ++++++++++++++++++ .../hmftools/compar/peach/PeachData.java | 82 ++++++++++++++ .../common/peach/PeachGenotypeFile.java | 7 ++ .../hmftools/orange/OrangeWGSRefConfig.java | 3 +- 6 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index b0be8d8e9e..f0b49f2dfd 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -18,7 +18,8 @@ public enum Category GERMLINE_SV, CUPPA, LILAC, - CHORD; + CHORD, + PEACH; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 994014ba28..1edd181b83 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -14,6 +14,8 @@ import static com.hartwig.hmftools.common.utils.config.CommonConfig.LINX_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.CommonConfig.LINX_GERMLINE_DIR_CFG; import static com.hartwig.hmftools.common.utils.config.CommonConfig.LINX_GERMLINE_DIR_DESC; +import static com.hartwig.hmftools.common.utils.config.CommonConfig.PEACH_DIR_CFG; +import static com.hartwig.hmftools.common.utils.config.CommonConfig.PEACH_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.CommonConfig.PURPLE_DIR_CFG; import static com.hartwig.hmftools.common.utils.config.CommonConfig.PURPLE_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.ConfigUtils.convertWildcardSamplePath; @@ -37,6 +39,7 @@ public class FileSources public final String Cuppa; public final String Lilac; public final String Chord; + public final String Peach; public final String SomaticVcf; public final String SomaticUnfilteredVcf; @@ -45,7 +48,7 @@ public class FileSources private static final String SOMATIC_UNFILTERED_VCF = "somatic_unfiltered_vcf"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, - final String lilac, final String chord, final String somaticVcf, final String somaticUnfilteredVcf) + final String lilac, final String chord, final String peach, final String somaticVcf, final String somaticUnfilteredVcf) { Source = source; Linx = linx; @@ -54,6 +57,7 @@ public FileSources(final String source, final String linx, final String purple, Cuppa = cuppa; Lilac = lilac; Chord = chord; + Peach = peach; SomaticVcf = somaticVcf; SomaticUnfilteredVcf = somaticUnfilteredVcf; } @@ -68,6 +72,7 @@ public static FileSources sampleInstance(final FileSources fileSources, final St convertWildcardSamplePath(fileSources.Cuppa, sampleId), convertWildcardSamplePath(fileSources.Lilac, sampleId), convertWildcardSamplePath(fileSources.Chord, sampleId), + convertWildcardSamplePath(fileSources.Peach, sampleId), convertWildcardSamplePath(fileSources.SomaticVcf, sampleId), convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId)); } @@ -100,6 +105,7 @@ public static void registerConfig(final ConfigBuilder configBuilder) addPathConfig(configBuilder, LILAC_DIR_CFG, LILAC_DIR_DESC, sourceName); addPathConfig(configBuilder, CHORD_DIR_CFG, CHORD_DIR_DESC, sourceName); addPathConfig(configBuilder, CUPPA_DIR_CFG, CUPPA_DIR_DESC, sourceName); + addPathConfig(configBuilder, PEACH_DIR_CFG, PEACH_DIR_DESC, sourceName); configBuilder.addPath( formSourceConfig(SOMATIC_VCF, sourceName), false, @@ -139,12 +145,13 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String cuppaDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.CUPPA_DIR, CUPPA_DIR_CFG, sourceName); String lilacDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.LILAC_DIR, LILAC_DIR_CFG, sourceName); String chordDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.CHORD_DIR, CHORD_DIR_CFG, sourceName); + String peachDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.PEACH_DIR, PEACH_DIR_CFG, sourceName); String somaticVcf = getConfigValue(configBuilder, SOMATIC_VCF, sourceName); String somaticUnfilteredVcf = getConfigValue(configBuilder, SOMATIC_UNFILTERED_VCF, sourceName); return new FileSources( - sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, somaticVcf, somaticUnfilteredVcf); + sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, somaticVcf, somaticUnfilteredVcf); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java new file mode 100644 index 0000000000..3b48cd3b55 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -0,0 +1,100 @@ +package com.hartwig.hmftools.compar.peach; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.PEACH; +import static com.hartwig.hmftools.compar.peach.PeachData.FLD_ALLELE_COUNT; +import static com.hartwig.hmftools.compar.peach.PeachData.FLD_DRUGS; +import static com.hartwig.hmftools.compar.peach.PeachData.FLD_FUNCTION; +import static com.hartwig.hmftools.compar.peach.PeachData.FLD_PRESCRIPTION_URLS; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.peach.PeachGenotypeFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +import org.jetbrains.annotations.NotNull; + +public class PeachComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public PeachComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return PEACH; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_ALLELE_COUNT, FLD_FUNCTION, FLD_DRUGS, FLD_PRESCRIPTION_URLS); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // TODO + return Lists.newArrayList(); + } + + public List loadFromFile(final String sampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + + String fileName = determineFileName(sampleId, fileSources); + try + { + PeachGenotypeFile.read(fileName).forEach(g -> comparableItems.add(new PeachData(g))); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load Peach data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } + + @NotNull + private static String determineFileName(final String sampleId, final FileSources fileSources) + { + // TODO: this needs to be the normal sample ID... + final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, sampleId); + final String oldFileName = PeachGenotypeFile.generateOldPythonFileName(fileSources.Peach, sampleId); + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } + + private static boolean fileExists(final String filename) + { + return Files.exists(new File(filename).toPath()); + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java new file mode 100644 index 0000000000..b4dea4c166 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java @@ -0,0 +1,82 @@ +package com.hartwig.hmftools.compar.peach; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.PEACH; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; + +import java.util.ArrayList; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.peach.PeachGenotype; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class PeachData implements ComparableItem +{ + public final PeachGenotype Genotype; + + protected static final String FLD_ALLELE_COUNT = "AlleleCount"; + protected static final String FLD_FUNCTION = "Function"; + protected static final String FLD_DRUGS = "Drugs"; + protected static final String FLD_PRESCRIPTION_URLS = "PrescriptionUrls"; + + public PeachData(final PeachGenotype genotype) + { + Genotype = genotype; + } + + @Override + public Category category() { return PEACH; } + + @Override + public String key() + { + return Genotype.gene() + " " + Genotype.allele(); + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%d", Genotype.alleleCount())); + values.add(format("%s", Genotype.function())); + values.add(format("%s", Genotype.linkedDrugs())); + values.add(format("%s", Genotype.urlPrescriptionInfo())); + return values; + } + + @Override + public boolean reportable() { return true; } + + @Override + public boolean matches(final ComparableItem other) + { + final PeachData otherData = (PeachData) other; + if(!Genotype.gene().equals(otherData.Genotype.gene())) + { + return false; + } + return Genotype.allele().equals(otherData.Genotype.allele()); + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final PeachData otherData = (PeachData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_ALLELE_COUNT, Genotype.alleleCount(), otherData.Genotype.alleleCount()); + checkDiff(diffs, FLD_FUNCTION, Genotype.function(), otherData.Genotype.function()); + checkDiff(diffs, FLD_DRUGS, Genotype.linkedDrugs(), otherData.Genotype.linkedDrugs()); + checkDiff(diffs, FLD_PRESCRIPTION_URLS, Genotype.urlPrescriptionInfo(), otherData.Genotype.urlPrescriptionInfo()); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} diff --git a/hmf-common/src/main/java/com/hartwig/hmftools/common/peach/PeachGenotypeFile.java b/hmf-common/src/main/java/com/hartwig/hmftools/common/peach/PeachGenotypeFile.java index e7f420d3a4..3fac4d1591 100644 --- a/hmf-common/src/main/java/com/hartwig/hmftools/common/peach/PeachGenotypeFile.java +++ b/hmf-common/src/main/java/com/hartwig/hmftools/common/peach/PeachGenotypeFile.java @@ -41,6 +41,7 @@ public final class PeachGenotypeFile .toString(); private static final String FILE_EXTENSION = ".peach.haplotypes.best.tsv"; + private static final String OLD_PYTHON_FILE_EXTENSION = ".peach.genotype.tsv"; @NotNull public static String generateFileName(@NotNull String outputDir, @NotNull String sampleId) @@ -48,6 +49,12 @@ public static String generateFileName(@NotNull String outputDir, @NotNull String return checkAddDirSeparator(outputDir) + sampleId + FILE_EXTENSION; } + @NotNull + public static String generateOldPythonFileName(@NotNull String outputDir, @NotNull String sampleId) + { + return checkAddDirSeparator(outputDir) + sampleId + OLD_PYTHON_FILE_EXTENSION; + } + @NotNull public static List read(@NotNull final String filename) throws IOException { diff --git a/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java b/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java index 23d329f970..fe945e1242 100644 --- a/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java +++ b/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java @@ -1,5 +1,6 @@ package com.hartwig.hmftools.orange; +import static com.hartwig.hmftools.common.peach.PeachGenotypeFile.generateOldPythonFileName; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.CHORD_DIR; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.CUPPA_DIR; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.FLAGSTAT_DIR; @@ -148,7 +149,7 @@ static OrangeWGSRefConfig createConfig(@NotNull ConfigBuilder configBuilder, @No String peachGenotypeTsv = optionalPath(PeachGenotypeFile.generateFileName(peachDir, refSampleId)); if (peachGenotypeTsv == null) { - peachGenotypeTsv = mandatoryPath(peachDir + File.separator + tumorSampleId + ".peach.genotype.tsv"); + peachGenotypeTsv = mandatoryPath(generateOldPythonFileName(peachDir, tumorSampleId)); } builder.peachGenotypeTsv(peachGenotypeTsv); From 428940287af71aa339e715988eeb0e4f72660d5b Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 29 Aug 2024 11:31:26 +0200 Subject: [PATCH 16/53] Compar: DEV-4061: Add support for normal sample IDs --- .../hartwig/hmftools/compar/ComparConfig.java | 82 +++++++++++++++---- 1 file changed, 65 insertions(+), 17 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java index 15a5760099..4e23712350 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java @@ -86,6 +86,7 @@ public class ComparConfig private boolean mIsValid; // config strings + public static final String NORMAL = "normal"; public static final String CATEGORIES = "categories"; public static final String MATCH_LEVEL = "match_level"; @@ -220,31 +221,60 @@ public String sourceSampleId(final String source, final String sampleId) return mapping.SourceMapping.get(source); } + public String sourceNormalSampleId(final String source, final String sampleId) + { + if(mSampleIdMappings.isEmpty()) + return sampleId; + + SampleIdMapping mapping = mSampleIdMappings.get(sampleId); + + if(mapping != null && mapping.NormalSourceMapping.containsKey(source)) + { + return mapping.NormalSourceMapping.get(source); + } + else if(mapping != null && mapping.NormalSampleId != null) + { + return mapping.NormalSampleId; + } + else + { + CMP_LOGGER.warn("sample({}) source({}) missed normal sample ID", sampleId, source); + return sourceSampleId(source, sampleId); + } + } + public boolean isValid() { return mIsValid; } public boolean singleSample() { return SampleIds.size() == 1; } public boolean multiSample() { return SampleIds.size() > 1; } - private class SampleIdMapping + private static class SampleIdMapping { public final String SampleId; + public final String NormalSampleId; public Map SourceMapping; + public Map NormalSourceMapping; - public SampleIdMapping(final String sampleId) + public SampleIdMapping(final String sampleId, final String normalSampleId) { SampleId = sampleId; + NormalSampleId = normalSampleId; SourceMapping = Maps.newHashMap(); + NormalSourceMapping = Maps.newHashMap(); } } private static final String COL_SAMPLE_ID = "SampleId"; + private static final String COL_NORMAL_SAMPLE_ID = "NormalSampleId"; private static final String COL_REF_SAMPLE_ID = "RefSampleId"; + private static final String COL_REF_NORMAL_SAMPLE_ID = "RefNormalSampleId"; private static final String COL_NEW_SAMPLE_ID = "NewSampleId"; + private static final String COL_NEW_NORMAL_SAMPLE_ID = "NewNormalSampleId"; private void loadSampleIds(final ConfigBuilder configBuilder) { if(configBuilder.hasValue(SAMPLE)) { - SampleIds.add(configBuilder.getValue(SAMPLE)); + registerSampleIds(configBuilder.getValue(SAMPLE), configBuilder.getValue(NORMAL, null)); return; } @@ -264,8 +294,11 @@ private void loadSampleIds(final ConfigBuilder configBuilder) Map fieldsIndexMap = createFieldsIndexMap(header, CSV_DELIM); int sampleIndex = fieldsIndexMap.get(COL_SAMPLE_ID); + Integer normalSampleIndex = fieldsIndexMap.get(COL_NORMAL_SAMPLE_ID); Integer refSampleIndex = fieldsIndexMap.get(COL_REF_SAMPLE_ID); + Integer refNormalSampleIndex = fieldsIndexMap.get(COL_REF_NORMAL_SAMPLE_ID); Integer newSampleIndex = fieldsIndexMap.get(COL_NEW_SAMPLE_ID); + Integer newNormalSampleIndex = fieldsIndexMap.get(COL_NEW_NORMAL_SAMPLE_ID); for(String line : lines) { @@ -275,23 +308,13 @@ private void loadSampleIds(final ConfigBuilder configBuilder) String[] values = line.split(CSV_DELIM, -1); String sampleId = values[sampleIndex]; - - SampleIds.add(sampleId); - + String normalSampleId = normalSampleIndex != null ? values[normalSampleIndex] : null; String refSampleId = refSampleIndex != null ? values[refSampleIndex] : null; + String refNormalSampleId = refNormalSampleIndex != null ? values[refNormalSampleIndex] : null; String newSampleId = newSampleIndex != null ? values[newSampleIndex] : null; + String newNormalSampleId = newNormalSampleIndex != null ? values[newNormalSampleIndex] : null; - if(refSampleId != null || newSampleId != null) - { - SampleIdMapping mapping = new SampleIdMapping(sampleId); - mSampleIdMappings.put(sampleId, mapping); - - if(refSampleId != null && SourceNames.size() >= 1); - mapping.SourceMapping.put(SourceNames.get(0), refSampleId); - - if(newSampleId != null && SourceNames.size() >= 2) - mapping.SourceMapping.put(SourceNames.get(1), newSampleId); - } + registerSampleIds(sampleId, normalSampleId, refSampleId, refNormalSampleId, newSampleId, newNormalSampleId); } CMP_LOGGER.info("loaded {} samples from file", SampleIds.size()); @@ -302,6 +325,30 @@ private void loadSampleIds(final ConfigBuilder configBuilder) } } + private void registerSampleIds(final String sampleId, final String normalSampleId) + { + registerSampleIds(sampleId, normalSampleId, null, null, null, null); + } + + private void registerSampleIds(final String sampleId, final String normalSampleId, final String refSampleId, + final String refNormalSampleId, final String newSampleId, final String newNormalSampleId) + { + SampleIds.add(sampleId); + + SampleIdMapping mapping = new SampleIdMapping(sampleId, normalSampleId); + mSampleIdMappings.put(sampleId, mapping); + + if(refSampleId != null && SourceNames.size() >= 1) + mapping.SourceMapping.put(SourceNames.get(0), refSampleId); + if(newSampleId != null && SourceNames.size() >= 2) + mapping.SourceMapping.put(SourceNames.get(1), newSampleId); + + if(refNormalSampleId != null && SourceNames.size() >= 1) + mapping.NormalSourceMapping.put(SourceNames.get(0), refNormalSampleId); + if(newNormalSampleId != null && SourceNames.size() >= 2) + mapping.NormalSourceMapping.put(SourceNames.get(1), newNormalSampleId); + } + private static String formConfigSourceStr(final String sourceType, final String sourceName) { return format("%s_%s", sourceType, sourceName); @@ -371,6 +418,7 @@ public static void addConfig(final ConfigBuilder configBuilder) MATCH_LEVEL, false, "Match level from REPORTABLE (default) or DETAILED", REPORTABLE.toString()); configBuilder.addConfigItem(SAMPLE, SAMPLE_DESC); + configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run."); addSampleIdFile(configBuilder, false); configBuilder.addConfigItem(DRIVER_GENE_PANEL_OPTION, DRIVER_GENE_PANEL_OPTION_DESC); configBuilder.addConfigItem(THRESHOLDS, "In form: Field,AbsoluteDiff,PercentDiff, separated by ';'"); From 3a0d9eed3c93155670e62b535a83401d97a5bdf0 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 29 Aug 2024 14:21:09 +0200 Subject: [PATCH 17/53] Compar: DEV-4061: Fix Peach implementation --- .../java/com/hartwig/hmftools/compar/ComparConfig.java | 4 +--- .../java/com/hartwig/hmftools/compar/ItemComparer.java | 2 +- .../hartwig/hmftools/compar/chord/ChordComparer.java | 2 +- .../hartwig/hmftools/compar/common/CommonUtils.java | 7 ++++++- .../hartwig/hmftools/compar/cuppa/CuppaComparer.java | 2 +- .../hartwig/hmftools/compar/driver/DriverComparer.java | 2 +- .../hartwig/hmftools/compar/lilac/LilacComparer.java | 2 +- .../hmftools/compar/linx/DisruptionComparer.java | 2 +- .../hartwig/hmftools/compar/linx/FusionComparer.java | 2 +- .../hmftools/compar/linx/GermlineSvComparer.java | 2 +- .../compar/mutation/GermlineVariantComparer.java | 2 +- .../compar/mutation/SomaticVariantComparer.java | 2 +- .../hartwig/hmftools/compar/peach/PeachComparer.java | 10 ++++------ .../hmftools/compar/purple/CopyNumberComparer.java | 2 +- .../hmftools/compar/purple/GeneCopyNumberComparer.java | 2 +- .../compar/purple/GermlineDeletionComparer.java | 2 +- .../hartwig/hmftools/compar/purple/PurityComparer.java | 2 +- 17 files changed, 25 insertions(+), 24 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java index 4e23712350..2fd1d9d1ec 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java @@ -214,7 +214,6 @@ public String sourceSampleId(final String source, final String sampleId) if(mapping == null || !mapping.SourceMapping.containsKey(source)) { - CMP_LOGGER.warn("sample({}) source({}) missed mapping", sampleId, source); return sampleId; } @@ -238,7 +237,6 @@ else if(mapping != null && mapping.NormalSampleId != null) } else { - CMP_LOGGER.warn("sample({}) source({}) missed normal sample ID", sampleId, source); return sourceSampleId(source, sampleId); } } @@ -418,7 +416,7 @@ public static void addConfig(final ConfigBuilder configBuilder) MATCH_LEVEL, false, "Match level from REPORTABLE (default) or DETAILED", REPORTABLE.toString()); configBuilder.addConfigItem(SAMPLE, SAMPLE_DESC); - configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run."); + configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run. By default same value as '" + SAMPLE + "' is used"); addSampleIdFile(configBuilder, false); configBuilder.addConfigItem(DRIVER_GENE_PANEL_OPTION, DRIVER_GENE_PANEL_OPTION_DESC); configBuilder.addConfigItem(THRESHOLDS, "In form: Field,AbsoluteDiff,PercentDiff, separated by ';'"); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java index 6c6064641e..6114bfd53f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java @@ -16,7 +16,7 @@ public interface ItemComparer List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName); - List loadFromFile(final String sampleId, final FileSources fileSources); + List loadFromFile(final String sampleId, final String sourceNormalSampleId, final FileSources fileSources); List comparedFieldNames(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java index ea753172ae..c197d1ca74 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java @@ -66,7 +66,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 5ba4665e6e..bdf7d45351 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -33,6 +33,7 @@ import com.hartwig.hmftools.compar.linx.DisruptionComparer; import com.hartwig.hmftools.compar.linx.FusionComparer; import com.hartwig.hmftools.compar.linx.GermlineSvComparer; +import com.hartwig.hmftools.compar.peach.PeachComparer; import com.hartwig.hmftools.compar.purple.CopyNumberComparer; import com.hartwig.hmftools.compar.purple.GeneCopyNumberComparer; import com.hartwig.hmftools.compar.purple.GermlineDeletionComparer; @@ -122,6 +123,9 @@ private static ItemComparer createComparer(final Category category, final Compar case GERMLINE_SV: return new GermlineSvComparer(config); + case PEACH: + return new PeachComparer(config); + default: return null; } @@ -137,6 +141,7 @@ public static boolean processSample( for(String sourceName : config.SourceNames) { String sourceSampleId = config.sourceSampleId(sourceName, sampleId); + String sourceNormalSampleId = config.sourceNormalSampleId(sourceName, sampleId); List items = null; if(!config.DbConnections.isEmpty()) @@ -146,7 +151,7 @@ public static boolean processSample( else { FileSources fileSources = FileSources.sampleInstance(config.FileSources.get(sourceName), sourceSampleId); - items = comparer.loadFromFile(sourceSampleId, fileSources); + items = comparer.loadFromFile(sourceSampleId, sourceNormalSampleId, fileSources); } if(items != null) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java index fb6737525c..443e22252f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java @@ -62,7 +62,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = new ArrayList<>(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index 239d448bf8..c4876bf0d6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -73,7 +73,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java index 2b91544758..f37c16d3b9 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java @@ -89,7 +89,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index b41d958040..0f110fa07b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -76,7 +76,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java index 78564f8d28..0632f86e51 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java @@ -72,7 +72,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java index 5116ec5da8..9eb6cab022 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java @@ -72,7 +72,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { List items = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 7158ece84a..26fd469558 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -90,7 +90,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 5175885b46..285f3c807e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -342,7 +342,7 @@ private List loadVariants(final String sampleId, final Datab } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List items = Lists.newArrayList(); loadVariants(sampleId, fileSources).forEach(x -> items.add(x)); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index 3b48cd3b55..d0930366ac 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -19,7 +19,6 @@ import com.hartwig.hmftools.compar.ItemComparer; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.CommonUtils; -import com.hartwig.hmftools.compar.common.DiffThresholds; import com.hartwig.hmftools.compar.common.FileSources; import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; @@ -60,11 +59,11 @@ public List loadFromDb(final String sampleId, final DatabaseAcce return Lists.newArrayList(); } - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); - String fileName = determineFileName(sampleId, fileSources); + String fileName = determineFileName(sampleId, normalSampleId, fileSources); try { PeachGenotypeFile.read(fileName).forEach(g -> comparableItems.add(new PeachData(g))); @@ -78,10 +77,9 @@ public List loadFromFile(final String sampleId, final FileSource } @NotNull - private static String determineFileName(final String sampleId, final FileSources fileSources) + private static String determineFileName(final String sampleId, final String normalSampleId, final FileSources fileSources) { - // TODO: this needs to be the normal sample ID... - final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, sampleId); + final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, normalSampleId); final String oldFileName = PeachGenotypeFile.generateOldPythonFileName(fileSources.Peach, sampleId); if(!fileExists(currentFileName) && fileExists(oldFileName)) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java index 09f7909306..b7951786bb 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java @@ -66,7 +66,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java index a28e2b72a5..658c5dd703 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java @@ -82,7 +82,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List items = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index b58871ac00..99cfabcf3e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -68,7 +68,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java index 621ca9f72e..57e84fa820 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java @@ -86,7 +86,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); From 9446167b68c00868a178a04b44738823576531e8 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 29 Aug 2024 14:43:52 +0200 Subject: [PATCH 18/53] PatientDb: DEV-4061: Add reading of Peach genotypes from DB --- .../patientdb/dao/DatabaseAccess.java | 6 +++++ .../hmftools/patientdb/dao/PeachDAO.java | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/DatabaseAccess.java b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/DatabaseAccess.java index 9f0e61eed8..243afc3ca0 100644 --- a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/DatabaseAccess.java +++ b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/DatabaseAccess.java @@ -395,6 +395,12 @@ public List readBreakends(@NotNull String sample) return structuralVariantFusionDAO.readBreakends(sample); } + @NotNull + public List readPeachGenotypes(@NotNull String sample) + { + return peachDAO.readPeachGenotypes(sample); + } + public void writeCanonicalTranscripts(final String refGenomeVersion, final List geneDataList, final List transcripts) { diff --git a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java index d77502745a..0d985211d1 100644 --- a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java +++ b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java @@ -4,15 +4,20 @@ import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.PEACHGENOTYPE; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.Date; import java.util.List; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.peach.ImmutablePeachGenotype; import com.hartwig.hmftools.common.peach.PeachGenotype; import org.jetbrains.annotations.NotNull; import org.jooq.DSLContext; import org.jooq.InsertValuesStep8; +import org.jooq.Record; +import org.jooq.Result; class PeachDAO { @@ -23,6 +28,25 @@ class PeachDAO this.context = context; } + public List readPeachGenotypes(final String sample) + { + Result result = context.select().from(PEACHGENOTYPE).where(PEACHGENOTYPE.SAMPLEID.eq(sample)).fetch(); + List genotypes = Lists.newArrayList(); + for(Record record : result) + { + PeachGenotype genotype = ImmutablePeachGenotype.builder() + .gene(record.getValue(PEACHGENOTYPE.GENE)) + .allele(record.getValue(PEACHGENOTYPE.HAPLOTYPE)) + .alleleCount(record.getValue(PEACHGENOTYPE.COUNT)) + .function(record.getValue(PEACHGENOTYPE.FUNCTION)) + .linkedDrugs(record.getValue(PEACHGENOTYPE.LINKEDDRUGS)) + .urlPrescriptionInfo(record.getValue(PEACHGENOTYPE.URLPRESCRIPTIONINFO)) + .build(); + genotypes.add(genotype); + } + return genotypes; + } + void writePeach(final String sample, final List peachGenotypes) { deletePeachForSample(sample); From 6bdb272a76ee02b749e02309574ba465cec8df01 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 29 Aug 2024 14:47:44 +0200 Subject: [PATCH 19/53] Compar: DEV-4061: Add SQL DB support for Peach --- .../com/hartwig/hmftools/compar/peach/PeachComparer.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index d0930366ac..1ecfe4a1c8 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -11,8 +11,10 @@ import java.io.IOException; import java.nio.file.Files; import java.util.List; +import java.util.stream.Collectors; import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.peach.PeachGenotype; import com.hartwig.hmftools.common.peach.PeachGenotypeFile; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; @@ -55,8 +57,8 @@ public List comparedFieldNames() @Override public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) { - // TODO - return Lists.newArrayList(); + List genotypes = dbAccess.readPeachGenotypes(sampleId); + return genotypes.stream().map(g -> new PeachData(g)).collect(Collectors.toList()); } public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) From b77e484a879930e772514f1df64f0b58ebba8d54 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 29 Aug 2024 16:37:09 +0200 Subject: [PATCH 20/53] Compar: DEV-4061: Fix formatting --- .../com/hartwig/hmftools/compar/peach/PeachData.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java index b4dea4c166..b8f6d37887 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachData.java @@ -6,7 +6,6 @@ import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; -import java.util.ArrayList; import java.util.List; import com.google.common.collect.Lists; @@ -32,7 +31,10 @@ public PeachData(final PeachGenotype genotype) } @Override - public Category category() { return PEACH; } + public Category category() + { + return PEACH; + } @Override public String key() @@ -52,7 +54,10 @@ public List displayValues() } @Override - public boolean reportable() { return true; } + public boolean reportable() + { + return true; + } @Override public boolean matches(final ComparableItem other) From e977d53427c66e6bccfa29d9edb05347200d3379 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 11:04:51 +0200 Subject: [PATCH 21/53] Common/Orange: DEV-4061: Make virus blacklist status optional Throw error in Orange when unknown blacklist status is encountered. --- .../hartwig/hmftools/common/virus/AnnotatedVirus.java | 4 +++- .../hmftools/common/virus/AnnotatedVirusFile.java | 3 ++- .../hmftools/orange/algo/virus/VirusInterpreter.java | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirus.java b/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirus.java index c4f89888b1..236d1027ee 100644 --- a/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirus.java +++ b/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirus.java @@ -31,7 +31,9 @@ public abstract class AnnotatedVirus public abstract boolean reported(); - public abstract boolean blacklisted(); + // Nullable for backwards compatibility + @Nullable + public abstract Boolean blacklisted(); @NotNull public abstract VirusLikelihoodType virusDriverLikelihoodType(); diff --git a/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirusFile.java b/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirusFile.java index ddcfe2a31d..a6504a43d8 100644 --- a/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirusFile.java +++ b/hmf-common/src/main/java/com/hartwig/hmftools/common/virus/AnnotatedVirusFile.java @@ -63,6 +63,7 @@ static List fromLines(@NotNull List lines) Integer meanCoverageIndex = fieldsIndexMap.get("meanCoverage"); Integer expectedClonalCoverageIndex = fieldsIndexMap.get("expectedClonalCoverage"); Integer driverLikelihoodIndex = fieldsIndexMap.get("driverLikelihood"); + Integer blacklistedIndex = fieldsIndexMap.get("blacklisted"); for(String line : lines) { @@ -89,7 +90,7 @@ static List fromLines(@NotNull List lines) .meanCoverage(meanCoverageIndex != null ? Double.parseDouble(values[meanCoverageIndex]) : 0) .expectedClonalCoverage(expectedClonalCoverage) .reported(Boolean.parseBoolean(values[fieldsIndexMap.get("reported")])) - .blacklisted(Boolean.parseBoolean(values[fieldsIndexMap.get("blacklisted")])) + .blacklisted(blacklistedIndex != null ? Boolean.parseBoolean(values[blacklistedIndex]) : null) .virusDriverLikelihoodType(driverLikelihoodIndex != null ? VirusLikelihoodType.valueOf(values[driverLikelihoodIndex]) : UNKNOWN) .build()); diff --git a/orange/src/main/java/com/hartwig/hmftools/orange/algo/virus/VirusInterpreter.java b/orange/src/main/java/com/hartwig/hmftools/orange/algo/virus/VirusInterpreter.java index 751be693f8..2122abc461 100644 --- a/orange/src/main/java/com/hartwig/hmftools/orange/algo/virus/VirusInterpreter.java +++ b/orange/src/main/java/com/hartwig/hmftools/orange/algo/virus/VirusInterpreter.java @@ -1,6 +1,9 @@ package com.hartwig.hmftools.orange.algo.virus; +import static java.lang.String.format; + import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import com.hartwig.hmftools.common.virus.AnnotatedVirus; @@ -25,6 +28,12 @@ public static VirusInterpreterData interpret(@NotNull com.hartwig.hmftools.commo @NotNull public static List filterBlacklistedViruses(@NotNull List allViruses) { + Optional virusWithBlacklistStatusUnknown = + allViruses.stream().filter(virus -> virus.blacklisted() == null).findFirst(); + if(virusWithBlacklistStatusUnknown.isPresent()) + { + throw new RuntimeException(format("Encountered virus '%s' with unknown blacklist status", virusWithBlacklistStatusUnknown.get())); + } return allViruses.stream().filter(virus -> !virus.blacklisted()).collect(Collectors.toList()); } } From ea9b8f1be39adbd08f7d7549ce10dd5b85a2f241 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 11:35:43 +0200 Subject: [PATCH 22/53] Compar: DEV-4061: Add VIRUS category --- .../hmftools/compar/common/Category.java | 3 +- .../hmftools/compar/common/CommonUtils.java | 4 + .../hmftools/compar/common/FileSources.java | 13 ++- .../hmftools/compar/virus/VirusComparer.java | 81 ++++++++++++++++++ .../hmftools/compar/virus/VirusData.java | 82 +++++++++++++++++++ 5 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index f0b49f2dfd..be8c03dc7d 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -19,7 +19,8 @@ public enum Category CUPPA, LILAC, CHORD, - PEACH; + PEACH, + VIRUS; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index bdf7d45351..66666b8d09 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -40,6 +40,7 @@ import com.hartwig.hmftools.compar.purple.PurityComparer; import com.hartwig.hmftools.compar.mutation.GermlineVariantComparer; import com.hartwig.hmftools.compar.mutation.SomaticVariantComparer; +import com.hartwig.hmftools.compar.virus.VirusComparer; public class CommonUtils { @@ -126,6 +127,9 @@ private static ItemComparer createComparer(final Category category, final Compar case PEACH: return new PeachComparer(config); + case VIRUS: + return new VirusComparer(config); + default: return null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 1edd181b83..b53ddbdec6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -18,6 +18,8 @@ import static com.hartwig.hmftools.common.utils.config.CommonConfig.PEACH_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.CommonConfig.PURPLE_DIR_CFG; import static com.hartwig.hmftools.common.utils.config.CommonConfig.PURPLE_DIR_DESC; +import static com.hartwig.hmftools.common.utils.config.CommonConfig.VIRUS_DIR_CFG; +import static com.hartwig.hmftools.common.utils.config.CommonConfig.VIRUS_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.ConfigUtils.convertWildcardSamplePath; import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.NEW_SOURCE; @@ -40,6 +42,7 @@ public class FileSources public final String Lilac; public final String Chord; public final String Peach; + public final String Virus; public final String SomaticVcf; public final String SomaticUnfilteredVcf; @@ -48,7 +51,7 @@ public class FileSources private static final String SOMATIC_UNFILTERED_VCF = "somatic_unfiltered_vcf"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, - final String lilac, final String chord, final String peach, final String somaticVcf, final String somaticUnfilteredVcf) + final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, final String somaticUnfilteredVcf) { Source = source; Linx = linx; @@ -58,6 +61,7 @@ public FileSources(final String source, final String linx, final String purple, Lilac = lilac; Chord = chord; Peach = peach; + Virus = virus; SomaticVcf = somaticVcf; SomaticUnfilteredVcf = somaticUnfilteredVcf; } @@ -73,6 +77,7 @@ public static FileSources sampleInstance(final FileSources fileSources, final St convertWildcardSamplePath(fileSources.Lilac, sampleId), convertWildcardSamplePath(fileSources.Chord, sampleId), convertWildcardSamplePath(fileSources.Peach, sampleId), + convertWildcardSamplePath(fileSources.Virus, sampleId), convertWildcardSamplePath(fileSources.SomaticVcf, sampleId), convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId)); } @@ -106,6 +111,7 @@ public static void registerConfig(final ConfigBuilder configBuilder) addPathConfig(configBuilder, CHORD_DIR_CFG, CHORD_DIR_DESC, sourceName); addPathConfig(configBuilder, CUPPA_DIR_CFG, CUPPA_DIR_DESC, sourceName); addPathConfig(configBuilder, PEACH_DIR_CFG, PEACH_DIR_DESC, sourceName); + addPathConfig(configBuilder, VIRUS_DIR_CFG, VIRUS_DIR_DESC, sourceName); configBuilder.addPath( formSourceConfig(SOMATIC_VCF, sourceName), false, @@ -146,12 +152,13 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String lilacDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.LILAC_DIR, LILAC_DIR_CFG, sourceName); String chordDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.CHORD_DIR, CHORD_DIR_CFG, sourceName); String peachDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.PEACH_DIR, PEACH_DIR_CFG, sourceName); + String virusDir = getDirectory(configBuilder, sampleDir, PipelineToolDirectories.VIRUS_INTERPRETER_DIR, VIRUS_DIR_CFG, sourceName); String somaticVcf = getConfigValue(configBuilder, SOMATIC_VCF, sourceName); String somaticUnfilteredVcf = getConfigValue(configBuilder, SOMATIC_UNFILTERED_VCF, sourceName); - return new FileSources( - sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, somaticVcf, somaticUnfilteredVcf); + return new FileSources(sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, virusDir, + somaticVcf, somaticUnfilteredVcf); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java new file mode 100644 index 0000000000..cd20ba806f --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java @@ -0,0 +1,81 @@ +package com.hartwig.hmftools.compar.virus; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.VIRUS; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; +import static com.hartwig.hmftools.compar.virus.VirusData.FLD_DRIVER_LIKELIHOOD; +import static com.hartwig.hmftools.compar.virus.VirusData.FLD_INTEGRATIONS; +import static com.hartwig.hmftools.compar.virus.VirusData.FLD_MEAN_COVERAGE; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.virus.AnnotatedVirusFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +public class VirusComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public VirusComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return VIRUS; + } + + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_MEAN_COVERAGE, 0, 0.15); + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_REPORTED, FLD_INTEGRATIONS, FLD_MEAN_COVERAGE, FLD_DRIVER_LIKELIHOOD); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + try + { + AnnotatedVirusFile.read(AnnotatedVirusFile.generateFileName(fileSources.Virus, sampleId)) + .forEach(v -> comparableItems.add(new VirusData(v))); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load Virus interpreter data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java new file mode 100644 index 0000000000..385380fe6d --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java @@ -0,0 +1,82 @@ +package com.hartwig.hmftools.compar.virus; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.VIRUS; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.virus.AnnotatedVirus; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class VirusData implements ComparableItem +{ + public final AnnotatedVirus Virus; + + protected static final String FLD_INTEGRATIONS = "Integrations"; + protected static final String FLD_MEAN_COVERAGE = "MeanCoverage"; + protected static final String FLD_DRIVER_LIKELIHOOD = "DriverLikelihood"; + + VirusData(final AnnotatedVirus virus) + { + Virus = virus; + } + + @Override + public Category category() { + return VIRUS; + } + + @Override + public String key() + { + return Virus.name(); + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%s", Virus.reported())); + values.add(format("%d", Virus.integrations())); + values.add(format("%.2f", Virus.meanCoverage())); + values.add(format("%s", Virus.virusDriverLikelihoodType())); + return values; + } + + @Override + public boolean reportable() + { + return Virus.reported(); + } + + @Override + public boolean matches(final ComparableItem other) + { + final VirusData otherData = (VirusData) other; + return Virus.name().equals(otherData.Virus.name()); + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final VirusData otherData = (VirusData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_REPORTED, Virus.reported(), otherData.Virus.reported()); + checkDiff(diffs, FLD_INTEGRATIONS, Virus.integrations(), otherData.Virus.integrations()); + checkDiff(diffs, FLD_MEAN_COVERAGE, Virus.meanCoverage(), otherData.Virus.meanCoverage(), thresholds); + checkDiff(diffs, FLD_DRIVER_LIKELIHOOD, String.valueOf(Virus.virusDriverLikelihoodType()), String.valueOf(otherData.Virus.virusDriverLikelihoodType())); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} From 5ef36a5dd798722600845714c886a61979e8f25d Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 13:01:36 +0200 Subject: [PATCH 23/53] Compar: DEV-4061: Fix formatting --- .../java/com/hartwig/hmftools/compar/peach/PeachComparer.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index 1ecfe4a1c8..269055b192 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -25,8 +25,6 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; -import org.jetbrains.annotations.NotNull; - public class PeachComparer implements ItemComparer { private final ComparConfig mConfig; @@ -78,7 +76,6 @@ public List loadFromFile(final String sampleId, final String nor return comparableItems; } - @NotNull private static String determineFileName(final String sampleId, final String normalSampleId, final FileSources fileSources) { final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, normalSampleId); From 6eab7dfb11c7525897b52e908da161ced6271f1e Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 15:46:36 +0200 Subject: [PATCH 24/53] Common: DEV-4061: Can replace '$' by a normal/ref sample ID in wildcard paths --- .../hmftools/common/utils/config/ConfigUtils.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hmf-common/src/main/java/com/hartwig/hmftools/common/utils/config/ConfigUtils.java b/hmf-common/src/main/java/com/hartwig/hmftools/common/utils/config/ConfigUtils.java index afaa192846..a2d5b01556 100644 --- a/hmf-common/src/main/java/com/hartwig/hmftools/common/utils/config/ConfigUtils.java +++ b/hmf-common/src/main/java/com/hartwig/hmftools/common/utils/config/ConfigUtils.java @@ -139,11 +139,18 @@ public static List loadDelimitedIdFile(final List fileContents, } public static String convertWildcardSamplePath(final String samplePath, final String sampleId) + { + return convertWildcardSamplePath(samplePath, sampleId, null); + } + + public static String convertWildcardSamplePath(final String samplePath, final String sampleId, final String normalSampleId) { if(samplePath == null) - return samplePath; + return null; - return samplePath.replaceAll("\\*", sampleId); + if(normalSampleId == null) + return samplePath.replaceAll("\\*", sampleId); + else + return samplePath.replaceAll("\\$", normalSampleId).replaceAll("\\*", sampleId); } - } From 00393f67a5140e33d7ace02eb4ba94129f9f66fc Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 15:47:13 +0200 Subject: [PATCH 25/53] Compar: DEV-4061: Support flagstat --- .../hartwig/hmftools/compar/ComparTask.java | 3 +- .../hmftools/compar/common/Category.java | 9 ++- .../hmftools/compar/common/CommonUtils.java | 11 ++- .../hmftools/compar/common/FileSources.java | 45 ++++++++--- .../compar/metrics/FlagstatCommon.java | 9 +++ .../metrics/NormalFlagstatComparer.java | 81 +++++++++++++++++++ .../compar/metrics/NormalFlagstatData.java | 73 +++++++++++++++++ .../compar/metrics/TumorFlagstatComparer.java | 81 +++++++++++++++++++ .../compar/metrics/TumorFlagstatData.java | 73 +++++++++++++++++ .../mutation/SomaticVariantComparer.java | 4 +- 10 files changed, 371 insertions(+), 18 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java index 4f5bf05404..627e07c101 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java @@ -113,8 +113,9 @@ private Set loadCombinedCopyNumberDriverGenes(final String sampleId) for(String sourceName : mConfig.SourceNames) { String sourceSampleId = mConfig.sourceSampleId(sourceName, sampleId); + String sourceNormalSampleId = mConfig.sourceNormalSampleId(sourceName, sampleId); - FileSources fileSources = FileSources.sampleInstance(mConfig.FileSources.get(sourceName), sourceSampleId); + FileSources fileSources = FileSources.sampleInstance(mConfig.FileSources.get(sourceName), sourceSampleId, sourceNormalSampleId); try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index be8c03dc7d..0483a928a1 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -20,7 +20,9 @@ public enum Category LILAC, CHORD, PEACH, - VIRUS; + VIRUS, + TUMOR_FLAGSTAT, + NORMAL_FLAGSTAT; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; @@ -34,5 +36,8 @@ public static List purpleCategories() public static List linxCategories() { return Lists.newArrayList(DRIVER, FUSION, DISRUPTION, GERMLINE_SV); } - public static List panelCategories() { return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION); } + public static List panelCategories() + { + return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT); + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 66666b8d09..6a11170c5f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -33,6 +33,8 @@ import com.hartwig.hmftools.compar.linx.DisruptionComparer; import com.hartwig.hmftools.compar.linx.FusionComparer; import com.hartwig.hmftools.compar.linx.GermlineSvComparer; +import com.hartwig.hmftools.compar.metrics.NormalFlagstatComparer; +import com.hartwig.hmftools.compar.metrics.TumorFlagstatComparer; import com.hartwig.hmftools.compar.peach.PeachComparer; import com.hartwig.hmftools.compar.purple.CopyNumberComparer; import com.hartwig.hmftools.compar.purple.GeneCopyNumberComparer; @@ -130,6 +132,12 @@ private static ItemComparer createComparer(final Category category, final Compar case VIRUS: return new VirusComparer(config); + case TUMOR_FLAGSTAT: + return new TumorFlagstatComparer(config); + + case NORMAL_FLAGSTAT: + return new NormalFlagstatComparer(config); + default: return null; } @@ -154,7 +162,8 @@ public static boolean processSample( } else { - FileSources fileSources = FileSources.sampleInstance(config.FileSources.get(sourceName), sourceSampleId); + FileSources fileSources = + FileSources.sampleInstance(config.FileSources.get(sourceName), sourceSampleId, sourceNormalSampleId); items = comparer.loadFromFile(sourceSampleId, sourceNormalSampleId, fileSources); } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index b53ddbdec6..3d21359b17 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -45,13 +45,18 @@ public class FileSources public final String Virus; public final String SomaticVcf; public final String SomaticUnfilteredVcf; + public final String TumorFlagstat; + public final String NormalFlagstat; private static final String SAMPLE_DIR = "sample_dir"; private static final String SOMATIC_VCF = "somatic_vcf"; private static final String SOMATIC_UNFILTERED_VCF = "somatic_unfiltered_vcf"; + private static final String TUMOR_FLAGSTAT = "tumor_flagstat_dir"; + private static final String NORMAL_FLAGSTAT = "normal_flagstat_dir"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, - final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, final String somaticUnfilteredVcf) + final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, + final String somaticUnfilteredVcf, final String tumorFlagstat, final String normalFlagstat) { Source = source; Linx = linx; @@ -64,22 +69,26 @@ public FileSources(final String source, final String linx, final String purple, Virus = virus; SomaticVcf = somaticVcf; SomaticUnfilteredVcf = somaticUnfilteredVcf; + TumorFlagstat = tumorFlagstat; + NormalFlagstat = normalFlagstat; } - public static FileSources sampleInstance(final FileSources fileSources, final String sampleId) + public static FileSources sampleInstance(final FileSources fileSources, final String sampleId, final String normalSampleId) { return new FileSources( fileSources.Source, - convertWildcardSamplePath(fileSources.Linx, sampleId), - convertWildcardSamplePath(fileSources.Purple, sampleId), - convertWildcardSamplePath(fileSources.LinxGermline, sampleId), - convertWildcardSamplePath(fileSources.Cuppa, sampleId), - convertWildcardSamplePath(fileSources.Lilac, sampleId), - convertWildcardSamplePath(fileSources.Chord, sampleId), - convertWildcardSamplePath(fileSources.Peach, sampleId), - convertWildcardSamplePath(fileSources.Virus, sampleId), - convertWildcardSamplePath(fileSources.SomaticVcf, sampleId), - convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId)); + convertWildcardSamplePath(fileSources.Linx, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Purple, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.LinxGermline, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Cuppa, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Lilac, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Chord, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Peach, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.Virus, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.SomaticVcf, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.TumorFlagstat, sampleId, normalSampleId), + convertWildcardSamplePath(fileSources.NormalFlagstat, sampleId, normalSampleId)); } public static RefGenomeVersion liftoverSourceGenomeVersion(final String source) @@ -120,6 +129,13 @@ public static void registerConfig(final ConfigBuilder configBuilder) configBuilder.addPath( formSourceConfig(SOMATIC_UNFILTERED_VCF, sourceName), false, formSourceDescription("VCF to search for filtered variants", sourceName)); + + configBuilder.addPath( + formSourceConfig(TUMOR_FLAGSTAT, sourceName), false, + formSourceDescription("Tumor flagstat", sourceName)); + configBuilder.addPath( + formSourceConfig(NORMAL_FLAGSTAT, sourceName), false, + formSourceDescription("Normal flagstat", sourceName)); } } @@ -157,8 +173,11 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String somaticVcf = getConfigValue(configBuilder, SOMATIC_VCF, sourceName); String somaticUnfilteredVcf = getConfigValue(configBuilder, SOMATIC_UNFILTERED_VCF, sourceName); + String tumorFlagstat = getDirectory(configBuilder, sampleDir, "*/flagstat", TUMOR_FLAGSTAT, sourceName); + String normalFlagstat = getDirectory(configBuilder, sampleDir, "$/flagstat", NORMAL_FLAGSTAT, sourceName); + return new FileSources(sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, virusDir, - somaticVcf, somaticUnfilteredVcf); + somaticVcf, somaticUnfilteredVcf, tumorFlagstat, normalFlagstat); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java new file mode 100644 index 0000000000..bd02c4dc92 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java @@ -0,0 +1,9 @@ +package com.hartwig.hmftools.compar.metrics; + +public class FlagstatCommon +{ + protected static final String FLD_MAPPED_PROPORTION = "MappedProportion"; + + protected static final double MAPPED_PROPORTION_ABS_THRESHOLD = 0.01; + protected static final double MAPPED_PROPORTION_PCT_THRESHOLD = 0; +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java new file mode 100644 index 0000000000..9ac672015f --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java @@ -0,0 +1,81 @@ +package com.hartwig.hmftools.compar.metrics; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.NORMAL_FLAGSTAT; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_PCT_THRESHOLD; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.common.flagstat.FlagstatFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +public class NormalFlagstatComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public NormalFlagstatComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return NORMAL_FLAGSTAT; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_MAPPED_PROPORTION, MAPPED_PROPORTION_ABS_THRESHOLD, MAPPED_PROPORTION_PCT_THRESHOLD); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_MAPPED_PROPORTION); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + try + { + Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.NormalFlagstat, normalSampleId)); + comparableItems.add(new NormalFlagstatData(flagstat)); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load normal flagstat data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java new file mode 100644 index 0000000000..b542c1003b --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java @@ -0,0 +1,73 @@ +package com.hartwig.hmftools.compar.metrics; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.NORMAL_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class NormalFlagstatData implements ComparableItem +{ + public final Flagstat mFlagstat; + + public NormalFlagstatData(final Flagstat flagstat) + { + mFlagstat = flagstat; + } + + @Override + public Category category() + { + return NORMAL_FLAGSTAT; + } + + @Override + public String key() + { + return ""; + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%.2f", mFlagstat.mappedProportion())); + return values; + } + + @Override + public boolean reportable() + { + return true; + } + + @Override + public boolean matches(final ComparableItem other) + { + // a single record for each sample + return true; + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final NormalFlagstatData otherData = (NormalFlagstatData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_MAPPED_PROPORTION, mFlagstat.mappedProportion(), otherData.mFlagstat.mappedProportion(), thresholds); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java new file mode 100644 index 0000000000..f7c435de7e --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java @@ -0,0 +1,81 @@ +package com.hartwig.hmftools.compar.metrics; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_PCT_THRESHOLD; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.common.flagstat.FlagstatFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +public class TumorFlagstatComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public TumorFlagstatComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return TUMOR_FLAGSTAT; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_MAPPED_PROPORTION, MAPPED_PROPORTION_ABS_THRESHOLD, MAPPED_PROPORTION_PCT_THRESHOLD); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_MAPPED_PROPORTION); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + try + { + Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.TumorFlagstat, sampleId)); + comparableItems.add(new TumorFlagstatData(flagstat)); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load tumor flagstat data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java new file mode 100644 index 0000000000..b6480d32fd --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java @@ -0,0 +1,73 @@ +package com.hartwig.hmftools.compar.metrics; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; +import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class TumorFlagstatData implements ComparableItem +{ + public final Flagstat mFlagstat; + + public TumorFlagstatData(final Flagstat flagstat) + { + mFlagstat = flagstat; + } + + @Override + public Category category() + { + return TUMOR_FLAGSTAT; + } + + @Override + public String key() + { + return ""; + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%.2f", mFlagstat.mappedProportion())); + return values; + } + + @Override + public boolean reportable() + { + return true; + } + + @Override + public boolean matches(final ComparableItem other) + { + // a single record for each sample + return true; + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final TumorFlagstatData otherData = (TumorFlagstatData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_MAPPED_PROPORTION, mFlagstat.mappedProportion(), otherData.mFlagstat.mappedProportion(), thresholds); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 285f3c807e..83d5bdde9a 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -86,6 +86,7 @@ public boolean processSample(final String sampleId, final List mismatc final String sourceName = mConfig.SourceNames.get(i); String sourceSampleId = mConfig.sourceSampleId(sourceName, sampleId); + String sourceNormalSampleId = mConfig.sourceNormalSampleId(sourceName, sampleId); if(!mConfig.DbConnections.isEmpty()) { @@ -94,7 +95,8 @@ public boolean processSample(final String sampleId, final List mismatc else { FileSources fileSources = mConfig.FileSources.get(sourceName); - List fileVariants = loadVariants(sourceSampleId, FileSources.sampleInstance(fileSources, sourceSampleId)); + List fileVariants = + loadVariants(sourceSampleId, FileSources.sampleInstance(fileSources, sourceSampleId, sourceNormalSampleId)); if(fileVariants == null) continue; From 486f117edd6757e1c8c3e88e73936ce8e7b5e2e1 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 30 Aug 2024 16:12:21 +0200 Subject: [PATCH 26/53] Compar: DEV-4061: Change normal sample default Prevents tumor flagstat data being used for normal flagstat comparison when no normal sample ID is provided. --- .../hartwig/hmftools/compar/ComparConfig.java | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java index 508c47e667..42e98b928c 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java @@ -224,23 +224,20 @@ public String sourceSampleId(final String source, final String sampleId) public String sourceNormalSampleId(final String source, final String sampleId) { - if(mSampleIdMappings.isEmpty()) - return sampleId; - - SampleIdMapping mapping = mSampleIdMappings.get(sampleId); - - if(mapping != null && mapping.NormalSourceMapping.containsKey(source)) - { - return mapping.NormalSourceMapping.get(source); - } - else if(mapping != null && mapping.NormalSampleId != null) + if(!mSampleIdMappings.isEmpty()) { - return mapping.NormalSampleId; - } - else - { - return sourceSampleId(source, sampleId); + SampleIdMapping mapping = mSampleIdMappings.get(sampleId); + + if(mapping != null && mapping.NormalSourceMapping.containsKey(source)) + { + return mapping.NormalSourceMapping.get(source); + } + else if(mapping != null && mapping.NormalSampleId != null) + { + return mapping.NormalSampleId; + } } + return sourceSampleId(source, sampleId) + "-ref"; } public boolean isValid() { return mIsValid; } @@ -418,7 +415,7 @@ public static void addConfig(final ConfigBuilder configBuilder) MATCH_LEVEL, false, "Match level from REPORTABLE (default) or DETAILED", REPORTABLE.toString()); configBuilder.addConfigItem(SAMPLE, SAMPLE_DESC); - configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run. By default same value as '" + SAMPLE + "' is used"); + configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run"); addSampleIdFile(configBuilder, false); configBuilder.addConfigItem(DRIVER_GENE_PANEL_OPTION, DRIVER_GENE_PANEL_OPTION_DESC); configBuilder.addConfigItem(THRESHOLDS, "In form: Field,AbsoluteDiff,PercentDiff, separated by ';'"); From fd591623038661037a918349c43e223e1c5cca7d Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Tue, 3 Sep 2024 11:04:01 +0200 Subject: [PATCH 27/53] Compar: DEV-4061: Change naming from 'normal' to 'germline' --- .../hartwig/hmftools/compar/ComparConfig.java | 64 +++++++++---------- .../hartwig/hmftools/compar/ComparTask.java | 4 +- .../hartwig/hmftools/compar/ItemComparer.java | 2 +- .../hmftools/compar/chord/ChordComparer.java | 2 +- .../hmftools/compar/common/Category.java | 2 +- .../hmftools/compar/common/CommonUtils.java | 12 ++-- .../hmftools/compar/common/FileSources.java | 42 ++++++------ .../hmftools/compar/cuppa/CuppaComparer.java | 2 +- .../compar/driver/DriverComparer.java | 2 +- .../hmftools/compar/lilac/LilacComparer.java | 2 +- .../compar/linx/DisruptionComparer.java | 2 +- .../hmftools/compar/linx/FusionComparer.java | 2 +- .../compar/linx/GermlineSvComparer.java | 2 +- ...rer.java => GermlineFlagstatComparer.java} | 16 ++--- ...tatData.java => GermlineFlagstatData.java} | 10 +-- .../compar/metrics/TumorFlagstatComparer.java | 2 +- .../mutation/GermlineVariantComparer.java | 2 +- .../mutation/SomaticVariantComparer.java | 6 +- .../hmftools/compar/peach/PeachComparer.java | 8 +-- .../compar/purple/CopyNumberComparer.java | 2 +- .../compar/purple/GeneCopyNumberComparer.java | 2 +- .../purple/GermlineDeletionComparer.java | 2 +- .../compar/purple/PurityComparer.java | 2 +- .../hmftools/compar/virus/VirusComparer.java | 2 +- 24 files changed, 97 insertions(+), 97 deletions(-) rename compar/src/main/java/com/hartwig/hmftools/compar/metrics/{NormalFlagstatComparer.java => GermlineFlagstatComparer.java} (81%) rename compar/src/main/java/com/hartwig/hmftools/compar/metrics/{NormalFlagstatData.java => GermlineFlagstatData.java} (85%) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java index 42e98b928c..b5451ce8e4 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java @@ -87,7 +87,7 @@ public class ComparConfig private boolean mIsValid; // config strings - public static final String NORMAL = "normal"; + public static final String GERMLINE_SAMPLE = "germline_sample"; public static final String CATEGORIES = "categories"; public static final String MATCH_LEVEL = "match_level"; @@ -222,19 +222,19 @@ public String sourceSampleId(final String source, final String sampleId) return mapping.SourceMapping.get(source); } - public String sourceNormalSampleId(final String source, final String sampleId) + public String sourceGermlineSampleId(final String source, final String sampleId) { if(!mSampleIdMappings.isEmpty()) { SampleIdMapping mapping = mSampleIdMappings.get(sampleId); - if(mapping != null && mapping.NormalSourceMapping.containsKey(source)) + if(mapping != null && mapping.GermlineSourceMapping.containsKey(source)) { - return mapping.NormalSourceMapping.get(source); + return mapping.GermlineSourceMapping.get(source); } - else if(mapping != null && mapping.NormalSampleId != null) + else if(mapping != null && mapping.GermlineSampleId != null) { - return mapping.NormalSampleId; + return mapping.GermlineSampleId; } } return sourceSampleId(source, sampleId) + "-ref"; @@ -247,31 +247,31 @@ else if(mapping != null && mapping.NormalSampleId != null) private static class SampleIdMapping { public final String SampleId; - public final String NormalSampleId; + public final String GermlineSampleId; public Map SourceMapping; - public Map NormalSourceMapping; + public Map GermlineSourceMapping; - public SampleIdMapping(final String sampleId, final String normalSampleId) + public SampleIdMapping(final String sampleId, final String germlineSampleId) { SampleId = sampleId; - NormalSampleId = normalSampleId; + GermlineSampleId = germlineSampleId; SourceMapping = Maps.newHashMap(); - NormalSourceMapping = Maps.newHashMap(); + GermlineSourceMapping = Maps.newHashMap(); } } private static final String COL_SAMPLE_ID = "SampleId"; - private static final String COL_NORMAL_SAMPLE_ID = "NormalSampleId"; + private static final String COL_GERMLINE_SAMPLE_ID = "GermlineSampleId"; private static final String COL_REF_SAMPLE_ID = "RefSampleId"; - private static final String COL_REF_NORMAL_SAMPLE_ID = "RefNormalSampleId"; + private static final String COL_REF_GERMLINE_SAMPLE_ID = "RefGermlineSampleId"; private static final String COL_NEW_SAMPLE_ID = "NewSampleId"; - private static final String COL_NEW_NORMAL_SAMPLE_ID = "NewNormalSampleId"; + private static final String COL_NEW_GERMLINE_SAMPLE_ID = "NewGermlineSampleId"; private void loadSampleIds(final ConfigBuilder configBuilder) { if(configBuilder.hasValue(SAMPLE)) { - registerSampleIds(configBuilder.getValue(SAMPLE), configBuilder.getValue(NORMAL, null)); + registerSampleIds(configBuilder.getValue(SAMPLE), configBuilder.getValue(GERMLINE_SAMPLE, null)); return; } @@ -291,11 +291,11 @@ private void loadSampleIds(final ConfigBuilder configBuilder) Map fieldsIndexMap = createFieldsIndexMap(header, CSV_DELIM); int sampleIndex = fieldsIndexMap.get(COL_SAMPLE_ID); - Integer normalSampleIndex = fieldsIndexMap.get(COL_NORMAL_SAMPLE_ID); + Integer germlineSampleIndex = fieldsIndexMap.get(COL_GERMLINE_SAMPLE_ID); Integer refSampleIndex = fieldsIndexMap.get(COL_REF_SAMPLE_ID); - Integer refNormalSampleIndex = fieldsIndexMap.get(COL_REF_NORMAL_SAMPLE_ID); + Integer refGermlineSampleIndex = fieldsIndexMap.get(COL_REF_GERMLINE_SAMPLE_ID); Integer newSampleIndex = fieldsIndexMap.get(COL_NEW_SAMPLE_ID); - Integer newNormalSampleIndex = fieldsIndexMap.get(COL_NEW_NORMAL_SAMPLE_ID); + Integer newGermlineSampleIndex = fieldsIndexMap.get(COL_NEW_GERMLINE_SAMPLE_ID); for(String line : lines) { @@ -305,13 +305,13 @@ private void loadSampleIds(final ConfigBuilder configBuilder) String[] values = line.split(CSV_DELIM, -1); String sampleId = values[sampleIndex]; - String normalSampleId = normalSampleIndex != null ? values[normalSampleIndex] : null; + String germlineSampleId = germlineSampleIndex != null ? values[germlineSampleIndex] : null; String refSampleId = refSampleIndex != null ? values[refSampleIndex] : null; - String refNormalSampleId = refNormalSampleIndex != null ? values[refNormalSampleIndex] : null; + String refGermlineSampleId = refGermlineSampleIndex != null ? values[refGermlineSampleIndex] : null; String newSampleId = newSampleIndex != null ? values[newSampleIndex] : null; - String newNormalSampleId = newNormalSampleIndex != null ? values[newNormalSampleIndex] : null; + String newGermlineSampleId = newGermlineSampleIndex != null ? values[newGermlineSampleIndex] : null; - registerSampleIds(sampleId, normalSampleId, refSampleId, refNormalSampleId, newSampleId, newNormalSampleId); + registerSampleIds(sampleId, germlineSampleId, refSampleId, refGermlineSampleId, newSampleId, newGermlineSampleId); } CMP_LOGGER.info("loaded {} samples from file", SampleIds.size()); @@ -322,17 +322,17 @@ private void loadSampleIds(final ConfigBuilder configBuilder) } } - private void registerSampleIds(final String sampleId, final String normalSampleId) + private void registerSampleIds(final String sampleId, final String germlineSampleId) { - registerSampleIds(sampleId, normalSampleId, null, null, null, null); + registerSampleIds(sampleId, germlineSampleId, null, null, null, null); } - private void registerSampleIds(final String sampleId, final String normalSampleId, final String refSampleId, - final String refNormalSampleId, final String newSampleId, final String newNormalSampleId) + private void registerSampleIds(final String sampleId, final String germlineSampleId, final String refSampleId, + final String refGermlineSampleId, final String newSampleId, final String newGermlineSampleId) { SampleIds.add(sampleId); - SampleIdMapping mapping = new SampleIdMapping(sampleId, normalSampleId); + SampleIdMapping mapping = new SampleIdMapping(sampleId, germlineSampleId); mSampleIdMappings.put(sampleId, mapping); if(refSampleId != null && SourceNames.size() >= 1) @@ -340,10 +340,10 @@ private void registerSampleIds(final String sampleId, final String normalSampleI if(newSampleId != null && SourceNames.size() >= 2) mapping.SourceMapping.put(SourceNames.get(1), newSampleId); - if(refNormalSampleId != null && SourceNames.size() >= 1) - mapping.NormalSourceMapping.put(SourceNames.get(0), refNormalSampleId); - if(newNormalSampleId != null && SourceNames.size() >= 2) - mapping.NormalSourceMapping.put(SourceNames.get(1), newNormalSampleId); + if(refGermlineSampleId != null && SourceNames.size() >= 1) + mapping.GermlineSourceMapping.put(SourceNames.get(0), refGermlineSampleId); + if(newGermlineSampleId != null && SourceNames.size() >= 2) + mapping.GermlineSourceMapping.put(SourceNames.get(1), newGermlineSampleId); } private static String formConfigSourceStr(final String sourceType, final String sourceName) @@ -415,7 +415,7 @@ public static void addConfig(final ConfigBuilder configBuilder) MATCH_LEVEL, false, "Match level from REPORTABLE (default) or DETAILED", REPORTABLE.toString()); configBuilder.addConfigItem(SAMPLE, SAMPLE_DESC); - configBuilder.addConfigItem(NORMAL, false, "Sample ID of normal sample if tumor-normal run"); + configBuilder.addConfigItem(GERMLINE_SAMPLE, false, "Sample ID of germline sample if tumor-normal run"); addSampleIdFile(configBuilder, false); configBuilder.addConfigItem(DRIVER_GENE_PANEL_OPTION, DRIVER_GENE_PANEL_OPTION_DESC); configBuilder.addConfigItem(THRESHOLDS, "In form: Field,AbsoluteDiff,PercentDiff, separated by ';'"); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java index 627e07c101..60f4efa7cd 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java @@ -113,9 +113,9 @@ private Set loadCombinedCopyNumberDriverGenes(final String sampleId) for(String sourceName : mConfig.SourceNames) { String sourceSampleId = mConfig.sourceSampleId(sourceName, sampleId); - String sourceNormalSampleId = mConfig.sourceNormalSampleId(sourceName, sampleId); + String sourceGermlineSampleId = mConfig.sourceGermlineSampleId(sourceName, sampleId); - FileSources fileSources = FileSources.sampleInstance(mConfig.FileSources.get(sourceName), sourceSampleId, sourceNormalSampleId); + FileSources fileSources = FileSources.sampleInstance(mConfig.FileSources.get(sourceName), sourceSampleId, sourceGermlineSampleId); try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java index 6114bfd53f..1d22cd7eac 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ItemComparer.java @@ -16,7 +16,7 @@ public interface ItemComparer List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName); - List loadFromFile(final String sampleId, final String sourceNormalSampleId, final FileSources fileSources); + List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources); List comparedFieldNames(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java index c197d1ca74..28ba174573 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java @@ -66,7 +66,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index 0483a928a1..c676438f06 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -22,7 +22,7 @@ public enum Category PEACH, VIRUS, TUMOR_FLAGSTAT, - NORMAL_FLAGSTAT; + GERMLINE_FLAGSTAT; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 6a11170c5f..5b09516aac 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -33,7 +33,7 @@ import com.hartwig.hmftools.compar.linx.DisruptionComparer; import com.hartwig.hmftools.compar.linx.FusionComparer; import com.hartwig.hmftools.compar.linx.GermlineSvComparer; -import com.hartwig.hmftools.compar.metrics.NormalFlagstatComparer; +import com.hartwig.hmftools.compar.metrics.GermlineFlagstatComparer; import com.hartwig.hmftools.compar.metrics.TumorFlagstatComparer; import com.hartwig.hmftools.compar.peach.PeachComparer; import com.hartwig.hmftools.compar.purple.CopyNumberComparer; @@ -135,8 +135,8 @@ private static ItemComparer createComparer(final Category category, final Compar case TUMOR_FLAGSTAT: return new TumorFlagstatComparer(config); - case NORMAL_FLAGSTAT: - return new NormalFlagstatComparer(config); + case GERMLINE_FLAGSTAT: + return new GermlineFlagstatComparer(config); default: return null; @@ -153,7 +153,7 @@ public static boolean processSample( for(String sourceName : config.SourceNames) { String sourceSampleId = config.sourceSampleId(sourceName, sampleId); - String sourceNormalSampleId = config.sourceNormalSampleId(sourceName, sampleId); + String sourceGermlineSampleId = config.sourceGermlineSampleId(sourceName, sampleId); List items = null; if(!config.DbConnections.isEmpty()) @@ -163,8 +163,8 @@ public static boolean processSample( else { FileSources fileSources = - FileSources.sampleInstance(config.FileSources.get(sourceName), sourceSampleId, sourceNormalSampleId); - items = comparer.loadFromFile(sourceSampleId, sourceNormalSampleId, fileSources); + FileSources.sampleInstance(config.FileSources.get(sourceName), sourceSampleId, sourceGermlineSampleId); + items = comparer.loadFromFile(sourceSampleId, sourceGermlineSampleId, fileSources); } if(items != null) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 3d21359b17..5bf2553358 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -46,17 +46,17 @@ public class FileSources public final String SomaticVcf; public final String SomaticUnfilteredVcf; public final String TumorFlagstat; - public final String NormalFlagstat; + public final String GermlineFlagstat; private static final String SAMPLE_DIR = "sample_dir"; private static final String SOMATIC_VCF = "somatic_vcf"; private static final String SOMATIC_UNFILTERED_VCF = "somatic_unfiltered_vcf"; private static final String TUMOR_FLAGSTAT = "tumor_flagstat_dir"; - private static final String NORMAL_FLAGSTAT = "normal_flagstat_dir"; + private static final String GERMLINE_FLAGSTAT = "germline_flagstat_dir"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, - final String somaticUnfilteredVcf, final String tumorFlagstat, final String normalFlagstat) + final String somaticUnfilteredVcf, final String tumorFlagstat, final String germlineFlagstat) { Source = source; Linx = linx; @@ -70,25 +70,25 @@ public FileSources(final String source, final String linx, final String purple, SomaticVcf = somaticVcf; SomaticUnfilteredVcf = somaticUnfilteredVcf; TumorFlagstat = tumorFlagstat; - NormalFlagstat = normalFlagstat; + GermlineFlagstat = germlineFlagstat; } - public static FileSources sampleInstance(final FileSources fileSources, final String sampleId, final String normalSampleId) + public static FileSources sampleInstance(final FileSources fileSources, final String sampleId, final String germlineSampleId) { return new FileSources( fileSources.Source, - convertWildcardSamplePath(fileSources.Linx, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Purple, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.LinxGermline, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Cuppa, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Lilac, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Chord, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Peach, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.Virus, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.SomaticVcf, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.TumorFlagstat, sampleId, normalSampleId), - convertWildcardSamplePath(fileSources.NormalFlagstat, sampleId, normalSampleId)); + convertWildcardSamplePath(fileSources.Linx, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Purple, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.LinxGermline, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Cuppa, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Lilac, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Chord, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Peach, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.Virus, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.SomaticVcf, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.TumorFlagstat, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.GermlineFlagstat, sampleId, germlineSampleId)); } public static RefGenomeVersion liftoverSourceGenomeVersion(final String source) @@ -134,8 +134,8 @@ public static void registerConfig(final ConfigBuilder configBuilder) formSourceConfig(TUMOR_FLAGSTAT, sourceName), false, formSourceDescription("Tumor flagstat", sourceName)); configBuilder.addPath( - formSourceConfig(NORMAL_FLAGSTAT, sourceName), false, - formSourceDescription("Normal flagstat", sourceName)); + formSourceConfig(GERMLINE_FLAGSTAT, sourceName), false, + formSourceDescription("Germline flagstat", sourceName)); } } @@ -174,10 +174,10 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String somaticUnfilteredVcf = getConfigValue(configBuilder, SOMATIC_UNFILTERED_VCF, sourceName); String tumorFlagstat = getDirectory(configBuilder, sampleDir, "*/flagstat", TUMOR_FLAGSTAT, sourceName); - String normalFlagstat = getDirectory(configBuilder, sampleDir, "$/flagstat", NORMAL_FLAGSTAT, sourceName); + String germlineFlagstat = getDirectory(configBuilder, sampleDir, "$/flagstat", GERMLINE_FLAGSTAT, sourceName); return new FileSources(sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, virusDir, - somaticVcf, somaticUnfilteredVcf, tumorFlagstat, normalFlagstat); + somaticVcf, somaticUnfilteredVcf, tumorFlagstat, germlineFlagstat); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java index 443e22252f..d9e8775d8b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java @@ -62,7 +62,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = new ArrayList<>(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index c4876bf0d6..d01b28c41a 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -73,7 +73,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java index f37c16d3b9..2bf003a4c8 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacComparer.java @@ -89,7 +89,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index 0f110fa07b..8de2ab7ee6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -76,7 +76,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java index 0632f86e51..3db0b7b025 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java @@ -72,7 +72,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { try { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java index 9eb6cab022..a3114ffa27 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java @@ -72,7 +72,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { List items = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java similarity index 81% rename from compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java rename to compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java index 9ac672015f..0ec54ba425 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java @@ -1,7 +1,7 @@ package com.hartwig.hmftools.compar.metrics; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; -import static com.hartwig.hmftools.compar.common.Category.NORMAL_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_PCT_THRESHOLD; @@ -22,11 +22,11 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; -public class NormalFlagstatComparer implements ItemComparer +public class GermlineFlagstatComparer implements ItemComparer { private final ComparConfig mConfig; - public NormalFlagstatComparer(final ComparConfig config) + public GermlineFlagstatComparer(final ComparConfig config) { mConfig = config; } @@ -34,7 +34,7 @@ public NormalFlagstatComparer(final ComparConfig config) @Override public Category category() { - return NORMAL_FLAGSTAT; + return GERMLINE_FLAGSTAT; } @Override @@ -63,17 +63,17 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); try { - Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.NormalFlagstat, normalSampleId)); - comparableItems.add(new NormalFlagstatData(flagstat)); + Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.GermlineFlagstat, germlineSampleId)); + comparableItems.add(new GermlineFlagstatData(flagstat)); } catch(IOException e) { - CMP_LOGGER.warn("sample({}) failed to load normal flagstat data: {}", sampleId, e.toString()); + CMP_LOGGER.warn("sample({}) failed to load germline flagstat data: {}", sampleId, e.toString()); return null; } return comparableItems; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java similarity index 85% rename from compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java rename to compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java index b542c1003b..71190225c2 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/NormalFlagstatData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java @@ -2,7 +2,7 @@ import static java.lang.String.format; -import static com.hartwig.hmftools.compar.common.Category.NORMAL_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; @@ -17,11 +17,11 @@ import com.hartwig.hmftools.compar.common.MatchLevel; import com.hartwig.hmftools.compar.common.Mismatch; -public class NormalFlagstatData implements ComparableItem +public class GermlineFlagstatData implements ComparableItem { public final Flagstat mFlagstat; - public NormalFlagstatData(final Flagstat flagstat) + public GermlineFlagstatData(final Flagstat flagstat) { mFlagstat = flagstat; } @@ -29,7 +29,7 @@ public NormalFlagstatData(final Flagstat flagstat) @Override public Category category() { - return NORMAL_FLAGSTAT; + return GERMLINE_FLAGSTAT; } @Override @@ -62,7 +62,7 @@ public boolean matches(final ComparableItem other) @Override public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) { - final NormalFlagstatData otherData = (NormalFlagstatData) other; + final GermlineFlagstatData otherData = (GermlineFlagstatData) other; final List diffs = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java index f7c435de7e..8437911fd7 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java @@ -63,7 +63,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); try diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 26fd469558..eebc884b3e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -90,7 +90,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 83d5bdde9a..fc1ac71dd5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -86,7 +86,7 @@ public boolean processSample(final String sampleId, final List mismatc final String sourceName = mConfig.SourceNames.get(i); String sourceSampleId = mConfig.sourceSampleId(sourceName, sampleId); - String sourceNormalSampleId = mConfig.sourceNormalSampleId(sourceName, sampleId); + String sourceGermlineSampleId = mConfig.sourceGermlineSampleId(sourceName, sampleId); if(!mConfig.DbConnections.isEmpty()) { @@ -96,7 +96,7 @@ public boolean processSample(final String sampleId, final List mismatc { FileSources fileSources = mConfig.FileSources.get(sourceName); List fileVariants = - loadVariants(sourceSampleId, FileSources.sampleInstance(fileSources, sourceSampleId, sourceNormalSampleId)); + loadVariants(sourceSampleId, FileSources.sampleInstance(fileSources, sourceSampleId, sourceGermlineSampleId)); if(fileVariants == null) continue; @@ -344,7 +344,7 @@ private List loadVariants(final String sampleId, final Datab } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List items = Lists.newArrayList(); loadVariants(sampleId, fileSources).forEach(x -> items.add(x)); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index 269055b192..94f8cbb1e4 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -59,11 +59,11 @@ public List loadFromDb(final String sampleId, final DatabaseAcce return genotypes.stream().map(g -> new PeachData(g)).collect(Collectors.toList()); } - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); - String fileName = determineFileName(sampleId, normalSampleId, fileSources); + String fileName = determineFileName(sampleId, germlineSampleId, fileSources); try { PeachGenotypeFile.read(fileName).forEach(g -> comparableItems.add(new PeachData(g))); @@ -76,9 +76,9 @@ public List loadFromFile(final String sampleId, final String nor return comparableItems; } - private static String determineFileName(final String sampleId, final String normalSampleId, final FileSources fileSources) + private static String determineFileName(final String sampleId, final String germlineSampleId, final FileSources fileSources) { - final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, normalSampleId); + final String currentFileName = PeachGenotypeFile.generateFileName(fileSources.Peach, germlineSampleId); final String oldFileName = PeachGenotypeFile.generateOldPythonFileName(fileSources.Peach, sampleId); if(!fileExists(currentFileName) && fileExists(oldFileName)) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java index b7951786bb..5019cebfd2 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java @@ -66,7 +66,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java index 658c5dd703..aebea88848 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java @@ -82,7 +82,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List items = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index 99cfabcf3e..238d09e1c2 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -68,7 +68,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java index 57e84fa820..5e31797fbf 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/PurityComparer.java @@ -86,7 +86,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java index cd20ba806f..24e4ecb382 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java @@ -63,7 +63,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce } @Override - public List loadFromFile(final String sampleId, final String normalSampleId, final FileSources fileSources) + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { final List comparableItems = Lists.newArrayList(); try From eaf685f80ed7880d04826cce6a4f0feeac552e57 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Tue, 3 Sep 2024 15:39:48 +0200 Subject: [PATCH 28/53] Compar: DEV-4061: Add BAM metrics comparison --- .../hmftools/compar/common/Category.java | 6 +- .../hmftools/compar/common/CommonUtils.java | 8 ++ .../hmftools/compar/common/FileSources.java | 23 ++++- .../metrics/GermlineBamMetricsComparer.java | 85 +++++++++++++++++++ .../metrics/GermlineBamMetricsData.java | 80 +++++++++++++++++ .../metrics/GermlineFlagstatComparer.java | 6 +- .../compar/metrics/GermlineFlagstatData.java | 2 +- ...FlagstatCommon.java => MetricsCommon.java} | 5 +- .../metrics/TumorBamMetricsComparer.java | 85 +++++++++++++++++++ .../compar/metrics/TumorBamMetricsData.java | 80 +++++++++++++++++ .../compar/metrics/TumorFlagstatComparer.java | 6 +- .../compar/metrics/TumorFlagstatData.java | 2 +- 12 files changed, 374 insertions(+), 14 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java rename compar/src/main/java/com/hartwig/hmftools/compar/metrics/{FlagstatCommon.java => MetricsCommon.java} (51%) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index c676438f06..3b7e1b01c9 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -22,7 +22,9 @@ public enum Category PEACH, VIRUS, TUMOR_FLAGSTAT, - GERMLINE_FLAGSTAT; + GERMLINE_FLAGSTAT, + TUMOR_BAM_METRICS, + GERMLINE_BAM_METRICS; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; @@ -38,6 +40,6 @@ public static List purpleCategories() public static List panelCategories() { - return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT); + return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS); } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 5b09516aac..4f578bdbb7 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -33,7 +33,9 @@ import com.hartwig.hmftools.compar.linx.DisruptionComparer; import com.hartwig.hmftools.compar.linx.FusionComparer; import com.hartwig.hmftools.compar.linx.GermlineSvComparer; +import com.hartwig.hmftools.compar.metrics.GermlineBamMetricsComparer; import com.hartwig.hmftools.compar.metrics.GermlineFlagstatComparer; +import com.hartwig.hmftools.compar.metrics.TumorBamMetricsComparer; import com.hartwig.hmftools.compar.metrics.TumorFlagstatComparer; import com.hartwig.hmftools.compar.peach.PeachComparer; import com.hartwig.hmftools.compar.purple.CopyNumberComparer; @@ -138,6 +140,12 @@ private static ItemComparer createComparer(final Category category, final Compar case GERMLINE_FLAGSTAT: return new GermlineFlagstatComparer(config); + case TUMOR_BAM_METRICS: + return new TumorBamMetricsComparer(config); + + case GERMLINE_BAM_METRICS: + return new GermlineBamMetricsComparer(config); + default: return null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 5bf2553358..8678dd3880 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -47,16 +47,21 @@ public class FileSources public final String SomaticUnfilteredVcf; public final String TumorFlagstat; public final String GermlineFlagstat; + public final String TumorBamMetrics; + public final String GermlineBamMetrics; private static final String SAMPLE_DIR = "sample_dir"; private static final String SOMATIC_VCF = "somatic_vcf"; private static final String SOMATIC_UNFILTERED_VCF = "somatic_unfiltered_vcf"; private static final String TUMOR_FLAGSTAT = "tumor_flagstat_dir"; private static final String GERMLINE_FLAGSTAT = "germline_flagstat_dir"; + private static final String TUMOR_BAM_METRICS = "tumor_bam_metrics_dir"; + private static final String GERMLINE_BAM_METRICS = "germline_bam_metrics_dir"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, - final String somaticUnfilteredVcf, final String tumorFlagstat, final String germlineFlagstat) + final String somaticUnfilteredVcf, final String tumorFlagstat, final String germlineFlagstat, final String tumorBamMetrics, + final String germlineBamMetrics) { Source = source; Linx = linx; @@ -71,6 +76,8 @@ public FileSources(final String source, final String linx, final String purple, SomaticUnfilteredVcf = somaticUnfilteredVcf; TumorFlagstat = tumorFlagstat; GermlineFlagstat = germlineFlagstat; + TumorBamMetrics = tumorBamMetrics; + GermlineBamMetrics = germlineBamMetrics; } public static FileSources sampleInstance(final FileSources fileSources, final String sampleId, final String germlineSampleId) @@ -88,7 +95,9 @@ public static FileSources sampleInstance(final FileSources fileSources, final St convertWildcardSamplePath(fileSources.SomaticVcf, sampleId, germlineSampleId), convertWildcardSamplePath(fileSources.SomaticUnfilteredVcf, sampleId, germlineSampleId), convertWildcardSamplePath(fileSources.TumorFlagstat, sampleId, germlineSampleId), - convertWildcardSamplePath(fileSources.GermlineFlagstat, sampleId, germlineSampleId)); + convertWildcardSamplePath(fileSources.GermlineFlagstat, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.TumorBamMetrics, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.GermlineBamMetrics, sampleId, germlineSampleId)); } public static RefGenomeVersion liftoverSourceGenomeVersion(final String source) @@ -136,6 +145,12 @@ public static void registerConfig(final ConfigBuilder configBuilder) configBuilder.addPath( formSourceConfig(GERMLINE_FLAGSTAT, sourceName), false, formSourceDescription("Germline flagstat", sourceName)); + configBuilder.addPath( + formSourceConfig(TUMOR_BAM_METRICS, sourceName), false, + formSourceDescription("Tumor BAM metrics", sourceName)); + configBuilder.addPath( + formSourceConfig(GERMLINE_BAM_METRICS, sourceName), false, + formSourceDescription("Germline BAM metrics", sourceName)); } } @@ -175,9 +190,11 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String tumorFlagstat = getDirectory(configBuilder, sampleDir, "*/flagstat", TUMOR_FLAGSTAT, sourceName); String germlineFlagstat = getDirectory(configBuilder, sampleDir, "$/flagstat", GERMLINE_FLAGSTAT, sourceName); + String tumorBamMetrics = getDirectory(configBuilder, sampleDir, "*/bam_metrics", TUMOR_BAM_METRICS, sourceName); + String germlineBamMetrics = getDirectory(configBuilder, sampleDir, "$/bam_metrics", GERMLINE_BAM_METRICS, sourceName); return new FileSources(sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, virusDir, - somaticVcf, somaticUnfilteredVcf, tumorFlagstat, germlineFlagstat); + somaticVcf, somaticUnfilteredVcf, tumorFlagstat, germlineFlagstat, tumorBamMetrics, germlineBamMetrics); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java new file mode 100644 index 0000000000..57a062d2c3 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java @@ -0,0 +1,85 @@ +package com.hartwig.hmftools.compar.metrics; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.GERMLINE_BAM_METRICS; +import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_10X; +import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_20X; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.common.metrics.WGSMetricsFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +public class GermlineBamMetricsComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public GermlineBamMetricsComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return GERMLINE_BAM_METRICS; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_DUPLICATE_PERCENTAGE, DUPLICATE_PERCENTAGE_ABS_THRESHOLD, DUPLICATE_PERCENTAGE_PCT_THRESHOLD); + thresholds.addFieldThreshold(FLD_PERCENTAGE_10X, 0.03, 0); + thresholds.addFieldThreshold(FLD_PERCENTAGE_20X, 0.03, 0); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_DUPLICATE_PERCENTAGE, FLD_PERCENTAGE_10X, FLD_PERCENTAGE_20X); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + try + { + WGSMetrics metrics = WGSMetricsFile.read(WGSMetricsFile.generateFilename(fileSources.GermlineBamMetrics, germlineSampleId)); + comparableItems.add(new GermlineBamMetricsData(metrics)); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load germline BAM metrics data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java new file mode 100644 index 0000000000..8d8340a95c --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java @@ -0,0 +1,80 @@ +package com.hartwig.hmftools.compar.metrics; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.GERMLINE_BAM_METRICS; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class GermlineBamMetricsData implements ComparableItem +{ + public final WGSMetrics Metrics; + + protected static final String FLD_PERCENTAGE_10X = "Percentage10X"; + protected static final String FLD_PERCENTAGE_20X = "Percentage20X"; + + public GermlineBamMetricsData(final WGSMetrics metrics) + { + Metrics = metrics; + } + + @Override + public Category category() + { + return GERMLINE_BAM_METRICS; + } + + @Override + public String key() + { + return ""; + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%.2f", Metrics.pctExcDupe())); + values.add(format("%.2f", Metrics.coverage10xPercentage())); + values.add(format("%.2f", Metrics.coverage20xPercentage())); + return values; + } + + @Override + public boolean reportable() + { + return true; + } + + @Override + public boolean matches(final ComparableItem other) + { + // a single record for each sample + return true; + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final GermlineBamMetricsData otherData = (GermlineBamMetricsData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.pctExcDupe(), otherData.Metrics.pctExcDupe(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_10X, Metrics.coverage10xPercentage(), otherData.Metrics.coverage10xPercentage(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_20X, Metrics.coverage20xPercentage(), otherData.Metrics.coverage20xPercentage(), thresholds); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java index 0ec54ba425..1e98522f34 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java @@ -2,9 +2,9 @@ import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_ABS_THRESHOLD; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; import java.io.IOException; import java.util.List; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java index 71190225c2..dbc03e0726 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java @@ -5,7 +5,7 @@ import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import java.util.List; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java similarity index 51% rename from compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java rename to compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java index bd02c4dc92..b30fc9e10f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/FlagstatCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java @@ -1,9 +1,12 @@ package com.hartwig.hmftools.compar.metrics; -public class FlagstatCommon +public class MetricsCommon { protected static final String FLD_MAPPED_PROPORTION = "MappedProportion"; + protected static final String FLD_DUPLICATE_PERCENTAGE = "DuplicatePercentage"; protected static final double MAPPED_PROPORTION_ABS_THRESHOLD = 0.01; protected static final double MAPPED_PROPORTION_PCT_THRESHOLD = 0; + protected static final double DUPLICATE_PERCENTAGE_ABS_THRESHOLD = 0.05; + protected static final double DUPLICATE_PERCENTAGE_PCT_THRESHOLD = 0; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java new file mode 100644 index 0000000000..37e667fd3a --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java @@ -0,0 +1,85 @@ +package com.hartwig.hmftools.compar.metrics; + +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.TUMOR_BAM_METRICS; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; +import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_30X; +import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_60X; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.common.metrics.WGSMetricsFile; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +public class TumorBamMetricsComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + public TumorBamMetricsComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return TUMOR_BAM_METRICS; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public void registerThresholds(final DiffThresholds thresholds) + { + thresholds.addFieldThreshold(FLD_DUPLICATE_PERCENTAGE, DUPLICATE_PERCENTAGE_ABS_THRESHOLD, DUPLICATE_PERCENTAGE_PCT_THRESHOLD); + thresholds.addFieldThreshold(FLD_PERCENTAGE_30X, 0.03, 0); + thresholds.addFieldThreshold(FLD_PERCENTAGE_60X, 0.03, 0); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_DUPLICATE_PERCENTAGE, FLD_PERCENTAGE_30X, FLD_PERCENTAGE_60X); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) + { + final List comparableItems = Lists.newArrayList(); + try + { + WGSMetrics metrics = WGSMetricsFile.read(WGSMetricsFile.generateFilename(fileSources.TumorBamMetrics, sampleId)); + comparableItems.add(new TumorBamMetricsData(metrics)); + } + catch(IOException e) + { + CMP_LOGGER.warn("sample({}) failed to load tumor BAM metrics data: {}", sampleId, e.toString()); + return null; + } + return comparableItems; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java new file mode 100644 index 0000000000..de4cf36188 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java @@ -0,0 +1,80 @@ +package com.hartwig.hmftools.compar.metrics; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.compar.common.Category.TUMOR_BAM_METRICS; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class TumorBamMetricsData implements ComparableItem +{ + public final WGSMetrics Metrics; + + protected static final String FLD_PERCENTAGE_30X = "Percentage30X"; + protected static final String FLD_PERCENTAGE_60X = "Percentage60X"; + + public TumorBamMetricsData(final WGSMetrics metrics) + { + Metrics = metrics; + } + + @Override + public Category category() + { + return TUMOR_BAM_METRICS; + } + + @Override + public String key() + { + return ""; + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%.2f", Metrics.pctExcDupe())); + values.add(format("%.2f", Metrics.coverage30xPercentage())); + values.add(format("%.2f", Metrics.coverage60xPercentage())); + return values; + } + + @Override + public boolean reportable() + { + return true; + } + + @Override + public boolean matches(final ComparableItem other) + { + // a single record for each sample + return true; + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final TumorBamMetricsData otherData = (TumorBamMetricsData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.pctExcDupe(), otherData.Metrics.pctExcDupe(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_30X, Metrics.coverage30xPercentage(), otherData.Metrics.coverage30xPercentage(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_60X, Metrics.coverage60xPercentage(), otherData.Metrics.coverage60xPercentage(), thresholds); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java index 8437911fd7..b56e70782c 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java @@ -2,9 +2,9 @@ import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_ABS_THRESHOLD; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.MAPPED_PROPORTION_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; import java.io.IOException; import java.util.List; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java index b6480d32fd..01afb86828 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java @@ -5,7 +5,7 @@ import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; -import static com.hartwig.hmftools.compar.metrics.FlagstatCommon.FLD_MAPPED_PROPORTION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import java.util.List; From f1994a4597c659ecf1604c9e1ded4816619e3572 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Tue, 3 Sep 2024 16:07:32 +0200 Subject: [PATCH 29/53] Compar: DEV-4061: Make relative differences work for negative values --- .../java/com/hartwig/hmftools/compar/common/ThresholdData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/ThresholdData.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/ThresholdData.java index 65bea32585..75f1efb32f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/ThresholdData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/ThresholdData.java @@ -25,7 +25,7 @@ public boolean hasDiff(double value1, double value2) double absDiff = abs(value1 - value2); boolean hasAbsDiff = absDiff > AbsoluteDiff; - boolean hasRelDiff = absDiff / max(value1, value2) > PercentDiff; + boolean hasRelDiff = absDiff / max(abs(value1), abs(value2)) > PercentDiff; if(Type == ThresholdType.ABSOLUTE_AND_PERCENT) return hasAbsDiff && hasRelDiff; From 3edcee2854676fbe097ba5c91313585f9bc379ee Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Tue, 3 Sep 2024 16:16:07 +0200 Subject: [PATCH 30/53] Compar: DEV-4061: Simplify thresholds for TVAF --- .../hmftools/compar/mutation/GermlineVariantComparer.java | 2 +- .../hmftools/compar/mutation/SomaticVariantComparer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index eebc884b3e..8603f3f040 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -56,7 +56,7 @@ public void registerThresholds(final DiffThresholds thresholds) // same as somatic thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); - thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); + thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0); thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index fc1ac71dd5..71b2d737ad 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -298,7 +298,7 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); - thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0.2); + thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0); thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); } From f26af6e954319eb029f518475a3291884dfa12ce Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 5 Sep 2024 15:09:13 +0200 Subject: [PATCH 31/53] Compar: DEV-4061: Add undisrupted copy number comparison --- .../com/hartwig/hmftools/compar/linx/DisruptionComparer.java | 4 +++- .../java/com/hartwig/hmftools/compar/linx/DisruptionData.java | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index 8de2ab7ee6..7a6d8855f0 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -10,6 +10,7 @@ import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_GENE_ORIENT; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_NEXT_SPLICE; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_REGION_TYPE; +import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_UNDISRUPTED_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import java.io.IOException; @@ -52,6 +53,7 @@ public DisruptionComparer(final ComparConfig config) public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_JUNCTION_COPY_NUMBER, 0.5, 0.2); + thresholds.addFieldThreshold(FLD_UNDISRUPTED_COPY_NUMBER, 0.5, 0.2); } @Override @@ -64,7 +66,7 @@ public boolean processSample(final String sampleId, final List mismatc public List comparedFieldNames() { return Lists.newArrayList(FLD_REPORTED, FLD_REGION_TYPE, FLD_CODING_CONTEXT, FLD_GENE_ORIENT, FLD_NEXT_SPLICE, - FLD_JUNCTION_COPY_NUMBER, FLD_CHROMOSOME_BAND); + FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER, FLD_CHROMOSOME_BAND); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java index ee00392843..1ec27f0dd5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java @@ -30,6 +30,7 @@ public class DisruptionData implements ComparableItem protected static final String FLD_GENE_ORIENT = "GeneOrientation"; protected static final String FLD_NEXT_SPLICE = "NextSpliceExonRank"; protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; + protected static final String FLD_UNDISRUPTED_COPY_NUMBER = "UndisruptedCopyNumber"; protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; public DisruptionData( @@ -73,6 +74,7 @@ public List displayValues() values.add(String.format("%s", Breakend.geneOrientation())); values.add(String.format("%d", Breakend.nextSpliceExonRank())); values.add(String.format("%.2f", Breakend.junctionCopyNumber())); + values.add(String.format("%.2f", Breakend.undisruptedCopyNumber())); values.add(String.format("%s", Breakend.chrBand())); return values; } @@ -121,6 +123,7 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_GENE_ORIENT, Breakend.geneOrientation(), otherBreakend.Breakend.geneOrientation()); checkDiff(diffs, FLD_NEXT_SPLICE, Breakend.nextSpliceExonRank(), otherBreakend.Breakend.nextSpliceExonRank()); checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); + checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); checkDiff(diffs, FLD_CHROMOSOME_BAND, Breakend.chrBand(), otherBreakend.Breakend.chrBand()); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; From 73e2cb47d14155c4ad1b6b82c2bacbf7f93d1c54 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 6 Sep 2024 15:21:30 +0200 Subject: [PATCH 32/53] Compar: DEV-4061: Add liftover for copy number regions --- .../compar/purple/CopyNumberComparer.java | 18 ++++++++++-- .../compar/purple/CopyNumberData.java | 29 +++++++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java index 5019cebfd2..78f156842e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java @@ -2,6 +2,7 @@ import static com.hartwig.hmftools.compar.common.Category.COPY_NUMBER; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; import static com.hartwig.hmftools.compar.purple.CopyNumberData.FLD_COPY_NUMBER; import static com.hartwig.hmftools.compar.purple.CopyNumberData.FLD_MAJOR_ALLELE_CN; import static com.hartwig.hmftools.compar.purple.CopyNumberData.FLD_METHOD; @@ -12,6 +13,7 @@ import com.google.common.collect.Lists; import com.hartwig.hmftools.common.purple.PurpleCopyNumber; import com.hartwig.hmftools.common.purple.PurpleCopyNumberFile; +import com.hartwig.hmftools.common.region.BasePosition; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.CommonUtils; import com.hartwig.hmftools.compar.ComparConfig; @@ -22,6 +24,8 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; +import org.jetbrains.annotations.NotNull; + public class CopyNumberComparer implements ItemComparer { private final ComparConfig mConfig; @@ -61,7 +65,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce { final List copyNumbers = dbAccess.readCopynumbers(sampleId); List items = Lists.newArrayList(); - copyNumbers.forEach(x -> items.add(new CopyNumberData(x))); + copyNumbers.forEach(x -> items.add(createCopyNumberData(x, sourceName))); return items; } @@ -75,7 +79,7 @@ public List loadFromFile(final String sampleId, final String ger List copyNumbers = PurpleCopyNumberFile.read(PurpleCopyNumberFile.generateFilenameForReading( fileSources.Purple, sampleId)); - copyNumbers.forEach(x -> comparableItems.add(new CopyNumberData(x))); + copyNumbers.forEach(x -> comparableItems.add(createCopyNumberData(x, fileSources.Source))); } catch(IOException e) { @@ -85,4 +89,14 @@ public List loadFromFile(final String sampleId, final String ger return comparableItems; } + + @NotNull + private CopyNumberData createCopyNumberData(final PurpleCopyNumber copyNumber, final String fileSource) + { + BasePosition comparisonPositionStart = determineComparisonGenomePosition( + copyNumber.chromosome(), copyNumber.start(), fileSource, mConfig.RequiresLiftover, mConfig.LiftoverCache); + BasePosition comparisonPositionEnd = determineComparisonGenomePosition( + copyNumber.chromosome(), copyNumber.end(), fileSource, mConfig.RequiresLiftover, mConfig.LiftoverCache); + return new CopyNumberData(copyNumber, comparisonPositionStart, comparisonPositionEnd); + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberData.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberData.java index 8aa852965a..ae20464923 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberData.java @@ -10,6 +10,7 @@ import com.google.common.collect.Lists; import com.hartwig.hmftools.common.purple.PurpleCopyNumber; +import com.hartwig.hmftools.common.region.BasePosition; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.common.DiffThresholds; @@ -19,14 +20,19 @@ public class CopyNumberData implements ComparableItem { public final PurpleCopyNumber CopyNumber; + public final BasePosition mComparisonPositionStart; + public final BasePosition mComparisonPositionEnd; protected static final String FLD_COPY_NUMBER = "CopyNumber"; protected static final String FLD_MAJOR_ALLELE_CN = "MajorAlleleCopyNumber"; protected static final String FLD_METHOD = "Method"; - public CopyNumberData(final PurpleCopyNumber copyNumber) + public CopyNumberData(final PurpleCopyNumber copyNumber, final BasePosition comparisonPositionStart, + final BasePosition comparisonPositionEnd) { CopyNumber = copyNumber; + mComparisonPositionStart = comparisonPositionStart; + mComparisonPositionEnd = comparisonPositionEnd; } public Category category() { @@ -36,7 +42,16 @@ public Category category() { @Override public String key() { - return format("%s:%d_%d", CopyNumber.chromosome(), CopyNumber.start(), CopyNumber.end()); + if(mComparisonPositionStart.equals(new BasePosition(CopyNumber.chromosome(), CopyNumber.start())) + && mComparisonPositionEnd.equals(new BasePosition(CopyNumber.chromosome(), CopyNumber.end()))) + { + return format("%s:%d_%d", CopyNumber.chromosome(), CopyNumber.start(), CopyNumber.end()); + } + else + { + return format("%s:%d_%d liftover(%s_%s)", CopyNumber.chromosome(), CopyNumber.start(), CopyNumber.end(), + mComparisonPositionStart, mComparisonPositionEnd); + } } @Override @@ -57,10 +72,14 @@ public boolean reportable() { @Override public boolean matches(final ComparableItem other) { - final CopyNumberData otherCn = (CopyNumberData)other; + final CopyNumberData otherCn = (CopyNumberData) other; + + if(!mComparisonPositionStart.Chromosome.equals(otherCn.CopyNumber.chromosome()) + || !mComparisonPositionEnd.Chromosome.equals(otherCn.CopyNumber.chromosome())) + return false; - return CopyNumber.chromosome().equals(otherCn.CopyNumber.chromosome()) - && CopyNumber.start() == otherCn.CopyNumber.start() && CopyNumber.end() == otherCn.CopyNumber.end(); + return mComparisonPositionStart.Position == otherCn.CopyNumber.start() + && mComparisonPositionEnd.Position == otherCn.CopyNumber.end(); } @Override From 4fdb4a07b5b50590def1efb4a5259fee2b1b3252 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 6 Sep 2024 16:10:46 +0200 Subject: [PATCH 33/53] Compar: DEV-4061: Add support for old virus directory --- .../hmftools/compar/common/CommonUtils.java | 7 ++++++ .../hmftools/compar/peach/PeachComparer.java | 6 +---- .../hmftools/compar/virus/VirusComparer.java | 25 ++++++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 4f578bdbb7..ab4f4d228d 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -13,6 +13,8 @@ import static com.hartwig.hmftools.compar.common.MismatchType.NEW_ONLY; import static com.hartwig.hmftools.compar.common.MismatchType.REF_ONLY; +import java.io.File; +import java.nio.file.Files; import java.util.Collections; import java.util.List; import java.util.Map; @@ -286,4 +288,9 @@ public static String determineComparisonChromosome(final String chromosome, fina return chromosome; } } + + public static boolean fileExists(final String filename) + { + return Files.exists(new File(filename).toPath()); + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index 94f8cbb1e4..eb1a0cd65e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -2,6 +2,7 @@ import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.PEACH; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.peach.PeachData.FLD_ALLELE_COUNT; import static com.hartwig.hmftools.compar.peach.PeachData.FLD_DRUGS; import static com.hartwig.hmftools.compar.peach.PeachData.FLD_FUNCTION; @@ -89,9 +90,4 @@ private static String determineFileName(final String sampleId, final String germ return currentFileName; } } - - private static boolean fileExists(final String filename) - { - return Files.exists(new File(filename).toPath()); - } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java index 24e4ecb382..4e9490b939 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java @@ -1,12 +1,15 @@ package com.hartwig.hmftools.compar.virus; +import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.VIRUS_INTERPRETER_DIR; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.VIRUS; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.virus.VirusData.FLD_DRIVER_LIKELIHOOD; import static com.hartwig.hmftools.compar.virus.VirusData.FLD_INTEGRATIONS; import static com.hartwig.hmftools.compar.virus.VirusData.FLD_MEAN_COVERAGE; +import java.io.File; import java.io.IOException; import java.util.List; @@ -22,6 +25,8 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; +import org.jetbrains.annotations.NotNull; + public class VirusComparer implements ItemComparer { private final ComparConfig mConfig; @@ -68,7 +73,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - AnnotatedVirusFile.read(AnnotatedVirusFile.generateFileName(fileSources.Virus, sampleId)) + AnnotatedVirusFile.read(determineFileName(sampleId, fileSources)) .forEach(v -> comparableItems.add(new VirusData(v))); } catch(IOException e) @@ -78,4 +83,22 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } + + @NotNull + private static String determineFileName(final String sampleId, final FileSources fileSources) + { + // dirty hack to get old virus directory working automatically most of the time + final String currentFileName = AnnotatedVirusFile.generateFileName(fileSources.Virus, sampleId); + final String oldFileName = + AnnotatedVirusFile.generateFileName(fileSources.Virus.replaceAll(VIRUS_INTERPRETER_DIR, "virus_interpreter"), sampleId); + ; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } } From 61c8e144ca24e6d49ba2176c82cb757f5faba767 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 6 Sep 2024 16:39:34 +0200 Subject: [PATCH 34/53] Compar: DEV-4061: Support old flagstat and BAM metrics file names --- .../metrics/GermlineBamMetricsComparer.java | 19 ++++++++++++++++++- .../metrics/GermlineFlagstatComparer.java | 19 ++++++++++++++++++- .../compar/metrics/MetricsCommon.java | 3 +++ .../metrics/TumorBamMetricsComparer.java | 19 ++++++++++++++++++- .../compar/metrics/TumorFlagstatComparer.java | 19 ++++++++++++++++++- 5 files changed, 75 insertions(+), 4 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java index 57a062d2c3..d4b3daf80f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java @@ -1,12 +1,15 @@ package com.hartwig.hmftools.compar.metrics; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_BAM_METRICS; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_10X; import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_20X; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_BAM_METRICS_FILE_EXTENSION; import java.io.IOException; import java.util.List; @@ -72,7 +75,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - WGSMetrics metrics = WGSMetricsFile.read(WGSMetricsFile.generateFilename(fileSources.GermlineBamMetrics, germlineSampleId)); + WGSMetrics metrics = WGSMetricsFile.read(determineFilePath(germlineSampleId, fileSources)); comparableItems.add(new GermlineBamMetricsData(metrics)); } catch(IOException e) @@ -82,4 +85,18 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } + + private static String determineFilePath(final String germlineSampleId, final FileSources fileSources) + { + String currentFileName = WGSMetricsFile.generateFilename(fileSources.GermlineBamMetrics, germlineSampleId); + String oldFileName = checkAddDirSeparator(fileSources.GermlineBamMetrics) + germlineSampleId + OLD_BAM_METRICS_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java index 1e98522f34..0fe406d96b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java @@ -1,10 +1,13 @@ package com.hartwig.hmftools.compar.metrics; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_FLAGSTAT_FILE_EXTENSION; import java.io.IOException; import java.util.List; @@ -68,7 +71,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.GermlineFlagstat, germlineSampleId)); + Flagstat flagstat = FlagstatFile.read(determineFilePath(germlineSampleId, fileSources)); comparableItems.add(new GermlineFlagstatData(flagstat)); } catch(IOException e) @@ -78,4 +81,18 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } + + private static String determineFilePath(final String germlineSampleId, final FileSources fileSources) + { + String currentFileName = FlagstatFile.generateFilename(fileSources.GermlineFlagstat, germlineSampleId); + String oldFileName = checkAddDirSeparator(fileSources.GermlineFlagstat) + germlineSampleId + OLD_FLAGSTAT_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java index b30fc9e10f..0ba317249e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java @@ -9,4 +9,7 @@ public class MetricsCommon protected static final double MAPPED_PROPORTION_PCT_THRESHOLD = 0; protected static final double DUPLICATE_PERCENTAGE_ABS_THRESHOLD = 0.05; protected static final double DUPLICATE_PERCENTAGE_PCT_THRESHOLD = 0; + + protected static final String OLD_FLAGSTAT_FILE_EXTENSION = "_dedup.realigned.flagstat"; + protected static final String OLD_BAM_METRICS_FILE_EXTENSION = "_dedup_WGSMetrics.txt"; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java index 37e667fd3a..63207a86f8 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java @@ -1,10 +1,13 @@ package com.hartwig.hmftools.compar.metrics; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.TUMOR_BAM_METRICS; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_BAM_METRICS_FILE_EXTENSION; import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_30X; import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_60X; @@ -72,7 +75,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - WGSMetrics metrics = WGSMetricsFile.read(WGSMetricsFile.generateFilename(fileSources.TumorBamMetrics, sampleId)); + WGSMetrics metrics = WGSMetricsFile.read(determineFilePath(sampleId, fileSources)); comparableItems.add(new TumorBamMetricsData(metrics)); } catch(IOException e) @@ -82,4 +85,18 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } + + private static String determineFilePath(final String sampleId, final FileSources fileSources) + { + String currentFileName = WGSMetricsFile.generateFilename(fileSources.TumorBamMetrics, sampleId); + String oldFileName = checkAddDirSeparator(fileSources.TumorBamMetrics) + sampleId + OLD_BAM_METRICS_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java index b56e70782c..83a2c52c3e 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java @@ -1,10 +1,13 @@ package com.hartwig.hmftools.compar.metrics; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_FLAGSTAT_FILE_EXTENSION; import java.io.IOException; import java.util.List; @@ -68,7 +71,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - Flagstat flagstat = FlagstatFile.read(FlagstatFile.generateFilename(fileSources.TumorFlagstat, sampleId)); + Flagstat flagstat = FlagstatFile.read(determineFilePath(sampleId, fileSources)); comparableItems.add(new TumorFlagstatData(flagstat)); } catch(IOException e) @@ -78,4 +81,18 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } + + private static String determineFilePath(final String sampleId, final FileSources fileSources) + { + String currentFileName = FlagstatFile.generateFilename(fileSources.TumorFlagstat, sampleId); + String oldFileName = checkAddDirSeparator(fileSources.TumorFlagstat) + sampleId + OLD_FLAGSTAT_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else + { + return currentFileName; + } + } } From 93bf1bd0534bbb37f9687723dcc094fa5303da57 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 9 Sep 2024 15:31:43 +0200 Subject: [PATCH 35/53] Compar: DEV-4061: Add SnpGenotype/SnpCheck comparison --- .../hmftools/compar/common/Category.java | 5 +- .../hmftools/compar/common/CommonUtils.java | 4 + .../hmftools/compar/common/FileSources.java | 14 ++- .../snpgenotype/SnpGenotypeComparer.java | 101 +++++++++++++++++ .../compar/snpgenotype/SnpGenotypeData.java | 103 ++++++++++++++++++ 5 files changed, 222 insertions(+), 5 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeComparer.java create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java index 3b7e1b01c9..5bc6b84042 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/Category.java @@ -24,7 +24,8 @@ public enum Category TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_BAM_METRICS, - GERMLINE_BAM_METRICS; + GERMLINE_BAM_METRICS, + SNP_GENOTYPE; public static final String ALL_CATEGORIES = "ALL"; public static final String LINX_CATEGORIES = "LINX"; @@ -40,6 +41,6 @@ public static List purpleCategories() public static List panelCategories() { - return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS); + return Lists.newArrayList(PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS, SNP_GENOTYPE); } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index ab4f4d228d..8841ef37f3 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -46,6 +46,7 @@ import com.hartwig.hmftools.compar.purple.PurityComparer; import com.hartwig.hmftools.compar.mutation.GermlineVariantComparer; import com.hartwig.hmftools.compar.mutation.SomaticVariantComparer; +import com.hartwig.hmftools.compar.snpgenotype.SnpGenotypeComparer; import com.hartwig.hmftools.compar.virus.VirusComparer; public class CommonUtils @@ -148,6 +149,9 @@ private static ItemComparer createComparer(final Category category, final Compar case GERMLINE_BAM_METRICS: return new GermlineBamMetricsComparer(config); + case SNP_GENOTYPE: + return new SnpGenotypeComparer(config); + default: return null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 8678dd3880..4ecdcbe7df 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -49,6 +49,7 @@ public class FileSources public final String GermlineFlagstat; public final String TumorBamMetrics; public final String GermlineBamMetrics; + public final String SnpGenotype; private static final String SAMPLE_DIR = "sample_dir"; private static final String SOMATIC_VCF = "somatic_vcf"; @@ -57,11 +58,12 @@ public class FileSources private static final String GERMLINE_FLAGSTAT = "germline_flagstat_dir"; private static final String TUMOR_BAM_METRICS = "tumor_bam_metrics_dir"; private static final String GERMLINE_BAM_METRICS = "germline_bam_metrics_dir"; + private static final String SNP_GENOTYPE = "snp_genotype_dir"; public FileSources(final String source, final String linx, final String purple, final String linxGermline, final String cuppa, final String lilac, final String chord, final String peach, final String virus, final String somaticVcf, final String somaticUnfilteredVcf, final String tumorFlagstat, final String germlineFlagstat, final String tumorBamMetrics, - final String germlineBamMetrics) + final String germlineBamMetrics, final String snpGenotype) { Source = source; Linx = linx; @@ -78,6 +80,7 @@ public FileSources(final String source, final String linx, final String purple, GermlineFlagstat = germlineFlagstat; TumorBamMetrics = tumorBamMetrics; GermlineBamMetrics = germlineBamMetrics; + SnpGenotype = snpGenotype; } public static FileSources sampleInstance(final FileSources fileSources, final String sampleId, final String germlineSampleId) @@ -97,7 +100,8 @@ public static FileSources sampleInstance(final FileSources fileSources, final St convertWildcardSamplePath(fileSources.TumorFlagstat, sampleId, germlineSampleId), convertWildcardSamplePath(fileSources.GermlineFlagstat, sampleId, germlineSampleId), convertWildcardSamplePath(fileSources.TumorBamMetrics, sampleId, germlineSampleId), - convertWildcardSamplePath(fileSources.GermlineBamMetrics, sampleId, germlineSampleId)); + convertWildcardSamplePath(fileSources.GermlineBamMetrics, sampleId, germlineSampleId), + convertWildcardSamplePath(fileSources.SnpGenotype, sampleId, germlineSampleId)); } public static RefGenomeVersion liftoverSourceGenomeVersion(final String source) @@ -151,6 +155,9 @@ public static void registerConfig(final ConfigBuilder configBuilder) configBuilder.addPath( formSourceConfig(GERMLINE_BAM_METRICS, sourceName), false, formSourceDescription("Germline BAM metrics", sourceName)); + configBuilder.addPath( + formSourceConfig(SNP_GENOTYPE, sourceName), false, + formSourceDescription("SNP genotype", sourceName)); } } @@ -192,9 +199,10 @@ public static FileSources fromConfig(final String sourceName, final ConfigBuilde String germlineFlagstat = getDirectory(configBuilder, sampleDir, "$/flagstat", GERMLINE_FLAGSTAT, sourceName); String tumorBamMetrics = getDirectory(configBuilder, sampleDir, "*/bam_metrics", TUMOR_BAM_METRICS, sourceName); String germlineBamMetrics = getDirectory(configBuilder, sampleDir, "$/bam_metrics", GERMLINE_BAM_METRICS, sourceName); + String snpGenotype = getDirectory(configBuilder, sampleDir, "$/snp_genotype", SNP_GENOTYPE, sourceName); return new FileSources(sourceName, linxDir, purpleDir, linxGermlineDir, cuppaDir, lilacDir, chordDir, peachDir, virusDir, - somaticVcf, somaticUnfilteredVcf, tumorFlagstat, germlineFlagstat, tumorBamMetrics, germlineBamMetrics); + somaticVcf, somaticUnfilteredVcf, tumorFlagstat, germlineFlagstat, tumorBamMetrics, germlineBamMetrics, snpGenotype); } private static String getDirectory( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeComparer.java new file mode 100644 index 0000000000..029b1f1cb2 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeComparer.java @@ -0,0 +1,101 @@ +package com.hartwig.hmftools.compar.snpgenotype; + +import static com.hartwig.hmftools.common.utils.file.CommonFields.FLD_ALT; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; +import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import static com.hartwig.hmftools.compar.common.Category.SNP_GENOTYPE; +import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; +import static com.hartwig.hmftools.compar.snpgenotype.SnpGenotypeData.FLD_GENOTYPE; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.region.BasePosition; +import com.hartwig.hmftools.common.variant.VcfFileReader; +import com.hartwig.hmftools.compar.ComparConfig; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.ItemComparer; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.CommonUtils; +import com.hartwig.hmftools.compar.common.FileSources; +import com.hartwig.hmftools.compar.common.Mismatch; +import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; + +import htsjdk.tribble.CloseableTribbleIterator; +import htsjdk.variant.variantcontext.VariantContext; + +public class SnpGenotypeComparer implements ItemComparer +{ + private final ComparConfig mConfig; + + private static final String FILE_NAME = "snp_genotype_output.vcf"; + + public SnpGenotypeComparer(final ComparConfig config) + { + mConfig = config; + } + + @Override + public Category category() + { + return SNP_GENOTYPE; + } + + @Override + public boolean processSample(final String sampleId, final List mismatches) + { + return CommonUtils.processSample(this, mConfig, sampleId, mismatches); + } + + @Override + public List comparedFieldNames() + { + return Lists.newArrayList(FLD_ALT, FLD_GENOTYPE); + } + + @Override + public List loadFromDb(final String sampleId, final DatabaseAccess dbAccess, final String sourceName) + { + // currently unsupported + return Lists.newArrayList(); + } + + @Override + public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) + { + String vcfFile = checkAddDirSeparator(fileSources.SnpGenotype) + FILE_NAME; + + VcfFileReader vcfFileReader = new VcfFileReader(vcfFile); + + if(!vcfFileReader.fileValid()) + { + CMP_LOGGER.error("failed to read SNP genotype VCF file({})", vcfFile); + return null; + } + + final List items = Lists.newArrayList(); + try (CloseableTribbleIterator variantReader = vcfFileReader.iterator()) + { + for(VariantContext variantContext : variantReader) + { + String chromosome = variantContext.getContig(); + int position = variantContext.getStart(); + String ref = variantContext.getReference().getBaseString(); + String alt = !variantContext.getAlternateAlleles().isEmpty() ? variantContext.getAlternateAlleles().get(0).toString() : "."; + String genotype = variantContext.getGenotype(germlineSampleId).getType().name(); + + BasePosition comparisonPosition = determineComparisonGenomePosition( + chromosome, position, fileSources.Source, mConfig.RequiresLiftover, mConfig.LiftoverCache); + + items.add(new SnpGenotypeData(chromosome, position, ref, alt, genotype, comparisonPosition)); + } + } + catch(Exception e) + { + CMP_LOGGER.warn("sample({}) failed to load SNP genotype data: {}", sampleId, e.toString()); + return null; + } + + return items; + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java new file mode 100644 index 0000000000..dbfd8b0554 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java @@ -0,0 +1,103 @@ +package com.hartwig.hmftools.compar.snpgenotype; + +import static java.lang.String.format; + +import static com.hartwig.hmftools.common.utils.file.CommonFields.FLD_ALT; +import static com.hartwig.hmftools.compar.common.Category.SNP_GENOTYPE; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; +import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.region.BasePosition; +import com.hartwig.hmftools.compar.ComparableItem; +import com.hartwig.hmftools.compar.common.Category; +import com.hartwig.hmftools.compar.common.DiffThresholds; +import com.hartwig.hmftools.compar.common.MatchLevel; +import com.hartwig.hmftools.compar.common.Mismatch; + +public class SnpGenotypeData implements ComparableItem +{ + public final String Chromosome; + public final int Position; + public final String Ref; + public final String Alt; + public final String Genotype; + public final BasePosition mComparisonPosition; + + protected static final String FLD_GENOTYPE = "Genotype"; + + public SnpGenotypeData(final String chromosome, final int position, final String ref, final String alt, final String genotype, + final BasePosition comparisonPosition) + { + Chromosome = chromosome; + Position = position; + Ref = ref; + Alt = alt; + Genotype = genotype; + mComparisonPosition = comparisonPosition; + } + + @Override + public Category category() + { + return SNP_GENOTYPE; + } + + @Override + public String key() + { + if(mComparisonPosition.Position != Position) + { + return String.format("%s:%d %s liftover(%s)", + Chromosome, Position, Ref, mComparisonPosition); + } + else + { + return String.format("%s:%d %s", Chromosome, Position, Ref); + } + } + + @Override + public List displayValues() + { + List values = Lists.newArrayList(); + values.add(format("%s", Alt)); + values.add(format("%s", Genotype)); + return values; + } + + @Override + public boolean reportable() + { + return true; + } + + @Override + public boolean matches(final ComparableItem other) + { + final SnpGenotypeData otherVar = (SnpGenotypeData) other; + + if(!mComparisonPosition.Chromosome.equals(otherVar.Chromosome) || mComparisonPosition.Position != otherVar.Position) + return false; + + if(!Ref.equals(otherVar.Ref)) + return false; + + return true; + } + + @Override + public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) + { + final SnpGenotypeData otherData = (SnpGenotypeData) other; + + final List diffs = Lists.newArrayList(); + + checkDiff(diffs, FLD_ALT, Alt, otherData.Alt); + checkDiff(diffs, FLD_GENOTYPE, Genotype, otherData.Genotype); + + return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; + } +} From 181d4d391d6e183676a301066aaadc82338f55d1 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 13 Sep 2024 11:29:03 +0200 Subject: [PATCH 36/53] Compar: DEV-4061: Remove chromosome and band from detailed display --- .../com/hartwig/hmftools/compar/driver/DriverComparer.java | 2 +- .../java/com/hartwig/hmftools/compar/driver/DriverData.java | 2 -- .../com/hartwig/hmftools/compar/linx/DisruptionComparer.java | 2 +- .../com/hartwig/hmftools/compar/linx/DisruptionData.java | 1 - .../hmftools/compar/purple/GermlineDeletionComparer.java | 2 +- .../hartwig/hmftools/compar/purple/GermlineDeletionData.java | 5 ----- 6 files changed, 3 insertions(+), 11 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index d01b28c41a..e6789d0c8b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -61,7 +61,7 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList(FLD_LIKE_METHOD, FLD_LIKELIHOOD, FLD_MIN_COPY_NUMBER, FLD_MAX_COPY_NUMBER, FLD_CHROMOSOME, FLD_CHROMOSOME_BAND); + return Lists.newArrayList(FLD_LIKE_METHOD, FLD_LIKELIHOOD, FLD_MIN_COPY_NUMBER, FLD_MAX_COPY_NUMBER); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java index f8d680f207..4547c95862 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java @@ -60,8 +60,6 @@ public List displayValues() values.add(format("%.2f", DriverCatalog.driverLikelihood())); values.add(format("%.2f", DriverCatalog.minCopyNumber())); values.add(format("%.2f", DriverCatalog.maxCopyNumber())); - values.add(format("%s", chromosomeDisplay)); - values.add(format("%s", DriverCatalog.chromosomeBand())); return values; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index 7a6d8855f0..690996d4a5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -66,7 +66,7 @@ public boolean processSample(final String sampleId, final List mismatc public List comparedFieldNames() { return Lists.newArrayList(FLD_REPORTED, FLD_REGION_TYPE, FLD_CODING_CONTEXT, FLD_GENE_ORIENT, FLD_NEXT_SPLICE, - FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER, FLD_CHROMOSOME_BAND); + FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java index 1ec27f0dd5..4e4ce35a81 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java @@ -75,7 +75,6 @@ public List displayValues() values.add(String.format("%d", Breakend.nextSpliceExonRank())); values.add(String.format("%.2f", Breakend.junctionCopyNumber())); values.add(String.format("%.2f", Breakend.undisruptedCopyNumber())); - values.add(String.format("%s", Breakend.chrBand())); return values; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index 238d09e1c2..f183205e8d 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -55,7 +55,7 @@ public boolean processSample(final String sampleId, final List mismatc public List comparedFieldNames() { return Lists.newArrayList( - FLD_REPORTED, FLD_GERMLINE_STATUS, FLD_TUMOR_STATUS, FLD_GERMLINE_CN, FLD_TUMOR_CN, FLD_CHROMOSOME, FLD_CHROMOSOME_BAND); + FLD_REPORTED, FLD_GERMLINE_STATUS, FLD_TUMOR_STATUS, FLD_GERMLINE_CN, FLD_TUMOR_CN); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java index ff190064ef..05cee236e1 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java @@ -48,17 +48,12 @@ public String key() @Override public List displayValues() { - String chromosomeDisplay = Deletion.Chromosome.equals(mComparisonChromosome) - ? Deletion.Chromosome - : format("%s compared(%s)", Deletion.Chromosome, mComparisonChromosome); List values = Lists.newArrayList(); values.add(format("%s", Deletion.Reported)); values.add(format("%s", Deletion.NormalStatus)); values.add(format("%s", Deletion.TumorStatus)); values.add(format("%s", Deletion.GermlineCopyNumber)); values.add(format("%s", Deletion.TumorCopyNumber)); - values.add(format("%s", chromosomeDisplay)); - values.add(format("%s", Deletion.ChromosomeBand)); return values; } From 5fb8cd7586f3e15c770fbe2ddd852c74d48d04f9 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 13 Sep 2024 13:58:56 +0200 Subject: [PATCH 37/53] Compar: DEV-4061: Fix incorrect field comparison --- .../java/com/hartwig/hmftools/compar/linx/DisruptionData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java index 4e4ce35a81..0e92058b13 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java @@ -122,7 +122,7 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL checkDiff(diffs, FLD_GENE_ORIENT, Breakend.geneOrientation(), otherBreakend.Breakend.geneOrientation()); checkDiff(diffs, FLD_NEXT_SPLICE, Breakend.nextSpliceExonRank(), otherBreakend.Breakend.nextSpliceExonRank()); checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); - checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); + checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, Breakend.undisruptedCopyNumber(), otherBreakend.Breakend.undisruptedCopyNumber(), thresholds); checkDiff(diffs, FLD_CHROMOSOME_BAND, Breakend.chrBand(), otherBreakend.Breakend.chrBand()); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; From 28e39b753b0fdda92500d88b5d7cceea2cfb2d6a Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 13 Sep 2024 14:43:18 +0200 Subject: [PATCH 38/53] Compar: DEV-4061: Change germline to a breakend level comparison Add breakend specific fields. Move common fields and fucntions between LINX categories to a LINX common class. Also move chromosome band field to CommonUtils. --- .../hmftools/compar/common/CommonUtils.java | 1 + .../compar/driver/DriverComparer.java | 2 - .../hmftools/compar/driver/DriverData.java | 7 +-- .../compar/linx/DisruptionComparer.java | 14 ++--- .../hmftools/compar/linx/DisruptionData.java | 20 +----- .../hmftools/compar/linx/FusionComparer.java | 2 +- .../hmftools/compar/linx/FusionData.java | 2 +- .../compar/linx/GermlineSvComparer.java | 62 +++++-------------- .../hmftools/compar/linx/GermlineSvData.java | 31 +++++----- .../hmftools/compar/linx/LinxCommon.java | 46 ++++++++++++++ .../purple/GermlineDeletionComparer.java | 2 - .../compar/purple/GermlineDeletionData.java | 4 +- 12 files changed, 91 insertions(+), 102 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 8841ef37f3..0ebc5b2951 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -55,6 +55,7 @@ public class CommonUtils public static final String FLD_REPORTED = "Reported"; public static final String FLD_QUAL = "Qual"; + public static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; public static List buildComparers(final ComparConfig config) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index e6789d0c8b..87f56abd01 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -5,8 +5,6 @@ import static com.hartwig.hmftools.compar.common.Category.DRIVER; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonChromosome; -import static com.hartwig.hmftools.compar.driver.DriverData.FLD_CHROMOSOME; -import static com.hartwig.hmftools.compar.driver.DriverData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.driver.DriverData.FLD_LIKELIHOOD; import static com.hartwig.hmftools.compar.driver.DriverData.FLD_LIKE_METHOD; import static com.hartwig.hmftools.compar.driver.DriverData.FLD_MAX_COPY_NUMBER; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java index 4547c95862..388030bba5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverData.java @@ -2,7 +2,9 @@ import static java.lang.String.format; +import static com.hartwig.hmftools.common.utils.file.CommonFields.FLD_CHROMOSOME; import static com.hartwig.hmftools.compar.common.Category.DRIVER; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; @@ -25,8 +27,6 @@ public class DriverData implements ComparableItem protected static final String FLD_LIKELIHOOD = "Likelihood"; protected static final String FLD_LIKE_METHOD = "LikelihoodMethod"; - protected static final String FLD_CHROMOSOME = "Chromosome"; - protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; protected static final String FLD_MIN_COPY_NUMBER = "MinCopyNumber"; protected static final String FLD_MAX_COPY_NUMBER = "MaxCopyNumber"; @@ -52,9 +52,6 @@ public String key() @Override public List displayValues() { - String chromosomeDisplay = DriverCatalog.chromosome().equals(mComparisonChromosome) - ? DriverCatalog.chromosome() - : format("%s compared(%s)", DriverCatalog.chromosome(), mComparisonChromosome); List values = Lists.newArrayList(); values.add(format("%s", DriverCatalog.likelihoodMethod())); values.add(format("%.2f", DriverCatalog.driverLikelihood())); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java index 690996d4a5..51355599c7 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionComparer.java @@ -2,16 +2,12 @@ import static com.hartwig.hmftools.common.sv.StructuralVariantData.convertSvData; import static com.hartwig.hmftools.compar.common.Category.DISRUPTION; -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; -import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_CODING_CONTEXT; import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_GENE_ORIENT; -import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_NEXT_SPLICE; -import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_REGION_TYPE; -import static com.hartwig.hmftools.compar.linx.DisruptionData.FLD_UNDISRUPTED_COPY_NUMBER; -import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_UNDISRUPTED_COPY_NUMBER; import java.io.IOException; import java.util.List; @@ -65,8 +61,10 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList(FLD_REPORTED, FLD_REGION_TYPE, FLD_CODING_CONTEXT, FLD_GENE_ORIENT, FLD_NEXT_SPLICE, - FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER); + List fieldNames = LinxCommon.comparedFieldNamesBreakends(); + fieldNames.add(FLD_CODING_CONTEXT); + fieldNames.add(FLD_GENE_ORIENT); + return fieldNames; } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java index 0e92058b13..4fa236d1a3 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/DisruptionData.java @@ -1,7 +1,6 @@ package com.hartwig.hmftools.compar.linx; import static com.hartwig.hmftools.compar.common.Category.DISRUPTION; -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; @@ -25,13 +24,8 @@ public class DisruptionData implements ComparableItem private final BasePosition mComparisonPositionEnd; private final boolean mCheckTranscript; - protected static final String FLD_REGION_TYPE = "RegionType"; protected static final String FLD_CODING_CONTEXT = "CodingContext"; protected static final String FLD_GENE_ORIENT = "GeneOrientation"; - protected static final String FLD_NEXT_SPLICE = "NextSpliceExonRank"; - protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; - protected static final String FLD_UNDISRUPTED_COPY_NUMBER = "UndisruptedCopyNumber"; - protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; public DisruptionData( final StructuralVariantData svData, final LinxBreakend breakend, final BasePosition comparisonPositionStart, @@ -67,14 +61,9 @@ public String key() @Override public List displayValues() { - List values = Lists.newArrayList(); - values.add(String.format("%s", Breakend.reportedDisruption())); - values.add(String.format("%s", Breakend.regionType())); + List values = LinxCommon.displayValuesBreakend(Breakend); values.add(String.format("%s", Breakend.codingType())); values.add(String.format("%s", Breakend.geneOrientation())); - values.add(String.format("%d", Breakend.nextSpliceExonRank())); - values.add(String.format("%.2f", Breakend.junctionCopyNumber())); - values.add(String.format("%.2f", Breakend.undisruptedCopyNumber())); return values; } @@ -116,14 +105,9 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL final List diffs = Lists.newArrayList(); - checkDiff(diffs, FLD_REGION_TYPE, Breakend.regionType().toString(), otherBreakend.Breakend.regionType().toString()); + LinxCommon.checkDiffsBreakends(diffs, Breakend, otherBreakend.Breakend, thresholds); checkDiff(diffs, FLD_CODING_CONTEXT, Breakend.codingType().toString(), otherBreakend.Breakend.codingType().toString()); - checkDiff(diffs, FLD_REPORTED, reportable(), otherBreakend.reportable()); checkDiff(diffs, FLD_GENE_ORIENT, Breakend.geneOrientation(), otherBreakend.Breakend.geneOrientation()); - checkDiff(diffs, FLD_NEXT_SPLICE, Breakend.nextSpliceExonRank(), otherBreakend.Breakend.nextSpliceExonRank()); - checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, Breakend.junctionCopyNumber(), otherBreakend.Breakend.junctionCopyNumber(), thresholds); - checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, Breakend.undisruptedCopyNumber(), otherBreakend.Breakend.undisruptedCopyNumber(), thresholds); - checkDiff(diffs, FLD_CHROMOSOME_BAND, Breakend.chrBand(), otherBreakend.Breakend.chrBand()); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java index 3db0b7b025..be60f80eaa 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java @@ -9,12 +9,12 @@ import static com.hartwig.hmftools.compar.linx.FusionData.FLD_DOMAINS_LOST; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_DOWN; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_UP; -import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_LIKELIHOOD; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_PHASED; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_REPORTED_TYPE; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_DOWN; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_UP; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; import java.io.IOException; import java.util.List; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java index dbad164a29..be6e6d4a62 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java @@ -4,6 +4,7 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; import java.util.List; @@ -31,7 +32,6 @@ public class FusionData implements ComparableItem protected static final String FLD_CHAIN_TERM = "chainTerminated"; protected static final String FLD_DOMAINS_KEPT = "domainsKept"; protected static final String FLD_DOMAINS_LOST = "domainsLost"; - protected static final String FLD_JUNCTION_COPY_NUMBER = "junctionCopyNumber"; public FusionData(final LinxFusion fusion, final String geneMappedName) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java index a3114ffa27..3c0dda0dcc 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvComparer.java @@ -1,20 +1,15 @@ package com.hartwig.hmftools.compar.linx; -import static com.hartwig.hmftools.common.utils.file.FileDelimiters.TSV_DELIM; -import static com.hartwig.hmftools.common.utils.file.FileReaderUtils.createFieldsIndexMap; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_SV; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonGenomePosition; -import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.GermlineSvData.FLD_GERMLINE_FRAGS; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; +import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_UNDISRUPTED_COPY_NUMBER; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; import com.google.common.collect.Lists; @@ -49,6 +44,7 @@ public void registerThresholds(final DiffThresholds thresholds) thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_GERMLINE_FRAGS, 5, 0.1); thresholds.addFieldThreshold(FLD_JUNCTION_COPY_NUMBER, 0.5, 0.2); + thresholds.addFieldThreshold(FLD_UNDISRUPTED_COPY_NUMBER, 0.5, 0.2); } @Override @@ -60,7 +56,10 @@ public boolean processSample(final String sampleId, final List mismatc @Override public List comparedFieldNames() { - return Lists.newArrayList(FLD_REPORTED, FLD_GERMLINE_FRAGS, FLD_QUAL, FLD_JUNCTION_COPY_NUMBER); + List fieldNames = LinxCommon.comparedFieldNamesBreakends(); + fieldNames.add(FLD_GERMLINE_FRAGS); + fieldNames.add(FLD_QUAL); + return fieldNames; } @Override @@ -83,55 +82,26 @@ public List loadFromFile(final String sampleId, final String ger String germlineBreakendFile = LinxBreakend.generateFilename(fileSources.LinxGermline, sampleId, true); - // germline breakend file was introduced in v5.32, for old versions extract reported from the SV file - if(Files.exists(Paths.get(germlineBreakendFile))) - { - List germlineBreakends = LinxBreakend.read(germlineBreakendFile).stream() - .filter(x -> x.reportedDisruption()).collect(Collectors.toList()); - - CMP_LOGGER.debug("sample({}) loaded {} germline SVs", sampleId, germlineSvs.size()); + List germlineBreakends = LinxBreakend.read(germlineBreakendFile); + CMP_LOGGER.debug("sample({}) loaded {} germline breakends", sampleId, germlineBreakends.size()); - for(LinxGermlineSv germlineSv : germlineSvs) + for(LinxGermlineSv germlineSv : germlineSvs) + { + List matchingBreakends = + germlineBreakends.stream().filter(x -> x.svId() == germlineSv.SvId).collect(Collectors.toList()); + for(LinxBreakend breakend : matchingBreakends) { - boolean isReported = germlineBreakends.stream().anyMatch(x -> x.svId() == germlineSv.SvId); + germlineBreakends.remove(breakend); BasePosition comparisonStartPosition = determineComparisonGenomePosition( germlineSv.ChromosomeStart, germlineSv.PositionStart, fileSources.Source, mConfig.RequiresLiftover, mConfig.LiftoverCache); - BasePosition comparisonEndPosition = determineComparisonGenomePosition( germlineSv.ChromosomeEnd, germlineSv.PositionEnd, fileSources.Source, mConfig.RequiresLiftover, mConfig.LiftoverCache); - items.add(new GermlineSvData(germlineSv, isReported, comparisonStartPosition, comparisonEndPosition)); + items.add(new GermlineSvData(germlineSv, breakend, comparisonStartPosition, comparisonEndPosition)); } } - else - { - List rawGermlineSvs = Files.readAllLines(Paths.get(germlineSvFile)); - Map fieldsIndexMap = createFieldsIndexMap(rawGermlineSvs.get(0), TSV_DELIM); - Integer reportedIndex = fieldsIndexMap.get("reported"); - - if(reportedIndex == null) - return null; - - rawGermlineSvs.remove(0); - - for(int i = 0; i < germlineSvs.size(); ++i) - { - LinxGermlineSv germlineSv = germlineSvs.get(i); - String[] values = rawGermlineSvs.get(i).split(TSV_DELIM, -1); - boolean isReported = Boolean.parseBoolean(values[reportedIndex]); - - BasePosition comparisonPositionStart = determineComparisonGenomePosition( - germlineSv.ChromosomeStart, germlineSv.PositionStart, fileSources.Source, mConfig.RequiresLiftover, mConfig.LiftoverCache); - - BasePosition comparisonPositionEnd = determineComparisonGenomePosition( - germlineSv.ChromosomeEnd, germlineSv.PositionEnd, fileSources.Source, mConfig.RequiresLiftover, mConfig.LiftoverCache); - - items.add(new GermlineSvData(germlineSv, isReported, comparisonPositionStart, comparisonPositionEnd)); - } - } - } catch(IOException e) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java index 64b32a467e..23521a1bc6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/GermlineSvData.java @@ -2,13 +2,13 @@ import static com.hartwig.hmftools.compar.common.Category.GERMLINE_SV; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; import java.util.List; import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.linx.LinxBreakend; import com.hartwig.hmftools.common.linx.LinxGermlineSv; import com.hartwig.hmftools.common.region.BasePosition; import com.hartwig.hmftools.compar.common.Category; @@ -20,19 +20,18 @@ public class GermlineSvData implements ComparableItem { public final LinxGermlineSv SvData; - private final boolean mIsReported; + private final LinxBreakend Breakend; private final BasePosition mComparisonStartPosition; private final BasePosition mComparisonEndPosition; protected static final String FLD_GERMLINE_FRAGS = "GermlineFragments"; - protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; public GermlineSvData( - final LinxGermlineSv svData, boolean isReported, final BasePosition comparisonStartPosition, + final LinxGermlineSv svData, final LinxBreakend breakend, final BasePosition comparisonStartPosition, final BasePosition comparisonEndPosition) { SvData = svData; - mIsReported = isReported; + Breakend = breakend; mComparisonStartPosition = comparisonStartPosition; mComparisonEndPosition = comparisonEndPosition; } @@ -47,32 +46,30 @@ public String key() { return String.format("%s:%s %s:%d:%d-%s:%d%d %s liftover(%s-%s)", SvData.EventId, SvData.Type, SvData.ChromosomeStart, SvData.PositionStart, SvData.OrientStart, - SvData.ChromosomeEnd, SvData.PositionEnd, SvData.OrientEnd, SvData.GeneName, + SvData.ChromosomeEnd, SvData.PositionEnd, SvData.OrientEnd, Breakend.gene(), mComparisonStartPosition, mComparisonEndPosition); } else { return String.format("%s:%s %s:%d:%d-%s:%d%d %s", SvData.EventId, SvData.Type, SvData.ChromosomeStart, SvData.PositionStart, SvData.OrientStart, - SvData.ChromosomeEnd, SvData.PositionEnd, SvData.OrientEnd, SvData.GeneName); + SvData.ChromosomeEnd, SvData.PositionEnd, SvData.OrientEnd, Breakend.gene()); } } @Override public List displayValues() { - List values = Lists.newArrayList(); - values.add(String.format("%s", mIsReported)); + List values = LinxCommon.displayValuesBreakend(Breakend); values.add(String.format("%d", SvData.GermlineFragments)); values.add(String.format("%d", (int) SvData.QualScore)); - values.add(String.format("%.2f", SvData.JunctionCopyNumber)); return values; } @Override public boolean reportable() { - return mIsReported; + return Breakend.reportedDisruption(); } @Override @@ -94,7 +91,7 @@ public boolean matches(final ComparableItem other) if(otherSv.SvData.OrientStart != SvData.OrientStart || otherSv.SvData.OrientEnd != SvData.OrientEnd) return false; - if(!otherSv.SvData.GeneName.equals(SvData.GeneName)) + if(!otherSv.Breakend.gene().equals(Breakend.gene())) return false; return true; @@ -103,14 +100,14 @@ public boolean matches(final ComparableItem other) @Override public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchLevel, final DiffThresholds thresholds) { - final GermlineSvData otherSv = (GermlineSvData)other; + final GermlineSvData otherBreakend = (GermlineSvData)other; final List diffs = Lists.newArrayList(); - checkDiff(diffs, FLD_REPORTED, mIsReported, otherSv.mIsReported); - checkDiff(diffs, FLD_GERMLINE_FRAGS, SvData.GermlineFragments, otherSv.SvData.GermlineFragments, thresholds); - checkDiff(diffs, FLD_QUAL, (int) SvData.QualScore, (int) otherSv.SvData.QualScore, thresholds); - checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, SvData.JunctionCopyNumber, otherSv.SvData.JunctionCopyNumber, thresholds); + LinxCommon.checkDiffsBreakends(diffs, Breakend, otherBreakend.Breakend, thresholds); + + checkDiff(diffs, FLD_GERMLINE_FRAGS, SvData.GermlineFragments, otherBreakend.SvData.GermlineFragments, thresholds); + checkDiff(diffs, FLD_QUAL, (int) SvData.QualScore, (int) otherBreakend.SvData.QualScore, thresholds); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java new file mode 100644 index 0000000000..e346796b71 --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java @@ -0,0 +1,46 @@ +package com.hartwig.hmftools.compar.linx; + +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_CHROMOSOME_BAND; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; +import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; + +import java.util.List; + +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.linx.LinxBreakend; +import com.hartwig.hmftools.compar.common.DiffThresholds; + +public class LinxCommon +{ + protected static final String FLD_REGION_TYPE = "RegionType"; + protected static final String FLD_NEXT_SPLICE = "NextSpliceExonRank"; + protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; + protected static final String FLD_UNDISRUPTED_COPY_NUMBER = "UndisruptedCopyNumber"; + + protected static List comparedFieldNamesBreakends() + { + return Lists.newArrayList( + FLD_REPORTED, FLD_REGION_TYPE, FLD_NEXT_SPLICE, FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER); + } + + protected static List displayValuesBreakend(LinxBreakend breakend) + { + List values = Lists.newArrayList(); + values.add(String.format("%s", breakend.reportedDisruption())); + values.add(String.format("%s", breakend.regionType())); + values.add(String.format("%d", breakend.nextSpliceExonRank())); + values.add(String.format("%.2f", breakend.junctionCopyNumber())); + values.add(String.format("%.2f", breakend.undisruptedCopyNumber())); + return values; + } + + protected static void checkDiffsBreakends(List diffs, final LinxBreakend breakend, final LinxBreakend otherBreakend, final DiffThresholds thresholds) + { + checkDiff(diffs, FLD_REPORTED, breakend.reportedDisruption(), otherBreakend.reportedDisruption()); + checkDiff(diffs, FLD_REGION_TYPE, breakend.regionType().toString(), otherBreakend.regionType().toString()); + checkDiff(diffs, FLD_NEXT_SPLICE, breakend.nextSpliceExonRank(), otherBreakend.nextSpliceExonRank()); + checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, breakend.junctionCopyNumber(), otherBreakend.junctionCopyNumber(), thresholds); + checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, breakend.undisruptedCopyNumber(), otherBreakend.undisruptedCopyNumber(), thresholds); + checkDiff(diffs, FLD_CHROMOSOME_BAND, breakend.chrBand(), otherBreakend.chrBand()); + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java index f183205e8d..361b81d14c 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionComparer.java @@ -4,8 +4,6 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.CommonUtils.determineComparisonChromosome; -import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME; -import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_GERMLINE_CN; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_GERMLINE_STATUS; import static com.hartwig.hmftools.compar.purple.GermlineDeletionData.FLD_TUMOR_CN; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java index 05cee236e1..045b813783 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GermlineDeletionData.java @@ -2,7 +2,9 @@ import static java.lang.String.format; +import static com.hartwig.hmftools.common.utils.file.CommonFields.FLD_CHROMOSOME; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_DELETION; +import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_CHROMOSOME_BAND; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; @@ -26,8 +28,6 @@ public class GermlineDeletionData implements ComparableItem protected static final String FLD_TUMOR_STATUS = "TumorStatus"; protected static final String FLD_GERMLINE_CN = "GermlineCopyNumber"; protected static final String FLD_TUMOR_CN = "TumorCopyNumber"; - protected static final String FLD_CHROMOSOME = "Chromosome"; - protected static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; public GermlineDeletionData(final GermlineDeletion germlineDeletion, final String comparisonChromosome) { From afa9cd0835db91d11e25bf10ee972bd537fc9907 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 13 Sep 2024 15:12:02 +0200 Subject: [PATCH 39/53] Compar: DEV-4061: Minor cleanups --- .../com/hartwig/hmftools/compar/Compar.java | 5 ++-- .../hartwig/hmftools/compar/ComparTask.java | 2 -- .../hmftools/compar/chord/ChordComparer.java | 2 +- .../hmftools/compar/common/CommonUtils.java | 2 -- .../compar/common/DiffThresholds.java | 2 -- .../hmftools/compar/common/FileSources.java | 8 ----- .../hmftools/compar/cuppa/CuppaComparer.java | 1 - .../hmftools/compar/cuppa/CuppaData.java | 2 -- .../hmftools/compar/lilac/LilacData.java | 2 +- .../hmftools/compar/linx/LinxCommon.java | 2 +- .../mutation/GermlineVariantComparer.java | 3 +- .../compar/mutation/VariantCommon.java | 29 +++++++++---------- .../hmftools/compar/peach/PeachComparer.java | 2 -- .../compar/purple/GeneCopyNumberComparer.java | 5 ++-- .../hmftools/compar/virus/VirusComparer.java | 3 +- 15 files changed, 23 insertions(+), 47 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/Compar.java b/compar/src/main/java/com/hartwig/hmftools/compar/Compar.java index 41b8e7ed90..35bf81f1d4 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/Compar.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/Compar.java @@ -1,14 +1,13 @@ package com.hartwig.hmftools.compar; import static java.lang.Math.min; -import static java.lang.String.format; import static com.hartwig.hmftools.common.utils.PerformanceCounter.runTimeMinsStr; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; -import java.util.stream.Collectors; import com.google.common.collect.Lists; import com.hartwig.hmftools.common.utils.TaskExecutor; @@ -73,7 +72,7 @@ public void run() ++taskIndex; } - final List callableList = sampleTasks.stream().collect(Collectors.toList()); + final List callableList = new ArrayList<>(sampleTasks); TaskExecutor.executeTasks(callableList, mConfig.Threads); } else diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java index 60f4efa7cd..79c1d994bc 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparTask.java @@ -1,7 +1,5 @@ package com.hartwig.hmftools.compar; -import static java.lang.String.format; - import static com.hartwig.hmftools.common.drivercatalog.DriverType.AMP; import static com.hartwig.hmftools.common.drivercatalog.DriverType.DEL; import static com.hartwig.hmftools.common.drivercatalog.DriverType.PARTIAL_AMP; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java index 28ba174573..6e500504c7 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/chord/ChordComparer.java @@ -62,7 +62,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce ChordData chordData = dbAccess.readChord(sampleId); final List comparableItems = Lists.newArrayList(); comparableItems.add(new ChordComparData(chordData)); - return Lists.newArrayList(); + return comparableItems; } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java index 0ebc5b2951..5a2566c6d1 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/CommonUtils.java @@ -51,8 +51,6 @@ public class CommonUtils { - public static final String SUB_ITEM_DELIM = "="; - public static final String FLD_REPORTED = "Reported"; public static final String FLD_QUAL = "Qual"; public static final String FLD_CHROMOSOME_BAND = "ChromosomeBand"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/DiffThresholds.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/DiffThresholds.java index 9303a53abf..2f5d19ca97 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/DiffThresholds.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/DiffThresholds.java @@ -6,8 +6,6 @@ import java.util.Map; import com.google.common.collect.Maps; -import com.hartwig.hmftools.compar.common.ThresholdData; -import com.hartwig.hmftools.compar.common.ThresholdType; public class DiffThresholds { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java index 4ecdcbe7df..0d89e7a255 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/common/FileSources.java @@ -2,8 +2,6 @@ import static java.lang.String.format; -import static com.hartwig.hmftools.common.genome.refgenome.RefGenomeVersion.V37; -import static com.hartwig.hmftools.common.genome.refgenome.RefGenomeVersion.V38; import static com.hartwig.hmftools.common.utils.config.CommonConfig.CHORD_DIR_CFG; import static com.hartwig.hmftools.common.utils.config.CommonConfig.CHORD_DIR_DESC; import static com.hartwig.hmftools.common.utils.config.CommonConfig.CUPPA_DIR_CFG; @@ -28,7 +26,6 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.genome.refgenome.RefGenomeVersion; import com.hartwig.hmftools.common.pipeline.PipelineToolDirectories; import com.hartwig.hmftools.common.utils.config.ConfigBuilder; @@ -104,11 +101,6 @@ public static FileSources sampleInstance(final FileSources fileSources, final St convertWildcardSamplePath(fileSources.SnpGenotype, sampleId, germlineSampleId)); } - public static RefGenomeVersion liftoverSourceGenomeVersion(final String source) - { - return source.equals(REF_SOURCE) ? V37 : V38; - } - private static void addPathConfig(final ConfigBuilder configBuilder, final String toolDir, final String toolDesc, final String sourceName) { configBuilder.addPrefixedPath( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java index d9e8775d8b..befc396457 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaComparer.java @@ -3,7 +3,6 @@ import static com.hartwig.hmftools.common.cuppa.DataType.PROB; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.CUPPA; -import static com.hartwig.hmftools.compar.cuppa.CuppaData.FLD_CLASSIFIER_NAME; import static com.hartwig.hmftools.compar.cuppa.CuppaData.FLD_PROBABILITY; import static com.hartwig.hmftools.compar.cuppa.CuppaData.FLD_TOP_CANCER_TYPE; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaData.java b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaData.java index 36dc4c9c87..8d34ac5a1f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/cuppa/CuppaData.java @@ -6,7 +6,6 @@ import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; -import java.util.ArrayList; import java.util.List; import com.google.common.collect.Lists; @@ -21,7 +20,6 @@ public class CuppaData implements ComparableItem { public final CuppaPredictionEntry PredictionEntry; - protected static final String FLD_CLASSIFIER_NAME = "classifier_name"; protected static final String FLD_TOP_CANCER_TYPE = "top_cancer_type"; protected static final String FLD_PROBABILITY = "probability"; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java index 45990001d6..555eb400cd 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/lilac/LilacData.java @@ -112,7 +112,7 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL StringJoiner newDiffsSj = new StringJoiner(ALLELE_DELIM); newDiffs.forEach(x -> newDiffsSj.add(x.allele())); - diffs.add(String.format("%s(%s/%s)", FLD_ALLELES, origDiffsSj, newDiffsSj.toString())); + diffs.add(String.format("%s(%s/%s)", FLD_ALLELES, origDiffsSj, newDiffsSj)); } // matches alleles in order when an allele is homozygous diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java index e346796b71..6a89bb949a 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java @@ -33,7 +33,7 @@ protected static List displayValuesBreakend(LinxBreakend breakend) values.add(String.format("%.2f", breakend.undisruptedCopyNumber())); return values; } - + protected static void checkDiffsBreakends(List diffs, final LinxBreakend breakend, final LinxBreakend otherBreakend, final DiffThresholds thresholds) { checkDiff(diffs, FLD_REPORTED, breakend.reportedDisruption(), otherBreakend.reportedDisruption()); diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 8603f3f040..7605ff1f69 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -64,8 +64,7 @@ public void registerThresholds(final DiffThresholds thresholds) @Override public List comparedFieldNames() { - List fieldNames = VariantCommon.comparedFieldNames(); - return fieldNames; + return VariantCommon.comparedFieldNames(); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java index f3e7392dd0..9f06abbb5f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/VariantCommon.java @@ -2,7 +2,6 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_QUAL; import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; -import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import java.util.List; @@ -10,21 +9,21 @@ public final class VariantCommon { - protected static final String FLD_HOTSPOT = "Hotspot"; - protected static final String FLD_TIER = "Tier"; - protected static final String FLD_BIALLELIC = "Biallelic"; - protected static final String FLD_GENE = "Gene"; - protected static final String FLD_CANON_EFFECT = "CanonicalEffect"; - protected static final String FLD_CODING_EFFECT = "CanonicalCodingEffect"; - protected static final String FLD_HGVS_CODING = "CanonicalHgvsCoding"; - protected static final String FLD_HGVS_PROTEIN = "CanonicalHgvsProtein"; - protected static final String FLD_OTHER_REPORTED = "OtherReportedEffects"; - protected static final String FLD_VARIANT_COPY_NUMBER = "VariantCopyNumber"; - protected static final String FLD_PURITY_ADJUSTED_VAF = "PurityAdjustedVaf"; - protected static final String FLD_TUMOR_SUPPORTING_READ_COUNT = "TumorSupportingReadCount"; - protected static final String FLD_TUMOR_TOTAL_READ_COUNT = "TumorTotalReadCount"; + static final String FLD_HOTSPOT = "Hotspot"; + static final String FLD_TIER = "Tier"; + static final String FLD_BIALLELIC = "Biallelic"; + static final String FLD_GENE = "Gene"; + static final String FLD_CANON_EFFECT = "CanonicalEffect"; + static final String FLD_CODING_EFFECT = "CanonicalCodingEffect"; + static final String FLD_HGVS_CODING = "CanonicalHgvsCoding"; + static final String FLD_HGVS_PROTEIN = "CanonicalHgvsProtein"; + static final String FLD_OTHER_REPORTED = "OtherReportedEffects"; + static final String FLD_VARIANT_COPY_NUMBER = "VariantCopyNumber"; + static final String FLD_PURITY_ADJUSTED_VAF = "PurityAdjustedVaf"; + static final String FLD_TUMOR_SUPPORTING_READ_COUNT = "TumorSupportingReadCount"; + static final String FLD_TUMOR_TOTAL_READ_COUNT = "TumorTotalReadCount"; - protected static List comparedFieldNames() + static List comparedFieldNames() { return Lists.newArrayList( FLD_REPORTED, FLD_HOTSPOT, FLD_TIER, FLD_BIALLELIC, FLD_GENE, FLD_CANON_EFFECT, FLD_CODING_EFFECT, diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java index eb1a0cd65e..7bf4f70251 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/peach/PeachComparer.java @@ -8,9 +8,7 @@ import static com.hartwig.hmftools.compar.peach.PeachData.FLD_FUNCTION; import static com.hartwig.hmftools.compar.peach.PeachData.FLD_PRESCRIPTION_URLS; -import java.io.File; import java.io.IOException; -import java.nio.file.Files; import java.util.List; import java.util.stream.Collectors; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java index aebea88848..beca1c281c 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/GeneCopyNumberComparer.java @@ -8,9 +8,9 @@ import static com.hartwig.hmftools.compar.purple.GeneCopyNumberData.FLD_MIN_REGION_START; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -74,8 +74,7 @@ public List loadFromDb(final String sampleId, final DatabaseAcce if(driverGenes.isEmpty()) return items; - final List copyNumbers = dbAccess.readGeneCopynumbers( - sampleId, driverGenes.stream().collect(Collectors.toList())); + final List copyNumbers = dbAccess.readGeneCopynumbers(sampleId, new ArrayList<>(driverGenes)); copyNumbers.forEach(x -> items.add(new GeneCopyNumberData(x))); return items; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java index 4e9490b939..decb2bc998 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java @@ -9,7 +9,6 @@ import static com.hartwig.hmftools.compar.virus.VirusData.FLD_INTEGRATIONS; import static com.hartwig.hmftools.compar.virus.VirusData.FLD_MEAN_COVERAGE; -import java.io.File; import java.io.IOException; import java.util.List; @@ -91,7 +90,7 @@ private static String determineFileName(final String sampleId, final FileSources final String currentFileName = AnnotatedVirusFile.generateFileName(fileSources.Virus, sampleId); final String oldFileName = AnnotatedVirusFile.generateFileName(fileSources.Virus.replaceAll(VIRUS_INTERPRETER_DIR, "virus_interpreter"), sampleId); - ; + if(!fileExists(currentFileName) && fileExists(oldFileName)) { return oldFileName; From 6fe253d0fbf98a6aeaeec2de6216734165579f34 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 13 Sep 2024 16:53:00 +0200 Subject: [PATCH 40/53] Compar: DEV-4061: Fix flag stats and BAM metrics comparisons Switched to the underlying objects of the new formats. Adds support for new file formats. --- .../metrics/GermlineBamMetricsComparer.java | 22 +---- .../metrics/GermlineBamMetricsData.java | 18 ++-- .../metrics/GermlineFlagstatComparer.java | 23 +---- .../compar/metrics/GermlineFlagstatData.java | 6 +- .../compar/metrics/MetricsCommon.java | 52 ++++++++++- .../compar/metrics/OldWGSMetricsFile.java | 90 +++++++++++++++++++ .../metrics/TumorBamMetricsComparer.java | 23 +---- .../compar/metrics/TumorBamMetricsData.java | 18 ++-- .../compar/metrics/TumorFlagstatComparer.java | 23 +---- .../compar/metrics/TumorFlagstatData.java | 6 +- 10 files changed, 176 insertions(+), 105 deletions(-) create mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/metrics/OldWGSMetricsFile.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java index d4b3daf80f..1de88391b7 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsComparer.java @@ -1,22 +1,19 @@ package com.hartwig.hmftools.compar.metrics; -import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_BAM_METRICS; -import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_10X; import static com.hartwig.hmftools.compar.metrics.GermlineBamMetricsData.FLD_PERCENTAGE_20X; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; -import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_BAM_METRICS_FILE_EXTENSION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.loadBamMetricsSummary; import java.io.IOException; import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.metrics.WGSMetrics; -import com.hartwig.hmftools.common.metrics.WGSMetricsFile; +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.ItemComparer; @@ -75,7 +72,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - WGSMetrics metrics = WGSMetricsFile.read(determineFilePath(germlineSampleId, fileSources)); + BamMetricsSummary metrics = loadBamMetricsSummary(germlineSampleId, fileSources.GermlineBamMetrics); comparableItems.add(new GermlineBamMetricsData(metrics)); } catch(IOException e) @@ -86,17 +83,4 @@ public List loadFromFile(final String sampleId, final String ger return comparableItems; } - private static String determineFilePath(final String germlineSampleId, final FileSources fileSources) - { - String currentFileName = WGSMetricsFile.generateFilename(fileSources.GermlineBamMetrics, germlineSampleId); - String oldFileName = checkAddDirSeparator(fileSources.GermlineBamMetrics) + germlineSampleId + OLD_BAM_METRICS_FILE_EXTENSION; - if(!fileExists(currentFileName) && fileExists(oldFileName)) - { - return oldFileName; - } - else - { - return currentFileName; - } - } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java index 8d8340a95c..ae13273a88 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineBamMetricsData.java @@ -10,7 +10,7 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.DiffThresholds; @@ -19,12 +19,12 @@ public class GermlineBamMetricsData implements ComparableItem { - public final WGSMetrics Metrics; + public final BamMetricsSummary Metrics; protected static final String FLD_PERCENTAGE_10X = "Percentage10X"; protected static final String FLD_PERCENTAGE_20X = "Percentage20X"; - public GermlineBamMetricsData(final WGSMetrics metrics) + public GermlineBamMetricsData(final BamMetricsSummary metrics) { Metrics = metrics; } @@ -45,9 +45,9 @@ public String key() public List displayValues() { List values = Lists.newArrayList(); - values.add(format("%.2f", Metrics.pctExcDupe())); - values.add(format("%.2f", Metrics.coverage10xPercentage())); - values.add(format("%.2f", Metrics.coverage20xPercentage())); + values.add(format("%.2f", Metrics.duplicatePercent())); + values.add(format("%.2f", Metrics.coveragePercent(10))); + values.add(format("%.2f", Metrics.coveragePercent(20))); return values; } @@ -71,9 +71,9 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL final List diffs = Lists.newArrayList(); - checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.pctExcDupe(), otherData.Metrics.pctExcDupe(), thresholds); - checkDiff(diffs, FLD_PERCENTAGE_10X, Metrics.coverage10xPercentage(), otherData.Metrics.coverage10xPercentage(), thresholds); - checkDiff(diffs, FLD_PERCENTAGE_20X, Metrics.coverage20xPercentage(), otherData.Metrics.coverage20xPercentage(), thresholds); + checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.duplicatePercent(), otherData.Metrics.duplicatePercent(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_10X, Metrics.coveragePercent(10), otherData.Metrics.coveragePercent(10), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_20X, Metrics.coveragePercent(20), otherData.Metrics.coveragePercent(20), thresholds); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java index 0fe406d96b..37394cc8a2 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatComparer.java @@ -1,20 +1,17 @@ package com.hartwig.hmftools.compar.metrics; -import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.GERMLINE_FLAGSTAT; -import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; -import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_FLAGSTAT_FILE_EXTENSION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.determineFlagStatsFilePath; import java.io.IOException; import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.flagstat.Flagstat; -import com.hartwig.hmftools.common.flagstat.FlagstatFile; +import com.hartwig.hmftools.common.metrics.BamFlagStats; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.ItemComparer; @@ -71,7 +68,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - Flagstat flagstat = FlagstatFile.read(determineFilePath(germlineSampleId, fileSources)); + BamFlagStats flagstat = BamFlagStats.read(determineFlagStatsFilePath(germlineSampleId, fileSources.GermlineFlagstat)); comparableItems.add(new GermlineFlagstatData(flagstat)); } catch(IOException e) @@ -81,18 +78,4 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } - - private static String determineFilePath(final String germlineSampleId, final FileSources fileSources) - { - String currentFileName = FlagstatFile.generateFilename(fileSources.GermlineFlagstat, germlineSampleId); - String oldFileName = checkAddDirSeparator(fileSources.GermlineFlagstat) + germlineSampleId + OLD_FLAGSTAT_FILE_EXTENSION; - if(!fileExists(currentFileName) && fileExists(oldFileName)) - { - return oldFileName; - } - else - { - return currentFileName; - } - } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java index dbc03e0726..3e7783b880 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/GermlineFlagstatData.java @@ -10,7 +10,7 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.common.metrics.BamFlagStats; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.DiffThresholds; @@ -19,9 +19,9 @@ public class GermlineFlagstatData implements ComparableItem { - public final Flagstat mFlagstat; + public final BamFlagStats mFlagstat; - public GermlineFlagstatData(final Flagstat flagstat) + public GermlineFlagstatData(final BamFlagStats flagstat) { mFlagstat = flagstat; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java index 0ba317249e..3aa3050a01 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/MetricsCommon.java @@ -1,5 +1,13 @@ package com.hartwig.hmftools.compar.metrics; +import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; +import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; + +import java.io.IOException; + +import com.hartwig.hmftools.common.metrics.BamFlagStats; +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; + public class MetricsCommon { protected static final String FLD_MAPPED_PROPORTION = "MappedProportion"; @@ -10,6 +18,46 @@ public class MetricsCommon protected static final double DUPLICATE_PERCENTAGE_ABS_THRESHOLD = 0.05; protected static final double DUPLICATE_PERCENTAGE_PCT_THRESHOLD = 0; - protected static final String OLD_FLAGSTAT_FILE_EXTENSION = "_dedup.realigned.flagstat"; - protected static final String OLD_BAM_METRICS_FILE_EXTENSION = "_dedup_WGSMetrics.txt"; + protected static final String OLD_FLAGSTAT_FILE_EXTENSION = ".flagstat"; + protected static final String VERY_OLD_FLAGSTAT_FILE_EXTENSION = "_dedup.realigned.flagstat"; + protected static final String OLD_BAM_METRICS_FILE_EXTENSION = ".wgsmetrics"; + protected static final String VERY_OLD_BAM_METRICS_FILE_EXTENSION = "_dedup_WGSMetrics.txt"; + + public static BamMetricsSummary loadBamMetricsSummary(final String sampleId, String directory) throws IOException + { + String currentFileName = BamMetricsSummary.generateFilename(directory, sampleId); + String oldFileName = checkAddDirSeparator(directory) + sampleId + OLD_BAM_METRICS_FILE_EXTENSION; + String veryOldFileName = checkAddDirSeparator(directory) + sampleId + VERY_OLD_BAM_METRICS_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return OldWGSMetricsFile.read(oldFileName); + } + else if(!fileExists(currentFileName) && !fileExists(oldFileName) && fileExists(veryOldFileName)) + { + return OldWGSMetricsFile.read(veryOldFileName); + } + else + { + return BamMetricsSummary.read(currentFileName); + } + } + + public static String determineFlagStatsFilePath(final String sampleId, final String directory) + { + String currentFileName = BamFlagStats.generateFilename(directory, sampleId); + String oldFileName = checkAddDirSeparator(directory) + sampleId + OLD_FLAGSTAT_FILE_EXTENSION; + String veryOldFileName = checkAddDirSeparator(directory) + sampleId + VERY_OLD_FLAGSTAT_FILE_EXTENSION; + if(!fileExists(currentFileName) && fileExists(oldFileName)) + { + return oldFileName; + } + else if(!fileExists(currentFileName) && !fileExists(oldFileName) && fileExists(veryOldFileName)) + { + return veryOldFileName; + } + else + { + return currentFileName; + } + } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/OldWGSMetricsFile.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/OldWGSMetricsFile.java new file mode 100644 index 0000000000..99adb55d6e --- /dev/null +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/OldWGSMetricsFile.java @@ -0,0 +1,90 @@ +package com.hartwig.hmftools.compar.metrics; + +// Picard WgsMetrics file output, internally superseded with BamMetricsSummary + +import static com.hartwig.hmftools.common.utils.file.FileDelimiters.TSV_DELIM; +import static com.hartwig.hmftools.common.utils.file.FileReaderUtils.createFieldsIndexMap; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.List; +import java.util.Map; + +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; +import com.hartwig.hmftools.common.metrics.ImmutableBamMetricsSummary; + +import org.jetbrains.annotations.NotNull; + +public final class OldWGSMetricsFile +{ + public static final String GENOME_TERRITORY_COLUMN = "GENOME_TERRITORY"; + public static final String MEAN_COVERAGE_COLUMN = "MEAN_COVERAGE"; + public static final String SD_COVERAGE_COLUMN = "SD_COVERAGE"; + public static final String MEDIAN_COVERAGE_COLUMN = "MEDIAN_COVERAGE"; + public static final String MAD_COVERAGE_COLUMN = "MAD_COVERAGE"; + public static final String PCT_EXC_MAPQ_COLUMN = "PCT_EXC_MAPQ"; + public static final String PCT_EXC_DUPE_COLUMN = "PCT_EXC_DUPE"; + public static final String PCT_EXC_UNPAIRED_COLUMN = "PCT_EXC_UNPAIRED"; + public static final String PCT_EXC_BASEQ_COLUMN = "PCT_EXC_BASEQ"; + public static final String PCT_EXC_OVERLAP_COLUMN = "PCT_EXC_OVERLAP"; + public static final String PCT_EXC_CAPPED_COLUMN = "PCT_EXC_CAPPED"; + + private static final String COVERAGE_10X_COLUMN = "PCT_10X"; + private static final String COVERAGE_20X_COLUMN = "PCT_20X"; + private static final String COVERAGE_30X_COLUMN = "PCT_30X"; + private static final String COVERAGE_60X_COLUMN = "PCT_60X"; + + @NotNull + public static BamMetricsSummary read(final String filename) throws IOException + { + List lines = Files.readAllLines(new File(filename).toPath()); + + String headerLine = null; + String valuesLine = null; + + for(int i = 0; i < lines.size() - 1; ++i) + { + if(lines.get(i).startsWith(GENOME_TERRITORY_COLUMN)) + { + headerLine = lines.get(i); + valuesLine = lines.get(i + 1); + break; + } + } + + if(headerLine == null) + { + throw new IOException("invalid WGS metrics file: " + filename); + } + + Map fieldsIndexMap = createFieldsIndexMap(headerLine, TSV_DELIM); + String[] values = valuesLine.split(TSV_DELIM, -1); + + List coverageLevels = List.of(10, 20, 30, 60); + List coveragePercents = List.of( + Double.parseDouble(values[fieldsIndexMap.get(COVERAGE_10X_COLUMN)]), + Double.parseDouble(values[fieldsIndexMap.get(COVERAGE_20X_COLUMN)]), + Double.parseDouble(values[fieldsIndexMap.get(COVERAGE_30X_COLUMN)]), + Double.parseDouble(values[fieldsIndexMap.get(COVERAGE_60X_COLUMN)]) + ); + return ImmutableBamMetricsSummary.builder() + .totalRegionBases(-1) + .totalReads(-1) + .duplicateReads(-1) + .dualStrandReads(-1) + .meanCoverage(Double.parseDouble(values[fieldsIndexMap.get(MEAN_COVERAGE_COLUMN)])) + .sdCoverage(Double.parseDouble(values[fieldsIndexMap.get(SD_COVERAGE_COLUMN)])) + .medianCoverage((int) Double.parseDouble(values[fieldsIndexMap.get(MEDIAN_COVERAGE_COLUMN)])) + .madCoverage((int) Double.parseDouble(values[fieldsIndexMap.get(MAD_COVERAGE_COLUMN)])) + .lowMapQualPercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_MAPQ_COLUMN)])) + .duplicatePercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_DUPE_COLUMN)])) + .unpairedPercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_UNPAIRED_COLUMN)])) + .lowBaseQualPercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_BASEQ_COLUMN)])) + .overlappingReadPercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_OVERLAP_COLUMN)])) + .cappedCoveragePercent(Double.parseDouble(values[fieldsIndexMap.get(PCT_EXC_CAPPED_COLUMN)])) + .coverageLevels(coverageLevels) + .coveragePercents(coveragePercents) + .build(); + } +} diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java index 63207a86f8..616f600c0f 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsComparer.java @@ -1,13 +1,11 @@ package com.hartwig.hmftools.compar.metrics; -import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.TUMOR_BAM_METRICS; -import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.DUPLICATE_PERCENTAGE_PCT_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_DUPLICATE_PERCENTAGE; -import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_BAM_METRICS_FILE_EXTENSION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.loadBamMetricsSummary; import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_30X; import static com.hartwig.hmftools.compar.metrics.TumorBamMetricsData.FLD_PERCENTAGE_60X; @@ -15,8 +13,7 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.metrics.WGSMetrics; -import com.hartwig.hmftools.common.metrics.WGSMetricsFile; +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.ItemComparer; @@ -75,7 +72,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - WGSMetrics metrics = WGSMetricsFile.read(determineFilePath(sampleId, fileSources)); + BamMetricsSummary metrics = loadBamMetricsSummary(sampleId, fileSources.TumorBamMetrics); comparableItems.add(new TumorBamMetricsData(metrics)); } catch(IOException e) @@ -85,18 +82,4 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } - - private static String determineFilePath(final String sampleId, final FileSources fileSources) - { - String currentFileName = WGSMetricsFile.generateFilename(fileSources.TumorBamMetrics, sampleId); - String oldFileName = checkAddDirSeparator(fileSources.TumorBamMetrics) + sampleId + OLD_BAM_METRICS_FILE_EXTENSION; - if(!fileExists(currentFileName) && fileExists(oldFileName)) - { - return oldFileName; - } - else - { - return currentFileName; - } - } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java index de4cf36188..25390db674 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorBamMetricsData.java @@ -10,7 +10,7 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.metrics.WGSMetrics; +import com.hartwig.hmftools.common.metrics.BamMetricsSummary; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.DiffThresholds; @@ -19,12 +19,12 @@ public class TumorBamMetricsData implements ComparableItem { - public final WGSMetrics Metrics; + public final BamMetricsSummary Metrics; protected static final String FLD_PERCENTAGE_30X = "Percentage30X"; protected static final String FLD_PERCENTAGE_60X = "Percentage60X"; - public TumorBamMetricsData(final WGSMetrics metrics) + public TumorBamMetricsData(final BamMetricsSummary metrics) { Metrics = metrics; } @@ -45,9 +45,9 @@ public String key() public List displayValues() { List values = Lists.newArrayList(); - values.add(format("%.2f", Metrics.pctExcDupe())); - values.add(format("%.2f", Metrics.coverage30xPercentage())); - values.add(format("%.2f", Metrics.coverage60xPercentage())); + values.add(format("%.2f", Metrics.duplicatePercent())); + values.add(format("%.2f", Metrics.coveragePercent(30))); + values.add(format("%.2f", Metrics.coveragePercent(60))); return values; } @@ -71,9 +71,9 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL final List diffs = Lists.newArrayList(); - checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.pctExcDupe(), otherData.Metrics.pctExcDupe(), thresholds); - checkDiff(diffs, FLD_PERCENTAGE_30X, Metrics.coverage30xPercentage(), otherData.Metrics.coverage30xPercentage(), thresholds); - checkDiff(diffs, FLD_PERCENTAGE_60X, Metrics.coverage60xPercentage(), otherData.Metrics.coverage60xPercentage(), thresholds); + checkDiff(diffs, FLD_DUPLICATE_PERCENTAGE, Metrics.duplicatePercent(), otherData.Metrics.duplicatePercent(), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_30X, Metrics.coveragePercent(30), otherData.Metrics.coveragePercent(30), thresholds); + checkDiff(diffs, FLD_PERCENTAGE_60X, Metrics.coveragePercent(60), otherData.Metrics.coveragePercent(60), thresholds); return !diffs.isEmpty() ? new Mismatch(this, other, VALUE, diffs) : null; } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java index 83a2c52c3e..f6d9fbd4ec 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatComparer.java @@ -1,20 +1,17 @@ package com.hartwig.hmftools.compar.metrics; -import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.checkAddDirSeparator; import static com.hartwig.hmftools.compar.ComparConfig.CMP_LOGGER; import static com.hartwig.hmftools.compar.common.Category.TUMOR_FLAGSTAT; -import static com.hartwig.hmftools.compar.common.CommonUtils.fileExists; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.FLD_MAPPED_PROPORTION; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_ABS_THRESHOLD; import static com.hartwig.hmftools.compar.metrics.MetricsCommon.MAPPED_PROPORTION_PCT_THRESHOLD; -import static com.hartwig.hmftools.compar.metrics.MetricsCommon.OLD_FLAGSTAT_FILE_EXTENSION; +import static com.hartwig.hmftools.compar.metrics.MetricsCommon.determineFlagStatsFilePath; import java.io.IOException; import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.flagstat.Flagstat; -import com.hartwig.hmftools.common.flagstat.FlagstatFile; +import com.hartwig.hmftools.common.metrics.BamFlagStats; import com.hartwig.hmftools.compar.ComparConfig; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.ItemComparer; @@ -71,7 +68,7 @@ public List loadFromFile(final String sampleId, final String ger final List comparableItems = Lists.newArrayList(); try { - Flagstat flagstat = FlagstatFile.read(determineFilePath(sampleId, fileSources)); + BamFlagStats flagstat = BamFlagStats.read(determineFlagStatsFilePath(sampleId, fileSources.TumorFlagstat)); comparableItems.add(new TumorFlagstatData(flagstat)); } catch(IOException e) @@ -81,18 +78,4 @@ public List loadFromFile(final String sampleId, final String ger } return comparableItems; } - - private static String determineFilePath(final String sampleId, final FileSources fileSources) - { - String currentFileName = FlagstatFile.generateFilename(fileSources.TumorFlagstat, sampleId); - String oldFileName = checkAddDirSeparator(fileSources.TumorFlagstat) + sampleId + OLD_FLAGSTAT_FILE_EXTENSION; - if(!fileExists(currentFileName) && fileExists(oldFileName)) - { - return oldFileName; - } - else - { - return currentFileName; - } - } } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java index 01afb86828..a2724f872b 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/metrics/TumorFlagstatData.java @@ -10,7 +10,7 @@ import java.util.List; import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.flagstat.Flagstat; +import com.hartwig.hmftools.common.metrics.BamFlagStats; import com.hartwig.hmftools.compar.ComparableItem; import com.hartwig.hmftools.compar.common.Category; import com.hartwig.hmftools.compar.common.DiffThresholds; @@ -19,9 +19,9 @@ public class TumorFlagstatData implements ComparableItem { - public final Flagstat mFlagstat; + public final BamFlagStats mFlagstat; - public TumorFlagstatData(final Flagstat flagstat) + public TumorFlagstatData(final BamFlagStats flagstat) { mFlagstat = flagstat; } From 87d9aa153a4d4b443c58df81fe0df2ef5e490787 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 25 Sep 2024 16:54:15 +0200 Subject: [PATCH 41/53] Compar: DEV-4061: Fix merge issues --- .../hmftools/compar/linx/FusionComparer.java | 2 +- .../hmftools/compar/linx/FusionData.java | 2 +- .../hmftools/compar/linx/LinxCommon.java | 46 ------------------- 3 files changed, 2 insertions(+), 48 deletions(-) delete mode 100644 compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java index be60f80eaa..3db0b7b025 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionComparer.java @@ -9,12 +9,12 @@ import static com.hartwig.hmftools.compar.linx.FusionData.FLD_DOMAINS_LOST; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_DOWN; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_EXON_UP; +import static com.hartwig.hmftools.compar.linx.FusionData.FLD_JUNCTION_COPY_NUMBER; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_LIKELIHOOD; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_PHASED; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_REPORTED_TYPE; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_DOWN; import static com.hartwig.hmftools.compar.linx.FusionData.FLD_TRANSCRIPT_UP; -import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; import java.io.IOException; import java.util.List; diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java index be6e6d4a62..39afb427c5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java @@ -4,7 +4,6 @@ import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; import static com.hartwig.hmftools.compar.common.MismatchType.VALUE; -import static com.hartwig.hmftools.compar.linx.LinxCommon.FLD_JUNCTION_COPY_NUMBER; import java.util.List; @@ -32,6 +31,7 @@ public class FusionData implements ComparableItem protected static final String FLD_CHAIN_TERM = "chainTerminated"; protected static final String FLD_DOMAINS_KEPT = "domainsKept"; protected static final String FLD_DOMAINS_LOST = "domainsLost"; + protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; public FusionData(final LinxFusion fusion, final String geneMappedName) { diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java deleted file mode 100644 index 6a89bb949a..0000000000 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/LinxCommon.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.hartwig.hmftools.compar.linx; - -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_CHROMOSOME_BAND; -import static com.hartwig.hmftools.compar.common.CommonUtils.FLD_REPORTED; -import static com.hartwig.hmftools.compar.common.DiffFunctions.checkDiff; - -import java.util.List; - -import com.google.common.collect.Lists; -import com.hartwig.hmftools.common.linx.LinxBreakend; -import com.hartwig.hmftools.compar.common.DiffThresholds; - -public class LinxCommon -{ - protected static final String FLD_REGION_TYPE = "RegionType"; - protected static final String FLD_NEXT_SPLICE = "NextSpliceExonRank"; - protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; - protected static final String FLD_UNDISRUPTED_COPY_NUMBER = "UndisruptedCopyNumber"; - - protected static List comparedFieldNamesBreakends() - { - return Lists.newArrayList( - FLD_REPORTED, FLD_REGION_TYPE, FLD_NEXT_SPLICE, FLD_JUNCTION_COPY_NUMBER, FLD_UNDISRUPTED_COPY_NUMBER); - } - - protected static List displayValuesBreakend(LinxBreakend breakend) - { - List values = Lists.newArrayList(); - values.add(String.format("%s", breakend.reportedDisruption())); - values.add(String.format("%s", breakend.regionType())); - values.add(String.format("%d", breakend.nextSpliceExonRank())); - values.add(String.format("%.2f", breakend.junctionCopyNumber())); - values.add(String.format("%.2f", breakend.undisruptedCopyNumber())); - return values; - } - - protected static void checkDiffsBreakends(List diffs, final LinxBreakend breakend, final LinxBreakend otherBreakend, final DiffThresholds thresholds) - { - checkDiff(diffs, FLD_REPORTED, breakend.reportedDisruption(), otherBreakend.reportedDisruption()); - checkDiff(diffs, FLD_REGION_TYPE, breakend.regionType().toString(), otherBreakend.regionType().toString()); - checkDiff(diffs, FLD_NEXT_SPLICE, breakend.nextSpliceExonRank(), otherBreakend.nextSpliceExonRank()); - checkDiff(diffs, FLD_JUNCTION_COPY_NUMBER, breakend.junctionCopyNumber(), otherBreakend.junctionCopyNumber(), thresholds); - checkDiff(diffs, FLD_UNDISRUPTED_COPY_NUMBER, breakend.undisruptedCopyNumber(), otherBreakend.undisruptedCopyNumber(), thresholds); - checkDiff(diffs, FLD_CHROMOSOME_BAND, breakend.chrBand(), otherBreakend.chrBand()); - } -} From 478eb8ffd4d432be4a9d517a4511b0f862d96aa1 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 25 Sep 2024 16:59:54 +0200 Subject: [PATCH 42/53] Compar: DEV-4061: Adjust thresholds --- .../com/hartwig/hmftools/compar/driver/DriverComparer.java | 4 ++-- .../hmftools/compar/mutation/GermlineVariantComparer.java | 6 +++--- .../hmftools/compar/mutation/SomaticVariantComparer.java | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java index 87f56abd01..ff4578c138 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/driver/DriverComparer.java @@ -46,8 +46,8 @@ public DriverComparer(final ComparConfig config) public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_LIKELIHOOD, 0.1, 0); - thresholds.addFieldThreshold(FLD_MIN_COPY_NUMBER, 0.5, 0.15); - thresholds.addFieldThreshold(FLD_MAX_COPY_NUMBER, 0.5, 0.15); + thresholds.addFieldThreshold(FLD_MIN_COPY_NUMBER, 0.3, 0.15); + thresholds.addFieldThreshold(FLD_MAX_COPY_NUMBER, 0.3, 0.15); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java index 7605ff1f69..42375cb801 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantComparer.java @@ -55,10 +55,10 @@ public void registerThresholds(final DiffThresholds thresholds) { // same as somatic thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); - thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.15); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0); - thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); - thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 1, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 1, 0.2); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java index 71b2d737ad..b9c42ad164 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/SomaticVariantComparer.java @@ -297,10 +297,10 @@ public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_QUAL, 20, 0.2); thresholds.addFieldThreshold(FLD_SUBCLONAL_LIKELIHOOD, 0.6, 0); - thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.5, 0.2); + thresholds.addFieldThreshold(FLD_VARIANT_COPY_NUMBER, 0.3, 0.15); thresholds.addFieldThreshold(FLD_PURITY_ADJUSTED_VAF, 0.2, 0); - thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 0, 0.2); - thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 0, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_SUPPORTING_READ_COUNT, 1, 0.2); + thresholds.addFieldThreshold(FLD_TUMOR_TOTAL_READ_COUNT, 1, 0.2); } @Override From 3799dac7ebd290a72e6c7861cbe61c6f2f5472da Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Fri, 27 Sep 2024 16:47:50 +0200 Subject: [PATCH 43/53] Compar: DEV-4061: Add thresholds for virus integration count --- .../java/com/hartwig/hmftools/compar/virus/VirusComparer.java | 1 + .../main/java/com/hartwig/hmftools/compar/virus/VirusData.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java index decb2bc998..01a1cad057 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusComparer.java @@ -45,6 +45,7 @@ public Category category() public void registerThresholds(final DiffThresholds thresholds) { thresholds.addFieldThreshold(FLD_MEAN_COVERAGE, 0, 0.15); + thresholds.addFieldThreshold(FLD_INTEGRATIONS, 0, 0.20); } @Override diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java index 385380fe6d..88c57e38d6 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/virus/VirusData.java @@ -73,7 +73,7 @@ public Mismatch findMismatch(final ComparableItem other, final MatchLevel matchL final List diffs = Lists.newArrayList(); checkDiff(diffs, FLD_REPORTED, Virus.reported(), otherData.Virus.reported()); - checkDiff(diffs, FLD_INTEGRATIONS, Virus.integrations(), otherData.Virus.integrations()); + checkDiff(diffs, FLD_INTEGRATIONS, Virus.integrations(), otherData.Virus.integrations(), thresholds); checkDiff(diffs, FLD_MEAN_COVERAGE, Virus.meanCoverage(), otherData.Virus.meanCoverage(), thresholds); checkDiff(diffs, FLD_DRIVER_LIKELIHOOD, String.valueOf(Virus.virusDriverLikelihoodType()), String.valueOf(otherData.Virus.virusDriverLikelihoodType())); From 27adb5fa569f522510cf80ea5495ddb7815fe627 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 30 Sep 2024 15:13:55 +0200 Subject: [PATCH 44/53] Compar: DEV-4061: Error when specifying samples with both file and arguments --- .../java/com/hartwig/hmftools/compar/ComparConfig.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java index c4abaf67aa..0d8e4abf98 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/ComparConfig.java @@ -269,6 +269,14 @@ public SampleIdMapping(final String sampleId, final String germlineSampleId) private void loadSampleIds(final ConfigBuilder configBuilder) { + if(configBuilder.hasValue(SAMPLE_ID_FILE) && (configBuilder.hasFlag(SAMPLE) || configBuilder.hasFlag(GERMLINE_SAMPLE))) + { + CMP_LOGGER.error("when the argument '{}' is set, the arguments '{}' and '{}' should not be set", + SAMPLE_ID_FILE, SAMPLE, GERMLINE_SAMPLE); + mIsValid = false; + return; + } + if(configBuilder.hasValue(SAMPLE)) { registerSampleIds(configBuilder.getValue(SAMPLE), configBuilder.getValue(GERMLINE_SAMPLE, null)); From bf2ad20ca8ee6aa835c4fc4ccfbf4ee19a059fc7 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 30 Sep 2024 15:40:56 +0200 Subject: [PATCH 45/53] Compar: DEV-4061: Compare breakend transcripts if not canonical --- .../java/com/hartwig/hmftools/compar/linx/BreakendData.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/BreakendData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/BreakendData.java index 61589b9329..91c2b327fa 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/BreakendData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/BreakendData.java @@ -51,6 +51,12 @@ public boolean matches(final BreakendData other) return false; } + boolean checkTranscriptIds = !Breakend.canonical() || !other.Breakend.canonical(); + if(checkTranscriptIds && !other.Breakend.transcriptId().equals(Breakend.transcriptId())) + { + return false; + } + return true; } From 7f43c9bb23ebf9ec876d11b943e2fc7f66cd748d Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 30 Sep 2024 15:49:10 +0200 Subject: [PATCH 46/53] Compar: DEV-4061: Update Readme --- compar/README.md | 403 +++++++++++++++++++++++++++++++---------------- 1 file changed, 263 insertions(+), 140 deletions(-) diff --git a/compar/README.md b/compar/README.md index 1763455c10..ea96451365 100644 --- a/compar/README.md +++ b/compar/README.md @@ -7,6 +7,7 @@ A regression testing tool, comparing sample output across pipeline runs. ``` java -jar compar.jar \ -sample SAMPLE_T \ + -germline_sample SAMPLE_R \ -categories ALL \ -match_level REPORTABLE \ -sample_dir_ref /path_to_sample_data/run_01/ @@ -16,33 +17,40 @@ java -jar compar.jar \ ## Configuration The key configuration values to set are: -- the sample(s) or to compare +- the sample(s) to compare - the categories to compare - each of these will map to specific pipeline output files -- the source of data - either the MySQL hmf_patients DB or pipeine output files +- the source of data - either the MySQL hmf_patients DB or pipeline output files ### Required configuration -Filter | Description ----|--- -sample | Tumor sample ID, OR -sample_id_file | File with column header SampleId and then list of sample IDs, optional Ref and New sample mappings (see example) -categories | 'ALL', otherwise specify a comma-separated list from PURITY, DRIVER, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION, GERMLINE_SV, FUSION, DISRUPTION, CUPPA, CHORD, LILAC -match_level | REPORTABLE (default) or DETAILED -sample_data_ref & sample_data_new | Sample root directory for pipeline output -TOOL_dir_ref & TOOL_dir_new ** | Tool path overrides - each pipeline tool directory eg 'linx_dir_ref' - relative path to 'sample_dir' if specified, otherwise absolute path -db_source_ref & db_source_new | DB connection details for ref and new sample data - see format below -output_dir | Path for output file +| Filter | Description | +|-----------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------| +| sample | Tumor sample ID, OR | +| sample_id_file | File with column header SampleId and then list of sample IDs, optional Ref and New sample mappings and germline sample IDs (see examples) | +| categories | 'ALL', 'PANEL', or otherwise specify a comma-separated list | +| match_level | REPORTABLE (default) or DETAILED | +| sample_data_ref & sample_data_new | Sample root directory for pipeline output | +| TOOL_dir_ref & TOOL_dir_new ** | Tool path overrides - each pipeline tool directory eg 'linx_dir_ref' - relative path to 'sample_dir' if specified, otherwise absolute path | +| db_source_ref & db_source_new | DB connection details for ref and new sample data - see format below | +| output_dir | Path for output file | + +** set of tools are: linx, linx_germline, purple, chord, cuppa, lilac, peach, virus (i.e. virus-interpreter) + +The available categories are: PURITY, DRIVER, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION, GERMLINE_SV, FUSION, DISRUPTION, CUPPA, +CHORD, LILAC, PEACH, VIRUS, TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_METRICS, GERMLINE_METRICS, SNP_GENOTYPE, COPY_NUMBER, GENE_COPY_NUMBER. + +The category PANEL is equivalent to PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS and SNP_GENOTYPE. -** set of tools are: linx, linx_germline, purple, chord, cuppa and lilac ### Optional configuration -Filter | Description ----|--- -output_id | Optional: outfile file suffix -driver_gene_panel | Used to check alternate transcript changes and to limit analysis of somatics and gene copy number comparisons -restrict_to_drivers | Limit analysis to genes within the panel -write_detailed | Write a file per compared category +| Filter | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------| +| germline_sample | Germline sample ID. Defaults to tumor sample ID with "-ref" appended | +| output_id | Outfile file suffix | +| driver_gene_panel | Used to check alternate transcript changes and to limit analysis of somatics and gene copy number comparisons | +| restrict_to_drivers | Limit analysis to genes within the panel | +| write_detailed | Write a file per compared category | ### Sample ID Mappings If the same patient has different sample IDs for different runs and these are used for all filenames, then specify these mappings in the sample ID file, eg: @@ -51,24 +59,33 @@ sample_id_mappings.csv SampleId,RefSampleId,NewSampleId COLO829T,COLO829_Ref,COLO829T_New ``` +The same can be done for germline sample IDs. +``` +sample_id_mappings.with_germline.csv +SampleId,GermlineSampleId,RefSampleId,RefGermlineSampleId,NewSampleId,NewGermlineSampleId +COLO829T,COLO829R,COLO829T_Ref,COLO829R_Ref,COLO829T_New,COLO829R_New +``` ### File Sourced Data -Typically set the 'sample_dir_ref' and 'sample_dir_new' to the REF and NEW sample root directories, which then contain each tool's -output in a sub-directory as per the standard HMF pipeline. +Typically, set the 'sample_dir_ref' and 'sample_dir_new' to the REF and NEW sample root directories, which then contain each tool's +output in a subdirectory as per the standard HMF pipeline. Specify one or more tool directories to override the pipeline default paths. -Category | Config Path Id ----|--- -PURITY, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION | purple_dir -FUSION, DISRUPTION | linx_dir -GERMLINE_SV | linx_germline_dir -PURITY | purple_dir -CUPPA | cuppa_dir -CHORD | chord_dir -LILAC | lilac_dir +| Category | Config Path Id | +|--------------------------------------------------------------|-------------------| +| PURITY, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION | purple_dir | +| FUSION, DISRUPTION | linx_dir | +| GERMLINE_SV | linx_germline_dir | +| PURITY | purple_dir | +| CUPPA | cuppa_dir | +| CHORD | chord_dir | +| LILAC | lilac_dir | +| PEACH | peach_dir | +| VIRUS | virus_dir | Wildcards '*' can be used in place of sampleIds, in which case Compar will replace the wildcard with the sampleId for each path. +Similarly, '$' can be used in place of germline sample IDs. Example 1 ``` @@ -110,150 +127,256 @@ Differences in field values are considered one of the following ways ### Purity Data key: SampleId -Field | Match Type & Thresholds ----|--- -qcStatus | Exact -gender | Exact -germlineAberration | Exact -fitMethod | Exact -msStatus | Exact -tmbStatus | Exact -tmlStatus | Exact -purity | Threshold [0.02] -ploidy | Threshold [0.1] -contamination | Threshold [0.005] -tmbPerMb | Threshold [0.1, 5%] -msIndelsPerMb | Threshold [0.1, 5%] -tml | Threshold [1, 5%] -copyNumberSegments | Threshold [5, 20%] -unsupportedCopyNumberSegments | Threshold [5, 20%] -svTmb | Threshold [2, 5%] +| Field | Match Type & Thresholds | +|-------------------------------|-------------------------| +| qcStatus | Exact | +| gender | Exact | +| germlineAberration | Exact | +| fitMethod | Exact | +| msStatus | Exact | +| tmbStatus | Exact | +| tmlStatus | Exact | +| purity | Threshold [0.02] | +| ploidy | Threshold [0.1] | +| contamination | Threshold [0.005] | +| tmbPerMb | Threshold [0.1, 5%] | +| msIndelsPerMb | Threshold [0.1, 5%] | +| tml | Threshold [1, 5%] | +| copyNumberSegments | Threshold [5, 20%] | +| unsupportedCopyNumberSegments | Threshold [5, 20%] | +| svTmb | Threshold [2, 5%] | ### Somatic Variant -Data key: SampleId, Chromosome, Position, Ref and Alt - -Field | Match Type & Thresholds ----|--- -reported | Exact -filter | Exact -gene | Exact -canonicalEffect | Exact -canonicalCodingEffect | Exact -canonicalHgvsCodingImpact | Exact -canonicalHgvsProteinImpact | Exact -otherTranscriptEffects | Exact -tier | Exact -hotspot | Exact -biallelic | Exact -qual | Threshold [20, 20%] -subclonalLikelihood | [0.6] -has LPS | true / false +Data key: SampleId, Chromosome, Position, Ref, Alt and VariantType (SNP/MNP/INDEL/UNDEFINED) + +| Field | Match Type & Thresholds | +|----------------------------|-------------------------| +| reported | Exact | +| filter | Exact | +| gene | Exact | +| canonicalEffect | Exact | +| canonicalCodingEffect | Exact | +| canonicalHgvsCodingImpact | Exact | +| canonicalHgvsProteinImpact | Exact | +| otherTranscriptEffects | Exact | +| tier | Exact | +| hotspot | Exact | +| biallelic | Exact | +| qual | Threshold [20, 20%] | +| subclonalLikelihood | Threshold [0.6] | +| hasLPS | Exact | +| variantCopyNumber | Threshold [0.3, 15%] | +| tumorSupportingReadCount | Threshold [1, 20%] | +| tumorTotalReadCount | Threshold [1, 20%] | +| purityAdjustedVaf | Threshold [0.2] | ### Germline Variant -Data key: SampleId, Chromosome, Position, Ref and Alt - -Field | Match Type & Thresholds ----|--- -reported | Exact -filter | Exact -gene | Exact -canonicalEffect | Exact -canonicalCodingEffect | Exact -canonicalHgvsCodingImpact | Exact -canonicalHgvsProteinImpact | Exact -otherTranscriptEffects | Exact -tier | Exact -hotspot | Exact -biallelic | Exact -pathogenicity | Exact -pathogenic | Exact -qual | Threshold [20, 20%] +Data key: SampleId, Chromosome, Position, Ref, Alt and VariantType (SNP/MNP/INDEL/UNDEFINED) + +| Field | Match Type & Thresholds | +|----------------------------|-------------------------| +| reported | Exact | +| filter | Exact | +| gene | Exact | +| canonicalEffect | Exact | +| canonicalCodingEffect | Exact | +| canonicalHgvsCodingImpact | Exact | +| canonicalHgvsProteinImpact | Exact | +| otherTranscriptEffects | Exact | +| tier | Exact | +| hotspot | Exact | +| biallelic | Exact | +| qual | Threshold [20, 20%] | +| variantCopyNumber (tumor) | Threshold [0.3, 15%] | +| tumorSupportingReadCount | Threshold [1, 20%] | +| tumorTotalReadCount | Threshold [1, 20%] | +| purityAdjustedVaf | Threshold [0.2] | ### Germline Deletion Data key: SampleId, Gene -Field | Match Type & Thresholds ----|--- -reported | Exact -germlineStatus | Exact -tumorStatus | Exact -germlineCopyNumber | Threshold [0.2, 10%] -tumorCopyNumber | Threshold [0.2, 10%] +| Field | Match Type & Thresholds | +|--------------------|-------------------------| +| reported | Exact | +| germlineStatus | Exact | +| tumorStatus | Exact | +| germlineCopyNumber | Threshold [0.2, 10%] | +| tumorCopyNumber | Threshold [0.2, 10%] | +| chromosome | Exact | +| chromosomeBand | Exact | ### Drivers (Linx and Purple) Data key: SampleId, GeneId, TranscriptId, Driver-type -Field | Match Type & Thresholds ----|--- -likelihoodMethod | Exact -driverLikelihood | [0.1] -minCopyNumber | max(0.2, 10%) for AMPs and DELs - not currently checked +| Field | Match Type & Thresholds | +|------------------|-------------------------| +| likelihoodMethod | Exact | +| driverLikelihood | Threshold [0.1] | +| minCopyNumber | Threshold [0.3, 15%] | +| maxCopyNumber | Threshold [0.3, 15%] | +| chromosome | Exact | +| chromosomeBand | Exact | ### Fusions Data key: SampleId, Fusion name -Field | Match Type & Thresholds ----|--- -reported | Exact -reportedType | Exact -phased | Exact -likelihood | Exact -fusedExonUp | Exact -fusedExonDown | Exact -chainLinks | Exact -chainTerminated | Exact -domainsKept | Exact -domainsLost | Exact +| Field | Match Type & Thresholds | +|---------------------|-------------------------| +| reported | Exact | +| reportedType | Exact | +| phased | Exact | +| likelihood | Exact | +| fusedExonUp | Exact | +| fusedTranscriptUp | Exact | +| fusedExonDown | Exact | +| fusedTranscriptDown | Exact | +| chainLinks | Exact | +| chainTerminated | Exact | +| domainsKept | Exact | +| domainsLost | Exact | +| junctionCopyNumber | Threshold [0.5, 20%] | ### Disruptions -Data key: SampleId, SV coordinates (chromosome, position, orientation), Gene, TranscriptId +Data key: SampleId, Gene + +Per breakend key: SV coordinates (chromosome, position, orientation), TranscriptId (if not canonical) -Field | Match Type & Thresholds ----|--- -reported | Exact -geneOrientation | Exact -regionType | Exact -codingContext | Exact -nextSpliceExonRank | Exact -undisruptedCopyNumber | max(0.2, 10%) - not currently checked +| Field | Match Type & Thresholds | +|--------------------|-------------------------| +| reported | Exact | +| regionType | Exact | +| codingType | Exact | +| nextSpliceExonRank | Exact | ### Germline SV -Data key: SampleId, SV coordinates (chromosome, position, orientation), Gene +Data key: SampleId, Gene + +Per breakend key: SV coordinates (chromosome, position, orientation), TranscriptId (if not canonical) -Field | Match Type & Thresholds ----|--- -reported | Exact -qual | Threshold [20, 20%] -germlineFragments | Threshold [5, 10%] +| Field | Match Type & Thresholds | +|--------------------|-------------------------| +| reported | Exact | +| regionType | Exact | +| codingType | Exact | +| nextSpliceExonRank | Exact | ### Cuppa -Data key: SampleId, ClassifierName +Data key: SampleId, ClassifierName, DataType -Field | Match Type & Thresholds ----|--- -topRefCancerType | Exact -topRefValue | Threshold [10%] +| Field | Match Type & Thresholds | +|---------------|-------------------------| +| topCancerType | Exact | +| probability | Threshold [0.1] | ### Chord Data key: SampleId -Field | Match Type & Thresholds ----|--- -BRCA1 | Threshold [0.1] -BRCA2 | Threshold [0.1] -status | Exact -type | Exact -hrdScore | Threshold [0.1] +| Field | Match Type & Thresholds | +|----------|-------------------------| +| BRCA1 | Threshold [0.1] | +| BRCA2 | Threshold [0.1] | +| status | Exact | +| type | Exact | +| hrdScore | Threshold [0.1] | ### Lilac Data key: SampleId -Field | Match Type & Thresholds ----|--- -status | Exact -alleles | Exact - checks all 6 alleles match -somaticVariants | Exact - number of annotated somatic variants match +| Field | Match Type & Thresholds | +|-----------------------------|----------------------------------------------------------| +| status | Exact | +| alleles | Exact - checks all 6 alleles match | +| somaticMissense | Threshold [0.4, 10%] - checks per allele | +| somaticNonsenseOrFrameshift | Threshold [0.4, 10%] - checks per allele | +| somaticSplice | Threshold [0.4, 10%] - checks per allele | +| somaticInframeIndel | Threshold [0.4, 10%] - checks per allele | +| somaticSynonymous | Threshold [0.4, 10%] - checks per allele (DETAILED only) | +| refTotalFragments | Threshold [10, 1%] - checks per allele (DETAILED only) | +| tumorTotalFragments | Threshold [10, 1%] - checks per allele (DETAILED only) | +| tumorCopyNumber | Threshold [0.5, 15%] - checks per allele | +| totalFragments | Threshold [10, 1%] | +| fittedFragments | Threshold [10, 1%] | +| discardedAlignmentFragments | Threshold [10, 1%] | +| discardedIndels | Threshold [10, 1%] | +| hlaYAllele | Exact | + +### Peach +Data key: SampleId, Gene, HaplotypeName + +| Field | Match Type & Thresholds | +|------------------|-------------------------| +| alleleCount | Exact | +| function | Exact | +| drugs | Exact | +| prescriptionUrls | Exact | + +### Virus +Data key: SampleId, Name + +| Field | Match Type & Thresholds | +|------------------|-------------------------| +| reported | Exact | +| integrations | Threshold [20%] | +| meanCoverage | Threshold [15%] | +| driverLikelihood | Exact | + +### Flagstat +For both tumor and germline sample + +Data key: SampleId + +| Field | Match Type & Thresholds | +|------------------|-------------------------| +| mappedProportion | Threshold [0.01] | + +### Tumor BAM Metrics +Data key: SampleId + +| Field | Match Type & Thresholds | +|---------------------|-------------------------| +| DuplicatePercentage | Threshold [0.05] | +| Percentage30X | Threshold [0.03] | +| Percentage60X | Threshold [0.03] | + +### Germline BAM Metrics +Data key: SampleId + +| Field | Match Type & Thresholds | +|---------------------|-------------------------| +| DuplicatePercentage | Threshold [0.05] | +| Percentage10X | Threshold [0.03] | +| Percentage20X | Threshold [0.03] | + +### SNP Check +Data key: SampleId, Chromosome, Position, Ref + +| Field | Match Type & Thresholds | +|----------|-------------------------| +| Alt | Exact | +| Genotype | Exact | + +### Copy Number +Only runs in DETAILED mode. + +Data key: SampleId, Chromosome, StartPosition, EndPosition + +| Field | Match Type & Thresholds | +|----------------------|-------------------------| +| CopyNumber | Threshold [0.5, 15%] | +| MajorAlleleCopyNumer | Threshold [0.5, 15%] | +| Method | Exact | + +### Gene Copy Number +Only runs in DETAILED mode. + +Data key: SampleId, Gene + +| Field | Match Type & Thresholds | +|---------------|-------------------------| +| MinCopyNumber | Threshold [0.5, 15%] | +| MaxCopyNumer | Threshold [0.5, 15%] | ## Version History and Download Links +- [1.2](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.2) - [1.1](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.1) - [1.0](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.0) From d7b39f9d7730fe1a3340e434d4c5db9da366acd7 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Mon, 30 Sep 2024 16:03:40 +0200 Subject: [PATCH 47/53] Compar: DEV-4061: Update Readme --- compar/README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/compar/README.md b/compar/README.md index ea96451365..2502aeb302 100644 --- a/compar/README.md +++ b/compar/README.md @@ -34,23 +34,25 @@ The key configuration values to set are: | db_source_ref & db_source_new | DB connection details for ref and new sample data - see format below | | output_dir | Path for output file | -** set of tools are: linx, linx_germline, purple, chord, cuppa, lilac, peach, virus (i.e. virus-interpreter) +** set of tools are: linx, linx_germline, purple, chord, cuppa, lilac, peach, virus (i.e. virus-interpreter), snp_genotype, tumor_flagstat, germline_flagstat, tumor_bam_metrics and germline_bam_metrics. The available categories are: PURITY, DRIVER, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION, GERMLINE_SV, FUSION, DISRUPTION, CUPPA, -CHORD, LILAC, PEACH, VIRUS, TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_METRICS, GERMLINE_METRICS, SNP_GENOTYPE, COPY_NUMBER, GENE_COPY_NUMBER. +CHORD, LILAC, PEACH, VIRUS, TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_BAM_METRICS, GERMLINE_BAM_METRICS, SNP_GENOTYPE, COPY_NUMBER, GENE_COPY_NUMBER. The category PANEL is equivalent to PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS and SNP_GENOTYPE. ### Optional configuration -| Filter | Description | -|---------------------|---------------------------------------------------------------------------------------------------------------| -| germline_sample | Germline sample ID. Defaults to tumor sample ID with "-ref" appended | -| output_id | Outfile file suffix | -| driver_gene_panel | Used to check alternate transcript changes and to limit analysis of somatics and gene copy number comparisons | -| restrict_to_drivers | Limit analysis to genes within the panel | -| write_detailed | Write a file per compared category | +| Filter | Description | +|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------------| +| germline_sample | Germline sample ID. Defaults to tumor sample ID with "-ref" appended | +| output_id | Outfile file suffix | +| driver_gene_panel | Used to check alternate transcript changes and to limit analysis of somatics and gene copy number comparisons | +| restrict_to_drivers | Limit analysis to genes within the panel | +| write_detailed | Write a file per compared category | +| somatic_unfiltered_vcf_ref & somatic_unfiltered_vcf_new | VCF of unfiltered somatic variants (i.e. SAGE) for detecting filtering reason | +| liftover | Apply liftover to relevant fields for pipeline run comparison across reference genome versions (V37/V38) | ### Sample ID Mappings If the same patient has different sample IDs for different runs and these are used for all filenames, then specify these mappings in the sample ID file, eg: @@ -347,7 +349,7 @@ Data key: SampleId | Percentage10X | Threshold [0.03] | | Percentage20X | Threshold [0.03] | -### SNP Check +### SNP Genotype Data key: SampleId, Chromosome, Position, Ref | Field | Match Type & Thresholds | From 790a32e31e4f1cb243acbc0e528dba2f2eaecdd7 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 2 Oct 2024 09:20:48 +0200 Subject: [PATCH 48/53] Compar: DEV-4061: Cleanup small issues --- .../hartwig/hmftools/compar/mutation/GermlineVariantData.java | 1 - .../hartwig/hmftools/compar/purple/CopyNumberComparer.java | 3 --- .../hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java | 3 +-- .../java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java | 4 ---- .../java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java | 1 - 5 files changed, 1 insertion(+), 11 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java index e2972c9ba4..325b135f94 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/mutation/GermlineVariantData.java @@ -150,5 +150,4 @@ protected static void addDisplayValues(final GermlineVariant variant, final List values.add(String.format("%d", variant.allelicDepth().AlleleReadCount)); values.add(String.format("%d", variant.allelicDepth().TotalReadCount)); } - } diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java index 78f156842e..1b7b014057 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/purple/CopyNumberComparer.java @@ -24,8 +24,6 @@ import com.hartwig.hmftools.compar.common.Mismatch; import com.hartwig.hmftools.patientdb.dao.DatabaseAccess; -import org.jetbrains.annotations.NotNull; - public class CopyNumberComparer implements ItemComparer { private final ComparConfig mConfig; @@ -90,7 +88,6 @@ public List loadFromFile(final String sampleId, final String ger return comparableItems; } - @NotNull private CopyNumberData createCopyNumberData(final PurpleCopyNumber copyNumber, final String fileSource) { BasePosition comparisonPositionStart = determineComparisonGenomePosition( diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java index dbfd8b0554..5f7513e980 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/snpgenotype/SnpGenotypeData.java @@ -50,8 +50,7 @@ public String key() { if(mComparisonPosition.Position != Position) { - return String.format("%s:%d %s liftover(%s)", - Chromosome, Position, Ref, mComparisonPosition); + return String.format("%s:%d %s liftover(%s)", Chromosome, Position, Ref, mComparisonPosition); } else { diff --git a/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java b/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java index 98d9c7d1e2..0fe05924ee 100644 --- a/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java +++ b/orange/src/main/java/com/hartwig/hmftools/orange/OrangeWGSRefConfig.java @@ -1,6 +1,5 @@ package com.hartwig.hmftools.orange; -import static com.hartwig.hmftools.common.peach.PeachGenotypeFile.generateOldPythonFileName; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.CHORD_DIR; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.CUPPA_DIR; import static com.hartwig.hmftools.common.pipeline.PipelineToolDirectories.LINX_GERMLINE_DIR; @@ -30,9 +29,6 @@ import static com.hartwig.hmftools.orange.OrangeApplication.LOGGER; import static com.hartwig.hmftools.orange.OrangeConfig.TUMOR_SAMPLE_ID; import static com.hartwig.hmftools.orange.util.PathUtil.mandatoryPath; -import static com.hartwig.hmftools.orange.util.PathUtil.optionalPath; - -import java.io.File; import com.hartwig.hmftools.common.chord.ChordDataFile; import com.hartwig.hmftools.common.cuppa.CuppaPredictions; diff --git a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java index 0d985211d1..29ea15ef1e 100644 --- a/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java +++ b/patient-db/src/main/java/com/hartwig/hmftools/patientdb/dao/PeachDAO.java @@ -4,7 +4,6 @@ import static com.hartwig.hmftools.patientdb.database.hmfpatients.Tables.PEACHGENOTYPE; import java.sql.Timestamp; -import java.util.ArrayList; import java.util.Date; import java.util.List; From ac4ee8cffed2a7bdb77a16ced2c79584bb70b338 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Wed, 2 Oct 2024 09:21:14 +0200 Subject: [PATCH 49/53] Compar: DEV-4061: Change fusion field names to consistent format --- .../hmftools/compar/linx/FusionData.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java index 39afb427c5..7c97ccc360 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/linx/FusionData.java @@ -22,15 +22,15 @@ public class FusionData implements ComparableItem protected static final String FLD_REPORTED_TYPE = "ReportedType"; protected static final String FLD_PHASED = "Phased"; - protected static final String FLD_LIKELIHOOD = "likelihood"; - protected static final String FLD_TRANSCRIPT_UP = "fusedTranscriptUp"; - protected static final String FLD_EXON_UP = "fusedExonUp"; - protected static final String FLD_TRANSCRIPT_DOWN = "fusedTranscriptDown"; - protected static final String FLD_EXON_DOWN = "fusedExonDown"; - protected static final String FLD_CHAIN_LINKS = "chainLinks"; - protected static final String FLD_CHAIN_TERM = "chainTerminated"; - protected static final String FLD_DOMAINS_KEPT = "domainsKept"; - protected static final String FLD_DOMAINS_LOST = "domainsLost"; + protected static final String FLD_LIKELIHOOD = "Likelihood"; + protected static final String FLD_TRANSCRIPT_UP = "FusedTranscriptUp"; + protected static final String FLD_EXON_UP = "FusedExonUp"; + protected static final String FLD_TRANSCRIPT_DOWN = "FusedTranscriptDown"; + protected static final String FLD_EXON_DOWN = "FusedExonDown"; + protected static final String FLD_CHAIN_LINKS = "ChainLinks"; + protected static final String FLD_CHAIN_TERM = "ChainTerminated"; + protected static final String FLD_DOMAINS_KEPT = "DomainsKept"; + protected static final String FLD_DOMAINS_LOST = "DomainsLost"; protected static final String FLD_JUNCTION_COPY_NUMBER = "JunctionCopyNumber"; public FusionData(final LinxFusion fusion, final String geneMappedName) From 2b833c44e1b7e7a775622b43a401642784449278 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 24 Oct 2024 13:30:31 +0200 Subject: [PATCH 50/53] Compar: DEV-4061: Fix crash in Teal comparison --- .../java/com/hartwig/hmftools/compar/teal/TealComparer.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/teal/TealComparer.java b/compar/src/main/java/com/hartwig/hmftools/compar/teal/TealComparer.java index 48ae1fb92d..4738f9e2d5 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/teal/TealComparer.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/teal/TealComparer.java @@ -5,6 +5,7 @@ import static com.hartwig.hmftools.compar.teal.TealData.FLD_TELOMERE_LENGTH; import java.io.UncheckedIOException; +import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -60,15 +61,17 @@ public List loadFromDb(final String sampleId, final DatabaseAcce @Override public List loadFromFile(final String sampleId, final String germlineSampleId, final FileSources fileSources) { + final List comparableItems = new ArrayList<>(); try { TelomereLength telomereLength = TelomereLengthFile.read(TelomereLengthFile.generateFilename(fileSources.Teal, sampleId)); - return List.of(new TealData(telomereLength)); + comparableItems.add(new TealData(telomereLength)); } catch(UncheckedIOException e) { CMP_LOGGER.warn("sample({}) failed to load Teal data: {}", sampleId, e.toString()); return null; } + return comparableItems; } } From 875de85208b9a12e3f5298f4c3921097b38589cd Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 24 Oct 2024 14:34:51 +0200 Subject: [PATCH 51/53] Compar: DEV-4061: Update Readme --- compar/README.md | 54 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/compar/README.md b/compar/README.md index 2502aeb302..5aa5bc269c 100644 --- a/compar/README.md +++ b/compar/README.md @@ -37,7 +37,8 @@ The key configuration values to set are: ** set of tools are: linx, linx_germline, purple, chord, cuppa, lilac, peach, virus (i.e. virus-interpreter), snp_genotype, tumor_flagstat, germline_flagstat, tumor_bam_metrics and germline_bam_metrics. The available categories are: PURITY, DRIVER, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION, GERMLINE_SV, FUSION, DISRUPTION, CUPPA, -CHORD, LILAC, PEACH, VIRUS, TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_BAM_METRICS, GERMLINE_BAM_METRICS, SNP_GENOTYPE, COPY_NUMBER, GENE_COPY_NUMBER. +CHORD, LILAC, PEACH, VIRUS, TUMOR_FLAGSTAT, GERMLINE_FLAGSTAT, TUMOR_BAM_METRICS, GERMLINE_BAM_METRICS, SNP_GENOTYPE, COPY_NUMBER, GENE_COPY_NUMBER, +CDR3_SEQUENCE, CDR3_LOCUS_SUMMARY, TELOMERE_LENGTH. The category PANEL is equivalent to PURITY, DRIVER, SOMATIC_VARIANT, FUSION, DISRUPTION, TUMOR_FLAGSTAT, TUMOR_BAM_METRICS and SNP_GENOTYPE. @@ -79,12 +80,13 @@ Specify one or more tool directories to override the pipeline default paths. | PURITY, SOMATIC_VARIANT, GERMLINE_VARIANT, GERMLINE_DELETION | purple_dir | | FUSION, DISRUPTION | linx_dir | | GERMLINE_SV | linx_germline_dir | -| PURITY | purple_dir | | CUPPA | cuppa_dir | | CHORD | chord_dir | | LILAC | lilac_dir | | PEACH | peach_dir | | VIRUS | virus_dir | +| CIDER | cider_dir | +| TEAL | teal_dir | Wildcards '*' can be used in place of sampleIds, in which case Compar will replace the wildcard with the sampleId for each path. Similarly, '$' can be used in place of germline sample IDs. @@ -336,26 +338,47 @@ Data key: SampleId | Field | Match Type & Thresholds | |---------------------|-------------------------| -| DuplicatePercentage | Threshold [0.05] | -| Percentage30X | Threshold [0.03] | -| Percentage60X | Threshold [0.03] | +| duplicatePercentage | Threshold [0.05] | +| percentage30X | Threshold [0.03] | +| percentage60X | Threshold [0.03] | ### Germline BAM Metrics Data key: SampleId | Field | Match Type & Thresholds | |---------------------|-------------------------| -| DuplicatePercentage | Threshold [0.05] | -| Percentage10X | Threshold [0.03] | -| Percentage20X | Threshold [0.03] | +| duplicatePercentage | Threshold [0.05] | +| percentage10X | Threshold [0.03] | +| percentage20X | Threshold [0.03] | ### SNP Genotype Data key: SampleId, Chromosome, Position, Ref | Field | Match Type & Thresholds | |----------|-------------------------| -| Alt | Exact | -| Genotype | Exact | +| alt | Exact | +| genotype | Exact | + +### CDR3 Sequence (Cider) +Data key: SampleId, Cdr3Seq, Filter, Locus + +| Field | Match Type & Thresholds | +|-------|-------------------------| +| N/A | N/A | + +### CDR3 Locus Summary (Cider) +Data key: SampleId, Locus + +| Field | Match Type & Thresholds | +|---------------|-------------------------| +| passSequences | Threshold [5%] | + +### Telomere lengths +Data key: SampleId, Type (tumor or ref) + +| Field | Match Type & Thresholds | +|----------------|-------------------------| +| telomereLength | Threshold [5%] | ### Copy Number Only runs in DETAILED mode. @@ -364,9 +387,9 @@ Data key: SampleId, Chromosome, StartPosition, EndPosition | Field | Match Type & Thresholds | |----------------------|-------------------------| -| CopyNumber | Threshold [0.5, 15%] | -| MajorAlleleCopyNumer | Threshold [0.5, 15%] | -| Method | Exact | +| copyNumber | Threshold [0.5, 15%] | +| majorAlleleCopyNumer | Threshold [0.5, 15%] | +| method | Exact | ### Gene Copy Number Only runs in DETAILED mode. @@ -375,10 +398,11 @@ Data key: SampleId, Gene | Field | Match Type & Thresholds | |---------------|-------------------------| -| MinCopyNumber | Threshold [0.5, 15%] | -| MaxCopyNumer | Threshold [0.5, 15%] | +| minCopyNumber | Threshold [0.5, 15%] | +| maxCopyNumer | Threshold [0.5, 15%] | ## Version History and Download Links +- [1.3](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.3) - [1.2](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.2) - [1.1](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.1) - [1.0](https://github.com/hartwigmedical/hmftools/releases/tag/compar-v1.0) From 918233e373c700e9df54fcd2b49d816a69b09469 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 24 Oct 2024 15:16:18 +0200 Subject: [PATCH 52/53] Compar: DEV-4061: Match header and fields for CDR3LocusSummary --- .../com/hartwig/hmftools/compar/cider/Cdr3LocusSummaryData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compar/src/main/java/com/hartwig/hmftools/compar/cider/Cdr3LocusSummaryData.java b/compar/src/main/java/com/hartwig/hmftools/compar/cider/Cdr3LocusSummaryData.java index 7791fab24e..554a40973a 100644 --- a/compar/src/main/java/com/hartwig/hmftools/compar/cider/Cdr3LocusSummaryData.java +++ b/compar/src/main/java/com/hartwig/hmftools/compar/cider/Cdr3LocusSummaryData.java @@ -39,7 +39,7 @@ public String key() @Override public List displayValues() { - return List.of(Cdr3LocusSummary.locus(), String.valueOf(Cdr3LocusSummary.passSequences())); + return List.of(String.valueOf(Cdr3LocusSummary.passSequences())); } @Override From 168ec28f70a1bc97cf05dabd23d799fa761d8bd9 Mon Sep 17 00:00:00 2001 From: David Koetsier Date: Thu, 31 Oct 2024 15:17:13 +0100 Subject: [PATCH 53/53] Compar: DEV-4061: Fix Cider in Readme --- compar/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/compar/README.md b/compar/README.md index 5aa5bc269c..d8ee41965b 100644 --- a/compar/README.md +++ b/compar/README.md @@ -360,11 +360,12 @@ Data key: SampleId, Chromosome, Position, Ref | genotype | Exact | ### CDR3 Sequence (Cider) -Data key: SampleId, Cdr3Seq, Filter, Locus +Data key: SampleId, Cdr3AA, Cdr3Seq -| Field | Match Type & Thresholds | -|-------|-------------------------| -| N/A | N/A | +| Field | Match Type & Thresholds | +|--------|-------------------------| +| Filter | Exact | +| Locus | Exact | ### CDR3 Locus Summary (Cider) Data key: SampleId, Locus