From 2bfdcf01a4bab2a13681df6aa8b61aa75a9d9f29 Mon Sep 17 00:00:00 2001 From: Charles Shale Date: Wed, 12 Jun 2024 09:12:33 +1000 Subject: [PATCH] Sage: use minimum sequence dictionary in sync fragment consensus read building --- .../hartwig/hmftools/sage/ReferenceData.java | 4 +++ .../sage/evidence/ReadContextCounter.java | 2 +- .../sage/evidence/ReadContextEvidence.java | 2 +- .../hmftools/sage/sync/CombinedSyncData.java | 14 ++++---- .../hmftools/sage/sync/FragmentSync.java | 35 +++++++++++++++++-- .../hmftools/sage/sync/FragmentSyncTest.java | 6 ++++ 6 files changed, 53 insertions(+), 10 deletions(-) diff --git a/sage/src/main/java/com/hartwig/hmftools/sage/ReferenceData.java b/sage/src/main/java/com/hartwig/hmftools/sage/ReferenceData.java index 32d7b79be2..d96f1cf9e0 100644 --- a/sage/src/main/java/com/hartwig/hmftools/sage/ReferenceData.java +++ b/sage/src/main/java/com/hartwig/hmftools/sage/ReferenceData.java @@ -21,12 +21,16 @@ import com.hartwig.hmftools.common.genome.bed.NamedBedFile; import com.hartwig.hmftools.common.genome.chromosome.Chromosome; import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; +import com.hartwig.hmftools.common.genome.refgenome.RefGenomeSource; import com.hartwig.hmftools.common.hla.HlaCommon; import com.hartwig.hmftools.common.utils.config.ConfigBuilder; import com.hartwig.hmftools.common.region.BaseRegion; import com.hartwig.hmftools.common.variant.hotspot.VariantHotspot; import com.hartwig.hmftools.common.variant.hotspot.VariantHotspotFile; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.reference.IndexedFastaSequenceFile; public class ReferenceData diff --git a/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextCounter.java b/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextCounter.java index 8686f40d4b..cfd96a239d 100644 --- a/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextCounter.java +++ b/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextCounter.java @@ -326,7 +326,7 @@ public ReadMatchType processRead(final SAMRecord record, int numberOfEvents, @Nu return MAP_QUAL; } - if(mConfig.Quality.HighBaseMode && isChimericRead(record)) + if(mConfig.Quality.HighBaseMode && fragmentData == null && isChimericRead(record)) { addVariantVisRecord(record, MatchType.NONE, null, fragmentData); return CHIMERIC; diff --git a/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextEvidence.java b/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextEvidence.java index 9421057874..4b841b8ee5 100644 --- a/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextEvidence.java +++ b/sage/src/main/java/com/hartwig/hmftools/sage/evidence/ReadContextEvidence.java @@ -60,7 +60,7 @@ public ReadContextEvidence( mRefGenome = refGenome; mFactory = new ReadContextCounterFactory(config); mQualityRecalibrationMap = qualityRecalibrationMap; - mFragmentSync = new FragmentSync(this); + mFragmentSync = new FragmentSync(this, refGenome); mRefSequence = null; mReadCounters = null; diff --git a/sage/src/main/java/com/hartwig/hmftools/sage/sync/CombinedSyncData.java b/sage/src/main/java/com/hartwig/hmftools/sage/sync/CombinedSyncData.java index c3d0f08a97..bc88308c87 100644 --- a/sage/src/main/java/com/hartwig/hmftools/sage/sync/CombinedSyncData.java +++ b/sage/src/main/java/com/hartwig/hmftools/sage/sync/CombinedSyncData.java @@ -24,6 +24,7 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMRecordSetBuilder; @@ -50,7 +51,7 @@ public CombinedSyncData() mCombinedEffectiveStart = 0; } - public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second) + public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second, final SAMFileHeader samHeader) { if(!positionsOverlap(first.getAlignmentStart(), first.getAlignmentEnd(), second.getAlignmentStart(), second.getAlignmentEnd())) { @@ -74,15 +75,18 @@ public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final return new FragmentSyncOutcome(BASE_MISMATCH); } - return new FragmentSyncOutcome(combinedSyncData.buildSyncedRead(first, second), COMBINED); + return new FragmentSyncOutcome(combinedSyncData.buildSyncedRead(first, second, samHeader), COMBINED); } - private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second) + private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second, final SAMFileHeader samHeader) { SAMRecordSetBuilder recordBuilder = new SAMRecordSetBuilder(); recordBuilder.setUnmappedHasBasesAndQualities(false); - recordBuilder.setHeader(first.getHeader()); + if(samHeader != null) + recordBuilder.setHeader(samHeader); + else + recordBuilder.setHeader(first.getHeader()); SAMRecord combinedRecord = recordBuilder.addFrag( first.getReadName(), @@ -94,7 +98,6 @@ private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second) combinedRecord.setReadBases(mCombinedBases); combinedRecord.setAlignmentStart(mFragmentStart); - combinedRecord.setReferenceIndex(first.getReferenceIndex()); // also set ref name, same for mate ref below combinedRecord.setBaseQualities(mCombinedBaseQualities); combinedRecord.setMateAlignmentStart(mFragmentStart); @@ -414,5 +417,4 @@ else if(firstQual > secondQual) return new byte[] { secondBase, (byte)max(BASE_QUAL_MINIMUM, secondQual - firstQual) }; } } - } diff --git a/sage/src/main/java/com/hartwig/hmftools/sage/sync/FragmentSync.java b/sage/src/main/java/com/hartwig/hmftools/sage/sync/FragmentSync.java index 918b5d1c33..3f1d0cbd1a 100644 --- a/sage/src/main/java/com/hartwig/hmftools/sage/sync/FragmentSync.java +++ b/sage/src/main/java/com/hartwig/hmftools/sage/sync/FragmentSync.java @@ -5,24 +5,35 @@ import static com.hartwig.hmftools.sage.sync.FragmentSyncType.CIGAR_MISMATCH; import static com.hartwig.hmftools.sage.sync.FragmentSyncType.NO_OVERLAP; +import java.util.List; import java.util.Map; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; +import com.hartwig.hmftools.common.genome.chromosome.MitochondrialChromosome; +import com.hartwig.hmftools.common.genome.refgenome.RefGenomeInterface; +import com.hartwig.hmftools.common.genome.refgenome.RefGenomeSource; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; public class FragmentSync { private final FragmentSyncReadHandler mReadHandler; private final Map mCachedReads; + private final SAMFileHeader mSAMHeader; private final int[] mSyncCounts; - public FragmentSync(final FragmentSyncReadHandler readHandler) + public FragmentSync(final FragmentSyncReadHandler readHandler, final RefGenomeInterface refGenomeSource) { mReadHandler = readHandler; mCachedReads = Maps.newHashMap(); mSyncCounts = new int[FragmentSyncType.values().length]; + mSAMHeader = buildRefSamHeader(refGenomeSource); } public void emptyCachedReads() @@ -47,7 +58,7 @@ public boolean handleOverlappingReads(final SAMRecord record) { try { - FragmentSyncOutcome syncOutcome = CombinedSyncData.formFragmentRead(otherRecord, record); + FragmentSyncOutcome syncOutcome = CombinedSyncData.formFragmentRead(otherRecord, record, mSAMHeader); mCachedReads.remove(record.getReadName()); SAMRecord fragmentRecord = syncOutcome.CombinedRecord; @@ -131,4 +142,24 @@ else if(syncOutcome.SyncType.processSeparately()) mCachedReads.put(record.getReadName(), record); return true; } + + private SAMFileHeader buildRefSamHeader(final RefGenomeInterface refGenomeInterface) + { + if(!(refGenomeInterface instanceof RefGenomeSource)) + return null; + + RefGenomeSource refGenomeSource = (RefGenomeSource)refGenomeInterface; + List sequenceRecords = Lists.newArrayList(); + + SAMSequenceDictionary refGenomeDict = refGenomeSource.refGenomeFile().getSequenceDictionary(); + SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); + + for(SAMSequenceRecord sequence : refGenomeDict.getSequences()) + { + if(HumanChromosome.contains(sequence.getSequenceName()) || MitochondrialChromosome.contains(sequence.getSequenceName())) + dictionary.addSequence(sequence); + } + + return new SAMFileHeader(dictionary); + } } diff --git a/sage/src/test/java/com/hartwig/hmftools/sage/sync/FragmentSyncTest.java b/sage/src/test/java/com/hartwig/hmftools/sage/sync/FragmentSyncTest.java index 5365f31388..b61852c274 100644 --- a/sage/src/test/java/com/hartwig/hmftools/sage/sync/FragmentSyncTest.java +++ b/sage/src/test/java/com/hartwig/hmftools/sage/sync/FragmentSyncTest.java @@ -23,6 +23,7 @@ import org.apache.logging.log4j.util.Strings; import org.junit.Test; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; public class FragmentSyncTest @@ -422,4 +423,9 @@ public void testTruncatedFragmentIndelMismatch() assertEquals(REF_BASES.substring(10, 40), readBases); assertEquals(FragmentSyncType.COMBINED, syncOutcome.SyncType); } + + private static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second) + { + return CombinedSyncData.formFragmentRead(first, second, null); + } }