Skip to content

Commit

Permalink
Sage: use minimum sequence dictionary in sync fragment consensus read…
Browse files Browse the repository at this point in the history
… building
  • Loading branch information
charlesshale committed Jun 11, 2024
1 parent 2f374cb commit 2bfdcf0
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@
import com.hartwig.hmftools.common.genome.bed.NamedBedFile;
import com.hartwig.hmftools.common.genome.chromosome.Chromosome;
import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome;
import com.hartwig.hmftools.common.genome.refgenome.RefGenomeSource;
import com.hartwig.hmftools.common.hla.HlaCommon;
import com.hartwig.hmftools.common.utils.config.ConfigBuilder;
import com.hartwig.hmftools.common.region.BaseRegion;
import com.hartwig.hmftools.common.variant.hotspot.VariantHotspot;
import com.hartwig.hmftools.common.variant.hotspot.VariantHotspotFile;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;

public class ReferenceData
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ public ReadMatchType processRead(final SAMRecord record, int numberOfEvents, @Nu
return MAP_QUAL;
}

if(mConfig.Quality.HighBaseMode && isChimericRead(record))
if(mConfig.Quality.HighBaseMode && fragmentData == null && isChimericRead(record))
{
addVariantVisRecord(record, MatchType.NONE, null, fragmentData);
return CHIMERIC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public ReadContextEvidence(
mRefGenome = refGenome;
mFactory = new ReadContextCounterFactory(config);
mQualityRecalibrationMap = qualityRecalibrationMap;
mFragmentSync = new FragmentSync(this);
mFragmentSync = new FragmentSync(this, refGenome);

mRefSequence = null;
mReadCounters = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;

Expand All @@ -50,7 +51,7 @@ public CombinedSyncData()
mCombinedEffectiveStart = 0;
}

public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second)
public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second, final SAMFileHeader samHeader)
{
if(!positionsOverlap(first.getAlignmentStart(), first.getAlignmentEnd(), second.getAlignmentStart(), second.getAlignmentEnd()))
{
Expand All @@ -74,15 +75,18 @@ public static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final
return new FragmentSyncOutcome(BASE_MISMATCH);
}

return new FragmentSyncOutcome(combinedSyncData.buildSyncedRead(first, second), COMBINED);
return new FragmentSyncOutcome(combinedSyncData.buildSyncedRead(first, second, samHeader), COMBINED);
}

private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second)
private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second, final SAMFileHeader samHeader)
{
SAMRecordSetBuilder recordBuilder = new SAMRecordSetBuilder();
recordBuilder.setUnmappedHasBasesAndQualities(false);

recordBuilder.setHeader(first.getHeader());
if(samHeader != null)
recordBuilder.setHeader(samHeader);
else
recordBuilder.setHeader(first.getHeader());

SAMRecord combinedRecord = recordBuilder.addFrag(
first.getReadName(),
Expand All @@ -94,7 +98,6 @@ private SAMRecord buildSyncedRead(final SAMRecord first, final SAMRecord second)

combinedRecord.setReadBases(mCombinedBases);
combinedRecord.setAlignmentStart(mFragmentStart);
combinedRecord.setReferenceIndex(first.getReferenceIndex()); // also set ref name, same for mate ref below

combinedRecord.setBaseQualities(mCombinedBaseQualities);
combinedRecord.setMateAlignmentStart(mFragmentStart);
Expand Down Expand Up @@ -414,5 +417,4 @@ else if(firstQual > secondQual)
return new byte[] { secondBase, (byte)max(BASE_QUAL_MINIMUM, secondQual - firstQual) };
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,35 @@
import static com.hartwig.hmftools.sage.sync.FragmentSyncType.CIGAR_MISMATCH;
import static com.hartwig.hmftools.sage.sync.FragmentSyncType.NO_OVERLAP;

import java.util.List;
import java.util.Map;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome;
import com.hartwig.hmftools.common.genome.chromosome.MitochondrialChromosome;
import com.hartwig.hmftools.common.genome.refgenome.RefGenomeInterface;
import com.hartwig.hmftools.common.genome.refgenome.RefGenomeSource;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;

public class FragmentSync
{
private final FragmentSyncReadHandler mReadHandler;
private final Map<String,SAMRecord> mCachedReads;
private final SAMFileHeader mSAMHeader;

private final int[] mSyncCounts;

public FragmentSync(final FragmentSyncReadHandler readHandler)
public FragmentSync(final FragmentSyncReadHandler readHandler, final RefGenomeInterface refGenomeSource)
{
mReadHandler = readHandler;
mCachedReads = Maps.newHashMap();
mSyncCounts = new int[FragmentSyncType.values().length];
mSAMHeader = buildRefSamHeader(refGenomeSource);
}

public void emptyCachedReads()
Expand All @@ -47,7 +58,7 @@ public boolean handleOverlappingReads(final SAMRecord record)
{
try
{
FragmentSyncOutcome syncOutcome = CombinedSyncData.formFragmentRead(otherRecord, record);
FragmentSyncOutcome syncOutcome = CombinedSyncData.formFragmentRead(otherRecord, record, mSAMHeader);
mCachedReads.remove(record.getReadName());

SAMRecord fragmentRecord = syncOutcome.CombinedRecord;
Expand Down Expand Up @@ -131,4 +142,24 @@ else if(syncOutcome.SyncType.processSeparately())
mCachedReads.put(record.getReadName(), record);
return true;
}

private SAMFileHeader buildRefSamHeader(final RefGenomeInterface refGenomeInterface)
{
if(!(refGenomeInterface instanceof RefGenomeSource))
return null;

RefGenomeSource refGenomeSource = (RefGenomeSource)refGenomeInterface;
List<SAMSequenceRecord> sequenceRecords = Lists.newArrayList();

SAMSequenceDictionary refGenomeDict = refGenomeSource.refGenomeFile().getSequenceDictionary();
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();

for(SAMSequenceRecord sequence : refGenomeDict.getSequences())
{
if(HumanChromosome.contains(sequence.getSequenceName()) || MitochondrialChromosome.contains(sequence.getSequenceName()))
dictionary.addSequence(sequence);
}

return new SAMFileHeader(dictionary);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.logging.log4j.util.Strings;
import org.junit.Test;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;

public class FragmentSyncTest
Expand Down Expand Up @@ -422,4 +423,9 @@ public void testTruncatedFragmentIndelMismatch()
assertEquals(REF_BASES.substring(10, 40), readBases);
assertEquals(FragmentSyncType.COMBINED, syncOutcome.SyncType);
}

private static FragmentSyncOutcome formFragmentRead(final SAMRecord first, final SAMRecord second)
{
return CombinedSyncData.formFragmentRead(first, second, null);
}
}

0 comments on commit 2bfdcf0

Please sign in to comment.