From 1d0d08cb1e6d8bb421194e8e88d1547b764c77e1 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Tue, 19 Sep 2023 14:14:49 -0400 Subject: [PATCH] Adding sqanti ML filter rule --- workflow/Snakefile | 5 +++++ workflow/rules/sqanti.smk | 30 ++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 6a6fcf4..58d1fc7 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -264,6 +264,10 @@ rule all: # Characterize and annotate collapsed transcripts from flair # @imported from `rule sqanti_qc` in rules/sqanti.smk join(workpath, "project", "counts", "novel", "sqanti.isoforms_classification.txt"), + # SQANTI3 ML Filter, known/novel isoform quantification + # Filters characterized transcripts from Sqanti QC + # @imported from `rule sqanti_ml_filter` in rules/sqanti.smk + join(workpath, "project", "counts", "novel", "sqanti.isoforms_MLresult_classification.txt"), # Nanopolish polyA tail length estimation, # needs index to map basecalled reads to # raw signal from the ONT sequencer @@ -332,3 +336,4 @@ include: join("rules", "editing_a-to-i.smk") include: join("rules", "polya.smk") include: join("rules", "diffexp.smk") include: join("rules", "altsplice.smk") +include: join("rules", "hooks.smk") \ No newline at end of file diff --git a/workflow/rules/sqanti.smk b/workflow/rules/sqanti.smk index e68eb0e..c062045 100644 --- a/workflow/rules/sqanti.smk +++ b/workflow/rules/sqanti.smk @@ -77,9 +77,35 @@ rule sqanti_ml_filter: https://github.com/ConesaLab/SQANTI3/wiki/ Github: https://github.com/ConesaLab/SQANTI3 @Input: - Sqanti Classification file (TSV) - @Output: + Sqanti Classification file (TSV), + Corrected Annotation (GTF), + Corrected Transcriptome (FASTA) + @Input: ML Filtered Sqanti Classification file (TSV), ML Filtered Corrected Annotation (GTF), ML Filtered Corrected Transcriptome (FASTA) """ + input: + txt = join(workpath, "project", "counts", "novel", "sqanti.isoforms_classification.txt"), + fa = join(workpath, "project", "counts", "novel", "sqanti.isoforms_corrected.fasta"), + gtf = join(workpath, "project", "counts", "novel", "sqanti.isoforms_corrected.gtf"), + output: + txt = join(workpath, "project", "counts", "novel", "sqanti.isoforms_MLresult_classification.txt"), + fa = join(workpath, "project", "counts", "novel", "sqanti.isoforms.filtered.fasta"), + gtf = join(workpath, "project", "counts", "novel", "sqanti.isoforms.filtered.gtf"), + params: + rname = "sqanti_ml_filter", + prefix = "sqanti.isoforms", + outdir = join(workpath, "project", "counts", "novel"), + container: depending(config['images']['sqanti3'], use_singularity), + threads: int(allocated("threads", "sqanti_ml_filter", cluster)), + shell: """ + # Applies ML filter on selected + # SQANTI3 QC classification file + sqanti3_filter.py ML \\ + --output {params.prefix} \\ + --dir {params.outdir} \\ + --gtf {input.gtf} \\ + --isoforms {input.fa} \\ + {input.txt} + """ \ No newline at end of file