From 11446ed858ba365dbdeb6de867103b29ae5bb74d Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 4 Mar 2021 12:02:32 +0900 Subject: [PATCH 1/3] migrate docker container for UniverSC --- docker/cellranger/Dockerfile | 1 - docker/universc/Dockerfile | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 docker/cellranger/Dockerfile create mode 100644 docker/universc/Dockerfile diff --git a/docker/cellranger/Dockerfile b/docker/cellranger/Dockerfile deleted file mode 100644 index 2725749b..00000000 --- a/docker/cellranger/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM tomkellygenetics/cellranger_clean:3.0.2.9001 diff --git a/docker/universc/Dockerfile b/docker/universc/Dockerfile new file mode 100644 index 00000000..56970607 --- /dev/null +++ b/docker/universc/Dockerfile @@ -0,0 +1 @@ +FROM tomkellygenetics/universc:1.0.3 From 9ab4a3ba750ac8368589426e0e9e9c132985880d Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 4 Mar 2021 12:24:21 +0900 Subject: [PATCH 2/3] replace Cell Ranger call with UniverSC for 10x Genomics --- bin/scrape_software_versions.py | 1 + conf/base.config | 6 +++--- docs/output.md | 9 +++++++-- main.nf | 5 +++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 4570153d..90b2b7bd 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -12,6 +12,7 @@ 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], 'bcl2fastq': ['v_bcl2fastq.txt', r"bcl2fastq v(\S+)"], 'CellRanger': ['v_cellranger.txt', r"cellranger mkfastq (\S+)"] + 'UniverSC': ['v_universc.txt', r"UniverSC v(\S+)"] #'CellRangerATAC': ['v_cellrangeratac.txt', r"CellRangerATAC, version (\S+)"], #'CellRangerDNA': ['v_cellrangerdna.txt', r"CellRangerDNA, version (\S+)"], } diff --git a/conf/base.config b/conf/base.config index a8fac399..93ee5a73 100755 --- a/conf/base.config +++ b/conf/base.config @@ -54,11 +54,11 @@ process { withName: bcl2fastq_problem_SS { container = 'nfcore/demultiplex:bcl2fastq-2.20.0' } - withName: cellRangerCount { - container = 'nfcore/demultiplex:cellranger-3.0.2.9001' + withName: UniverSC { + container = 'nfcore/demultiplex:universc-1.0.3' } withName: cellRangerMkFastQ { - container = 'nfcore/demultiplex:cellranger-3.0.2.9001' + container = 'nfcore/demultiplex:universc-1.0.3' } } diff --git a/docs/output.md b/docs/output.md index 5f89d99c..b02f2e78 100755 --- a/docs/output.md +++ b/docs/output.md @@ -18,8 +18,9 @@ and processes data using the following steps: * Recheck newly made sample sheet for any errors or problem samples that did not match any indexes in the Stats.json file. If there is still an issue the pipeline will exit at this stage. * [bcl2fastq](#bcl2fastq) - converting bcl files to fastq, and demultiplexing (CONDITIONAL) * Processes that only run if there are 10X samples on the sample sheet input (CONDITIONAL): - * [CellRanger](#cellranger) - demultiplexes raw base call (BCL) files generated by Illumina sequencers into FASTQ files and is a wrapper around Illumina's bcl2fastq - * [CellRangerCount](#cellrangercount) - performs alignment, filtering, barcode counting, and UMI counting + * [CellRanger](#cellranger) - demultiplexes raw base call (BCL) files generated by Illumina sequencers into FASTQ files and is a wrapper around Illumina's bcl2fastq to support 10x Genomics sample Index Sets +* Processes that only run if there are single-cell samples on the sample sheet input (CONDITIONAL): + * [UniverSC](#universc) - performs alignment, filtering, barcode counting, and UMI counting * [FastQC](#fastqc) - read quality control * [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline @@ -54,6 +55,10 @@ and processes data using the following steps: * `outs/metrics_summary.csv` * Run summary metrics in CSV format +## UniverSC + +[UniverSC](https://github.com/minoda-lab/universc) a flexible cross-platform single-cell data processing pipeline that enables demultiplexing any UMI-based technology. `launch_universc.sh` automatically renames and converts FASTQ file formats for compatibility with `cellranger count` which is then called. Presets are provided for various technologies including barcode whitelists. This provides the same summary information as for 10x above but does not support "Loupe" browser. + ## FastQC [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. diff --git a/main.nf b/main.nf index 26b3d569..f8a4c0b6 100755 --- a/main.nf +++ b/main.nf @@ -436,7 +436,7 @@ cr_fqname_fqfile_ch tuple(sampleID, projectName, refGenome, dataType, fastqDir) } .set { cr_grouped_fastq_dir_sample_ch } -process cellRangerCount { +process UniverSC { tag "${projectName}/${sampleID}" publishDir "${params.outdir}/${runName}", mode: 'copy', saveAs: { filename -> @@ -459,7 +459,7 @@ process cellRangerCount { script: genome_ref_conf_filepath = params.cellranger_genomes.get(refGenome, false) """ - cellranger count --id=$sampleID --transcriptome=${genome_ref_conf_filepath.tenx_transcriptomes} --fastqs=$fastqDir --sample=$sampleID + bash universc/launch_universc.sh --id $sampleID --technology "10x" --reference ${genome_ref_conf_filepath.tenx_transcriptomes} --file ${fastqDir}/${sampleID} """ } @@ -804,6 +804,7 @@ process get_software_versions { multiqc --version > v_multiqc.txt echo \$(bcl2fastq --version 2>&1) > v_bcl2fastq.txt cellranger mkfastq --version > v_cellranger.txt + bash universc/launch_universc.sh --version | tail -2 | head -n 1 | cut -d" " -f3 > v_universc.txt #cellranger-atac --version > v_cellrangeratac.txt #cellranger-dna --version > v_cellrangerdna.txt scrape_software_versions.py &> software_versions_mqc.yaml From f048eea17939e9cc39ae72f9cc62fcb40768a6fa Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 4 Mar 2021 12:28:57 +0900 Subject: [PATCH 3/3] update docs --- CHANGELOG.md | 4 ++++ README.md | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 446405b1..788ed836 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## v.1.1alpha + +Migrates Cell Ranger to UniverSC which supports additional UMI-based single-cell technologies + ## v1.0dev - [date] Initial release of nf-core/demultiplex, created with the [nf-core](http://nf-co.re/) template. diff --git a/README.md b/README.md index 8fccb441..9843aee9 100755 --- a/README.md +++ b/README.md @@ -30,7 +30,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 4. Single cell 10X sample processes (CONDITIONAL): NOTE: Must create CONFIG to point to CellRanger genome References 1. Cell Ranger mkfastq runs only when 10X samples exist. This will run the process with [`CellRanger`](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger), [`CellRanger ATAC`](https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/what-is-cell-ranger-atac), and [`Cell Ranger DNA`](https://support.10xgenomics.com/single-cell-dna/software/pipelines/latest/what-is-cell-ranger-dna) depending on which sample sheet has been created. - 2. Cell Ranger Count runs only when 10X samples exist. This will run the process with [`Cell Ranger Count`](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count), [`Cell Ranger ATAC Count`](https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/using/count), and [`Cell Ranger DNA CNV`](https://support.10xgenomics.com/single-cell-dna/software/pipelines/latest/using/cnv)depending on the output from Cell Ranger mkfastq. 10X reference genomes can be downloaded from the 10X site, a new config would have to be created to point to the location of these. Must add config to point Cell Ranger to genome references if used outside the Crick profile. + 2a. Cell Ranger Count runs only when 10X samples exist. This will run the process with [`Cell Ranger Count`](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count), [`Cell Ranger ATAC Count`](https://support.10xgenomics.com/single-cell-atac/software/pipelines/latest/using/count), and [`Cell Ranger DNA CNV`](https://support.10xgenomics.com/single-cell-dna/software/pipelines/latest/using/cnv)depending on the output from Cell Ranger mkfastq. 10X reference genomes can be downloaded from the 10X site, a new config would have to be created to point to the location of these. Must add config to point Cell Ranger to genome references if used outside the Crick profile. + 2b. [UniverSC](https://github.com/minoda-lab/universc) runs for all single-cell technologies: e.g., DropSeq, ICELL8, SmartSeq3, SureCell if these are given 5. [`bcl2fastq`](http://emea.support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html) (CONDITIONAL): 1. Runs on either the original sample sheet that had no error prone samples or on the newly created sample sheet created from the extra steps. 2. This is only run when there are samples left on the sample sheet after removing the single cell samples.