diff --git a/CHANGELOG.md b/CHANGELOG.md index 392f7718..12fc7b06 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#275](https://github.com/nf-core/demultiplex/pull/275) Update samshee module from nf-core. - [#276](https://github.com/nf-core/demultiplex/pull/276) Template update for nf-core/tools v3.0.2 ### `Fixed` diff --git a/bin/validate_samplesheet.py b/bin/validate_samplesheet.py deleted file mode 100755 index 987e3441..00000000 --- a/bin/validate_samplesheet.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 - -from samshee.samplesheetv2 import read_samplesheetv2 -from samshee.validation import illuminasamplesheetv2schema, illuminasamplesheetv2logic, validate -import json -import sys - -def validate_samplesheet(filename, custom_schema_file=None): - # Load the custom schema if provided - if custom_schema_file: - with open(custom_schema_file, 'r') as f: - custom_schema = json.load(f) - custom_validator = lambda doc: validate(doc, custom_schema) - else: - custom_validator = None - - # Prepare the list of validators - validators = [illuminasamplesheetv2schema, illuminasamplesheetv2logic] - if custom_validator: - validators.append(custom_validator) - # Read and validate the sample sheet - try: - sheet = read_samplesheetv2(filename, validation=validators) - print(f"Validation successful for {filename}") - except Exception as e: - print(f"Validation failed: {e}") - -if __name__ == "__main__": - if len(sys.argv) < 2 or len(sys.argv) > 3: - print("Usage: validate_samplesheet.py [custom_schema.json]") - sys.exit(1) - samplesheet_file = sys.argv[1] - schema_file = sys.argv[2] if len(sys.argv) == 3 else None - - validate_samplesheet(samplesheet_file, schema_file) diff --git a/conf/modules.config b/conf/modules.config index 883a9590..a9093c16 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -231,9 +231,18 @@ process { mode: params.publish_dir_mode ] } - // Samshee should fail the entire pipeline immediately as it validated the illumina samplesheet to be valid before the pipeline runs. As such, it should not be running more than once & if it fails should stop the pipeline withName: SAMSHEE { + ext.args = [ + params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", + params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "", + params.v1_schema ? "--output-format sectioned" : "", + ].join(" ").trim() errorStrategy = "terminate" + publishDir = [ + path: { "${params.outdir}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } diff --git a/conf/test_full.config b/conf/test_full.config index 40209e9a..d591fdc4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,5 +17,5 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/samplesheet_full.csv' demultiplexer = 'bcl2fastq' - skip_tools = 'samshee' + v1_schema = true } diff --git a/docs/usage.md b/docs/usage.md index 3753857e..1fd50c51 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,9 +97,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: @@ -130,7 +129,16 @@ The trimming process in our demultiplexing pipeline has been updated to ensure c ## samshee (Samplesheet validator) -samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--validator_schema`, which accepts a JSON schema validator file. Users can specify this file to enforce additional validation rules beyond the default ones provided by the tool. To use this feature, simply provide the path to the JSON schema validator file via the `--validator_schema` parameter in the pipeline configuration. This enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information about the tool or how to write the schema JSON file, please refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). +samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--json_schema_validator`, which accepts a JSON schema validation string; the `--name_schema_validator`, which accepts a schema name string; and the `--file_schema_validator` which accepts a JSON schema validation file. Users can specify additional validation rules beyond the default ones provided by the tool using all or any of these parameters, this enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). + +> [!NOTE] +> Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. +> When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: +> +> ```bash +> --json_schema_validator '{"required": ["Data"]}' +> --name_schema_validator '{"$ref": "urn:samshee:illuminav2/v1"}' +> ``` ### Updating the pipeline @@ -150,15 +158,13 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -:::tip -If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -::: +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Core Nextflow arguments -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). ### `-profile` @@ -166,9 +172,8 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: +> [!NOTE] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/modules.json b/modules.json index 3711c3a6..65ceb04c 100644 --- a/modules.json +++ b/modules.json @@ -65,6 +65,11 @@ "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, + "samshee": { + "branch": "master", + "git_sha": "3c464e75051db485c1b37ab9f1ea2182fb3d3533", + "installed_by": ["modules"] + }, "seqtk/sample": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/local/samshee/README.md b/modules/local/samshee/README.md deleted file mode 100644 index 3e8745bc..00000000 --- a/modules/local/samshee/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# Guide to Writing a `validation.json` Schema File - -## Introduction - -A JSON schema defines the structure and constraints of JSON data. This guide will help you create a `validation.json` schema file for use with Samshee to perform additional checks on Illumina® Sample Sheet v2 files. - -## JSON Schema Basics - -JSON Schema is a powerful tool for validating the structure of JSON data. It allows you to specify required fields, data types, and constraints. Here are some common components: - -- **`$schema`**: Declares the JSON Schema version being used. -- **`type`**: Specifies the data type (e.g., `object`, `array`, `string`, `number`). -- **`properties`**: Defines the properties of an object and their constraints. -- **`required`**: Lists properties that must be present in the object. -- **`items`**: Specifies the schema for items in an array. - -## Example Schema - -Here’s an example of a `validation.json` schema file for an Illumina® Sample Sheet: - -```json -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "Header": { - "type": "object", - "properties": { - "InvestigatorName": { - "type": "string" - }, - "ExperimentName": { - "type": "string" - } - }, - "required": ["InvestigatorName", "ExperimentName"] - }, - "Reads": { - "type": "object", - "properties": { - "Read1": { - "type": "integer", - "minimum": 1 - }, - "Read2": { - "type": "integer", - "minimum": 1 - } - }, - "required": ["Read1", "Read2"] - }, - "BCLConvert": { - "type": "object", - "properties": { - "Index": { - "type": "string", - "pattern": "^[ACGT]{8}$" // Example pattern for 8-base indices - } - } - } - }, - "required": ["Header", "Reads"] -} -``` - -### Explanation of the Example - -- **`$schema`**: Specifies the JSON Schema version (draft-07). -- **`type`**: Defines the main type as `object`. -- **`properties`**: Lists the properties of the object: -- **`Header`**: An object with required `InvestigatorName` and `ExperimentName` fields. -- **`Reads`**: An object with required `Read1` and `Read2` fields that must be integers greater than or equal to 1. -- **`BCLConvert`**: An object with an optional `Index` field that must be a string matching a pattern for 8-base indices. -- **`required`**: Lists required properties at the top level. - -### Tips for Writing JSON Schemas - -1. **Start Simple**: Begin with basic constraints and gradually add complexity. -2. **Use Online Validators**: Validate your schema using online tools to ensure it adheres to the JSON Schema specification. -3. **Refer to Schema Documentation**: Consult the [JSON Schema documentation](https://json-schema.org/) for detailed guidance. - -### Conclusion - -By defining a JSON schema, you can enforce specific rules and ensure that your Illumina® Sample Sheet v2 files meet your required structure and constraints. Use this guide to create and validate your `validation.json` schema files effectively. diff --git a/modules/local/samshee/environment.yml b/modules/local/samshee/environment.yml deleted file mode 100644 index f92e0eee..00000000 --- a/modules/local/samshee/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - python>=3.9 - - pip - - pip: # FIXME https://github.com/nf-core/modules/issues/5814 - - samshee==0.1.12 diff --git a/modules/local/samshee/meta.yml b/modules/local/samshee/meta.yml deleted file mode 100644 index 145ddd24..00000000 --- a/modules/local/samshee/meta.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: samshee -description: Module to validate illumina® Sample Sheet v2 files. -keywords: - - samplesheet - - illumina - - bclconvert - - bcl2fastq -tools: - - samshee: - description: A schema-agnostic parser and writer for illumina® sample sheets v2 and similar documents. - homepage: https://github.com/lit-regensburg/samshee - documentation: https://github.com/lit-regensburg/samshee/blob/main/README.md - tool_dev_url: https://github.com/lit-regensburg/samshee - licence: [MIT license] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', lane:1 ] - - samplesheet: - type: file - description: "illumina v2 samplesheet" - pattern: "*.{csv}" -output: - - versions: - type: file - description: File containing software version - pattern: "versions.yml" -authors: - - "@nschcolnicov" -maintainers: - - "@nschcolnicov" diff --git a/modules/local/samshee/tests/main.nf.test b/modules/local/samshee/tests/main.nf.test deleted file mode 100644 index d76c98f4..00000000 --- a/modules/local/samshee/tests/main.nf.test +++ /dev/null @@ -1,51 +0,0 @@ -// nf-core modules test cellranger/mkfastq -nextflow_process { - - name "Test Process samshee" - script "../main.nf" - config "./nextflow.config" - process "SAMSHEE" - - tag "modules" - - test("test samplesheet") { - - when { - process { - """ - input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success } - ) - } - - } - - test("stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true), [] ] - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - ) - } - - } - -} diff --git a/modules/local/samshee/tests/nextflow.config b/modules/local/samshee/tests/nextflow.config deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/nf-core/samshee/environment.yml b/modules/nf-core/samshee/environment.yml new file mode 100644 index 00000000..35a8e2e7 --- /dev/null +++ b/modules/nf-core/samshee/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::samshee=0.2.1 + - python=3.13.0 diff --git a/modules/local/samshee/main.nf b/modules/nf-core/samshee/main.nf similarity index 50% rename from modules/local/samshee/main.nf rename to modules/nf-core/samshee/main.nf index acbf928c..6d7ba2e6 100644 --- a/modules/local/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -4,47 +4,40 @@ process SAMSHEE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/python_pip_samshee:84a770c9853c725d' : - 'community.wave.seqera.io/library/python_pip_samshee:e8a5c47ec32efa42' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/65/659cdc3068a6fbce17ccb199bb3afc8600c65940743c1a0214b3bf0eed4df1a3/data' : + 'community.wave.seqera.io/library/pip_samshee:9b655e3c18eee356' }" input: tuple val(meta), path(samplesheet) - path(validator_schema) //optional + path(file_schema_validator) output: - // Module is meant to stop the pipeline if validation fails - path "versions.yml", emit: versions + tuple val(meta), path("*_formatted.csv"), emit: samplesheet + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def arg_validator_schema = validator_schema ? "${validator_schema}" : "" + def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : "" + def args = task.ext.args ?: "" """ # Run validation command and capture output - output=\$(validate_samplesheet.py "${samplesheet}" "${arg_validator_schema}" 2>&1) - status=\$? - # Check if validation failed - if echo "\$output" | grep -q "Validation failed:"; then - echo "\$output" # Print output for debugging - exit 1 # Fail the process if validation failed - fi + python -m samshee $samplesheet \ + $args \ + $arg_file_schema_validator \ + > ${samplesheet.baseName}_formatted.csv cat <<-END_VERSIONS > versions.yml "${task.process}": samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" ) python: \$( python --version | sed -e "s/Python //g" ) END_VERSIONS - - # If no validation errors, process exits with status 0 - exit \$status """ stub: """ + touch ${samplesheet.baseName}_formatted.csv cat <<-END_VERSIONS > versions.yml "${task.process}": samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" ) diff --git a/modules/nf-core/samshee/meta.yml b/modules/nf-core/samshee/meta.yml new file mode 100644 index 00000000..50789dfb --- /dev/null +++ b/modules/nf-core/samshee/meta.yml @@ -0,0 +1,47 @@ +name: samshee +description: Module to validate illumina® Sample Sheet v2 files. +keywords: + - samplesheet + - illumina + - bclconvert + - bcl2fastq +tools: + - samshee: + description: A schema-agnostic parser and writer for illumina® sample sheets v2 and similar documents. + homepage: https://github.com/lit-regensburg/samshee + documentation: https://github.com/lit-regensburg/samshee/blob/main/README.md + tool_dev_url: https://github.com/lit-regensburg/samshee + licence: [MIT license] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', lane:1 ] + - samplesheet: + type: file + description: "illumina v2 samplesheet" + pattern: "*.{csv}" + - - file_schema_validator: + type: string + description: "Optional JSON file used additional samplesheet validation settings" +output: + - samplesheet: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', lane:1 ] + - "*_formatted.csv": + type: file + description: "illumina v2 samplesheet" + - versions: + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@nschcolnicov" +maintainers: + - "@nschcolnicov" diff --git a/modules/nf-core/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test new file mode 100644 index 00000000..5bec682f --- /dev/null +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process samshee" + script "../main.nf" + process "SAMSHEE" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "samshee" + + test("test samplesheet_v1") { + + when { + params { + v1_schema = true + json_schema_validator = '{"required": ["Data"]}' + name_schema_validator = null + } + process { + """ + input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ] + input[1] = file("schema.json") + new File("schema.json").text = '''{ + "\$schema": "https://json-schema.org/draft/2020-12/schema", + "required": ["Settings"] + }''' + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + test("test samplesheet_v2") { + + when { + params { + v1_schema = null + json_schema_validator = null + name_schema_validator = '{"$ref": "urn:samshee:illuminav2/v1"}' + } + process { + """ + input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] + input[1] = [] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + + test("stub") { + + options "-stub" + + when { + params { + v1_schema = null + json_schema_validator = null + name_schema_validator = null + } + process { + """ + input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] + input[1] = [] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/samshee/tests/main.nf.test.snap b/modules/nf-core/samshee/tests/main.nf.test.snap new file mode 100644 index 00000000..b3729eba --- /dev/null +++ b/modules/nf-core/samshee/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-16T15:25:40.722007136" + }, + "test samplesheet_v1": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26" + ] + ], + "1": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26" + ] + ], + "versions": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-16T15:25:02.353128191" + }, + "test samplesheet_v2": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + ] + ], + "1": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + ] + ], + "versions": [ + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-16T15:25:24.540910786" + } +} \ No newline at end of file diff --git a/modules/nf-core/samshee/tests/nextflow.config b/modules/nf-core/samshee/tests/nextflow.config new file mode 100644 index 00000000..ecf1ff66 --- /dev/null +++ b/modules/nf-core/samshee/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: SAMSHEE { + ext.args = [ + params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", + params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "", + params.v1_schema ? "--output-format sectioned" : "", + ].join(" ").trim() + } +} diff --git a/nextflow.config b/nextflow.config index 8d9a6b81..02217b53 100755 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,12 @@ params { input = null demultiplexer = "bclconvert" // enum string [bclconvert, bcl2fastq, bases2fastq, fqtk, sgdemux, mkfastq] + // Options: samshee, Illumina samplesheet validator + v1_schema = false // [true, false] + json_schema_validator = null // string + name_schema_validator = null // string + file_schema_validator = null // file .json + // Options: trimming trim_fastq = true // [true, false] remove_adapter = true // [true, false] @@ -32,9 +38,6 @@ params { // Options: CheckQC checkqc_config = [] // file .yaml - // Options: Illumina samplesheet validator - validator_schema = null // file .json - // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index bddff01c..679f7d04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,10 +31,22 @@ "format": "path", "description": "Path to Kraken2 DB to use for screening" }, - "validator_schema": { + "json_schema_validator": { + "type": "string", + "description": "String in JSON format to be passed to samshee module for samplesheet validation" + }, + "name_schema_validator": { + "type": "string", + "description": "Schema name to be passed to samshee module for samplesheet validation" + }, + "file_schema_validator": { "type": "string", "format": "file-path", - "description": "Path to Illumina v2 samplesheet validator .json file" + "description": "Local JSON file to be passed to samshee module for samplesheet validation" + }, + "v1_schema": { + "type": "boolean", + "description": "Whether or not illumina samplesheet is v1 " } } }, diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index bc960b7b..84fd1693 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -28,11 +28,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' include { UNTAR as UNTAR_FLOWCELL } from '../modules/nf-core/untar/main' include { UNTAR as UNTAR_KRAKEN_DB } from '../modules/nf-core/untar/main' include { MD5SUM } from '../modules/nf-core/md5sum/main' - -// -// MODULE: Local modules -// -include { SAMSHEE } from '../modules/local/samshee/main' +include { SAMSHEE } from '../modules/nf-core/samshee/main' // // FUNCTION @@ -63,11 +59,11 @@ workflow DEMULTIPLEX { strandedness = params.strandedness // string: auto, reverse, forward, unstranded // Channel inputs - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - ch_multiqc_reports = Channel.empty() - checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml - ch_validator_schema = params.validator_schema ? Channel.fromPath(params.validator_schema, checkIfExists: true) : [] // file validator_schema.json + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_multiqc_reports = Channel.empty() + checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml + ch_file_schema_validator = params.file_schema_validator ? Channel.fromPath(params.file_schema_validator, checkIfExists: true) : [] // file schema.json // Remove adapter from Illumina samplesheet to avoid adapter trimming in demultiplexer tools if (params.remove_adapter && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])) { @@ -95,10 +91,13 @@ workflow DEMULTIPLEX { // RUN samplesheet_validator samshee if (!("samshee" in skip_tools) && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])){ SAMSHEE ( - ch_samplesheet.map{ meta, samplesheet, flowcell, lane -> [ meta, samplesheet ] }, - ch_validator_schema + ch_samplesheet.map{ meta, samplesheet, flowcell, lane -> [meta,samplesheet] }, + ch_file_schema_validator ) ch_versions = ch_versions.mix(SAMSHEE.out.versions) + ch_samplesheet = ch_samplesheet + .join(SAMSHEE.out.samplesheet) + .map{ meta, samplesheet, flowcell, lane, samplesheet_formatted -> [ meta, samplesheet_formatted, flowcell, lane ] } } // Convenience