diff --git a/config/functional_annotation_modules.config b/config/functional_annotation_modules.config index 0992ec3..51926cc 100644 --- a/config/functional_annotation_modules.config +++ b/config/functional_annotation_modules.config @@ -17,11 +17,9 @@ process { } withName: 'INTERPROSCAN' { ext.args = [ - '-f TSV', '--iprlookup', '--goterms', '-pa', - '-dp', '-t p' ].join(' ').trim() } diff --git a/config/test.config b/config/test.config index 4608a7b..23f22fb 100644 --- a/config/test.config +++ b/config/test.config @@ -29,12 +29,11 @@ if ( params.subworkflow == 'functional_annotation' ) { } process { withName: 'FUNCTIONAL_ANNOTATION:INTERPROSCAN' { + cpus = 2 ext.args = [ - '-f TSV', // '--iprlookup', // '--goterms', // '-pa', - // '-dp', '-t p' ].join(' ').trim() } diff --git a/modules.json b/modules.json index f032637..4db56fb 100644 --- a/modules.json +++ b/modules.json @@ -1,41 +1,58 @@ { - "name": "NBIS Genome Annotation Workflow", - "homePage": "", - "repos": { - "https://github.com/nf-core/modules.git": { - "modules": { - "nf-core": { - "blast/makeblastdb": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], - "patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff" - }, - "busco": { - "branch": "master", - "git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de", - "installed_by": ["modules"] - }, - "fastp": { - "branch": "master", - "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", - "installed_by": ["modules"] - }, - "fastqc": { - "branch": "master", - "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", - "installed_by": ["modules"] - }, - "multiqc": { - "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", - "installed_by": ["modules"] - } + "name": "NBIS Genome Annotation Workflow", + "homePage": "", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "blast/makeblastdb": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ], + "patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff" + }, + "busco": { + "branch": "master", + "git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de", + "installed_by": [ + "modules" + ] + }, + "fastp": { + "branch": "master", + "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "installed_by": [ + "modules" + ] + }, + "fastqc": { + "branch": "master", + "git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53", + "installed_by": [ + "modules" + ] + }, + "interproscan": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": [ + "modules" + ] + }, + "multiqc": { + "branch": "master", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "installed_by": [ + "modules" + ] + } + } + }, + "subworkflows": { + "nf-core": {} + } } - }, - "subworkflows": { - "nf-core": {} - } } - } } diff --git a/modules/local/interproscan.nf b/modules/local/interproscan.nf deleted file mode 100644 index 099c608..0000000 --- a/modules/local/interproscan.nf +++ /dev/null @@ -1,34 +0,0 @@ -process INTERPROSCAN { - tag "${protein_fasta.baseName}" - label 'process_single' - - conda "bioconda::interproscan=5.55_88.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/interproscan:5.55_88.0--hec16e2b_1': - 'biocontainers/interproscan:5.55_88.0--hec16e2b_1' }" - - input: - tuple val(meta), path(protein_fasta) - - output: - tuple val(meta), path('*.tsv'), emit: tsv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${protein_fasta.baseName}" - """ - interproscan.sh \\ - $args \\ - -i $protein_fasta \\ - -o ${prefix}.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - interproscan: \$( interproscan.sh --version | sed '1!d; s/.*version //' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/interproscan/environment.yml b/modules/nf-core/interproscan/environment.yml new file mode 100644 index 0000000..36ec6fa --- /dev/null +++ b/modules/nf-core/interproscan/environment.yml @@ -0,0 +1,7 @@ +name: interproscan +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::interproscan=5.59_91.0 diff --git a/modules/nf-core/interproscan/main.nf b/modules/nf-core/interproscan/main.nf new file mode 100644 index 0000000..b22149b --- /dev/null +++ b/modules/nf-core/interproscan/main.nf @@ -0,0 +1,88 @@ +process INTERPROSCAN { + tag "$meta.id" + label 'process_long' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/interproscan:5.59_91.0--hec16e2b_1' : + 'biocontainers/interproscan:5.59_91.0--hec16e2b_1' }" + + input: + tuple val(meta), path(fasta) + val(out_ext) + + output: + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.gff3'), optional: true, emit: gff3 + tuple val(meta), path('*.json'), optional: true, emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.name.endsWith(".gz") + def fasta_name = fasta.name.replace(".gz", "") + + def appl = "-appl TIGRFAM,FunFam,SFLD,PANTHER,Gene3D,Hamap,ProSiteProfiles,Coils,SMART,CDD,PRINTS,PIRSR,ProSitePatterns,AntiFam,Pfam,MobiDBLite" + if ( args.contains("-appl") ) { + appl = "" + } + switch ( out_ext ) { + case "tsv": break + case "xml": break + case "gff3": break + case "json": break + default: + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting tsv as fallback"); + break + } + + // -dp (disable precalculation) is on so no online dependency + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + interproscan.sh \\ + -cpu ${task.cpus} \\ + -i ${fasta_name} \\ + -f ${out_ext} \\ + -dp \\ + ${args} \\ + ${appl} \\ + -o ${prefix}.${out_ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + interproscan: \$(echo \$(interproscan.sh --version 2>&1) | head -n 1 | sed 's/^.*InterProScan version//' | sed 's/\\s*InterProScan.*//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + switch ( out_ext ) { + case "tsv": break + case "xml": break + case "gff3": break + case "json": break + default: + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting tsv as fallback"); + break + } + + """ + touch ${prefix}.${out_ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + interproscan: \$(echo \$(interproscan.sh --version 2>&1) | head -n 1 | sed 's/^.*InterProScan version//' | sed 's/\\s*InterProScan.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/interproscan/meta.yml b/modules/nf-core/interproscan/meta.yml new file mode 100644 index 0000000..e984bd3 --- /dev/null +++ b/modules/nf-core/interproscan/meta.yml @@ -0,0 +1,54 @@ +name: "interproscan" +description: Produces protein annotations and predictions from an amino acids FASTA file +keywords: + - annotation + - fasta + - interproscan +tools: + - "interproscan": + description: "InterPro integrates together predictive information about proteins function from a number of partner resources" + homepage: "https://www.ebi.ac.uk/interpro/search/sequence/" + documentation: "https://interproscan-docs.readthedocs.io" + tool_dev_url: "https://github.com/ebi-pf-team/interproscan" + doi: "10.1093/bioinformatics/btu031" + licence: "['GPL v3']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing the amino acid query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - out_ext: + type: string + description: Specify the type of output file to be generated + pattern: "tsv|xml|gff3|json" +output: + - tsv: + type: file + description: Tab separated file containing with detailed hits + pattern: "*.{tsv}" + - xml: + type: file + description: XML file containing with detailed hits + pattern: "*.{xml}" + - gff3: + type: file + description: GFF3 file containing with detailed hits + pattern: "*.{gff3}" + - json: + type: file + description: JSON file containing with detailed hits + pattern: "*.{json}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" +maintainers: + - "@toniher" + - "@vagkaratzas" diff --git a/modules/nf-core/interproscan/tests/main.nf.test b/modules/nf-core/interproscan/tests/main.nf.test new file mode 100644 index 0000000..7e77774 --- /dev/null +++ b/modules/nf-core/interproscan/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process INTERPROSCAN" + script "../main.nf" + process "INTERPROSCAN" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "interproscan" + + test("Annotates set of input proteins in an output tsv file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) + ] + input[1] = 'tsv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv") }, + { assert process.out.versions } + ) + } + + } + + test("Annotates set of zipped input proteins in an output xml file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['proteome_fasta_gz'], checkIfExists: true) + ] + input[1] = 'xml' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.xml).match("xml") }, + { assert process.out.versions } + ) + } + + } + +} diff --git a/modules/nf-core/interproscan/tests/main.nf.test.snap b/modules/nf-core/interproscan/tests/main.nf.test.snap new file mode 100644 index 0000000..903b8c1 --- /dev/null +++ b/modules/nf-core/interproscan/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,0e12562859bec64decc90e3dedd1a52e" + ] + ] + ], + "timestamp": "2023-11-01T11:58:56.075236858" + }, + "xml": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.xml:md5,7a211c1a4761e2b9b8700e6e9abbb15f" + ] + ] + ], + "timestamp": "2023-11-01T12:29:19.50051319" + } +} \ No newline at end of file diff --git a/modules/nf-core/interproscan/tests/nextflow.config b/modules/nf-core/interproscan/tests/nextflow.config new file mode 100644 index 0000000..2043e2c --- /dev/null +++ b/modules/nf-core/interproscan/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: INTERPROSCAN { + ext.args = '-appl Coils' + } +} diff --git a/modules/nf-core/interproscan/tests/tags.yml b/modules/nf-core/interproscan/tests/tags.yml new file mode 100644 index 0000000..ddb90f8 --- /dev/null +++ b/modules/nf-core/interproscan/tests/tags.yml @@ -0,0 +1,2 @@ +interproscan: + - modules/nf-core/interproscan/** diff --git a/subworkflows/functional_annotation/README.md b/subworkflows/functional_annotation/README.md index 134de19..fef2b10 100644 --- a/subworkflows/functional_annotation/README.md +++ b/subworkflows/functional_annotation/README.md @@ -26,23 +26,6 @@ nextflow run NBISweden/pipelines-nextflow \ -params-file params.yml ``` -> **note** -> -> The Interproscan conda package is temperamental. Please use a local installation -> by overriding the workflow configuration. -> -> `nextflow.config`: -> -> ```nextflow -> process { -> withName: 'INTERPROSCAN' { -> conda = null -> container = null -> module = 'bioinfo-tools:InterProScan/5.30-69.0' // Load Uppmax modules `bioinfo-tools` and `InterProScan/5.30-69.0` -> } -> } -> ``` - ## Parameters - General: @@ -65,7 +48,7 @@ These command line tool parameters can be changed by overriding the `ext.args` v ```nextflow process { withName: 'INTERPROSCAN' { - ext.args = '-f TSV --iprlookup --goterms -pa -dp -t p' + ext.args = '--iprlookup --goterms -pa -t p' } } ``` diff --git a/subworkflows/functional_annotation/main.nf b/subworkflows/functional_annotation/main.nf index 9073ae1..bf30c17 100644 --- a/subworkflows/functional_annotation/main.nf +++ b/subworkflows/functional_annotation/main.nf @@ -1,7 +1,7 @@ include { BLAST_MAKEBLASTDB } from "$projectDir/modules/nf-core/blast/makeblastdb/main" include { AGAT_EXTRACTSEQUENCES as GFF2PROTEIN } from "$projectDir/modules/local/agat/extractsequences" include { BLAST_BLASTP } from "$projectDir/modules/local/blast/blastp" -include { INTERPROSCAN } from "$projectDir/modules/local/interproscan" +include { INTERPROSCAN } from "$projectDir/modules/nf-core/interproscan" include { AGAT_MANAGEFUNCTIONALANNOTATION as MERGE_FUNCTIONAL_ANNOTATION } from "$projectDir/modules/local/agat/managefunctionalannotation" workflow FUNCTIONAL_ANNOTATION { @@ -43,7 +43,10 @@ workflow FUNCTIONAL_ANNOTATION { GFF2PROTEIN.out.proteins.splitFasta( by: params.records_per_file, file: true ), blastdb_ch.map{ meta, db -> db }.collect() ) - INTERPROSCAN( GFF2PROTEIN.out.proteins.splitFasta( by: params.records_per_file, file: true ) ) + INTERPROSCAN( + GFF2PROTEIN.out.proteins.splitFasta( by: params.records_per_file, file: true ), + 'tsv' + ) MERGE_FUNCTIONAL_ANNOTATION( gff_file, BLAST_BLASTP.out.txt.map{ meta, txt -> txt }.collectFile( name: 'blast_merged.tsv' ),