From 97e152d4e5d5324b4e8612afe83eef1c91f3328e Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 14:13:38 +0100
Subject: [PATCH 01/36] correct eggnog mapper version bug while not use

---
 bin/scrape_software_versions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 86f979f..6b74a59 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from collections import OrderedDict
 import re
+import os 
 
 regexes = {
     'metagWGS': ['v_pipeline.txt', r"(\S+)"],
@@ -49,11 +50,12 @@ results['Eggnog-Mapper'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
-    with open(v[0]) as x:
-        versions = x.read()
-        match = re.search(v[1], versions)
-        if match:
-            results[k] = "v{}".format(match.group(1))
+    if os.path.exists(v[0]):
+        with open(v[0]) as x:
+            versions = x.read()
+            match = re.search(v[1], versions)
+            if match:
+                results[k] = "v{}".format(match.group(1))
 
 # Remove software set to false in results
 for k in results:
-- 
GitLab


From 820a630e34053129857b16a7bf49b9bbaee39bb9 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:54:20 +0100
Subject: [PATCH 02/36] correct host + taxonomy_dir paths

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index 4555d12..e278d4c 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 1f0ab4c..9d09ccf 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
-- 
GitLab


From 2cf7e07b329788872da1befee222ed5c117d672a Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 03/36] update docs + add environement variables

---
 docs/usage.md                      |  8 +++---
 functional_tests/README.md         | 42 +++++++++++++++++++++---------
 functional_tests/launch_example.sh |  2 +-
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 2644fc8..2237db7 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,9 +10,9 @@
 
    > ```
    > sample,fastq_1,fastq_2
-   > a1,$DASTASET/a1_R1.fastq.gz,$DASTASET/a1_R2.fastq.gz
-   > a2,$DASTASET/a2_R1.fastq.gz,$DASTASET/a2_R2.fastq.gz
-   > c,$DASTASET/c_R1.fastq.gz,$DASTASET/c_R2.fastq.gz
+   > a1,$DATASET/a1_R1.fastq.gz,$DATASET/a1_R2.fastq.gz
+   > a2,$DATASET/a2_R1.fastq.gz,$DATASET/a2_R2.fastq.gz
+   > c,$DATASET/c_R1.fastq.gz,$DATASET/c_R2.fastq.gz
    > ```
 
 4. Run a basic script:
@@ -33,7 +33,7 @@
    > nextflow run -profile test_genotoul_workq metagwgs/main.nf \
    > --type 'SR' \
    > --input 'metagwgs-test-datasets/small/input/samplesheet.csv' \
-   > --skip_host_filter --skip_kaiju
+   > --skip_host_filter --skip_kaiju --stop_at_clean
    > ```
 
    > **NOTE:** you can change Nextflow and Singularity versions with other versions available on the cluster (see all versions with `search_module ToolName`). Nextflow version must be >= v20 and Singularity version must be >= v3.
diff --git a/functional_tests/README.md b/functional_tests/README.md
index 387ff1c..4d56af4 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -5,18 +5,17 @@
 1. Install metagwgs as described here: [installation doc](../docs/installation.md)
 2. Get datasets: two datasets are currently available for these functional tests at `https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git`
 
-    Replace "\<dataset\>" with either "small" or "mag":
     ```
-    git clone --branch <dataset> git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
+    git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
 
     or
 
-    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/archive/<dataset>/metagwgs-test-datasets-<dataset>.tar.gz
+    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git
     ```
-3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-10-19.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
+3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
     - **Kaiju** (_kaijudb_refseq_2020-05-25_)
     - **Diamond** (_refseq_bacteria_2021-05-20_)
-    - **NCBI Taxonomy** (_taxonomy_2021-08-23_)
+    - **NCBI Taxonomy** (_taxonomy_2021-12-7_ )
     - **Eggnog Mapper** (_eggnog-mapper-2.0.4-rf1_)
 
 
@@ -30,15 +29,34 @@ To launch functional tests, you need to be located at the root of the folder whe
 - by providing the results folder of a pipeline already exectuted
 ```
 cd test_folder
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results
+export METAG_PATH="/path/to/sources"
+export DATASET="/path/to/metagwgs-test-datasets"
+python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results
 ```
 - by providing a script which will launch the nextflow pipeline [see example](./launch_example.sh) (this example is designed for the "small" dataset with --min_contigs_cpm>1000, using slurm)
-```
-mkdir test_folder
-cd test_folder
-cp <metagwgs-src>/functional_tests/launch_example.sh ./
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results --script launch_example.sh
-```
+
+    1. create working directory 
+    ```
+    mkdir test_folder
+    cd test_folder
+    ```
+    
+    2.set enviroment variables and load module 
+    
+    ```
+    export METAG_PATH="/path/to/sources"
+    export DATASET="/path/to/metagwgs-test-datasets"
+    export DATABANK="/path/to/FT_banks_2021-10-19"
+    export EGGNOG_DB="$DATABANK/eggnog-mapper-2.0.4-rf1/data"
+    module load system/Python-3.7.4
+    ```
+    
+    3.launch functional test  
+    
+    ```
+    cp $METAG_PATH/functional_tests/launch_example.sh ./
+    python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results --script launch_example.sh
+    ```
 
 >**NOTE: more information on the command used to produce each dataset in [small](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/small) and [mag](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/mag) READMEs**
 
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 3a95935..7d69bdd 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq main.nf --type 'SR' --input 'metagwgs-test-datasets/small/input/samplesheet.csv' --host_fasta 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir 'FT_banks_2021-10-19/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank 'FT_banks_2021-10-19/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir 'FT_banks_2021-10-19/eggnog-mapper-2.0.4-rf1/data' --taxonomy_dir 'FT_banks_2021-10-19/taxonomy_2021-08-23' --stop_at_clean -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 5a626c908acc874ae88acb3329a4f52f642b3c5d Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 10:44:00 +0100
Subject: [PATCH 04/36] remove diamond bank necessity when not use

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 46bccb4..9a05ad1 100644
--- a/main.nf
+++ b/main.nf
@@ -166,7 +166,7 @@ workflow {
     skip_clean = true
   }
 
-  if ( !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
+  if ( !(params.stop_at_clean) && !(params.stop_at_assembly) && !(params.stop_at_filtering) && !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
       exit 1, "You must specify --stop_at_structural_annot or specify a diamond bank with --diamond_bank"
   }
   header = getAndCheckHeader()
-- 
GitLab


From 702803ceb01eebc0445e03298f4713511bb5489d Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 14:13:38 +0100
Subject: [PATCH 05/36] correct eggnog mapper version bug while not use

---
 bin/scrape_software_versions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 86f979f..6b74a59 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from collections import OrderedDict
 import re
+import os 
 
 regexes = {
     'metagWGS': ['v_pipeline.txt', r"(\S+)"],
@@ -49,11 +50,12 @@ results['Eggnog-Mapper'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
-    with open(v[0]) as x:
-        versions = x.read()
-        match = re.search(v[1], versions)
-        if match:
-            results[k] = "v{}".format(match.group(1))
+    if os.path.exists(v[0]):
+        with open(v[0]) as x:
+            versions = x.read()
+            match = re.search(v[1], versions)
+            if match:
+                results[k] = "v{}".format(match.group(1))
 
 # Remove software set to false in results
 for k in results:
-- 
GitLab


From 9bbcb27268b5e90df5ebb885008dbf8b54bfce90 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:54:20 +0100
Subject: [PATCH 06/36] correct host + taxonomy_dir paths

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index 4555d12..e278d4c 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 1f0ab4c..9d09ccf 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
-- 
GitLab


From c343d7af71e7b2aec7e8b8b865a3202e6d92b16a Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 07/36] update docs + add environement variables

---
 docs/usage.md                      |  8 +++---
 functional_tests/README.md         | 42 +++++++++++++++++++++---------
 functional_tests/launch_example.sh |  2 +-
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 924523d..a476cfb 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,9 +10,9 @@
 
    > ```
    > sample,fastq_1,fastq_2
-   > a1,$DASTASET/a1_R1.fastq.gz,$DASTASET/a1_R2.fastq.gz
-   > a2,$DASTASET/a2_R1.fastq.gz,$DASTASET/a2_R2.fastq.gz
-   > c,$DASTASET/c_R1.fastq.gz,$DASTASET/c_R2.fastq.gz
+   > a1,$DATASET/a1_R1.fastq.gz,$DATASET/a1_R2.fastq.gz
+   > a2,$DATASET/a2_R1.fastq.gz,$DATASET/a2_R2.fastq.gz
+   > c,$DATASET/c_R1.fastq.gz,$DATASET/c_R2.fastq.gz
    > ```
 
 4. Run a basic script:
@@ -33,7 +33,7 @@
    > nextflow run -profile test_genotoul_workq metagwgs/main.nf \
    > --type 'SR' \
    > --input 'metagwgs-test-datasets/small/input/samplesheet.csv' \
-   > --skip_host_filter --skip_kaiju
+   > --skip_host_filter --skip_kaiju --stop_at_clean
    > ```
 
    > **NOTE:** you can change Nextflow and Singularity versions with other versions available on the cluster (see all versions with `search_module ToolName`). Nextflow version must be >= v20 and Singularity version must be >= v3.
diff --git a/functional_tests/README.md b/functional_tests/README.md
index 387ff1c..4d56af4 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -5,18 +5,17 @@
 1. Install metagwgs as described here: [installation doc](../docs/installation.md)
 2. Get datasets: two datasets are currently available for these functional tests at `https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git`
 
-    Replace "\<dataset\>" with either "small" or "mag":
     ```
-    git clone --branch <dataset> git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
+    git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
 
     or
 
-    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/archive/<dataset>/metagwgs-test-datasets-<dataset>.tar.gz
+    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git
     ```
-3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-10-19.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
+3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
     - **Kaiju** (_kaijudb_refseq_2020-05-25_)
     - **Diamond** (_refseq_bacteria_2021-05-20_)
-    - **NCBI Taxonomy** (_taxonomy_2021-08-23_)
+    - **NCBI Taxonomy** (_taxonomy_2021-12-7_ )
     - **Eggnog Mapper** (_eggnog-mapper-2.0.4-rf1_)
 
 
@@ -30,15 +29,34 @@ To launch functional tests, you need to be located at the root of the folder whe
 - by providing the results folder of a pipeline already exectuted
 ```
 cd test_folder
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results
+export METAG_PATH="/path/to/sources"
+export DATASET="/path/to/metagwgs-test-datasets"
+python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results
 ```
 - by providing a script which will launch the nextflow pipeline [see example](./launch_example.sh) (this example is designed for the "small" dataset with --min_contigs_cpm>1000, using slurm)
-```
-mkdir test_folder
-cd test_folder
-cp <metagwgs-src>/functional_tests/launch_example.sh ./
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results --script launch_example.sh
-```
+
+    1. create working directory 
+    ```
+    mkdir test_folder
+    cd test_folder
+    ```
+    
+    2.set enviroment variables and load module 
+    
+    ```
+    export METAG_PATH="/path/to/sources"
+    export DATASET="/path/to/metagwgs-test-datasets"
+    export DATABANK="/path/to/FT_banks_2021-10-19"
+    export EGGNOG_DB="$DATABANK/eggnog-mapper-2.0.4-rf1/data"
+    module load system/Python-3.7.4
+    ```
+    
+    3.launch functional test  
+    
+    ```
+    cp $METAG_PATH/functional_tests/launch_example.sh ./
+    python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results --script launch_example.sh
+    ```
 
 >**NOTE: more information on the command used to produce each dataset in [small](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/small) and [mag](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/mag) READMEs**
 
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 3a95935..7d69bdd 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq main.nf --type 'SR' --input 'metagwgs-test-datasets/small/input/samplesheet.csv' --host_fasta 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir 'FT_banks_2021-10-19/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank 'FT_banks_2021-10-19/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir 'FT_banks_2021-10-19/eggnog-mapper-2.0.4-rf1/data' --taxonomy_dir 'FT_banks_2021-10-19/taxonomy_2021-08-23' --stop_at_clean -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From db4d7d20063c9b56554a8635544ec29e477f0ffb Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 10:44:00 +0100
Subject: [PATCH 08/36] remove diamond bank necessity when not use

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 46bccb4..9a05ad1 100644
--- a/main.nf
+++ b/main.nf
@@ -166,7 +166,7 @@ workflow {
     skip_clean = true
   }
 
-  if ( !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
+  if ( !(params.stop_at_clean) && !(params.stop_at_assembly) && !(params.stop_at_filtering) && !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
       exit 1, "You must specify --stop_at_structural_annot or specify a diamond bank with --diamond_bank"
   }
   header = getAndCheckHeader()
-- 
GitLab


From d0f68b10971e5dbcc8b183bc26ba86ba2d02e735 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 13:02:29 +0100
Subject: [PATCH 09/36] update functional_tests with new profiles

---
 functional_tests/README.md                   |  2 +-
 functional_tests/expected_processes_HiFi.tsv | 10 +++---
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 functional_tests/launch_example.sh           |  2 +-
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/functional_tests/README.md b/functional_tests/README.md
index 4d56af4..25f1b36 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -150,7 +150,7 @@ To use it :
   ``` 
   cut -f 1 $METAG_PATH/functional_tests/expected_processes_sr.tsv  | tail -n +2 > $OUTDIR/cmd_sr.sh
   ``` 
-  > the commands use profile `test_genotoul_workq`
+  > the commands use profile `test,genotoul`
   - replace path in the samplesheet : 
   ``` 
   sed -i -e "s,\$DATASET,$DATASET,g" $DATASET/small/input/samplesheet.csv
diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index e278d4c..cf9eacf 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 9d09ccf..4f146c8 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 7d69bdd..0905b24 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 5eb82430fb12fdbfeb8e93e198398e912f025011 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 16:54:03 +0100
Subject: [PATCH 10/36] Check gff validity (issue : check point prokka)

---
 modules/prokka.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index 44c3475..bb2c305 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -7,11 +7,18 @@ process PROKKA {
    output:
    tuple val(sampleId), path("PROKKA_${sampleId}"), emit: prokka_results
    path "PROKKA_${sampleId}/${sampleId}.txt", emit: report
+   path "PROKKA_${sampleId}/${sampleId}_gff3_validator.txt",emit: gff3_validator_results
 
   script:
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
+
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  then
+    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
+  fi
   """
 }
 
-- 
GitLab


From 819c6301ff906158b2fd62f70698013234f7d4a8 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Thu, 13 Jan 2022 09:46:12 +0100
Subject: [PATCH 11/36] replace bedtools bamtofastq with samtools fastq (direct
 fastq.gz)

---
 modules/prokka.nf              | 7 +------
 modules/reads_deduplication.nf | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index bb2c305..d2341c3 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -13,12 +13,7 @@ process PROKKA {
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
-
-  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  then
-    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
-  fi
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff 
   """
 }
 
diff --git a/modules/reads_deduplication.nf b/modules/reads_deduplication.nf
index 72b8ff3..c675efd 100644
--- a/modules/reads_deduplication.nf
+++ b/modules/reads_deduplication.nf
@@ -24,8 +24,7 @@ process READS_DEDUPLICATION {
   samtools idxstats ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.idxstats
   samtools flagstat ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.flagstat
   samtools sort -n -o ${sampleId}.filtered.sort.bam ${sampleId}.filtered.bam
-  bedtools bamtofastq -i ${sampleId}.filtered.sort.bam -fq ${sampleId}_R1_dedup.fastq -fq2 ${sampleId}_R2_dedup.fastq
-  gzip ${sampleId}_R1_dedup.fastq ; gzip ${sampleId}_R2_dedup.fastq
+  samtools fastq -N -1 ${sampleId}_R1_dedup.fastq.gz -2 ${sampleId}_R2_dedup.fastq.gz ${sampleId}.filtered.sort.bam 
   rm ${sampleId}.sort.bam
   rm ${sampleId}.fixmate.bam
   rm ${sampleId}.fixmate.positionsort.bam
-- 
GitLab


From df0479a16d85f14fe7a8cfb6a8d23db47287049a Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 14:13:38 +0100
Subject: [PATCH 12/36] correct eggnog mapper version bug while not use

---
 bin/scrape_software_versions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 86f979f..6b74a59 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from collections import OrderedDict
 import re
+import os 
 
 regexes = {
     'metagWGS': ['v_pipeline.txt', r"(\S+)"],
@@ -49,11 +50,12 @@ results['Eggnog-Mapper'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
-    with open(v[0]) as x:
-        versions = x.read()
-        match = re.search(v[1], versions)
-        if match:
-            results[k] = "v{}".format(match.group(1))
+    if os.path.exists(v[0]):
+        with open(v[0]) as x:
+            versions = x.read()
+            match = re.search(v[1], versions)
+            if match:
+                results[k] = "v{}".format(match.group(1))
 
 # Remove software set to false in results
 for k in results:
-- 
GitLab


From e34af630ebc3b4d2a7ed04456727c74be6ac7f6d Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:54:20 +0100
Subject: [PATCH 13/36] correct host + taxonomy_dir paths

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index 4555d12..e278d4c 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 1f0ab4c..9d09ccf 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
-- 
GitLab


From 96af8f1f6c65851c15232df8420947c559c665db Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 14/36] update docs + add environement variables

---
 docs/usage.md                      |  8 +++---
 functional_tests/README.md         | 42 +++++++++++++++++++++---------
 functional_tests/launch_example.sh |  2 +-
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 924523d..a476cfb 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,9 +10,9 @@
 
    > ```
    > sample,fastq_1,fastq_2
-   > a1,$DASTASET/a1_R1.fastq.gz,$DASTASET/a1_R2.fastq.gz
-   > a2,$DASTASET/a2_R1.fastq.gz,$DASTASET/a2_R2.fastq.gz
-   > c,$DASTASET/c_R1.fastq.gz,$DASTASET/c_R2.fastq.gz
+   > a1,$DATASET/a1_R1.fastq.gz,$DATASET/a1_R2.fastq.gz
+   > a2,$DATASET/a2_R1.fastq.gz,$DATASET/a2_R2.fastq.gz
+   > c,$DATASET/c_R1.fastq.gz,$DATASET/c_R2.fastq.gz
    > ```
 
 4. Run a basic script:
@@ -33,7 +33,7 @@
    > nextflow run -profile test_genotoul_workq metagwgs/main.nf \
    > --type 'SR' \
    > --input 'metagwgs-test-datasets/small/input/samplesheet.csv' \
-   > --skip_host_filter --skip_kaiju
+   > --skip_host_filter --skip_kaiju --stop_at_clean
    > ```
 
    > **NOTE:** you can change Nextflow and Singularity versions with other versions available on the cluster (see all versions with `search_module ToolName`). Nextflow version must be >= v20 and Singularity version must be >= v3.
diff --git a/functional_tests/README.md b/functional_tests/README.md
index 387ff1c..4d56af4 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -5,18 +5,17 @@
 1. Install metagwgs as described here: [installation doc](../docs/installation.md)
 2. Get datasets: two datasets are currently available for these functional tests at `https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git`
 
-    Replace "\<dataset\>" with either "small" or "mag":
     ```
-    git clone --branch <dataset> git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
+    git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
 
     or
 
-    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/archive/<dataset>/metagwgs-test-datasets-<dataset>.tar.gz
+    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git
     ```
-3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-10-19.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
+3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
     - **Kaiju** (_kaijudb_refseq_2020-05-25_)
     - **Diamond** (_refseq_bacteria_2021-05-20_)
-    - **NCBI Taxonomy** (_taxonomy_2021-08-23_)
+    - **NCBI Taxonomy** (_taxonomy_2021-12-7_ )
     - **Eggnog Mapper** (_eggnog-mapper-2.0.4-rf1_)
 
 
@@ -30,15 +29,34 @@ To launch functional tests, you need to be located at the root of the folder whe
 - by providing the results folder of a pipeline already exectuted
 ```
 cd test_folder
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results
+export METAG_PATH="/path/to/sources"
+export DATASET="/path/to/metagwgs-test-datasets"
+python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results
 ```
 - by providing a script which will launch the nextflow pipeline [see example](./launch_example.sh) (this example is designed for the "small" dataset with --min_contigs_cpm>1000, using slurm)
-```
-mkdir test_folder
-cd test_folder
-cp <metagwgs-src>/functional_tests/launch_example.sh ./
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results --script launch_example.sh
-```
+
+    1. create working directory 
+    ```
+    mkdir test_folder
+    cd test_folder
+    ```
+    
+    2.set enviroment variables and load module 
+    
+    ```
+    export METAG_PATH="/path/to/sources"
+    export DATASET="/path/to/metagwgs-test-datasets"
+    export DATABANK="/path/to/FT_banks_2021-10-19"
+    export EGGNOG_DB="$DATABANK/eggnog-mapper-2.0.4-rf1/data"
+    module load system/Python-3.7.4
+    ```
+    
+    3.launch functional test  
+    
+    ```
+    cp $METAG_PATH/functional_tests/launch_example.sh ./
+    python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results --script launch_example.sh
+    ```
 
 >**NOTE: more information on the command used to produce each dataset in [small](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/small) and [mag](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/mag) READMEs**
 
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 3a95935..7d69bdd 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq main.nf --type 'SR' --input 'metagwgs-test-datasets/small/input/samplesheet.csv' --host_fasta 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir 'FT_banks_2021-10-19/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank 'FT_banks_2021-10-19/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir 'FT_banks_2021-10-19/eggnog-mapper-2.0.4-rf1/data' --taxonomy_dir 'FT_banks_2021-10-19/taxonomy_2021-08-23' --stop_at_clean -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 80566ab3a1d9311e8e9d2a2706d9b94f3775dacb Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 10:44:00 +0100
Subject: [PATCH 15/36] remove diamond bank necessity when not use

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 46bccb4..9a05ad1 100644
--- a/main.nf
+++ b/main.nf
@@ -166,7 +166,7 @@ workflow {
     skip_clean = true
   }
 
-  if ( !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
+  if ( !(params.stop_at_clean) && !(params.stop_at_assembly) && !(params.stop_at_filtering) && !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
       exit 1, "You must specify --stop_at_structural_annot or specify a diamond bank with --diamond_bank"
   }
   header = getAndCheckHeader()
-- 
GitLab


From 9311b5c435dd8a3fe521cde6c45d3e7ed91b330a Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 13:02:29 +0100
Subject: [PATCH 16/36] update functional_tests with new profiles

---
 functional_tests/README.md                   |  2 +-
 functional_tests/expected_processes_HiFi.tsv | 10 +++---
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 functional_tests/launch_example.sh           |  2 +-
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/functional_tests/README.md b/functional_tests/README.md
index 4d56af4..25f1b36 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -150,7 +150,7 @@ To use it :
   ``` 
   cut -f 1 $METAG_PATH/functional_tests/expected_processes_sr.tsv  | tail -n +2 > $OUTDIR/cmd_sr.sh
   ``` 
-  > the commands use profile `test_genotoul_workq`
+  > the commands use profile `test,genotoul`
   - replace path in the samplesheet : 
   ``` 
   sed -i -e "s,\$DATASET,$DATASET,g" $DATASET/small/input/samplesheet.csv
diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index e278d4c..cf9eacf 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 9d09ccf..4f146c8 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 7d69bdd..0905b24 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From aec8331d0787656b960147f1734458881ca6a4a0 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 16:54:03 +0100
Subject: [PATCH 17/36] Check gff validity (issue : check point prokka)

---
 modules/prokka.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index 44c3475..bb2c305 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -7,11 +7,18 @@ process PROKKA {
    output:
    tuple val(sampleId), path("PROKKA_${sampleId}"), emit: prokka_results
    path "PROKKA_${sampleId}/${sampleId}.txt", emit: report
+   path "PROKKA_${sampleId}/${sampleId}_gff3_validator.txt",emit: gff3_validator_results
 
   script:
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
+
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  then
+    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
+  fi
   """
 }
 
-- 
GitLab


From 06beeb7669392ac6b56c596db1bedbbd36979d25 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Thu, 13 Jan 2022 09:46:12 +0100
Subject: [PATCH 18/36] replace bedtools bamtofastq with samtools fastq (direct
 fastq.gz)

---
 modules/prokka.nf              | 7 +------
 modules/reads_deduplication.nf | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index bb2c305..d2341c3 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -13,12 +13,7 @@ process PROKKA {
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
-
-  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  then
-    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
-  fi
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff 
   """
 }
 
diff --git a/modules/reads_deduplication.nf b/modules/reads_deduplication.nf
index 72b8ff3..c675efd 100644
--- a/modules/reads_deduplication.nf
+++ b/modules/reads_deduplication.nf
@@ -24,8 +24,7 @@ process READS_DEDUPLICATION {
   samtools idxstats ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.idxstats
   samtools flagstat ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.flagstat
   samtools sort -n -o ${sampleId}.filtered.sort.bam ${sampleId}.filtered.bam
-  bedtools bamtofastq -i ${sampleId}.filtered.sort.bam -fq ${sampleId}_R1_dedup.fastq -fq2 ${sampleId}_R2_dedup.fastq
-  gzip ${sampleId}_R1_dedup.fastq ; gzip ${sampleId}_R2_dedup.fastq
+  samtools fastq -N -1 ${sampleId}_R1_dedup.fastq.gz -2 ${sampleId}_R2_dedup.fastq.gz ${sampleId}.filtered.sort.bam 
   rm ${sampleId}.sort.bam
   rm ${sampleId}.fixmate.bam
   rm ${sampleId}.fixmate.positionsort.bam
-- 
GitLab


From 0fa08f53c1383bb4d20bb5656169fee4711d8101 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 14:13:38 +0100
Subject: [PATCH 19/36] correct eggnog mapper version bug while not use

---
 bin/scrape_software_versions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 86f979f..6b74a59 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from collections import OrderedDict
 import re
+import os 
 
 regexes = {
     'metagWGS': ['v_pipeline.txt', r"(\S+)"],
@@ -49,11 +50,12 @@ results['Eggnog-Mapper'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
-    with open(v[0]) as x:
-        versions = x.read()
-        match = re.search(v[1], versions)
-        if match:
-            results[k] = "v{}".format(match.group(1))
+    if os.path.exists(v[0]):
+        with open(v[0]) as x:
+            versions = x.read()
+            match = re.search(v[1], versions)
+            if match:
+                results[k] = "v{}".format(match.group(1))
 
 # Remove software set to false in results
 for k in results:
-- 
GitLab


From c95e3a73f2840b70e5c333b0eed9cf50cf9ac97a Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:54:20 +0100
Subject: [PATCH 20/36] correct host + taxonomy_dir paths

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index 4555d12..e278d4c 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 1f0ab4c..9d09ccf 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
-- 
GitLab


From b8c84a53c78a6d50dd3ae3be962f8de2bcbddb50 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 21/36] update docs + add environement variables

---
 docs/usage.md                      |  8 +++---
 functional_tests/README.md         | 42 +++++++++++++++++++++---------
 functional_tests/launch_example.sh |  2 +-
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 924523d..a476cfb 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,9 +10,9 @@
 
    > ```
    > sample,fastq_1,fastq_2
-   > a1,$DASTASET/a1_R1.fastq.gz,$DASTASET/a1_R2.fastq.gz
-   > a2,$DASTASET/a2_R1.fastq.gz,$DASTASET/a2_R2.fastq.gz
-   > c,$DASTASET/c_R1.fastq.gz,$DASTASET/c_R2.fastq.gz
+   > a1,$DATASET/a1_R1.fastq.gz,$DATASET/a1_R2.fastq.gz
+   > a2,$DATASET/a2_R1.fastq.gz,$DATASET/a2_R2.fastq.gz
+   > c,$DATASET/c_R1.fastq.gz,$DATASET/c_R2.fastq.gz
    > ```
 
 4. Run a basic script:
@@ -33,7 +33,7 @@
    > nextflow run -profile test_genotoul_workq metagwgs/main.nf \
    > --type 'SR' \
    > --input 'metagwgs-test-datasets/small/input/samplesheet.csv' \
-   > --skip_host_filter --skip_kaiju
+   > --skip_host_filter --skip_kaiju --stop_at_clean
    > ```
 
    > **NOTE:** you can change Nextflow and Singularity versions with other versions available on the cluster (see all versions with `search_module ToolName`). Nextflow version must be >= v20 and Singularity version must be >= v3.
diff --git a/functional_tests/README.md b/functional_tests/README.md
index 387ff1c..4d56af4 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -5,18 +5,17 @@
 1. Install metagwgs as described here: [installation doc](../docs/installation.md)
 2. Get datasets: two datasets are currently available for these functional tests at `https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git`
 
-    Replace "\<dataset\>" with either "small" or "mag":
     ```
-    git clone --branch <dataset> git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
+    git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
 
     or
 
-    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/archive/<dataset>/metagwgs-test-datasets-<dataset>.tar.gz
+    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git
     ```
-3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-10-19.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
+3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
     - **Kaiju** (_kaijudb_refseq_2020-05-25_)
     - **Diamond** (_refseq_bacteria_2021-05-20_)
-    - **NCBI Taxonomy** (_taxonomy_2021-08-23_)
+    - **NCBI Taxonomy** (_taxonomy_2021-12-7_ )
     - **Eggnog Mapper** (_eggnog-mapper-2.0.4-rf1_)
 
 
@@ -30,15 +29,34 @@ To launch functional tests, you need to be located at the root of the folder whe
 - by providing the results folder of a pipeline already exectuted
 ```
 cd test_folder
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results
+export METAG_PATH="/path/to/sources"
+export DATASET="/path/to/metagwgs-test-datasets"
+python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results
 ```
 - by providing a script which will launch the nextflow pipeline [see example](./launch_example.sh) (this example is designed for the "small" dataset with --min_contigs_cpm>1000, using slurm)
-```
-mkdir test_folder
-cd test_folder
-cp <metagwgs-src>/functional_tests/launch_example.sh ./
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results --script launch_example.sh
-```
+
+    1. create working directory 
+    ```
+    mkdir test_folder
+    cd test_folder
+    ```
+    
+    2.set enviroment variables and load module 
+    
+    ```
+    export METAG_PATH="/path/to/sources"
+    export DATASET="/path/to/metagwgs-test-datasets"
+    export DATABANK="/path/to/FT_banks_2021-10-19"
+    export EGGNOG_DB="$DATABANK/eggnog-mapper-2.0.4-rf1/data"
+    module load system/Python-3.7.4
+    ```
+    
+    3.launch functional test  
+    
+    ```
+    cp $METAG_PATH/functional_tests/launch_example.sh ./
+    python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results --script launch_example.sh
+    ```
 
 >**NOTE: more information on the command used to produce each dataset in [small](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/small) and [mag](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/mag) READMEs**
 
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 3a95935..7d69bdd 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq main.nf --type 'SR' --input 'metagwgs-test-datasets/small/input/samplesheet.csv' --host_fasta 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir 'FT_banks_2021-10-19/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank 'FT_banks_2021-10-19/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir 'FT_banks_2021-10-19/eggnog-mapper-2.0.4-rf1/data' --taxonomy_dir 'FT_banks_2021-10-19/taxonomy_2021-08-23' --stop_at_clean -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 944d0efb99401970b3ce20bbeba4624f7660462c Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 10:44:00 +0100
Subject: [PATCH 22/36] remove diamond bank necessity when not use

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 46bccb4..9a05ad1 100644
--- a/main.nf
+++ b/main.nf
@@ -166,7 +166,7 @@ workflow {
     skip_clean = true
   }
 
-  if ( !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
+  if ( !(params.stop_at_clean) && !(params.stop_at_assembly) && !(params.stop_at_filtering) && !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
       exit 1, "You must specify --stop_at_structural_annot or specify a diamond bank with --diamond_bank"
   }
   header = getAndCheckHeader()
-- 
GitLab


From e6fd319d34a195744761b992089c8e27fa080daf Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 13:02:29 +0100
Subject: [PATCH 23/36] update functional_tests with new profiles

---
 functional_tests/README.md                   |  2 +-
 functional_tests/expected_processes_HiFi.tsv | 10 +++---
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 functional_tests/launch_example.sh           |  2 +-
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/functional_tests/README.md b/functional_tests/README.md
index 4d56af4..25f1b36 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -150,7 +150,7 @@ To use it :
   ``` 
   cut -f 1 $METAG_PATH/functional_tests/expected_processes_sr.tsv  | tail -n +2 > $OUTDIR/cmd_sr.sh
   ``` 
-  > the commands use profile `test_genotoul_workq`
+  > the commands use profile `test,genotoul`
   - replace path in the samplesheet : 
   ``` 
   sed -i -e "s,\$DATASET,$DATASET,g" $DATASET/small/input/samplesheet.csv
diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index e278d4c..cf9eacf 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 9d09ccf..4f146c8 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 7d69bdd..0905b24 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 69c1bb09c0157c5a34fd35c3bdb1d5fe7aa05281 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 16:54:03 +0100
Subject: [PATCH 24/36] Check gff validity (issue : check point prokka)

---
 modules/prokka.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index 44c3475..bb2c305 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -7,11 +7,18 @@ process PROKKA {
    output:
    tuple val(sampleId), path("PROKKA_${sampleId}"), emit: prokka_results
    path "PROKKA_${sampleId}/${sampleId}.txt", emit: report
+   path "PROKKA_${sampleId}/${sampleId}_gff3_validator.txt",emit: gff3_validator_results
 
   script:
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
+
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  then
+    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
+  fi
   """
 }
 
-- 
GitLab


From c2523f7e15ba935177ec4eec7ab1cb9fe8c8fb1d Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Thu, 13 Jan 2022 09:46:12 +0100
Subject: [PATCH 25/36] replace bedtools bamtofastq with samtools fastq (direct
 fastq.gz)

---
 modules/prokka.nf              | 7 +------
 modules/reads_deduplication.nf | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index bb2c305..d2341c3 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -13,12 +13,7 @@ process PROKKA {
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
-
-  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  then
-    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
-  fi
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff 
   """
 }
 
diff --git a/modules/reads_deduplication.nf b/modules/reads_deduplication.nf
index 72b8ff3..c675efd 100644
--- a/modules/reads_deduplication.nf
+++ b/modules/reads_deduplication.nf
@@ -24,8 +24,7 @@ process READS_DEDUPLICATION {
   samtools idxstats ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.idxstats
   samtools flagstat ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.flagstat
   samtools sort -n -o ${sampleId}.filtered.sort.bam ${sampleId}.filtered.bam
-  bedtools bamtofastq -i ${sampleId}.filtered.sort.bam -fq ${sampleId}_R1_dedup.fastq -fq2 ${sampleId}_R2_dedup.fastq
-  gzip ${sampleId}_R1_dedup.fastq ; gzip ${sampleId}_R2_dedup.fastq
+  samtools fastq -N -1 ${sampleId}_R1_dedup.fastq.gz -2 ${sampleId}_R2_dedup.fastq.gz ${sampleId}.filtered.sort.bam 
   rm ${sampleId}.sort.bam
   rm ${sampleId}.fixmate.bam
   rm ${sampleId}.fixmate.positionsort.bam
-- 
GitLab


From 2b61f6742221afba3e61fd61026dcf1fba9459c4 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 14:13:38 +0100
Subject: [PATCH 26/36] correct eggnog mapper version bug while not use

---
 bin/scrape_software_versions.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 86f979f..6b74a59 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -2,6 +2,7 @@
 from __future__ import print_function
 from collections import OrderedDict
 import re
+import os 
 
 regexes = {
     'metagWGS': ['v_pipeline.txt', r"(\S+)"],
@@ -49,11 +50,12 @@ results['Eggnog-Mapper'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
-    with open(v[0]) as x:
-        versions = x.read()
-        match = re.search(v[1], versions)
-        if match:
-            results[k] = "v{}".format(match.group(1))
+    if os.path.exists(v[0]):
+        with open(v[0]) as x:
+            versions = x.read()
+            match = re.search(v[1], versions)
+            if match:
+                results[k] = "v{}".format(match.group(1))
 
 # Remove software set to false in results
 for k in results:
-- 
GitLab


From 6b4dd74d90456253a3712d5540bcbf637d7e1552 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:54:20 +0100
Subject: [PATCH 27/36] correct host + taxonomy_dir paths

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index 4555d12..e278d4c 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 1f0ab4c..9d09ccf 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-08-23 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
-- 
GitLab


From 29195043e4a67b39c830a67f69072ce27f19bbc4 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 28/36] update docs + add environement variables

---
 functional_tests/README.md | 42 +++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/functional_tests/README.md b/functional_tests/README.md
index 5b4be13..4d56af4 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -5,18 +5,17 @@
 1. Install metagwgs as described here: [installation doc](../docs/installation.md)
 2. Get datasets: two datasets are currently available for these functional tests at `https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git`
 
-    Replace "\<dataset\>" with either "small" or "mag":
     ```
-    git clone --branch <dataset> git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
+    git clone git@forgemia.inra.fr:genotoul-bioinfo/metagwgs-test-datasets.git
 
     or
 
-    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/archive/<dataset>/metagwgs-test-datasets-<dataset>.tar.gz
+    wget https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets.git
     ```
-3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz	) and decompress its contents in any folder. This archive contains data banks for:
+3. Get data banks: download [this archive](http://genoweb.toulouse.inra.fr/~choede/FT_banks_2021-12-16.tar.gz) and decompress its contents in any folder. This archive contains data banks for:
     - **Kaiju** (_kaijudb_refseq_2020-05-25_)
     - **Diamond** (_refseq_bacteria_2021-05-20_)
-    - **NCBI Taxonomy** (_taxonomy_2021-08-23_)
+    - **NCBI Taxonomy** (_taxonomy_2021-12-7_ )
     - **Eggnog Mapper** (_eggnog-mapper-2.0.4-rf1_)
 
 
@@ -30,15 +29,34 @@ To launch functional tests, you need to be located at the root of the folder whe
 - by providing the results folder of a pipeline already exectuted
 ```
 cd test_folder
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results
+export METAG_PATH="/path/to/sources"
+export DATASET="/path/to/metagwgs-test-datasets"
+python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results
 ```
 - by providing a script which will launch the nextflow pipeline [see example](./launch_example.sh) (this example is designed for the "small" dataset with --min_contigs_cpm>1000, using slurm)
-```
-mkdir test_folder
-cd test_folder
-cp <metagwgs-src>/functional_tests/launch_example.sh ./
-python <metagwgs-src>/functional_tests/main.py -step 07_taxo_affi -exp_dir metagwgs-test-datasets/small/output -obs_dir ./results --script launch_example.sh
-```
+
+    1. create working directory 
+    ```
+    mkdir test_folder
+    cd test_folder
+    ```
+    
+    2.set enviroment variables and load module 
+    
+    ```
+    export METAG_PATH="/path/to/sources"
+    export DATASET="/path/to/metagwgs-test-datasets"
+    export DATABANK="/path/to/FT_banks_2021-10-19"
+    export EGGNOG_DB="$DATABANK/eggnog-mapper-2.0.4-rf1/data"
+    module load system/Python-3.7.4
+    ```
+    
+    3.launch functional test  
+    
+    ```
+    cp $METAG_PATH/functional_tests/launch_example.sh ./
+    python $METAG_PATH/functional_tests/main.py -step 07_taxo_affi -exp_dir $DATASET/small/output -obs_dir ./results --script launch_example.sh
+    ```
 
 >**NOTE: more information on the command used to produce each dataset in [small](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/small) and [mag](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs-test-datasets/-/tree/mag) READMEs**
 
-- 
GitLab


From 96471aed1daaea4386c362e216f47057188df38e Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 10:44:00 +0100
Subject: [PATCH 29/36] remove diamond bank necessity when not use

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index de00ac8..67a944c 100644
--- a/main.nf
+++ b/main.nf
@@ -165,7 +165,7 @@ workflow {
     skip_clean = true
   }
 
-  if ( !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
+  if ( !(params.stop_at_clean) && !(params.stop_at_assembly) && !(params.stop_at_filtering) && !(params.stop_at_structural_annot) && !(params.diamond_bank) ) {
       exit 1, "You must specify --stop_at_structural_annot or specify a diamond bank with --diamond_bank"
   }
   header = getAndCheckHeader()
-- 
GitLab


From 828d606bf45a4fdc4fcf5500badd95e1de633adb Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 30/36] update docs + add environement variables

---
 docs/usage.md                      | 8 ++++----
 functional_tests/launch_example.sh | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 1a1c665..def331d 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,9 +10,9 @@
 
    > ```
    > sample,fastq_1,fastq_2
-   > a1,$DASTASET/a1_R1.fastq.gz,$DASTASET/a1_R2.fastq.gz
-   > a2,$DASTASET/a2_R1.fastq.gz,$DASTASET/a2_R2.fastq.gz
-   > c,$DASTASET/c_R1.fastq.gz,$DASTASET/c_R2.fastq.gz
+   > a1,$DATASET/a1_R1.fastq.gz,$DATASET/a1_R2.fastq.gz
+   > a2,$DATASET/a2_R1.fastq.gz,$DATASET/a2_R2.fastq.gz
+   > c,$DATASET/c_R1.fastq.gz,$DATASET/c_R2.fastq.gz
    > ```
 
 4. Run a basic script:
@@ -33,7 +33,7 @@
    > nextflow run -profile test_genotoul_workq metagwgs/main.nf \
    > --type 'SR' \
    > --input 'metagwgs-test-datasets/small/input/samplesheet.csv' \
-   > --skip_host_filter --skip_kaiju
+   > --skip_host_filter --skip_kaiju --stop_at_clean
    > ```
 
    > **NOTE:** you can change Nextflow and Singularity versions with other versions available on the cluster (see all versions with `search_module ToolName`). Nextflow version must be >= v20 and Singularity version must be >= v3.
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 3a95935..7d69bdd 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq main.nf --type 'SR' --input 'metagwgs-test-datasets/small/input/samplesheet.csv' --host_fasta 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index 'metagwgs-test-datasets/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir 'FT_banks_2021-10-19/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank 'FT_banks_2021-10-19/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir 'FT_banks_2021-10-19/eggnog-mapper-2.0.4-rf1/data' --taxonomy_dir 'FT_banks_2021-10-19/taxonomy_2021-08-23' --stop_at_clean -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 43329377357dbb06d8a1bad0afc6d4b303d945a6 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 13:02:29 +0100
Subject: [PATCH 31/36] update functional_tests with new profiles

---
 functional_tests/README.md                   |  2 +-
 functional_tests/expected_processes_HiFi.tsv | 10 +++---
 functional_tests/expected_processes_sr.tsv   | 34 ++++++++++----------
 functional_tests/launch_example.sh           |  2 +-
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/functional_tests/README.md b/functional_tests/README.md
index 4d56af4..25f1b36 100644
--- a/functional_tests/README.md
+++ b/functional_tests/README.md
@@ -150,7 +150,7 @@ To use it :
   ``` 
   cut -f 1 $METAG_PATH/functional_tests/expected_processes_sr.tsv  | tail -n +2 > $OUTDIR/cmd_sr.sh
   ``` 
-  > the commands use profile `test_genotoul_workq`
+  > the commands use profile `test,genotoul`
   - replace path in the samplesheet : 
   ``` 
   sed -i -e "s,\$DATASET,$DATASET,g" $DATASET/small/input/samplesheet.csv
diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index e278d4c..cf9eacf 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 9d09ccf..4f146c8 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 7d69bdd..0905b24 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-- 
GitLab


From 04ddeaa0d0a0b5356172e19f8f92db3256577dea Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 16:54:03 +0100
Subject: [PATCH 32/36] Check gff validity (issue : check point prokka)

---
 modules/prokka.nf | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index 44c3475..bb2c305 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -7,11 +7,18 @@ process PROKKA {
    output:
    tuple val(sampleId), path("PROKKA_${sampleId}"), emit: prokka_results
    path "PROKKA_${sampleId}/${sampleId}.txt", emit: report
+   path "PROKKA_${sampleId}/${sampleId}_gff3_validator.txt",emit: gff3_validator_results
 
   script:
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
+
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
+  then
+    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
+  fi
   """
 }
 
-- 
GitLab


From 66af6629c459db2e19362387a978902323309aa2 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Thu, 13 Jan 2022 09:46:12 +0100
Subject: [PATCH 33/36] replace bedtools bamtofastq with samtools fastq (direct
 fastq.gz)

---
 modules/prokka.nf              | 7 +------
 modules/reads_deduplication.nf | 3 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/modules/prokka.nf b/modules/prokka.nf
index bb2c305..d2341c3 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -13,12 +13,7 @@ process PROKKA {
   """
   prokka --metagenome --noanno --rawproduct --outdir PROKKA_${sampleId} --prefix ${sampleId} ${assembly_file} --centre X --compliant --cpus ${task.cpus}
   rm PROKKA_${sampleId}/*.gbk
-
-  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff > PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  if grep -Fxqv "input is valid GFF3" PROKKA_${sampleId}/${sampleId}_gff3_validator.txt
-  then
-    rm PROKKA_${sampleId}/${sampleId}_gff3_validator.txt 
-  fi
+  gt gff3validator PROKKA_${sampleId}/${sampleId}.gff 
   """
 }
 
diff --git a/modules/reads_deduplication.nf b/modules/reads_deduplication.nf
index 72b8ff3..c675efd 100644
--- a/modules/reads_deduplication.nf
+++ b/modules/reads_deduplication.nf
@@ -24,8 +24,7 @@ process READS_DEDUPLICATION {
   samtools idxstats ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.idxstats
   samtools flagstat ${sampleId}.filtered.bam > ${sampleId}.count_reads_on_contigs.flagstat
   samtools sort -n -o ${sampleId}.filtered.sort.bam ${sampleId}.filtered.bam
-  bedtools bamtofastq -i ${sampleId}.filtered.sort.bam -fq ${sampleId}_R1_dedup.fastq -fq2 ${sampleId}_R2_dedup.fastq
-  gzip ${sampleId}_R1_dedup.fastq ; gzip ${sampleId}_R2_dedup.fastq
+  samtools fastq -N -1 ${sampleId}_R1_dedup.fastq.gz -2 ${sampleId}_R2_dedup.fastq.gz ${sampleId}.filtered.sort.bam 
   rm ${sampleId}.sort.bam
   rm ${sampleId}.fixmate.bam
   rm ${sampleId}.fixmate.positionsort.bam
-- 
GitLab


From 20dc1171f1236472c1d6206f8268b4abf5a632ca Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Mon, 10 Jan 2022 15:55:47 +0100
Subject: [PATCH 34/36] update docs + add environement variables

---
 functional_tests/launch_example.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index 0905b24..baef67d 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,4 +1,8 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
+<<<<<<< HEAD
 	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+=======
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+>>>>>>> update docs + add environement variables
-- 
GitLab


From ee4c12a05ee96642f5203fe8bc181a693bb6b7c6 Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Tue, 11 Jan 2022 13:02:29 +0100
Subject: [PATCH 35/36] update functional_tests with new profiles

---
 functional_tests/launch_example.sh | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index baef67d..3750a6d 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,8 +1,5 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-<<<<<<< HEAD
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
-=======
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test_genotoul_workq $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
->>>>>>> update docs + add environement variables
+
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
\ No newline at end of file
-- 
GitLab


From a689edb8c4ac21e991bc2abbbd4f18d494ef87ea Mon Sep 17 00:00:00 2001
From: Maina Vienne <maina.vienne@inrae.fr>
Date: Fri, 14 Jan 2022 09:30:56 +0100
Subject: [PATCH 36/36] update functional test to take into account new
 taxonomy parameters

---
 functional_tests/expected_processes_HiFi.tsv |  4 +--
 functional_tests/expected_processes_sr.tsv   | 32 ++++++++++----------
 functional_tests/launch_example.sh           |  3 +-
 modules/prokka.nf                            |  1 -
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/functional_tests/expected_processes_HiFi.tsv b/functional_tests/expected_processes_HiFi.tsv
index cf9eacf..ca9dbbd 100644
--- a/functional_tests/expected_processes_HiFi.tsv
+++ b/functional_tests/expected_processes_HiFi.tsv
@@ -1,6 +1,6 @@
 cmd	outputdir	DATABASES:INDEX_KAIJU	DATABASES:DOWNLOAD_TAXONOMY_DB	DATABASES:EGGNOG_MAPPER_DB	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:MINIMAP2	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
-mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/hifi_all ; cd $OUTDIR/hifi_all ;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump	$OUTDIR/hifi_all	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
 mkdir $OUTDIR/hifi_stop_at_structural_annot;cd $OUTDIR/hifi_stop_at_structural_annot; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --stop_at_structural_annot	$OUTDIR/hifi_stop_at_structural_annot	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 mkdir $OUTDIR/skip_func_annot-skip_taxo_affi; cd $OUTDIR/skip_func_annot-skip_taxo_affi;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot;cp ../nextflow.config .;  nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --eggnog_mapper_db_dir `echo $EGGNOG_DB`  --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd  --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump –skip_func_annot	$OUTDIR/skip_func_annot	0	0	0	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
 mkdir $OUTDIR/skip_taxo_affi; cd $OUTDIR/skip_taxo_affi;nextflow run -profile test,genotoul $METAG_PATH/main.nf --type HIFI --input `echo $DATASET`/hifi/input/samplesheet.csv --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --skip_taxo_affi	$OUTDIR/skip_taxo_affi	0	0	0	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/expected_processes_sr.tsv b/functional_tests/expected_processes_sr.tsv
index 4f146c8..26f8a39 100644
--- a/functional_tests/expected_processes_sr.tsv
+++ b/functional_tests/expected_processes_sr.tsv
@@ -1,22 +1,22 @@
 cmd	outputdir	SR:S01_CLEAN_QC:FASTQC_RAW	SR:S01_CLEAN_QC:CUTADAPT	SR:S01_CLEAN_QC:SICKLE	SR:S01_CLEAN_QC:HOST_FILTER	SR:S01_CLEAN_QC:FASTQC_CLEANED	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:KAIJU	SR:S01_CLEAN_QC:KAIJU_AND_MERGE:MERGE_KAIJU	SR:S02_ASSEMBLY:ASSEMBLY	SR:S02_ASSEMBLY:ASSEMBLY_QUAST	SR:S02_ASSEMBLY:READS_DEDUPLICATION	SR:S03_FILTERING:CHUNK_ASSEMBLY_FILTER	SR:S03_FILTERING:MERGE_ASSEMBLY_FILTER	SH:S04_STRUCTURAL_ANNOT:PROKKA	SH:S04_FILTERED_QUAST	SH:S04_STRUCTURAL_ANNOT:RENAME_CONTIGS_AND_GENES	SH:S05_ALIGNMENT:DIAMOND	SH:S05_ALIGNMENT:BWA_MEM	SH:S06_FUNC_ANNOT:CD_HIT:INDIVIDUAL_CD_HIT	SH:S06_FUNC_ANNOT:EGGNOG_MAPPER	SH:S06_FUNC_ANNOT:BEST_HITS	SH:S06_FUNC_ANNOT:QUANTIFICATION:FEATURE_COUNTS	SH:S06_FUNC_ANNOT:CD_HIT:GLOBAL_CD_HIT	SH:S06_FUNC_ANNOT:QUANTIFICATION:QUANTIFICATION_TABLE	SH:S06_FUNC_ANNOT:MERGE_QUANT_ANNOT_BEST	SH:S06_FUNC_ANNOT:FUNCTIONAL_ANNOT_TABLE	SH:S07_TAXO_AFFI:ASSIGN_TAXONOMY	SH:S07_TAXO_AFFI:QUANTIF_AND_TAXONOMIC_TABLE_CONTIGS
 mkdir $OUTDIR/stop_at_clean ; cd $OUTDIR/stop_at_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 -with-report -with-timeline -with-trace --stop_at_clean	$OUTDIR/stop_at_clean	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter --skip_kaiju	$OUTDIR/skip_sickle-skip_host_filter-skip_kaiju	1	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_host_filter ; cd $OUTDIR/skip_sickle-skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_host_filter	$OUTDIR/skip_sickle-skip_host_filter	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle-skip_kaiju ; cd $OUTDIR/skip_sickle-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle --skip_kaiju	$OUTDIR/skip_sickle-skip_kaiju	1	1	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_sickle ; cd $OUTDIR/skip_sickle ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_sickle	$OUTDIR/skip_sickle	1	1	0	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter-skip_kaiju ; cd $OUTDIR/skip_host_filter-skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter --skip_kaiju	$OUTDIR/skip_host_filter-skip_kaiju	1	1	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_host_filter ; cd $OUTDIR/skip_host_filter ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_host_filter	$OUTDIR/skip_host_filter	1	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_kaiju ; cd $OUTDIR/skip_kaiju ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_clean --skip_kaiju	$OUTDIR/skip_kaiju	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_assembly ; cd $OUTDIR/stop_at_assembly ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_assembly	$OUTDIR/stop_at_assembly	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_clean ; cd $OUTDIR/skip_clean ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_assembly --skip_clean	$OUTDIR/skip_clean	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_filtering ; cd $OUTDIR/stop_at_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_filtering	$OUTDIR/stop_at_filtering	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/stop_at_structural_annot ; cd $OUTDIR/stop_at_structural_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_structural_annot	$OUTDIR/stop_at_structural_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_filtering ; cd $OUTDIR/skip_filtering ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --stop_at_structural_annot --skip_filtering	$OUTDIR/skip_filtering	1	1	1	1	1	1	1	1	1	1	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0
 																												
-mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
-mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
-mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
-mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --taxonomy_dir `echo $DATABANK`/taxonomy_2021-12-7 -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
+mkdir $OUTDIR/all ; cd $OUTDIR/all ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace	$OUTDIR/all	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1
+mkdir $OUTDIR/skip_func_annot-skip_taxo_affi ; cd $OUTDIR/skip_func_annot-skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --skip_func_annot --skip_taxo_affi	$OUTDIR/skip_func_annot-skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	0	0
+mkdir $OUTDIR/skip_func_annot ; cd $OUTDIR/skip_func_annot ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --skip_func_annot	$OUTDIR/skip_func_annot	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0	0	0	0	0	0	0	1	1
+mkdir $OUTDIR/skip_taxo_affi ; cd $OUTDIR/skip_taxo_affi ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input `echo $DATASET`/small/input/samplesheet.csv --host_fasta `echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa --host_index "`echo $DATASET`/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}" --kaiju_db_dir `echo $DATABANK`/kaijudb_refseq_2020-05-25 --diamond_bank `echo $DATABANK`/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd --eggnog_mapper_db_dir `echo $EGGNOG_DB` --accession2taxid `echo $DATABANK`/taxonomy_2021-12-7/prot.accession2taxid.FULL --taxdump `echo $DATABANK`/taxonomy_2021-12-7/new_taxdump -with-report -with-timeline -with-trace --skip_taxo_affi	$OUTDIR/skip_taxo_affi	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	1	0	0
diff --git a/functional_tests/launch_example.sh b/functional_tests/launch_example.sh
index ecacf0d..0bbc87d 100644
--- a/functional_tests/launch_example.sh
+++ b/functional_tests/launch_example.sh
@@ -1,5 +1,4 @@
 #!/bin/bash
 
 sbatch -W -p workq -J functional_test --mem=6G \
-
-	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --taxonomy_dir '$DATABANK/taxonomy_2021-12-7' -with-report -with-timeline -with-trace -with-dag"
+	--wrap="module load bioinfo/Nextflow-v21.04.1 ; module load system/singularity-3.7.3 ; nextflow run -profile test,genotoul $METAG_PATH/main.nf --type 'SR' --input '$DATASET/small/input/samplesheet.csv' --host_fasta '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa' --host_index '$DATASET/small/input/host/Homo_sapiens.GRCh38_chr21.fa.{amb,ann,bwt,pac,sa}' --kaiju_db_dir '$DATABANK/kaijudb_refseq_2020-05-25' --min_contigs_cpm 1000 --diamond_bank '$DATABANK/refseq_bacteria_2021-05-20/refseq_bacteria_100000.dmnd' --eggnog_mapper_db_dir '$EGGNOG_DB' --accession2taxid '$DATABANK/taxonomy_2021-12-7/prot.accession2taxid.FULL' --taxdump '$DATABANK/taxonomy_2021-12-7/new_taxdump' -with-report -with-timeline -with-trace -with-dag"
diff --git a/modules/prokka.nf b/modules/prokka.nf
index d2341c3..984e972 100644
--- a/modules/prokka.nf
+++ b/modules/prokka.nf
@@ -7,7 +7,6 @@ process PROKKA {
    output:
    tuple val(sampleId), path("PROKKA_${sampleId}"), emit: prokka_results
    path "PROKKA_${sampleId}/${sampleId}.txt", emit: report
-   path "PROKKA_${sampleId}/${sampleId}_gff3_validator.txt",emit: gff3_validator_results
 
   script:
   """
-- 
GitLab