From 24cb56374693a8fd6605e5a8f5585581ff0cd577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noirot=20C=C3=A9line?= <celine.noirot@inra.fr> Date: Thu, 18 Aug 2022 09:29:27 +0200 Subject: [PATCH] Do checkpoint on values after Cdhit #30 --- modules/cd_hit.nf | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/modules/cd_hit.nf b/modules/cd_hit.nf index af6913c..d1e7252 100644 --- a/modules/cd_hit.nf +++ b/modules/cd_hit.nf @@ -30,7 +30,7 @@ process GLOBAL_CD_HIT { val pct_id output: - path "All-cd-hit-est.${pct_id}.fasta" + path "All-cd-hit-est.${pct_id}.fasta", emit: fasta_clusters path "table_clstr.txt", emit: clstr_table @@ -44,6 +44,38 @@ process GLOBAL_CD_HIT { } +// Global clustering with CD-HIT. +process CHECK_METRICS { + label 'CD_HIT' + + input: + path ech_fastas + path table_clstr + path fasta_cluster + + script: + """ + cat ${ech_fastas} | grep -c ">" > nb_inputed_seq_in_fasta.txt + grep -c ">" All-cd-hit-est.0.95.fasta > nb_clusters_in_fasta.txt + cut -f1 table_clstr.txt | sort -u | wc -l > nb_clusters.txt + cat table_clstr.txt | wc -l > nb_seq_clustered.txt + + DIFF=\$(diff nb_clusters.txt nb_clusters_in_fasta.txt) + if [ "\$DIFF" != "" ] + then + echo "Error: nb cluster after cdhit (individual + global) not consistant [table VS fasta]" + exit 1 + fi + + DIFF=\$(diff nb_seq_clustered.txt nb_inputed_seq_in_fasta.txt) + if [ "\$DIFF" != "" ] + then + echo "Error: nb contigs after cdhit (individual + global) not consistant [table VS fasta]" + exit 1 + fi + """ + +} workflow CD_HIT { @@ -53,14 +85,13 @@ ch_percentage_identity // channel: val main: INDIVIDUAL_CD_HIT( ch_assembly, ch_percentage_identity ) - ch_individual_clusters = INDIVIDUAL_CD_HIT.out.clstr_fasta.collect() - GLOBAL_CD_HIT(ch_individual_clusters , ch_percentage_identity ) + ch_ffn = ch_assembly.flatMap{it -> it[1]}.collect() + CHECK_METRICS(ch_ffn, GLOBAL_CD_HIT.out.clstr_table , GLOBAL_CD_HIT.out.fasta_clusters) emit: individual_clstr_table = INDIVIDUAL_CD_HIT.out.clstr_table global_clstr_table = GLOBAL_CD_HIT.out.clstr_table -} - +} \ No newline at end of file -- GitLab