From 7502d1b5e685ab23995986ae10fa6bae9f2d77ef Mon Sep 17 00:00:00 2001 From: lcouderc Date: Thu, 29 Jun 2017 11:38:38 +0200 Subject: [PATCH] Link the automatic error correction with the auto mode (--read_correction auto) --- scripts/components_assembly.py | 21 ++++++++++++++++----- scripts/matam_assembly.py | 19 +++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/scripts/components_assembly.py b/scripts/components_assembly.py index f6292e2..456b088 100755 --- a/scripts/components_assembly.py +++ b/scripts/components_assembly.py @@ -146,6 +146,10 @@ def estimate_coverage(reads_fq, contigs_fa): def assemble_component(assembler_name, in_fastq, workdir, read_correction, cpu, coverage_threshold): + + if read_correction != 'auto' and coverage_threshold is not None: + logger.warning("Coverage_threshold %s makes no sense when read_correction is not auto. This argument will be ignored" % coverage_threshold) + logger.debug('Assembling: %s' % in_fastq) assembler_factory = AssemblerFactory() assembler = assembler_factory.get(assembler_name) @@ -155,12 +159,12 @@ def assemble_component(assembler_name, estimated_cov = estimate_coverage(in_fastq, fasta_file) logger.debug("Estimated coverage:%s, %s" % (estimated_cov, in_fastq)) - # Re-run the assembly with error correction activated - if estimated_cov is not None and estimated_cov > coverage_threshold: + # Re-run the assembly with error correction activated when read_correction == auto + if read_correction == 'auto' and estimated_cov is not None and estimated_cov > coverage_threshold: assembler.build_command_line(in_fastq, workdir, 'yes', cpu) fasta_file = assembler.run() estimated_cov2 = estimate_coverage(in_fastq, fasta_file) - logger.debug("Estimated coverage, before:%s, after:%s, %s" % (estimated_cov, estimated_cov2, in_fastq)) + logger.debug("Estimated coverage, before:%s, after:%s, component:%s, cov_threshold:%s" % (estimated_cov, estimated_cov2, in_fastq, coverage_threshold)) # suspicious when estimate_cov is not None and estimated_cov2 is None if estimated_cov2 is None: @@ -201,7 +205,7 @@ def _get_workdir(fq): def assemble_all_components(assembler_name, fastq, read_metanode_component_filepath, components_lca_filepath, out_contigs_fasta, workdir, - cpu, read_correction, coverage_threshold=20): + cpu, read_correction, coverage_threshold): logger.info("Save components to fastq files") @@ -278,9 +282,16 @@ def assemble_all_components(assembler_name, default = 'no', help = 'Set the assembler read correction step. ' 'Default is %(default)s') + parser.add_argument( '--contig_coverage_threshold', + action = 'store', + type = int, + choices = [20, 50], + default = 20, + help = "When the contig's coverage is sufficient (default %(default)s, then run the assembler with read_correction enabled for this contig.") args = parser.parse_args() + assemble_all_components(args.assembler, args.input_fastq.name, args.reads_metanode.name, args.components_lca.name, args.output_fasta, args.workdir, - args.cpu, args.read_correction) + args.cpu, args.read_correction, args.contig_coverage_threshold) diff --git a/scripts/matam_assembly.py b/scripts/matam_assembly.py index 1b79af1..6c75a2b 100755 --- a/scripts/matam_assembly.py +++ b/scripts/matam_assembly.py @@ -412,9 +412,18 @@ def parse_arguments(): action = 'store', type = str, choices = ['no', 'yes', 'auto'], - default = 'no', + default = 'auto', help = 'Set the assembler read correction step. ' + "'auto' means assemble the components with read correction enabled when the components coverage are sufficient (20X) and assemble the other components without read correction. " 'Default is %(default)s') + # --contig_coverage_threshold + # this option is not shown to the user + group_contig.add_argument( '--contig_coverage_threshold', + action = 'store', + type = int, + choices = [20, 50], + default = 20, + help = argparse.SUPPRESS) # Scaffolding group_scaff = parser.add_argument_group('Scaffolding') @@ -497,6 +506,10 @@ def parse_arguments(): parser.print_help() raise Exception("quorum not in range [0.51,1]") + # contig_coverage_threshold default value makes no sense when args.read_correction is not auto + if args.read_correction != 'auto': + args.contig_coverage_threshold = None + # Set debug parameters if args.debug: args.verbose = True @@ -588,6 +601,8 @@ def print_intro(args): # Contigs assembly cmd_line += '--read_correction {0} '.format(args.read_correction) + if args.read_correction == 'auto': + cmd_line += '--contig_coverage_threshold {0} '.format(args.contig_coverage_threshold) # Scaffolding if args.contigs_binning: @@ -1181,7 +1196,7 @@ def main(): components_assembly.assemble_all_components(args.assembler, sortme_output_fastx_filepath, read_metanode_component_filepath, components_lca_filepath, contigs_filepath, contigs_assembly_wkdir, - args.cpu, args.read_correction) + args.cpu, args.read_correction, args.contig_coverage_threshold) if not args.keep_tmp: shutil.rmtree(contigs_assembly_wkdir)