Skip to content

Commit

Permalink
Link the automatic error correction with the auto mode (--read_correc…
Browse files Browse the repository at this point in the history
…tion auto)
  • Loading branch information
loic-couderc committed Jun 29, 2017
1 parent 0881a10 commit 7502d1b
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 7 deletions.
21 changes: 16 additions & 5 deletions scripts/components_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ def estimate_coverage(reads_fq, contigs_fa):
def assemble_component(assembler_name,
in_fastq, workdir,
read_correction, cpu, coverage_threshold):

if read_correction != 'auto' and coverage_threshold is not None:
logger.warning("Coverage_threshold %s makes no sense when read_correction is not auto. This argument will be ignored" % coverage_threshold)

logger.debug('Assembling: %s' % in_fastq)
assembler_factory = AssemblerFactory()
assembler = assembler_factory.get(assembler_name)
Expand All @@ -155,12 +159,12 @@ def assemble_component(assembler_name,
estimated_cov = estimate_coverage(in_fastq, fasta_file)
logger.debug("Estimated coverage:%s, %s" % (estimated_cov, in_fastq))

# Re-run the assembly with error correction activated
if estimated_cov is not None and estimated_cov > coverage_threshold:
# Re-run the assembly with error correction activated when read_correction == auto
if read_correction == 'auto' and estimated_cov is not None and estimated_cov > coverage_threshold:
assembler.build_command_line(in_fastq, workdir, 'yes', cpu)
fasta_file = assembler.run()
estimated_cov2 = estimate_coverage(in_fastq, fasta_file)
logger.debug("Estimated coverage, before:%s, after:%s, %s" % (estimated_cov, estimated_cov2, in_fastq))
logger.debug("Estimated coverage, before:%s, after:%s, component:%s, cov_threshold:%s" % (estimated_cov, estimated_cov2, in_fastq, coverage_threshold))

# suspicious when estimate_cov is not None and estimated_cov2 is None
if estimated_cov2 is None:
Expand Down Expand Up @@ -201,7 +205,7 @@ def _get_workdir(fq):
def assemble_all_components(assembler_name,
fastq, read_metanode_component_filepath, components_lca_filepath,
out_contigs_fasta, workdir,
cpu, read_correction, coverage_threshold=20):
cpu, read_correction, coverage_threshold):


logger.info("Save components to fastq files")
Expand Down Expand Up @@ -278,9 +282,16 @@ def assemble_all_components(assembler_name,
default = 'no',
help = 'Set the assembler read correction step. '
'Default is %(default)s')
parser.add_argument( '--contig_coverage_threshold',
action = 'store',
type = int,
choices = [20, 50],
default = 20,
help = "When the contig's coverage is sufficient (default %(default)s, then run the assembler with read_correction enabled for this contig.")

args = parser.parse_args()

assemble_all_components(args.assembler,
args.input_fastq.name, args.reads_metanode.name, args.components_lca.name,
args.output_fasta, args.workdir,
args.cpu, args.read_correction)
args.cpu, args.read_correction, args.contig_coverage_threshold)
19 changes: 17 additions & 2 deletions scripts/matam_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,9 +412,18 @@ def parse_arguments():
action = 'store',
type = str,
choices = ['no', 'yes', 'auto'],
default = 'no',
default = 'auto',
help = 'Set the assembler read correction step. '
"'auto' means assemble the components with read correction enabled when the components coverage are sufficient (20X) and assemble the other components without read correction. "
'Default is %(default)s')
# --contig_coverage_threshold
# this option is not shown to the user
group_contig.add_argument( '--contig_coverage_threshold',
action = 'store',
type = int,
choices = [20, 50],
default = 20,
help = argparse.SUPPRESS)

# Scaffolding
group_scaff = parser.add_argument_group('Scaffolding')
Expand Down Expand Up @@ -497,6 +506,10 @@ def parse_arguments():
parser.print_help()
raise Exception("quorum not in range [0.51,1]")

# contig_coverage_threshold default value makes no sense when args.read_correction is not auto
if args.read_correction != 'auto':
args.contig_coverage_threshold = None

# Set debug parameters
if args.debug:
args.verbose = True
Expand Down Expand Up @@ -588,6 +601,8 @@ def print_intro(args):

# Contigs assembly
cmd_line += '--read_correction {0} '.format(args.read_correction)
if args.read_correction == 'auto':
cmd_line += '--contig_coverage_threshold {0} '.format(args.contig_coverage_threshold)

# Scaffolding
if args.contigs_binning:
Expand Down Expand Up @@ -1181,7 +1196,7 @@ def main():
components_assembly.assemble_all_components(args.assembler,
sortme_output_fastx_filepath, read_metanode_component_filepath, components_lca_filepath,
contigs_filepath, contigs_assembly_wkdir,
args.cpu, args.read_correction)
args.cpu, args.read_correction, args.contig_coverage_threshold)

if not args.keep_tmp:
shutil.rmtree(contigs_assembly_wkdir)
Expand Down

0 comments on commit 7502d1b

Please sign in to comment.