From 41dfe8bb91fb77d7173c07a09c0375d3f171f60d Mon Sep 17 00:00:00 2001 From: Victor Reys <132575181+VGPReys@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:21:39 +0200 Subject: [PATCH] create textual output (#343) * create textual output * test_cli_resclust.py lint * Filename matching issue * test_cli_resclust.py lint * Update cli_resclust.py lint --- src/arctic3d/cli_resclust.py | 50 +++++++++++++++++++++++++++++++----- tests/test_cli_resclust.py | 21 +++++++++++++++ 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/arctic3d/cli_resclust.py b/src/arctic3d/cli_resclust.py index 7b70459..9afccf4 100644 --- a/src/arctic3d/cli_resclust.py +++ b/src/arctic3d/cli_resclust.py @@ -23,6 +23,8 @@ `linkage` : the linkage strategy. `criterion` : the criterion to extract the clusters. + + `output` : the path where to output clusters data. """ import argparse import sys @@ -36,6 +38,7 @@ get_clustering_dict, ) from arctic3d.modules.input import Input +from arctic3d.modules.output import create_output_folder argument_parser = argparse.ArgumentParser() @@ -88,6 +91,13 @@ "--chain", help="Segment ID to be considered", required=False ) +argument_parser.add_argument( + "--output", + help="Path to the generated output dictionary", + type=str, + required=False, +) + def load_args(arguments): """ @@ -128,7 +138,15 @@ def maincli(): cli(argument_parser, main) -def main(input_arg, residue_list, chain, threshold, linkage, criterion): +def main( + input_arg, + residue_list, + chain, + threshold, + linkage, + criterion, + output, +): """Main function.""" log.setLevel("INFO") @@ -192,14 +210,32 @@ def main(input_arg, residue_list, chain, threshold, linkage, criterion): ) cl_dict = get_clustering_dict(clusters, unique_sorted_resids) - for el in cl_dict.keys(): - log.info( - f"cluster {el}, residues" - f" {' '.join([str(res) for res in cl_dict[el]])}" - ) else: log.info("Only one residue, no clustering performed.") - log.info(f"cluster 1, residues {unique_sorted_resids[0]}") + # fake cluster dict with only one entry + cl_dict = {1: unique_sorted_resids} + + # log data + for el in cl_dict.keys(): + log.info( + f"cluster {el}, residues" + f" {' '.join([str(res) for res in cl_dict[el]])}" + ) + + # check if data must be flushed to output file + if output: + # initiate output directory + output_basepath = create_output_folder(output, uniprot_id='resclust') + # write txt file + output_fname = f'{output_basepath}/clustered_residues.out' + log.info(f'writing clusters data in "{output_fname}"') + with open(output_fname, 'w') as filout: + for el in cl_dict.keys(): + filout.write( + f"cluster {el} -> " + f"{' '.join([str(res) for res in cl_dict[el]])}" + "\n" + ) if __name__ == "__main__": diff --git a/tests/test_cli_resclust.py b/tests/test_cli_resclust.py index 06f19d7..6eae59c 100644 --- a/tests/test_cli_resclust.py +++ b/tests/test_cli_resclust.py @@ -2,6 +2,9 @@ import pytest +import os +import shutil + from arctic3d.cli_resclust import main from . import golden_data @@ -22,6 +25,7 @@ def test_resclust_cli(example_pdbpath): 7.0, "average", "distance", + None, ) @@ -35,6 +39,7 @@ def test_wrong_residue_list(example_pdbpath): 9.0, "average", "distance", + None, ) assert e.type == SystemExit assert e.value.code == 1 @@ -49,4 +54,20 @@ def test_resclust_maxclust(example_pdbpath): 2, "average", "maxclust", + None, + ) + + +def test_resclust_genoutput(example_pdbpath): + main( + example_pdbpath, + "100,101,102,133,134,135", + None, + 2, + "average", + "maxclust", + "resclustout", ) + assert os.path.exists("resclustout") is True + assert os.path.exists("resclustout/clustered_residues.out") is True + shutil.rmtree("resclustout")