From b6a29dc45b017f77d81489ca85c62f87818ef4a1 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Thu, 26 Sep 2024 14:38:57 -0400 Subject: [PATCH 1/5] Add test_parse_results --- tests/unit/test_parse.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_parse.py b/tests/unit/test_parse.py index 131457d..21f827e 100644 --- a/tests/unit/test_parse.py +++ b/tests/unit/test_parse.py @@ -1,7 +1,7 @@ import tempfile import unittest -from unassigner.parse import parse_fasta, parse_desc, load_fasta, write_fasta +from unassigner.parse import parse_fasta, parse_results, load_fasta, write_fasta class FastaTests(unittest.TestCase): @@ -40,5 +40,20 @@ def test_write_fasta(self): self.assertEqual(f.read(), ">a\nCCGGT\n>b\nTTTTTTTTT\n") +class ResultsTests(unittest.TestCase): + def test_parse_results(self): + results = [ + "query_id\tspecies\ttypestrain_id\tregion_mismatches\tregion_positions\tprobability_incompatible\n", + "Seq1\tA\tB\t1\t2\t0.5\n", + "Seq1\tC\tD\t3\t1500\t-2.062794379753541e-12\n", + "Seq2\tNA\tNA\tNA\tNA\tNA\n", + ] + + with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8") as f: + f.writelines(results) + f.seek(0) + content = list(parse_results(f)) + + if __name__ == "__main__": unittest.main() From b3b88f3ce1d43f3aef2a2ae02bc22657b8d48a42 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Thu, 26 Sep 2024 14:45:11 -0400 Subject: [PATCH 2/5] Pass in as generator rather than list --- tests/unit/test_parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_parse.py b/tests/unit/test_parse.py index 21f827e..101069c 100644 --- a/tests/unit/test_parse.py +++ b/tests/unit/test_parse.py @@ -52,7 +52,7 @@ def test_parse_results(self): with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8") as f: f.writelines(results) f.seek(0) - content = list(parse_results(f)) + content = list(parse_results((l for l in f.readlines()))) if __name__ == "__main__": From c7f938074bab481317c06438a1d067a5354d656e Mon Sep 17 00:00:00 2001 From: Ulthran Date: Thu, 26 Sep 2024 14:50:20 -0400 Subject: [PATCH 3/5] Cast NA to none for numerical fields --- unassigner/parse.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/unassigner/parse.py b/unassigner/parse.py index 445881a..60fbeba 100644 --- a/unassigner/parse.py +++ b/unassigner/parse.py @@ -98,6 +98,12 @@ def parse_greengenes_accessions(f): yield line.split("\t") +def cast_num_or_na(val, cast_func): + if val == "NA": + return None + return cast_func(val) + + def parse_results(f): float_fields = ["probability_incompatible"] int_fields = ["region_mismatches", "region_positions"] @@ -110,7 +116,7 @@ def parse_results(f): res = dict(zip(fields, vals)) for field, val in res.items(): if field in float_fields: - res[field] = float(val) + res[field] = cast_num_or_na(val, float) elif field in int_fields: - res[field] = int(val) + res[field] = cast_num_or_na(val, int) yield res From e2b4d8adfcf09a89002377f6994d3cb370de6ec6 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Thu, 26 Sep 2024 15:02:19 -0400 Subject: [PATCH 4/5] Add codacy badge and remove unecessary action spec --- .github/workflows/test.yml | 16 +++------------- README.md | 1 + 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 864c1c2..8ab27cd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,8 +13,8 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 + - name: Set up Python + uses: actions/setup-python@v5 with: python-version: '3.x' @@ -90,14 +90,4 @@ jobs: - name: Lint Code Base run: | - black --check . - - codacy-analysis-cli: - name: Codacy Analysis CLI - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Run Codacy Analysis CLI - uses: codacy/codacy-analysis-cli-action@master \ No newline at end of file + black --check . \ No newline at end of file diff --git a/README.md b/README.md index 0f6e7c1..1187299 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Tests](https://github.com/PennChopMicrobiomeProgram/unassigner/actions/workflows/pr.yml/badge.svg)](https://github.com/PennChopMicrobiomeProgram/unassigner/actions/workflows/pr.yml) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/9ee8fc7bc3e940bb812b35006e95937d)](https://app.codacy.com/gh/PennChopMicrobiomeProgram/unassigner/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![codecov](https://codecov.io/gh/PennChopMicrobiomeProgram/unassigner/graph/badge.svg?token=LAFU84K088)](https://codecov.io/gh/PennChopMicrobiomeProgram/unassigner) [![PyPI](https://badge.fury.io/py/unassigner.svg)](https://pypi.org/project/unassigner/) [![Bioconda](https://anaconda.org/bioconda/unassigner/badges/downloads.svg)](https://anaconda.org/bioconda/unassigner/) From 057290c3813688b96dc072f9a6ad90815611e691 Mon Sep 17 00:00:00 2001 From: Ulthran Date: Thu, 26 Sep 2024 15:29:26 -0400 Subject: [PATCH 5/5] Check that parsed results are as expected --- tests/unit/test_parse.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_parse.py b/tests/unit/test_parse.py index 101069c..fe5712b 100644 --- a/tests/unit/test_parse.py +++ b/tests/unit/test_parse.py @@ -54,6 +54,11 @@ def test_parse_results(self): f.seek(0) content = list(parse_results((l for l in f.readlines()))) + self.assertEqual(content[0]["query_id"], "Seq1") + self.assertEqual(content[2]["species"], "NA") + self.assertEqual(content[2]["region_mismatches"], None) + self.assertEqual(content[2]["probability_incompatible"], None) + if __name__ == "__main__": unittest.main()