Skip to content

Commit

Permalink
Merge pull request #31 from PennChopMicrobiomeProgram/30-parse_result…
Browse files Browse the repository at this point in the history
…s-fails-with-nas-in-output

Upgrade parse_results to handle NAs
  • Loading branch information
kylebittinger authored Oct 3, 2024
2 parents b1850fc + 057290c commit c7901de
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 16 deletions.
16 changes: 3 additions & 13 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Python 3.10
uses: actions/setup-python@v3
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

Expand Down Expand Up @@ -90,14 +90,4 @@ jobs:

- name: Lint Code Base
run: |
black --check .
codacy-analysis-cli:
name: Codacy Analysis CLI
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Run Codacy Analysis CLI
uses: codacy/codacy-analysis-cli-action@master
black --check .
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<!-- Begin badges -->
[![Tests](https://github.com/PennChopMicrobiomeProgram/unassigner/actions/workflows/pr.yml/badge.svg)](https://github.com/PennChopMicrobiomeProgram/unassigner/actions/workflows/pr.yml)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/9ee8fc7bc3e940bb812b35006e95937d)](https://app.codacy.com/gh/PennChopMicrobiomeProgram/unassigner/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
[![codecov](https://codecov.io/gh/PennChopMicrobiomeProgram/unassigner/graph/badge.svg?token=LAFU84K088)](https://codecov.io/gh/PennChopMicrobiomeProgram/unassigner)
[![PyPI](https://badge.fury.io/py/unassigner.svg)](https://pypi.org/project/unassigner/)
[![Bioconda](https://anaconda.org/bioconda/unassigner/badges/downloads.svg)](https://anaconda.org/bioconda/unassigner/)
Expand Down
22 changes: 21 additions & 1 deletion tests/unit/test_parse.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import tempfile
import unittest

from unassigner.parse import parse_fasta, parse_desc, load_fasta, write_fasta
from unassigner.parse import parse_fasta, parse_results, load_fasta, write_fasta


class FastaTests(unittest.TestCase):
Expand Down Expand Up @@ -40,5 +40,25 @@ def test_write_fasta(self):
self.assertEqual(f.read(), ">a\nCCGGT\n>b\nTTTTTTTTT\n")


class ResultsTests(unittest.TestCase):
def test_parse_results(self):
results = [
"query_id\tspecies\ttypestrain_id\tregion_mismatches\tregion_positions\tprobability_incompatible\n",
"Seq1\tA\tB\t1\t2\t0.5\n",
"Seq1\tC\tD\t3\t1500\t-2.062794379753541e-12\n",
"Seq2\tNA\tNA\tNA\tNA\tNA\n",
]

with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8") as f:
f.writelines(results)
f.seek(0)
content = list(parse_results((l for l in f.readlines())))

self.assertEqual(content[0]["query_id"], "Seq1")
self.assertEqual(content[2]["species"], "NA")
self.assertEqual(content[2]["region_mismatches"], None)
self.assertEqual(content[2]["probability_incompatible"], None)


if __name__ == "__main__":
unittest.main()
10 changes: 8 additions & 2 deletions unassigner/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ def parse_greengenes_accessions(f):
yield line.split("\t")


def cast_num_or_na(val, cast_func):
if val == "NA":
return None
return cast_func(val)


def parse_results(f):
float_fields = ["probability_incompatible"]
int_fields = ["region_mismatches", "region_positions"]
Expand All @@ -110,7 +116,7 @@ def parse_results(f):
res = dict(zip(fields, vals))
for field, val in res.items():
if field in float_fields:
res[field] = float(val)
res[field] = cast_num_or_na(val, float)
elif field in int_fields:
res[field] = int(val)
res[field] = cast_num_or_na(val, int)
yield res

0 comments on commit c7901de

Please sign in to comment.