Skip to content

Commit

Permalink
Merge commit 'a5f90ff2a284bde3827b191363649071a83dc7df' into gn_downl…
Browse files Browse the repository at this point in the history
…oad-fixing
  • Loading branch information
EugeneDu-GA committed Sep 19, 2024
2 parents 5b087d1 + a5f90ff commit d49ec26
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 41 deletions.
41 changes: 26 additions & 15 deletions gnssanalysis/gn_io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,43 @@
MB = 1024 * 1024


def path2bytes(path: _Union[_Path, str, bytes]) -> bytes:
def path2bytes(path_or_bytes: _Union[_Path, str, bytes]) -> bytes:
"""Main file reading function. Checks file extension and calls appropriate reading function.
Passes through bytes if given, thus one may not routinely leave it in the top of the specific
file reading function and be able to call it with bytes or str path without additional modifications.
:param str path: input file path
:return bytes: bytes object, decompressed if necessary
:raise FileNotFoundError: path didn't resolve to a file
:raise Exception: wrapped exception for all other exceptions raised
:raise EOFError: if input bytes is empty, input file is empty, or decompressed result of input file is empty.
"""
if isinstance(path, bytes): # no reading is necessary - pass through.
return path
if isinstance(path_or_bytes, bytes): # no reading is necessary - pass through.
if len(path_or_bytes) == 0:
raise EOFError("Input bytes object was empty!")
return path_or_bytes

if isinstance(path_or_bytes, _Path):
path_string = path_or_bytes.as_posix()
elif isinstance(path_or_bytes, str):
path_string = path_or_bytes
else:
raise TypeError("Must be Path, str, or bytes")

if isinstance(path, _Path):
path = path.as_posix()
try:
if path.endswith(".Z"):
databytes = _lzw2bytes(path)
elif path.endswith(".gz"):
databytes = _gz2bytes(path)
if path_string.endswith(".Z"):
databytes = _lzw2bytes(path_string)
elif path_string.endswith(".gz"):
databytes = _gz2bytes(path_string)
else:
databytes = _txt2bytes(path)
except FileNotFoundError:
_logging.error(f"File {path} not found. Returning empty bytes.")
return None
databytes = _txt2bytes(path_string)
except FileNotFoundError as fe:
raise fe
except Exception as e:
_logging.error(f"Error reading file {path} with error {e}. Returning empty bytes.")
return None
raise Exception(f"Error reading file '{path_string}'. Exception: {e}")

if len(databytes) == 0:
raise EOFError(f"Input file (or decompressed result of it) was empty. Path: '{path_string}'")
return databytes


Expand Down
23 changes: 17 additions & 6 deletions gnssanalysis/gn_io/sp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import io as _io
import os as _os
import re as _re
from typing import Literal, Union, List, Tuple
from typing import Literal, Optional, Union, List, Tuple
from pathlib import Path

import numpy as _np
import pandas as _pd
Expand Down Expand Up @@ -238,7 +239,16 @@ def _process_sp3_block(
return temp_sp3


def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _pd.DataFrame:
def description_for_path_or_bytes(path_or_bytes: Union[str, Path, bytes]) -> Optional[str]:
if isinstance(path_or_bytes, str) or isinstance(path_or_bytes, Path):
return str(path_or_bytes)
else:
return "Data passed as bytes: no path available"


def read_sp3(
sp3_path_or_bytes: Union[str, Path, bytes], pOnly: bool = True, nodata_to_nan: bool = True
) -> _pd.DataFrame:
"""Reads an SP3 file and returns the data as a pandas DataFrame.
Expand All @@ -247,7 +257,8 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
:param bool nodata_to_nan: If True, converts 0.000000 (indicating nodata) to NaN in the SP3 POS column
and converts 999999* (indicating nodata) to NaN in the SP3 CLK column. Defaults to True.
:return pandas.DataFrame: The SP3 data as a DataFrame.
:raise FileNotFoundError: If the SP3 file specified by sp3_path does not exist.
:raise FileNotFoundError: If the SP3 file specified by sp3_path_or_bytes does not exist.
:raise Exception: For other errors reading SP3 file/bytes
:note: The SP3 file format is a standard format used for representing precise satellite ephemeris and clock data.
This function reads the SP3 file, parses the header information, and extracts the data into a DataFrame.
Expand All @@ -256,7 +267,7 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
(mm/ps) and remove unnecessary columns. If pOnly is True, only P* values are included in the DataFrame.
If nodata_to_nan is True, nodata values in the SP3 POS and CLK columns are converted to NaN.
"""
content = _gn_io.common.path2bytes(str(sp3_path))
content = _gn_io.common.path2bytes(sp3_path_or_bytes) # Will raise EOFError if file empty

# Match comment lines, including the trailing newline (so that it gets removed in a second too): ^(\/\*.*$\n)
comments: list = _RE_SP3_COMMENT_STRIP.findall(content)
Expand Down Expand Up @@ -306,13 +317,13 @@ def read_sp3(sp3_path: str, pOnly: bool = True, nodata_to_nan: bool = True) -> _
logging.warning(
f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + _gn_const.J2000_ORIGIN} "
f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
f"SP3 path is: '{description_for_path_or_bytes(sp3_path_or_bytes)}'. Duplicates will be removed, keeping first."
)
# Now dedupe them, keeping the first of any clashes:
sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]
# Write header data to dataframe attributes:
sp3_df.attrs["HEADER"] = parsed_header
sp3_df.attrs["path"] = sp3_path
sp3_df.attrs["path"] = sp3_path_or_bytes if type(sp3_path_or_bytes) in (str, Path) else ""
return sp3_df


Expand Down
51 changes: 31 additions & 20 deletions tests/test_common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import unittest
from unittest.mock import patch, mock_open, MagicMock
from pathlib import Path
import logging
from unittest.mock import patch
from pyfakefs.fake_filesystem_unittest import TestCase

# Assuming the function path2bytes is in a module named common
from gnssanalysis.gn_io.common import path2bytes


Expand Down Expand Up @@ -34,19 +32,32 @@ def test_bytes_input(self):
result = path2bytes(b"test data")
self.assertEqual(result, b"test data")

@patch("gnssanalysis.gn_io.common._logging.error")
def test_file_not_found(self, mock_logging_error):
with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=FileNotFoundError):
print("testing path")
result = path2bytes("nonexistent.txt")
self.assertIsNone(result)
mock_logging_error.assert_called_once_with("File nonexistent.txt not found. Returning empty bytes.")

@patch("gnssanalysis.gn_io.common._logging.error")
def test_generic_exception(self, mock_logging_error):
with patch("gnssanalysis.gn_io.common._txt2bytes", side_effect=Exception("Generic error")):
result = path2bytes("test.txt")
self.assertIsNone(result)
mock_logging_error.assert_called_once_with(
"Error reading file test.txt with error Generic error. Returning empty bytes."
)

class TestPath2BytesWithFakeFs(TestCase):
def setUp(self):
self.setUpPyfakefs()

def test_file_not_found_and_file_read(self):
# Create a mock file, but not the one we're looking for
self.fs.create_file("testfile.txt", contents=b"hello")
with self.assertRaises(FileNotFoundError):
path2bytes("nonexistent.txt")

# Now open the file that does exist and check the contents
self.assertEqual(path2bytes("testfile.txt"), b"hello")

def test_empty_file_exception(self):
# Create a mock empty file
self.fs.create_file("emptyfile.txt", contents=b"")
# We raise EOFError for empty files, and (valid) compressed files that expand to a zero-length output
with self.assertRaises(EOFError):
path2bytes("emptyfile.txt")

def test_invalid_archive_expand_exception(self):
# Test that trying to unpack an archive file which isn't valid archive data, raises an exception
self.fs.create_file("invalidarchive.gz", contents=b"hello")
self.fs.create_file("invalidarchive.Z", contents=b"hello")
with self.assertRaises(Exception):
path2bytes("invalidarchive.gz")
with self.assertRaises(Exception):
path2bytes("invalidarchive.Z")

0 comments on commit d49ec26

Please sign in to comment.