diff --git a/modin/engines/base/io/text/csv_reader.py b/modin/engines/base/io/text/csv_reader.py index ceb9d2883fc..5a4c1df4ca8 100644 --- a/modin/engines/base/io/text/csv_reader.py +++ b/modin/engines/base/io/text/csv_reader.py @@ -18,24 +18,6 @@ import sys -def pathlib_or_pypath(filepath_or_buffer): - try: - import py - - if isinstance(filepath_or_buffer, py.path.local): - return True - except ImportError: # pragma: no cover - pass - try: - import pathlib - - if isinstance(filepath_or_buffer, pathlib.Path): - return True - except ImportError: # pragma: no cover - pass - return False - - class CSVReader(TextFileReader): @classmethod def read(cls, filepath_or_buffer, **kwargs): @@ -43,7 +25,7 @@ def read(cls, filepath_or_buffer, **kwargs): if not cls.file_exists(filepath_or_buffer): return cls.single_worker_read(filepath_or_buffer, **kwargs) filepath_or_buffer = cls.get_path(filepath_or_buffer) - elif not pathlib_or_pypath(filepath_or_buffer): + elif not cls.pathlib_or_pypath(filepath_or_buffer): return cls.single_worker_read(filepath_or_buffer, **kwargs) compression_type = cls.infer_compression( filepath_or_buffer, kwargs.get("compression") diff --git a/modin/engines/base/io/text/json_reader.py b/modin/engines/base/io/text/json_reader.py index 226ac801a32..da3087bebd9 100644 --- a/modin/engines/base/io/text/json_reader.py +++ b/modin/engines/base/io/text/json_reader.py @@ -21,7 +21,12 @@ class JSONReader(TextFileReader): @classmethod def read(cls, path_or_buf, **kwargs): - path_or_buf = cls.get_path(path_or_buf) + if isinstance(path_or_buf, str): + if not cls.file_exists(path_or_buf): + return cls.single_worker_read(path_or_buf, **kwargs) + path_or_buf = cls.get_path(path_or_buf) + elif not cls.pathlib_or_pypath(path_or_buf): + return cls.single_worker_read(path_or_buf, **kwargs) if not kwargs.get("lines", False): return cls.single_worker_read(path_or_buf, **kwargs) columns = pandas.read_json( diff --git a/modin/engines/base/io/text/text_file_reader.py b/modin/engines/base/io/text/text_file_reader.py index bd86388dcde..e971a40954c 100644 --- a/modin/engines/base/io/text/text_file_reader.py +++ b/modin/engines/base/io/text/text_file_reader.py @@ -52,3 +52,21 @@ def build_partition(cls, partition_ids, row_lengths, column_widths): for i in range(len(partition_ids)) ] ) + + @classmethod + def pathlib_or_pypath(cls, filepath_or_buffer): + try: + import py + + if isinstance(filepath_or_buffer, py.path.local): + return True + except ImportError: # pragma: no cover + pass + try: + import pathlib + + if isinstance(filepath_or_buffer, pathlib.Path): + return True + except ImportError: # pragma: no cover + pass + return False diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index 9a55e61acda..c30f4414d89 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -23,7 +23,13 @@ import shutil import sqlalchemy as sa -from .utils import df_equals +from .utils import ( + df_equals, + json_short_string, + json_short_bytes, + json_long_string, + json_long_bytes, +) from modin import __execution_engine__ @@ -474,6 +480,18 @@ def test_from_json_lines(): teardown_json_file() +@pytest.mark.parametrize( + "data", [json_short_string, json_short_bytes, json_long_string, json_long_bytes], +) +def test_read_json_string_bytes(data): + with pytest.warns(UserWarning): + modin_df = pd.read_json(data) + # For I/O objects we need to rewind to reuse the same object. + if hasattr(data, "seek"): + data.seek(0) + df_equals(modin_df, pandas.read_json(data)) + + def test_from_html(): setup_html_file(SMALL_ROW_SIZE) diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 1c517a6fb18..1faaf386ac3 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -16,6 +16,7 @@ from pandas.util.testing import assert_almost_equal, assert_frame_equal import modin.pandas as pd from modin.pandas.utils import to_pandas +from io import BytesIO random_state = np.random.RandomState(seed=42) @@ -300,6 +301,48 @@ # END parametrizations of common kwargs +json_short_string = """[{"project": "modin"}]""" +json_long_string = """{ + "quiz": { + "sport": { + "q1": { + "question": "Which one is correct team name in NBA?", + "options": [ + "New York Bulls", + "Los Angeles Kings", + "Golden State Warriros", + "Huston Rocket" + ], + "answer": "Huston Rocket" + } + }, + "maths": { + "q1": { + "question": "5 + 7 = ?", + "options": [ + "10", + "11", + "12", + "13" + ], + "answer": "12" + }, + "q2": { + "question": "12 - 8 = ?", + "options": [ + "1", + "2", + "3", + "4" + ], + "answer": "4" + } + } + } + }""" +json_long_bytes = BytesIO(json_long_string.encode(encoding="UTF-8")) +json_short_bytes = BytesIO(json_short_string.encode(encoding="UTF-8")) + def df_equals(df1, df2): """Tests if df1 and df2 are equal.