Skip to content

Commit

Permalink
Merge pull request #1400 from devin-petersohn/bugs/1379
Browse files Browse the repository at this point in the history
  • Loading branch information
devin-petersohn authored Apr 28, 2020
2 parents 199fe79 + f0dc421 commit 85bc894
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 21 deletions.
20 changes: 1 addition & 19 deletions modin/engines/base/io/text/csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,14 @@
import sys


def pathlib_or_pypath(filepath_or_buffer):
try:
import py

if isinstance(filepath_or_buffer, py.path.local):
return True
except ImportError: # pragma: no cover
pass
try:
import pathlib

if isinstance(filepath_or_buffer, pathlib.Path):
return True
except ImportError: # pragma: no cover
pass
return False


class CSVReader(TextFileReader):
@classmethod
def read(cls, filepath_or_buffer, **kwargs):
if isinstance(filepath_or_buffer, str):
if not cls.file_exists(filepath_or_buffer):
return cls.single_worker_read(filepath_or_buffer, **kwargs)
filepath_or_buffer = cls.get_path(filepath_or_buffer)
elif not pathlib_or_pypath(filepath_or_buffer):
elif not cls.pathlib_or_pypath(filepath_or_buffer):
return cls.single_worker_read(filepath_or_buffer, **kwargs)
compression_type = cls.infer_compression(
filepath_or_buffer, kwargs.get("compression")
Expand Down
7 changes: 6 additions & 1 deletion modin/engines/base/io/text/json_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
class JSONReader(TextFileReader):
@classmethod
def read(cls, path_or_buf, **kwargs):
path_or_buf = cls.get_path(path_or_buf)
if isinstance(path_or_buf, str):
if not cls.file_exists(path_or_buf):
return cls.single_worker_read(path_or_buf, **kwargs)
path_or_buf = cls.get_path(path_or_buf)
elif not cls.pathlib_or_pypath(path_or_buf):
return cls.single_worker_read(path_or_buf, **kwargs)
if not kwargs.get("lines", False):
return cls.single_worker_read(path_or_buf, **kwargs)
columns = pandas.read_json(
Expand Down
18 changes: 18 additions & 0 deletions modin/engines/base/io/text/text_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,21 @@ def build_partition(cls, partition_ids, row_lengths, column_widths):
for i in range(len(partition_ids))
]
)

@classmethod
def pathlib_or_pypath(cls, filepath_or_buffer):
try:
import py

if isinstance(filepath_or_buffer, py.path.local):
return True
except ImportError: # pragma: no cover
pass
try:
import pathlib

if isinstance(filepath_or_buffer, pathlib.Path):
return True
except ImportError: # pragma: no cover
pass
return False
20 changes: 19 additions & 1 deletion modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@
import shutil
import sqlalchemy as sa

from .utils import df_equals
from .utils import (
df_equals,
json_short_string,
json_short_bytes,
json_long_string,
json_long_bytes,
)

from modin import __execution_engine__

Expand Down Expand Up @@ -474,6 +480,18 @@ def test_from_json_lines():
teardown_json_file()


@pytest.mark.parametrize(
"data", [json_short_string, json_short_bytes, json_long_string, json_long_bytes],
)
def test_read_json_string_bytes(data):
with pytest.warns(UserWarning):
modin_df = pd.read_json(data)
# For I/O objects we need to rewind to reuse the same object.
if hasattr(data, "seek"):
data.seek(0)
df_equals(modin_df, pandas.read_json(data))


def test_from_html():
setup_html_file(SMALL_ROW_SIZE)

Expand Down
43 changes: 43 additions & 0 deletions modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pandas.util.testing import assert_almost_equal, assert_frame_equal
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from io import BytesIO

random_state = np.random.RandomState(seed=42)

Expand Down Expand Up @@ -300,6 +301,48 @@

# END parametrizations of common kwargs

json_short_string = """[{"project": "modin"}]"""
json_long_string = """{
"quiz": {
"sport": {
"q1": {
"question": "Which one is correct team name in NBA?",
"options": [
"New York Bulls",
"Los Angeles Kings",
"Golden State Warriros",
"Huston Rocket"
],
"answer": "Huston Rocket"
}
},
"maths": {
"q1": {
"question": "5 + 7 = ?",
"options": [
"10",
"11",
"12",
"13"
],
"answer": "12"
},
"q2": {
"question": "12 - 8 = ?",
"options": [
"1",
"2",
"3",
"4"
],
"answer": "4"
}
}
}
}"""
json_long_bytes = BytesIO(json_long_string.encode(encoding="UTF-8"))
json_short_bytes = BytesIO(json_short_string.encode(encoding="UTF-8"))


def df_equals(df1, df2):
"""Tests if df1 and df2 are equal.
Expand Down

0 comments on commit 85bc894

Please sign in to comment.