-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* 🩹 Attach original `__module__` to `func_factory` registed functions * ✨ Allow configuration file to save default options; 💥Replace option `warn_builtin_names` with `imiport_names_conflict` (#73) * 🐛 Register `base.factor()` and accept grouped data (#108) * ✨ Support `base.glimpse()` (#107, machow/siuba#409) * ✅ Add tests for `base.factor()` with grouped data * 🔖 0.8.0 * 📝 Update CHANGELOG
- Loading branch information
Showing
25 changed files
with
694 additions
and
283 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
"""Import all constants, verbs and functions""" | ||
|
||
_locs = locals() | ||
|
||
from . import base as _base | ||
_base_conflict_names = _base._conflict_names | ||
for _key in _base.__all__: | ||
if _key not in _base_conflict_names: | ||
_locs[_key] = getattr(_base, _key) | ||
|
||
from . import dplyr as _dplyr | ||
_dplyr_conflict_names = _dplyr._conflict_names | ||
for _key in _dplyr.__all__: | ||
if _key not in _dplyr_conflict_names: | ||
_locs[_key] = getattr(_dplyr, _key) | ||
|
||
from .core.defaults import f | ||
from .base import ( | ||
_no_warn as _, | ||
) # don't override from datar.all import _no_warn | ||
from .base import _builtin_names as _base_builtin_names | ||
from .base import * | ||
from .base import _warn as _ | ||
from .forcats import * | ||
from .datar import * | ||
from .dplyr import _no_warn as _ | ||
from .dplyr import _builtin_names as _dplyr_builtin_names | ||
from .dplyr import * | ||
from .dplyr import _warn as _ | ||
from .tibble import * | ||
from .tidyr import * | ||
from .base import rank # overwrite dplyr.rank | ||
|
||
_builtin_names = _base_builtin_names.copy() | ||
_builtin_names.update(_dplyr_builtin_names) | ||
# builtin names included | ||
__all__ = [var_ for var_ in locals() if not var_.startswith("_")] | ||
|
||
for name in _builtin_names: | ||
# let __getattr__ handles the builtins, otherwise | ||
# from datar.all import filter | ||
# will not warn | ||
del locals()[name] | ||
from .core.import_names_conflict import ( | ||
handle_import_names_conflict as _handle_import_names_conflict, | ||
) | ||
|
||
from .core.warn_builtin_names import ( | ||
warn_builtin_names as _warn_builtin_names, | ||
__all__, _getattr = _handle_import_names_conflict( | ||
_locs, | ||
_base_conflict_names | _dplyr_conflict_names, | ||
) | ||
|
||
__getattr__ = _warn_builtin_names(**_builtin_names) | ||
if _getattr is not None: | ||
__getattr__ = _getattr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
"""Provides glimpse""" | ||
import textwrap | ||
import html | ||
from functools import singledispatch | ||
from shutil import get_terminal_size | ||
|
||
from pipda import register_verb | ||
|
||
from ..core.tibble import TibbleGrouped, TibbleRowwise | ||
from ..core.backends.pandas import DataFrame | ||
from ..core.backends.pandas.core.groupby import SeriesGroupBy | ||
|
||
|
||
@singledispatch | ||
def formatter(x): | ||
"""Formatter passed to glimpse to format a single element of a dataframe.""" | ||
return str(x) | ||
|
||
|
||
@formatter.register(DataFrame) | ||
def _dataframe_formatter(x): | ||
"""Format a dataframe element.""" | ||
return f"<DF {x.shape[0]}x{x.shape[1]}>" | ||
|
||
|
||
@formatter.register(str) | ||
def _str_formatter(x): | ||
"""Format a string""" | ||
return repr(x) | ||
|
||
|
||
def _is_notebook() -> bool: # pragma: no cover | ||
"""Check if the current environment is notebook""" | ||
try: | ||
from IPython import get_ipython | ||
shell = get_ipython().__class__.__name__ | ||
if shell == "ZMQInteractiveShell": | ||
return True # Jupyter notebook or qtconsole | ||
elif shell == "TerminalInteractiveShell": | ||
return False # Terminal running IPython | ||
else: | ||
return False # Other type (?) | ||
except (ImportError, NameError): | ||
return False # Probably standard Python interpreter | ||
|
||
|
||
class Glimpse: | ||
"""Glimpse class | ||
Args: | ||
x: The data to be glimpseed | ||
width: The width of the output | ||
formatter: The formatter to use to format data elements | ||
""" | ||
def __init__(self, x, width, formatter) -> None: | ||
self.x = x | ||
self.width = width or get_terminal_size((100, 20)).columns | ||
self.formatter = formatter | ||
self.colwidths = (0, 0) | ||
|
||
def __repr__(self) -> str: | ||
return f"<Glimpse: {self.__hash__()}>" | ||
|
||
def __str__(self) -> str: | ||
self._calculate_output_widths() | ||
return "\n".join( | ||
( | ||
"\n".join(self._general()), | ||
"\n".join(self._variables()), | ||
) | ||
) | ||
|
||
def _repr_html_(self): | ||
out = [] | ||
for gen in self._general(): | ||
out.append(f"<div><i>{gen}</i></div>") | ||
out.append("<table>") | ||
out.extend(self._variables(fmt="html")) | ||
out.append("</table>") | ||
return "\n".join(out) | ||
|
||
def _general(self): | ||
if isinstance(self.x, TibbleGrouped): | ||
groups = ", ".join((str(name) for name in self.x.group_vars)) | ||
group_title = ( | ||
"Rowwise" if isinstance(self.x, TibbleRowwise) else "Groups" | ||
) | ||
return ( | ||
f"Rows: {self.x.shape[0]}", | ||
f"Columns: {self.x.shape[1]}", | ||
f"{group_title}: {groups} " | ||
f"[{self.x._datar['grouped'].grouper.ngroups}]", | ||
) | ||
|
||
return ( | ||
f"Rows: {self.x.shape[0]}", | ||
f"Columns: {self.x.shape[1]}", | ||
) | ||
|
||
def _calculate_output_widths(self): | ||
colname_width = max(len(str(colname)) for colname in self.x.columns) | ||
dtype_width = max(len(str(dtype)) for dtype in self.x.dtypes) + 2 | ||
self.colwidths = (colname_width, dtype_width) | ||
|
||
def _variables(self, fmt="str"): | ||
for col in self.x: | ||
yield self._format_variable( | ||
col, | ||
self.x[col].dtype, | ||
self.x[col].obj.values | ||
if isinstance(self.x[col], SeriesGroupBy) | ||
else self.x[col].values, | ||
fmt=fmt, | ||
) | ||
|
||
def _format_variable(self, col, dtype, data, fmt="str"): | ||
if fmt == "str": | ||
return self._format_variable_str(col, dtype, data) | ||
|
||
return self._format_variable_html(col, dtype, data) | ||
|
||
def _format_data(self, data): | ||
"""Format the data for the glimpse view | ||
Formatting 10 elements in a batch in case of a long dataframe. | ||
Since we don't need to format all the data, but only the first a few | ||
till the line (terminal width or provided width) overflows. | ||
""" | ||
out = "" | ||
placeholder = "…" | ||
i = 0 | ||
chunk_size = 10 | ||
while not out.endswith(placeholder) and i < data.size: | ||
if out: | ||
out += ", " | ||
out += ", ".join( | ||
self.formatter(d) for d in data[i:i + chunk_size] | ||
) | ||
i += chunk_size | ||
out = textwrap.shorten( | ||
out, | ||
break_long_words=True, | ||
break_on_hyphens=True, | ||
width=self.width - 4 - sum(self.colwidths), | ||
placeholder=placeholder, | ||
) | ||
return out | ||
|
||
def _format_variable_str(self, col, dtype, data): | ||
name_col = col.ljust(self.colwidths[0]) | ||
dtype_col = f'<{dtype}>'.ljust(self.colwidths[1]) | ||
data_col = self._format_data(data) | ||
return f". {name_col} {dtype_col} {data_col}" | ||
|
||
def _format_variable_html(self, col, dtype, data): | ||
name_col = f". <b>{col}</b>" | ||
dtype_col = f"<i><{dtype}></i>" | ||
data_col = html.escape(self._format_data(data)) | ||
return ( | ||
f"<tr><th style=\"text-align: left\">{name_col}</th>" | ||
f"<td style=\"text-align: left\">{dtype_col}</td>" | ||
f"<td style=\"text-align: left\">{data_col}</td></tr>" | ||
) | ||
|
||
def show(self): | ||
"""Show the glimpse view""" | ||
if _is_notebook(): # pragma: no cover | ||
from IPython.display import display, HTML | ||
display(HTML(self._repr_html_())) | ||
else: | ||
print(self.__str__()) | ||
|
||
|
||
@register_verb(DataFrame) | ||
def glimpse(x, width=None, formatter=formatter): | ||
"""Get a glimpse of your data | ||
Args: | ||
x: An object to glimpse at. | ||
width: Width of output, defaults to the width of the console. | ||
formatter: A single-dispatch function to format a single element. | ||
""" | ||
Glimpse(x, width=width, formatter=formatter).show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.