Skip to content

Commit

Permalink
πŸ”– 0.8.1 (#113)
Browse files Browse the repository at this point in the history
* πŸ› month_abb and month_name being truncated (#112)

* πŸ› Fix unite() not keeping other columns (#111)

* πŸ”– 0.8.1
  • Loading branch information
pwwang authored Apr 19, 2022
1 parent 0814390 commit 4a73b09
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 43 deletions.
2 changes: 1 addition & 1 deletion datar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

__all__ = ("f", "get_versions")
__version__ = "0.8.0"
__version__ = "0.8.1"

apply_init_callbacks()

Expand Down
22 changes: 4 additions & 18 deletions datar/base/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,7 @@
letters = np.array(list(ascii_letters[:26]), dtype='<U1')
LETTERS = np.array(list(ascii_letters[26:]), dtype='<U1')

month_abb = np.array(
[
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
],
dtype='<U1',
)

month_name = np.array(
[
"January",
Expand All @@ -42,5 +26,7 @@
"November",
"December",
],
dtype='<U1',
dtype='<U9',
)

month_abb = np.array(month_name, dtype='<U3')
18 changes: 9 additions & 9 deletions datar/tidyr/unite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from typing import Union

from pipda import register_verb

from ..core.backends import pandas as pd
from ..core.backends.pandas import DataFrame, Series
from pipda import register_verb

from ..core.contexts import Context
from ..core.utils import vars_select, regcall
from ..core.tibble import reconstruct_tibble

from ..base import setdiff
from ..dplyr import ungroup


Expand Down Expand Up @@ -38,9 +40,11 @@ def unite(
"""
all_columns = data.columns
if not columns:
unite_idx = range(data.shape[1])
columns = all_columns
else:
columns = all_columns[vars_select(all_columns, *columns)]
unite_idx = vars_select(data, columns)
columns = all_columns[unite_idx]

out = regcall(ungroup, data).copy()

Expand All @@ -54,15 +58,11 @@ def unite(
united = united.transform(lambda x: sep.join(str(elem) for elem in x))

# get indexes to relocate
unite_cols = out.columns.get_indexer_for(columns)
insert_at = int(unite_cols.min())
insert_at = int(min(unite_idx))
out.insert(insert_at, col, united, allow_duplicates=True)

if remove:
out_cols = [
i for i in range(out.shape[1])
if i <= insert_at and i - 1 not in unite_cols
]
out = out.iloc[:, out_cols]
to_remove = [i if i < insert_at else i + 1 for i in unite_idx]
out = out.iloc[:, regcall(setdiff, range(out.shape[1]), to_remove)]

return reconstruct_tibble(data, out)
5 changes: 5 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 0.8.1

- πŸ› Fix `month_abb` and `month_name` being truncated (#112)
- πŸ› Fix `unite()` not keeping other columns (#111)

## 0.8.0

- ✨ Support `base.glimpse()` (#107, machow/siuba#409)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "datar"
version = "0.8.0"
version = "0.8.1"
description = "Port of dplyr and other related R packages in python, using pipda."
authors = ["pwwang <[email protected]>"]
readme = "README.md"
Expand Down
58 changes: 44 additions & 14 deletions tests/tidyr/test_unite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,47 @@
import pytest # noqa

from datar.all import *
from datar.datasets import table1
from datar.core.backends.pandas.testing import assert_frame_equal
from ..conftest import assert_iterable_equal


def test_unite_pastes_columns_togeter_and_removes_old_col():
df = tibble(x="a", y="b")
out = df >> unite('z', f[f.x:])
out = df >> unite("z", f[f.x :])
assert_frame_equal(out, tibble(z="a_b"))


def test_unite_does_not_remove_new_col_in_case_of_name_clash():
df = tibble(x = "a", y = "b")
out = df >> unite('x', f[f.x:])
df = tibble(x="a", y="b")
out = df >> unite("x", f[f.x :])
cols = out >> names()
assert_iterable_equal(cols, ["x"])
assert_iterable_equal(out.x, ["a_b"])


def test_unite_preserves_grouping():
df = tibble(g = 1, x = "a") >> group_by(f.g)
rs = df >> unite('x', f.x)
df = tibble(g=1, x="a") >> group_by(f.g)
rs = df >> unite("x", f.x)
assert_frame_equal(df, rs)
assert group_vars(df) == group_vars(rs)


def test_drops_grouping_when_needed():
df = tibble(g = 1, x = "a") >> group_by(f.g)
rs = df >> unite('gx', f.g, f.x)
df = tibble(g=1, x="a") >> group_by(f.g)
rs = df >> unite("gx", f.g, f.x)
assert_iterable_equal(rs.gx, ["1_a"])
assert group_vars(rs) == []


def test_empty_var_spec_uses_all_vars():
df = tibble(x = "a", y = "b")
assert_iterable_equal(df >> unite("z"), tibble(z = "a_b"))
df = tibble(x="a", y="b")
assert_iterable_equal(df >> unite("z"), tibble(z="a_b"))


def test_can_remove_missing_vars_on_request():
df = expand_grid(x = ["a", NA], y = ["b", NA])
out = df >> unite("z", f[f.x:], na_rm = TRUE)
df = expand_grid(x=["a", NA], y=["b", NA])
out = df >> unite("z", f[f.x :], na_rm=TRUE)

assert_iterable_equal(out.z, c("a_b", "a", "b", ""))

Expand All @@ -65,6 +68,33 @@ def test_can_remove_missing_vars_on_request():

# GH#105
def test_sep_none_does_not_join_strings():
df = tibble(x = "a", y = "b")
out = df >> unite('z', f[f.x:], sep = None)
assert_frame_equal(out, tibble(z = [["a", "b"]]))
df = tibble(x="a", y="b")
out = df >> unite("z", f[f.x :], sep=None)
assert_frame_equal(out, tibble(z=[["a", "b"]]))


# GH#111
def test_unite_keeping_other_columns():
out = unite(table1, f.z, f.country, f.year)
assert_frame_equal(
out,
tibble(
z=[
"Afghanistan_1999",
"Afghanistan_2000",
"Brazil_1999",
"Brazil_2000",
"China_1999",
"China_2000",
],
cases=[745, 2666, 37737, 80488, 212258, 213766],
population=[
19987071,
20595360,
172006362,
174504898,
1272915272,
1280428583,
],
),
)

0 comments on commit 4a73b09

Please sign in to comment.