🔖 0.8.1 (#113)

* 🐛 month_abb and month_name being truncated (#112) * 🐛 Fix unite() not keeping other columns (#111) * 🔖 0.8.1
pwwang · Apr 19, 2022 · 4a73b09 · 4a73b09
1 parent 0814390
commit 4a73b09
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 43 deletions.
diff --git a/datar/__init__.py b/datar/__init__.py
@@ -13,7 +13,7 @@
 )
 
 __all__ = ("f", "get_versions")
-__version__ = "0.8.0"
+__version__ = "0.8.1"
 
 apply_init_callbacks()
 

diff --git a/datar/base/constants.py b/datar/base/constants.py
@@ -10,23 +10,7 @@
 letters = np.array(list(ascii_letters[:26]), dtype='<U1')
 LETTERS = np.array(list(ascii_letters[26:]), dtype='<U1')
 
-month_abb = np.array(
-    [
-        "Jan",
-        "Feb",
-        "Mar",
-        "Apr",
-        "May",
-        "Jun",
-        "Jul",
-        "Aug",
-        "Sep",
-        "Oct",
-        "Nov",
-        "Dec",
-    ],
-    dtype='<U1',
-)
+
 month_name = np.array(
     [
         "January",
@@ -42,5 +26,7 @@
         "November",
         "December",
     ],
-    dtype='<U1',
+    dtype='<U9',
 )
+
+month_abb = np.array(month_name, dtype='<U3')
diff --git a/datar/tidyr/unite.py b/datar/tidyr/unite.py
@@ -2,14 +2,16 @@
 
 from typing import Union
 
+from pipda import register_verb
+
 from ..core.backends import pandas as pd
 from ..core.backends.pandas import DataFrame, Series
-from pipda import register_verb
 
 from ..core.contexts import Context
 from ..core.utils import vars_select, regcall
 from ..core.tibble import reconstruct_tibble
 
+from ..base import setdiff
 from ..dplyr import ungroup
 
 
@@ -38,9 +40,11 @@ def unite(
     """
     all_columns = data.columns
     if not columns:
+        unite_idx = range(data.shape[1])
         columns = all_columns
     else:
-        columns = all_columns[vars_select(all_columns, *columns)]
+        unite_idx = vars_select(data, columns)
+        columns = all_columns[unite_idx]
 
     out = regcall(ungroup, data).copy()
 
@@ -54,15 +58,11 @@ def unite(
         united = united.transform(lambda x: sep.join(str(elem) for elem in x))
 
     # get indexes to relocate
-    unite_cols = out.columns.get_indexer_for(columns)
-    insert_at = int(unite_cols.min())
+    insert_at = int(min(unite_idx))
     out.insert(insert_at, col, united, allow_duplicates=True)
 
     if remove:
-        out_cols = [
-            i for i in range(out.shape[1])
-            if i <= insert_at and i - 1 not in unite_cols
-        ]
-        out = out.iloc[:, out_cols]
+        to_remove = [i if i < insert_at else i + 1 for i in unite_idx]
+        out = out.iloc[:, regcall(setdiff, range(out.shape[1]), to_remove)]
 
     return reconstruct_tibble(data, out)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.8.1
+
+- 🐛 Fix `month_abb` and `month_name` being truncated (#112)
+- 🐛 Fix `unite()` not keeping other columns (#111)
+
 ## 0.8.0
 
 - ✨ Support `base.glimpse()` (#107, machow/siuba#409)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "datar"
-version = "0.8.0"
+version = "0.8.1"
 description = "Port of dplyr and other related R packages in python, using pipda."
 authors = ["pwwang <[email protected]>"]
 readme = "README.md"

diff --git a/tests/tidyr/test_unite.py b/tests/tidyr/test_unite.py
@@ -3,44 +3,47 @@
 import pytest  # noqa
 
 from datar.all import *
+from datar.datasets import table1
 from datar.core.backends.pandas.testing import assert_frame_equal
 from ..conftest import assert_iterable_equal
 
 
 def test_unite_pastes_columns_togeter_and_removes_old_col():
     df = tibble(x="a", y="b")
-    out = df >> unite('z', f[f.x:])
+    out = df >> unite("z", f[f.x :])
     assert_frame_equal(out, tibble(z="a_b"))
 
 
 def test_unite_does_not_remove_new_col_in_case_of_name_clash():
-    df = tibble(x = "a", y = "b")
-    out = df >> unite('x', f[f.x:])
+    df = tibble(x="a", y="b")
+    out = df >> unite("x", f[f.x :])
     cols = out >> names()
     assert_iterable_equal(cols, ["x"])
     assert_iterable_equal(out.x, ["a_b"])
 
 
 def test_unite_preserves_grouping():
-    df = tibble(g = 1, x = "a") >> group_by(f.g)
-    rs = df >> unite('x', f.x)
+    df = tibble(g=1, x="a") >> group_by(f.g)
+    rs = df >> unite("x", f.x)
     assert_frame_equal(df, rs)
     assert group_vars(df) == group_vars(rs)
 
 
 def test_drops_grouping_when_needed():
-    df = tibble(g = 1, x = "a") >> group_by(f.g)
-    rs = df >> unite('gx', f.g, f.x)
+    df = tibble(g=1, x="a") >> group_by(f.g)
+    rs = df >> unite("gx", f.g, f.x)
     assert_iterable_equal(rs.gx, ["1_a"])
     assert group_vars(rs) == []
 
+
 def test_empty_var_spec_uses_all_vars():
-    df = tibble(x = "a", y = "b")
-    assert_iterable_equal(df >> unite("z"), tibble(z = "a_b"))
+    df = tibble(x="a", y="b")
+    assert_iterable_equal(df >> unite("z"), tibble(z="a_b"))
+
 
 def test_can_remove_missing_vars_on_request():
-    df = expand_grid(x = ["a", NA], y = ["b", NA])
-    out = df >> unite("z", f[f.x:], na_rm = TRUE)
+    df = expand_grid(x=["a", NA], y=["b", NA])
+    out = df >> unite("z", f[f.x :], na_rm=TRUE)
 
     assert_iterable_equal(out.z, c("a_b", "a", "b", ""))
 
@@ -65,6 +68,33 @@ def test_can_remove_missing_vars_on_request():
 
 # GH#105
 def test_sep_none_does_not_join_strings():
-    df = tibble(x = "a", y = "b")
-    out = df >> unite('z', f[f.x:], sep = None)
-    assert_frame_equal(out, tibble(z = [["a", "b"]]))
+    df = tibble(x="a", y="b")
+    out = df >> unite("z", f[f.x :], sep=None)
+    assert_frame_equal(out, tibble(z=[["a", "b"]]))
+
+
+# GH#111
+def test_unite_keeping_other_columns():
+    out = unite(table1, f.z, f.country, f.year)
+    assert_frame_equal(
+        out,
+        tibble(
+            z=[
+                "Afghanistan_1999",
+                "Afghanistan_2000",
+                "Brazil_1999",
+                "Brazil_2000",
+                "China_1999",
+                "China_2000",
+            ],
+            cases=[745, 2666, 37737, 80488, 212258, 213766],
+            population=[
+                19987071,
+                20595360,
+                172006362,
+                174504898,
+                1272915272,
+                1280428583,
+            ],
+        ),
+    )