From 4a73b09d0862118d5ddc3e1175ea465a74d56320 Mon Sep 17 00:00:00 2001
From: pwwang <1188067+pwwang@users.noreply.github.com>
Date: Mon, 18 Apr 2022 18:05:05 -0700
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=96=200.8.1=20(#113)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 🐛 month_abb and month_name being truncated (#112)

* 🐛 Fix unite() not keeping other columns (#111)

* 🔖 0.8.1
---
 datar/__init__.py         |  2 +-
 datar/base/constants.py   | 22 +++------------
 datar/tidyr/unite.py      | 18 ++++++------
 docs/CHANGELOG.md         |  5 ++++
 pyproject.toml            |  2 +-
 tests/tidyr/test_unite.py | 58 +++++++++++++++++++++++++++++----------
 6 files changed, 64 insertions(+), 43 deletions(-)

diff --git a/datar/__init__.py b/datar/__init__.py
index 33e52cbc..7e8a3d37 100644
--- a/datar/__init__.py
+++ b/datar/__init__.py
@@ -13,7 +13,7 @@
 )
 
 __all__ = ("f", "get_versions")
-__version__ = "0.8.0"
+__version__ = "0.8.1"
 
 apply_init_callbacks()
 
diff --git a/datar/base/constants.py b/datar/base/constants.py
index 81ef4157..7daff60b 100644
--- a/datar/base/constants.py
+++ b/datar/base/constants.py
@@ -10,23 +10,7 @@
 letters = np.array(list(ascii_letters[:26]), dtype='<U1')
 LETTERS = np.array(list(ascii_letters[26:]), dtype='<U1')
 
-month_abb = np.array(
-    [
-        "Jan",
-        "Feb",
-        "Mar",
-        "Apr",
-        "May",
-        "Jun",
-        "Jul",
-        "Aug",
-        "Sep",
-        "Oct",
-        "Nov",
-        "Dec",
-    ],
-    dtype='<U1',
-)
+
 month_name = np.array(
     [
         "January",
@@ -42,5 +26,7 @@
         "November",
         "December",
     ],
-    dtype='<U1',
+    dtype='<U9',
 )
+
+month_abb = np.array(month_name, dtype='<U3')
diff --git a/datar/tidyr/unite.py b/datar/tidyr/unite.py
index 2fb0a3dd..015b1be3 100644
--- a/datar/tidyr/unite.py
+++ b/datar/tidyr/unite.py
@@ -2,14 +2,16 @@
 
 from typing import Union
 
+from pipda import register_verb
+
 from ..core.backends import pandas as pd
 from ..core.backends.pandas import DataFrame, Series
-from pipda import register_verb
 
 from ..core.contexts import Context
 from ..core.utils import vars_select, regcall
 from ..core.tibble import reconstruct_tibble
 
+from ..base import setdiff
 from ..dplyr import ungroup
 
 
@@ -38,9 +40,11 @@ def unite(
     """
     all_columns = data.columns
     if not columns:
+        unite_idx = range(data.shape[1])
         columns = all_columns
     else:
-        columns = all_columns[vars_select(all_columns, *columns)]
+        unite_idx = vars_select(data, columns)
+        columns = all_columns[unite_idx]
 
     out = regcall(ungroup, data).copy()
 
@@ -54,15 +58,11 @@ def unite(
         united = united.transform(lambda x: sep.join(str(elem) for elem in x))
 
     # get indexes to relocate
-    unite_cols = out.columns.get_indexer_for(columns)
-    insert_at = int(unite_cols.min())
+    insert_at = int(min(unite_idx))
     out.insert(insert_at, col, united, allow_duplicates=True)
 
     if remove:
-        out_cols = [
-            i for i in range(out.shape[1])
-            if i <= insert_at and i - 1 not in unite_cols
-        ]
-        out = out.iloc[:, out_cols]
+        to_remove = [i if i < insert_at else i + 1 for i in unite_idx]
+        out = out.iloc[:, regcall(setdiff, range(out.shape[1]), to_remove)]
 
     return reconstruct_tibble(data, out)
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 460a5dbf..6724adcd 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,3 +1,8 @@
+## 0.8.1
+
+- 🐛 Fix `month_abb` and `month_name` being truncated (#112)
+- 🐛 Fix `unite()` not keeping other columns (#111)
+
 ## 0.8.0
 
 - ✨ Support `base.glimpse()` (#107, machow/siuba#409)
diff --git a/pyproject.toml b/pyproject.toml
index a7d0502d..0b9ad70e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "datar"
-version = "0.8.0"
+version = "0.8.1"
 description = "Port of dplyr and other related R packages in python, using pipda."
 authors = ["pwwang <pwwang@pwwang.com>"]
 readme = "README.md"
diff --git a/tests/tidyr/test_unite.py b/tests/tidyr/test_unite.py
index 31195ba5..31bcac5d 100644
--- a/tests/tidyr/test_unite.py
+++ b/tests/tidyr/test_unite.py
@@ -3,44 +3,47 @@
 import pytest  # noqa
 
 from datar.all import *
+from datar.datasets import table1
 from datar.core.backends.pandas.testing import assert_frame_equal
 from ..conftest import assert_iterable_equal
 
 
 def test_unite_pastes_columns_togeter_and_removes_old_col():
     df = tibble(x="a", y="b")
-    out = df >> unite('z', f[f.x:])
+    out = df >> unite("z", f[f.x :])
     assert_frame_equal(out, tibble(z="a_b"))
 
 
 def test_unite_does_not_remove_new_col_in_case_of_name_clash():
-    df = tibble(x = "a", y = "b")
-    out = df >> unite('x', f[f.x:])
+    df = tibble(x="a", y="b")
+    out = df >> unite("x", f[f.x :])
     cols = out >> names()
     assert_iterable_equal(cols, ["x"])
     assert_iterable_equal(out.x, ["a_b"])
 
 
 def test_unite_preserves_grouping():
-    df = tibble(g = 1, x = "a") >> group_by(f.g)
-    rs = df >> unite('x', f.x)
+    df = tibble(g=1, x="a") >> group_by(f.g)
+    rs = df >> unite("x", f.x)
     assert_frame_equal(df, rs)
     assert group_vars(df) == group_vars(rs)
 
 
 def test_drops_grouping_when_needed():
-    df = tibble(g = 1, x = "a") >> group_by(f.g)
-    rs = df >> unite('gx', f.g, f.x)
+    df = tibble(g=1, x="a") >> group_by(f.g)
+    rs = df >> unite("gx", f.g, f.x)
     assert_iterable_equal(rs.gx, ["1_a"])
     assert group_vars(rs) == []
 
+
 def test_empty_var_spec_uses_all_vars():
-    df = tibble(x = "a", y = "b")
-    assert_iterable_equal(df >> unite("z"), tibble(z = "a_b"))
+    df = tibble(x="a", y="b")
+    assert_iterable_equal(df >> unite("z"), tibble(z="a_b"))
+
 
 def test_can_remove_missing_vars_on_request():
-    df = expand_grid(x = ["a", NA], y = ["b", NA])
-    out = df >> unite("z", f[f.x:], na_rm = TRUE)
+    df = expand_grid(x=["a", NA], y=["b", NA])
+    out = df >> unite("z", f[f.x :], na_rm=TRUE)
 
     assert_iterable_equal(out.z, c("a_b", "a", "b", ""))
 
@@ -65,6 +68,33 @@ def test_can_remove_missing_vars_on_request():
 
 # GH#105
 def test_sep_none_does_not_join_strings():
-    df = tibble(x = "a", y = "b")
-    out = df >> unite('z', f[f.x:], sep = None)
-    assert_frame_equal(out, tibble(z = [["a", "b"]]))
+    df = tibble(x="a", y="b")
+    out = df >> unite("z", f[f.x :], sep=None)
+    assert_frame_equal(out, tibble(z=[["a", "b"]]))
+
+
+# GH#111
+def test_unite_keeping_other_columns():
+    out = unite(table1, f.z, f.country, f.year)
+    assert_frame_equal(
+        out,
+        tibble(
+            z=[
+                "Afghanistan_1999",
+                "Afghanistan_2000",
+                "Brazil_1999",
+                "Brazil_2000",
+                "China_1999",
+                "China_2000",
+            ],
+            cases=[745, 2666, 37737, 80488, 212258, 213766],
+            population=[
+                19987071,
+                20595360,
+                172006362,
+                174504898,
+                1272915272,
+                1280428583,
+            ],
+        ),
+    )