diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7885edbf..f0f2cedd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,3 +6,40 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml +- repo: local + hooks: + - id: masterpylintrc + name: Overwrite local .pylintrc by master one + entry: cp ../.pylintrc ./.pylintrc + pass_filenames: false + always_run: true + language: system +- repo: https://github.com/pre-commit/mirrors-pylint + rev: v2.4.4 + hooks: + - id: pylint + files: ^datar/.+$ + pass_filenames: false + types: [python] + args: [datar] +- repo: local + hooks: + - id: poetry2setuppy + name: Convert pyproject.toml to setup.py + entry: dephell deps convert --from=poetry --to=setup.py + language: system + files: pyproject.toml + pass_filenames: false + - id: poetry2requirements + name: Convert pyproject.toml to requirements.txt + entry: dephell deps convert --from=poetry --to=requirements.txt + language: system + files: pyproject.toml + pass_filenames: false + - id: pytest + name: Run pytest + entry: pytest + language: system + args: [tests/] + pass_filenames: false + files: ^tests/.+$|^pipda/.+$ diff --git a/datar/__init__.py b/datar/__init__.py index 1602653b..92553d2b 100644 --- a/datar/__init__.py +++ b/datar/__init__.py @@ -6,4 +6,4 @@ f = Symbolic() # pylint: disable=invalid-name -__version__ = '0.0.0' +__version__ = '0.0.1' diff --git a/datar/tidyr/verbs.py b/datar/tidyr/verbs.py index 1a272264..d4e34373 100644 --- a/datar/tidyr/verbs.py +++ b/datar/tidyr/verbs.py @@ -2,13 +2,13 @@ from typing import Any, Callable, Mapping, Optional, Type, Union import numpy -from numpy.core.fromnumeric import shape import pandas -from pandas import DataFrame, Series +from pandas import DataFrame +from pandas.core.groupby.generic import DataFrameGroupBy from pipda import register_verb, Context -from ..core.utils import select_columns, list_diff -from ..core.types import IntOrIter, StringOrIter, is_scalar +from ..core.utils import objectize, select_columns, list_diff +from ..core.types import DataFrameType, IntOrIter, StringOrIter, is_scalar @register_verb(DataFrame, context=Context.SELECT) def pivot_longer( @@ -207,13 +207,31 @@ def get_new_colname(cols, names): return ret -@register_verb(DataFrame, context=Context.EVAL) +@register_verb((DataFrame, DataFrameGroupBy), context=Context.EVAL) def uncount( - _data: DataFrame, + _data: DataFrameType, weights: IntOrIter, _remove: bool = True, _id: Optional[str] = None, -) -> DataFrame: +) -> DataFrameType: + """Duplicating rows according to a weighting variable + + Args: + _data: A data frame + weights: A vector of weights. Evaluated in the context of data + _remove: If TRUE, and weights is the name of a column in data, + then this column is removed. + _id: Supply a string to create a new variable which gives a + unique identifier for each created row (0-based). + + Returns: + dataframe with rows repeated. + """ + gnames = ( + _data.grouper.names + if isinstance(_data, DataFrameGroupBy) else None + ) + _data = objectize(_data) if is_scalar(weights): weights = [weights] * _data.shape[0] @@ -231,7 +249,9 @@ def uncount( ret = _data.loc[indexes, rest_columns] if _remove else _data.loc[indexes, :] if _id: - return ret.groupby(rest_columns).apply( + ret = ret.groupby(rest_columns).apply( lambda df: df.assign(**{_id: range(df.shape[0])}) ).reset_index(drop=True, level=0) + if gnames: + return ret.groupby(gnames, dropna=False) return ret diff --git a/pyproject.toml b/pyproject.toml index 8517c181..526e31c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datar" -version = "0.0.0" +version = "0.0.1" description = "Probably the closest port of tidyr, dplyr and tibble in python" authors = ["pwwang "] license = "MIT" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..0a18811b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +modkit +pandas==1.*,>=1.2.0 +pipda +pytest +pytest-cov diff --git a/setup.py b/setup.py index 916f80da..444591c4 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ setup( long_description=readme, name='datar', - version='0.0.0', + version='0.0.1', description='Probably the closest port of tidyr, dplyr and tibble in python', python_requires='==3.*,>=3.7.1', author='pwwang',