Skip to content

Commit

Permalink
Merge pull request #30 from LukasZahradnik/feature/formatter-and-cleanup
Browse files Browse the repository at this point in the history
Add Black formatter and cleanup
  • Loading branch information
jakubpeleska authored Feb 21, 2024
2 parents 1031a65 + b4eedcc commit d33abfa
Show file tree
Hide file tree
Showing 55 changed files with 1,995 additions and 1,781 deletions.
17 changes: 17 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Lint

on:
pull_request:
push:
branches:
- master
- 'releases/**'

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: psf/black@stable
with:
src: "./db_transformer"
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"editor.formatOnSave": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
}
}
42 changes: 25 additions & 17 deletions db_transformer/data/converter/column/cat_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@
from db_transformer.schema.columns import CategoricalColumnDef
from db_transformer.schema.schema import ColumnDef

__ALL__ = ['CategoricalConverter']
__ALL__ = ["CategoricalConverter"]


class CategoricalConverter(SeriesConverter[CategoricalColumnDef]):
def __init__(self,
mapper: Optional[Union[SimpleStringSeriesMapper, SeriesMapper]] = None,
) -> None:
def __init__(
self,
mapper: Optional[Union[SimpleStringSeriesMapper, SeriesMapper]] = None,
) -> None:
super().__init__()
self.mapper = get_string_mapper(mapper) if mapper is not None else None

def __call__(self,
column_def: CategoricalColumnDef,
column: pd.Series,
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def __call__(
self,
column_def: CategoricalColumnDef,
column: pd.Series,
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
distinct_vals, mapper = self._guess_value_set(column_def.card, column)

# give None index of 0
Expand All @@ -37,13 +39,15 @@ def __call__(self,

out_column = mapper(column).map(value_map)

return (out_column, ), (column_def, )
return (out_column,), (column_def,)

def _guess_value_set(self, cardinality: int, column: pd.Series) -> Tuple[List[Any], SeriesMapper]:
def _guess_value_set(
self, cardinality: int, column: pd.Series
) -> Tuple[List[Any], SeriesMapper]:
failed_mappings: List[Tuple[str, int, Optional[Exception]]] = []

if self.mapper is not None:
mappers = {'user_provided': self.mapper}
mappers = {"user_provided": self.mapper}
else:
mappers = SIMPLE_STRING_SERIES_MAPPERS

Expand All @@ -58,13 +62,17 @@ def _guess_value_set(self, cardinality: int, column: pd.Series) -> Tuple[List[An

def _exception_to_str(e: Optional[Exception]) -> str:
if e is None:
return ''
return ""

return f" (failed: {e})"

errormsg = [
f" -> {mapping_name} (cardinality {card}){_exception_to_str(e)}" for mapping_name, card, e in failed_mappings]

raise RuntimeError(f"Expected {cardinality} unique values, "
f"but the following operations on values provided the following cardinalities instead:\n"
+ '\n'.join(errormsg))
f" -> {mapping_name} (cardinality {card}){_exception_to_str(e)}"
for mapping_name, card, e in failed_mappings
]

raise RuntimeError(
f"Expected {cardinality} unique values, "
f"but the following operations on values provided the following cardinalities instead:\n"
+ "\n".join(errormsg)
)
6 changes: 4 additions & 2 deletions db_transformer/data/converter/column/converter_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@

from .series_converter import SeriesConverter

__ALL__ = ['ConverterList']
__ALL__ = ["ConverterList"]


class ConverterList(SeriesConverter):
def __init__(self, *converters: SeriesConverter) -> None:
self.converters = converters

def __call__(self, column_def: ColumnDef, column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def __call__(
self, column_def: ColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
out: List[pd.Series] = []
out_column_defs: List[ColumnDef] = []

Expand Down
38 changes: 25 additions & 13 deletions db_transformer/data/converter/column/default_datetime_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@

from .pandas_converter import PandasConverter

__ALL__ = ['DateConverter', 'DateTimeConverter', 'TimestampConverter', 'TimeConverter']
__ALL__ = ["DateConverter", "DateTimeConverter", "TimestampConverter", "TimeConverter"]


class DateConverter(PandasConverter):
"""Converts column to year and day of year."""

def __init__(self, skip_if_allsame=True) -> None:
super().__init__(
('_year', lambda s: (s.dt.year, NumericColumnDef())),
('_dayofyear', lambda s: (s.dt.dayofyear, NumericColumnDef())),
skip_if_allsame=skip_if_allsame)
("_year", lambda s: (s.dt.year, NumericColumnDef())),
("_dayofyear", lambda s: (s.dt.dayofyear, NumericColumnDef())),
skip_if_allsame=skip_if_allsame,
)


def _get_seconds_since_midnight(s: pd.Series) -> pd.Series:
return ((s - s.dt.normalize()) / pd.Timedelta('1 second')).fillna(0).astype(int)
return ((s - s.dt.normalize()) / pd.Timedelta("1 second")).fillna(0).astype(int)


def _get_seconds_since_midnight_time(t: Optional[datetime.time]) -> Optional[int]:
Expand All @@ -36,19 +37,29 @@ class DateTimeConverter(PandasConverter):

def __init__(self, skip_if_allsame=True) -> None:
super().__init__(
('_year', lambda s: (s.dt.year, NumericColumnDef())),
('_dayofyear', lambda s: (s.dt.dayofyear, NumericColumnDef())),
('_seconds_since_midnight', lambda s: (_get_seconds_since_midnight(s), NumericColumnDef())),
skip_if_allsame=skip_if_allsame)
("_year", lambda s: (s.dt.year, NumericColumnDef())),
("_dayofyear", lambda s: (s.dt.dayofyear, NumericColumnDef())),
(
"_seconds_since_midnight",
lambda s: (_get_seconds_since_midnight(s), NumericColumnDef()),
),
skip_if_allsame=skip_if_allsame,
)


class TimeConverter(PandasConverter):
"""Converts column to seconds since midnight."""

def __init__(self, skip_if_allsame=True) -> None:
super().__init__(
('', lambda s: (s.map(lambda v: _get_seconds_since_midnight_time(v)), NumericColumnDef())),
skip_if_allsame=skip_if_allsame
(
"",
lambda s: (
s.map(lambda v: _get_seconds_since_midnight_time(v)),
NumericColumnDef(),
),
),
skip_if_allsame=skip_if_allsame,
)


Expand All @@ -57,5 +68,6 @@ class TimestampConverter(PandasConverter):

def __init__(self, skip_if_allsame=True) -> None:
super().__init__(
('', lambda s: (s.astype('int64') // 10**9, NumericColumnDef())),
skip_if_allsame=skip_if_allsame)
("", lambda s: (s.astype("int64") // 10**9, NumericColumnDef())),
skip_if_allsame=skip_if_allsame,
)
8 changes: 5 additions & 3 deletions db_transformer/data/converter/column/identity_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

from .series_converter import SeriesConverter

__ALL__ = ['IdentityConverter']
__ALL__ = ["IdentityConverter"]


class IdentityConverter(SeriesConverter):
def __call__(self, column_def: ColumnDef, column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
return (column, ), (column_def, )
def __call__(
self, column_def: ColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
return (column,), (column_def,)
6 changes: 4 additions & 2 deletions db_transformer/data/converter/column/omit_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

from .series_converter import SeriesConverter

__ALL__ = ['OmitConverter']
__ALL__ = ["OmitConverter"]


class OmitConverter(SeriesConverter):
def __call__(self, column_def: ColumnDef, column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def __call__(
self, column_def: ColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
return (), ()
22 changes: 14 additions & 8 deletions db_transformer/data/converter/column/pandas_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,27 @@

from .series_converter import SeriesConverter

__ALL__ = ['PandasConverter']
__ALL__ = ["PandasConverter"]


class PandasConverter(SeriesConverter):
def __init__(self,
*segments: Tuple[str, Callable[[pd.Series], Tuple[pd.Series, ColumnDef]]],
skip_if_allsame=True) -> None:
def __init__(
self,
*segments: Tuple[str, Callable[[pd.Series], Tuple[pd.Series, ColumnDef]]],
skip_if_allsame=True
) -> None:
self.segments = segments
self.skip_if_allsame = skip_if_allsame

@classmethod
def single(cls, func: Callable[[pd.Series], Tuple[pd.Series, ColumnDef]], skip_if_allsame=True) -> SeriesConverter:
return PandasConverter(('', func), skip_if_allsame=skip_if_allsame)

def __call__(self, column_def: ColumnDef, column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def single(
cls, func: Callable[[pd.Series], Tuple[pd.Series, ColumnDef]], skip_if_allsame=True
) -> SeriesConverter:
return PandasConverter(("", func), skip_if_allsame=skip_if_allsame)

def __call__(
self, column_def: ColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
out: List[pd.Series] = []
out_column_defs: List[ColumnDef] = []

Expand Down
26 changes: 15 additions & 11 deletions db_transformer/data/converter/column/per_type_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,23 @@
import pandas as pd

from db_transformer.data.converter.column.series_converter import SeriesConverter
from db_transformer.data.utils.column_def_matching import ColumnDefMatcherLike, find_value_for_matcher, get_matcher
from db_transformer.data.utils.column_def_matching import (
ColumnDefMatcherLike,
find_value_for_matcher,
get_matcher,
)
from db_transformer.schema.schema import ColumnDef

__ALL__ = ['PerTypeSeriesConverter']
__ALL__ = ["PerTypeSeriesConverter"]


class PerTypeSeriesConverter(SeriesConverter[ColumnDef]):
def __init__(self,
*converters: Tuple[ColumnDefMatcherLike, SeriesConverter]) -> None:
def __init__(self, *converters: Tuple[ColumnDefMatcherLike, SeriesConverter]) -> None:
self.converters = [(get_matcher(k), v) for k, v in converters]

def __call__(self,
column_def: ColumnDef,
column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def __call__(
self, column_def: ColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
converter = find_value_for_matcher(self.converters, column_def)

if converter is None:
Expand All @@ -28,9 +31,10 @@ def __call__(self,
raise RuntimeError(f"Failed to convert {column_def} using {converter}") from e

if len(series) != len(this_column_defs):
raise ValueError(f"{converter} returned {len(series)} pd.Series objects, "
f"but {len(this_column_defs)} column definition objects "
f"for column def {column_def}.")
raise ValueError(
f"{converter} returned {len(series)} pd.Series objects, "
f"but {len(this_column_defs)} column definition objects "
f"for column def {column_def}."
)

return series, this_column_defs

8 changes: 5 additions & 3 deletions db_transformer/data/converter/column/series_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@

from db_transformer.schema.columns import ColumnDef

_TColumnDef = TypeVar('_TColumnDef', bound=ColumnDef)
_TColumnDef = TypeVar("_TColumnDef", bound=ColumnDef)

__ALL__ = [
'SeriesConverter',
"SeriesConverter",
]


class SeriesConverter(Generic[_TColumnDef], ABC):
@abstractmethod
def __call__(self, column_def: _TColumnDef, column: pd.Series) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
def __call__(
self, column_def: _TColumnDef, column: pd.Series
) -> Tuple[Sequence[pd.Series], Sequence[ColumnDef]]:
pass
Loading

0 comments on commit d33abfa

Please sign in to comment.