Redefine public API (#435)

* Redefine public API * Remove api.utils * Remove api.rst * remove empty __all__ * Add api modules to import test * fix import issue with _zmsgpack
JDASoftwareGroup · Mar 15, 2021 · 1856b2e · 1856b2e
1 parent 563a56b
commit 1856b2e
Show file tree

Hide file tree

Showing 35 changed files with 170 additions and 309 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -95,6 +95,11 @@ jobs:
         run: |
           python -c "import kartothek"
           python -c "import kartothek.api"
+          python -c "import kartothek.api.consistency"
+          python -c "import kartothek.api.cube"
+          python -c "import kartothek.api.dataset"
+          python -c "import kartothek.api.discover"
+          python -c "import kartothek.api.serialization"
           python -c "import kartothek.cli"
           python -c "import kartothek.core"
           python -c "import kartothek.io"

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -393,7 +393,7 @@ Version 3.2.0 (2019-07-25)
   using improper types. The behavior now is to raise in these situations.
 - Predicate pushdown and :func:`~kartothek.serialization.filter_array_like` will now properly handle pandas Categoricals.
 - Add :meth:`~kartothek.io.dask.bag.read_dataset_as_dataframe_bag`
-- Add :meth:`~kartothek.io.dask.bag.read_dataset_as_metapartitions_bag`
+- Add `kartothek.io.dask.bag.read_dataset_as_metapartitions_bag`
 
 
 Version 3.1.1 (2019-07-12)

diff --git a/docs/api.rst b/docs/api.rst
diff --git a/docs/guide/cube/examples.rst b/docs/guide/cube/examples.rst
@@ -6,7 +6,7 @@ This is a quick walk through the basic functionality of Kartothek Cubes.
 
 First, we want to create a cube for geodata:
 
->>> from kartothek.core.cube.cube import Cube
+>>> from kartothek.api.cube import Cube
 >>> cube = Cube(
 ...     uuid_prefix="geodata",
 ...     dimension_columns=["city", "day"],
@@ -56,7 +56,7 @@ Kartothek cube should be initially filled with the following information:
 
 We use the simple :py:mod:`kartothek.io.eager_cube` backend to store the data:
 
->>> from kartothek.io.eager_cube import build_cube
+>>> from kartothek.api.cube import build_cube
 >>> datasets_build = build_cube(
 ...   data=df_weather,
 ...   store=store,
@@ -95,7 +95,7 @@ Extend
 ``````
 Now let's say we would also like to have longitude and latitude data in our cube.
 
->>> from kartothek.io.eager_cube import extend_cube
+>>> from kartothek.api.cube import extend_cube
 >>> df_location = pd.read_csv(
 ...     filepath_or_buffer=StringIO("""
 ...    city country  latitude  longitude
@@ -155,7 +155,7 @@ Query
 Now the whole beauty of Kartothek Cube does not come from storing multiple datasets, but especially from retrieving the data in a
 very comfortable way. It is possible to treat the entire cube as a single, large DataFrame:
 
->>> from kartothek.io.eager_cube import query_cube
+>>> from kartothek.api.cube import query_cube
 >>> query_cube(
 ...     cube=cube,
 ...     store=store,
@@ -189,7 +189,7 @@ more helpful for distributed backends like `Distributed`_:
 
 The query system also supports selection and projection:
 
->>> from kartothek.core.cube.conditions import C
+>>> from kartothek.api.cube import C
 >>> query_cube(
 ...     cube=cube,
 ...     store=store,
@@ -214,7 +214,7 @@ Query and Extend can be combined to build powerful transformation pipelines. To
    `Distributed`_, Kartothek Cube requires the user to pass a :term:`Store Factory` instead of a store. This ensures that no file
    handles, TCP connections, or other non-transportable objects are shared.
 
->>> from kartothek.io.dask.bag_cube import (
+>>> from kartothek.api.cube import (
 ...     extend_cube_from_bag,
 ...     query_cube_bag,
 ... )
@@ -275,7 +275,7 @@ Append
 ``````
 New rows can be added to the cube using an append operation:
 
->>> from kartothek.io.eager_cube import append_to_cube
+>>> from kartothek.api.cube import append_to_cube
 >>> df_weather2 = pd.read_csv(
 ...     filepath_or_buffer=StringIO("""
 ... avg_temp     city country        day
@@ -321,7 +321,7 @@ Remove
 ``````
 You can remove entire partitions from the cube using the remove operation:
 
->>> from kartothek.io.eager_cube import remove_partitions
+>>> from kartothek.api.cube import remove_partitions
 >>> datasets_after_removal = remove_partitions(
 ...     cube=cube,
 ...     store=store,
@@ -346,7 +346,7 @@ Delete
 ``````
 You can also delete entire datasets (or the entire cube):
 
->>> from kartothek.io.eager_cube import delete_cube
+>>> from kartothek.api.cube import delete_cube
 >>> datasets_still_in_cube = delete_cube(
 ...     cube=cube,
 ...     store=store,

diff --git a/docs/guide/dask_indexing.rst b/docs/guide/dask_indexing.rst
@@ -13,7 +13,7 @@ Calculating a dask index is usually a very expensive operation which requires da
     import pandas as pd
     from tempfile import TemporaryDirectory
 
-    from kartothek.io.eager import store_dataframes_as_dataset
+    from kartothek.api.dataset import store_dataframes_as_dataset
 
     dataset_dir = TemporaryDirectory()
 
@@ -38,7 +38,7 @@ Calculating a dask index is usually a very expensive operation which requires da
 .. ipython:: python
 
     import dask.dataframe as dd
-    from kartothek.io.dask.dataframe import update_dataset_from_ddf, read_dataset_as_ddf
+    from kartothek.api.dataset import update_dataset_from_ddf, read_dataset_as_ddf
 
     df
 

diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -16,8 +16,7 @@ Setup a store
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.eager import store_dataframes_as_dataset
-    from kartothek.io.eager import read_table
+    from kartothek.api.dataset import read_table, store_dataframes_as_dataset
 
     df = pd.DataFrame({"Name": ["Paul", "Lisa"], "Age": [32, 29]})
 
@@ -50,7 +49,7 @@ Write
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.eager import store_dataframes_as_dataset
+    from kartothek.api.dataset import store_dataframes_as_dataset
 
     #  Now, define the actual partitions. This list will, most of the time,
     # be the intermediate result of a previously executed pipeline which e.g. pulls
@@ -87,7 +86,7 @@ Read
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.eager import read_dataset_as_dataframes
+    from kartothek.api.dataset import read_dataset_as_dataframes
 
     #  Create the pipeline with a minimal set of configs
     list_of_partitions = read_dataset_as_dataframes(
@@ -118,7 +117,7 @@ Write
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.iter import store_dataframes_as_dataset__iter
+    from kartothek.api.dataset import store_dataframes_as_dataset__iter
 
     input_list_of_partitions = [
         {
@@ -148,7 +147,7 @@ Read
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.iter import read_dataset_as_dataframes__iterator
+    from kartothek.api.dataset import read_dataset_as_dataframes__iterator
 
     #  Create the pipeline with a minimal set of configs
     list_of_partitions = read_dataset_as_dataframes__iterator(
@@ -182,7 +181,7 @@ Write
 .. ipython:: python
 
     import pandas as pd
-    from kartothek.io.dask.delayed import store_delayed_as_dataset
+    from kartothek.api.dataset import store_delayed_as_dataset
 
     input_list_of_partitions = [
         {
@@ -219,7 +218,7 @@ Read
 
     import dask
     import pandas as pd
-    from kartothek.io.dask.delayed import read_dataset_as_delayed
+    from kartothek.api.dataset import read_dataset_as_delayed
 
     tasks = read_dataset_as_delayed(dataset_uuid="MyFirstDatasetDask", store=store_url)
     tasks

diff --git a/docs/guide/getting_started.rst b/docs/guide/getting_started.rst
@@ -97,7 +97,7 @@ to store the ``DataFrame`` ``df`` that we already have in memory.
 
 .. ipython:: python
 
-    from kartothek.io.eager import store_dataframes_as_dataset
+    from kartothek.api.dataset import store_dataframes_as_dataset
 
     df.dtypes.equals(another_df.dtypes)  # both have the same schema
 
@@ -236,7 +236,7 @@ table of the dataset as a pandas DataFrame.
 
 .. ipython:: python
 
-    from kartothek.io.eager import read_table
+    from kartothek.api.dataset import read_table
 
     read_table("a_unique_dataset_identifier", store_url, table="table")
 
@@ -248,7 +248,7 @@ represent the `tables` of the dataset. For example,
 
 .. ipython:: python
 
-    from kartothek.io.iter import read_dataset_as_dataframes__iterator
+    from kartothek.api.dataset import read_dataset_as_dataframes__iterator
 
     for partition_index, df_dict in enumerate(
         read_dataset_as_dataframes__iterator(dataset_uuid="two-tables", store=store_url)