Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update readme and tests #11

Merged
merged 11 commits into from
Oct 31, 2024
13 changes: 13 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[run]
# All files under source are checked, even if not otherwise referenced.
source = .

# More strict: Check transitions between lines, not just individual lines.
branch = True

omit = setup.py

[report]
show_missing = True
skip_covered = True
fail_under = 100
19 changes: 16 additions & 3 deletions .github/workflows/smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: [3.8]
python-version:
- '3.9'
- '3.12'
opendp-version:
- '==0.8.0'
- ''
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand All @@ -35,10 +40,18 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pytest
python -m pip install pytest coverage

- name: Install package
run: python -m pip install -e .

- name: (Re)install opendp
run: python -m pip install opendp${{ matrix.opendp-version }}

- name: Test
run: cd test && pytest -v
# OpenDP doesn't have pretty pretty reprs in older versions,
# so skip the doc test in that case.
run: coverage run -m pytest -v ${{ matrix.opendp-version && '-k "not README"' || '' }}

- name: Check coverage
run: coverage report
5 changes: 5 additions & 0 deletions .pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
addopts = --doctest-glob '*.md' --doctest-modules

# If an xfail starts passes unexpectedly, that should count as a failure:
xfail_strict=true
92 changes: 70 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@

The OpenDP logger makes it possible to serialize and deserialize OpenDP Measurements/Transformations to/from JSON.

## Usage

## Serialize
### Serialize
Enable logging (globally) before you build your transformations and/or measurements:

```python
Expand All @@ -16,33 +17,80 @@ enable_logging()
```
Once this is enabled, Transformations/Measurements have a method `.to_json()` that returns a JSON string.

## Deserialize
### Deserialize
Deserialize a JSON string into a Transformation/Measurement by invoking `opendp_logger.make_load_json`.

# Example
### Example
```python
from opendp_logger import enable_logging
from opendp.mod import enable_features
>>> from opendp_logger import enable_logging, make_load_json
>>> import opendp.prelude as dp

enable_logging()
enable_features("contrib")

import opendp.transformations as trans
>>> enable_logging()
>>> dp.enable_features("contrib")

preprocessor = (
# load data into a dataframe where columns are of type Vec<str>
trans.make_split_dataframe(separator=",", col_names=["hello", "world"])
>>
# select a column of the dataframe
trans.make_select_column(key="income", TOA=str)
)
>>> preprocessor = (
... # load data into a dataframe where columns are of type Vec<str>
... dp.t.make_split_dataframe(separator=",", col_names=["hello", "world"])
... >>
... # select a column of the dataframe
... dp.t.make_select_column(key="income", TOA=str)
... )

# serialize the chain to json
json_obj = preprocessor.to_json()
print("json:", json_obj)
>>> # serialize the chain to json
>>> json_obj = preprocessor.to_json(indent=2)
>>> print(json_obj)
{
"ast": {
"_type": "constructor",
"func": "make_chain_tt",
"module": "combinators",
"args": [
{
"_type": "constructor",
"func": "make_select_column",
"module": "transformations",
"kwargs": {
"key": "income",
"TOA": "String"
}
},
{
"_type": "constructor",
"func": "make_split_dataframe",
"module": "transformations",
"kwargs": {
"separator": ",",
"col_names": {
"_type": "list",
"_items": [
"hello",
"world"
]
}
}
}
]
}
}

from opendp_logger import make_load_json
>>> # reconstruct the obj from the json string
>>> make_load_json(json_obj)
Transformation(
input_domain = AtomDomain(T=String),
output_domain = VectorDomain(AtomDomain(T=String)),
input_metric = SymmetricDistance(),
output_metric = SymmetricDistance())

# reconstruct the obj from the json string
test = make_load_json(json_obj)
```

## Development

```shell
git clone https://github.com/opendp/opendp-logger.git
cd opendp-logger
python3 -m venv .venv
source .venv/bin/activate
pip install pytest
pip install -e .
pytest -v
```
20 changes: 8 additions & 12 deletions opendp_logger/deserialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,13 @@
import json
import builtins

import pkg_resources

try:
OPENDP_VERSION = pkg_resources.get_distribution("opendp").version
except pkg_resources.DistributionNotFound:
OPENDP_VERSION = "development"

__all__ = ["make_load_json", "make_load_ast"]


def decode_ast(obj):
if isinstance(obj, dict):
if obj.get("_type") == "type":
return getattr(builtins, dp.RuntimeType.parse(obj["name"]))
return getattr(builtins, dp.RuntimeType.parse(obj["name"])) # pragma: no cover

if obj.get("_type") == "list":
return [decode_ast(i) for i in obj["_items"]]
Expand All @@ -44,9 +37,12 @@ def make_load_json(parse_str: str):


def make_load_ast(obj, force=False):
if obj["version"] != OPENDP_VERSION and not force:
raise ValueError(
f"OpenDP version in parsed object ({obj['version']}) does not match the current installation ({OPENDP_VERSION}). Set `force=True` to try to load anyways."
)
# TODO: Reenable when we can get the OpenDP version:
# https://github.com/opendp/opendp/issues/2103
#
# if obj["version"] != OPENDP_VERSION and not force:
# raise ValueError(
# f"OpenDP version in parsed object ({obj['version']}) does not match the current installation ({OPENDP_VERSION}). Set `force=True` to try to load anyways."
# )

return decode_ast(obj["ast"])
10 changes: 5 additions & 5 deletions opendp_logger/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import json
from functools import wraps

from opendp_logger.deserialize import OPENDP_VERSION

import importlib

__all__ = ["enable_logging"]
Expand Down Expand Up @@ -39,14 +37,14 @@ def wrapper(*args, **kwargs):
}
args and chain.log.setdefault("args", args)
kwargs and chain.log.setdefault("kwargs", kwargs)
return chain
return chain # pragma: no cover (if isinstance is false)

return wrapper


def to_ast(item):
if isinstance(item, LOGGED_CLASSES):
if not hasattr(item, "log"):
if not hasattr(item, "log"): # pragma: no cover
msg = "invoke `opendp_logger.enable_logging()` before constructing your measurement"
raise ValueError(msg)

Expand All @@ -64,7 +62,9 @@ def to_ast(item):

def to_json(chain, *args, **kwargs):
return json.dumps(
{"version": OPENDP_VERSION, "ast": chain.to_ast()}, *args, **kwargs
# TODO: Include OpenDP version
# https://github.com/opendp/opendp/issues/2103
{"ast": chain.to_ast()}, *args, **kwargs
)


Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
'Development Status :: 3 - Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
],
keywords='opendp logger ast',
packages=find_packages(),
python_requires=">=3.8, <4",
python_requires=">=3.9, <4",
install_requires=[
"opendp >= 0.8.0"
],
Expand Down
Loading