Skip to content

Commit

Permalink
39 experiment with jinja2 (#40)
Browse files Browse the repository at this point in the history
* jinja2 experiment

* testing dataclass

* testing dataclass

* moving jinja2 up

* remove outdated test
  • Loading branch information
tschm authored Dec 25, 2023
1 parent 4ccbf40 commit 6ac79fa
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 21 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ coverage: install ## test and coverage
xdg-open htmlcov/index.html 2> /dev/null; \
fi


.PHONY: help
help: ## Display this help screen
@echo -e "\033[1mAvailable commands:\033[0m"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ programming languages. However, they are not very efficient.
Here we use their binary counterpart, bson files. Bson files are much more efficient
but somewhat lack the flexibility of json files. Here we rely on the [bson](https://pypi.org/project/bson/)
package to read and write bson files. We are interested in parsing dictionaries
of numpy arrays, pandas and polars dataframe as fast as possible.
of numpy arrays, pandas and polars dataframes as fast as possible.

There might be faster ways to achieve this goal and we are open to suggestions
and pull requests.
Expand Down
16 changes: 15 additions & 1 deletion cvx/bson/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from io import BytesIO
from typing import Any, Union

Expand Down Expand Up @@ -43,7 +44,17 @@ def encode(data: Union[np.ndarray, pd.DataFrame, pl.DataFrame]) -> Any:
result.seek(0)
return result.read()

raise TypeError(f"Invalid Datatype {type(data)}")
converted = json.dumps(data).encode(encoding="utf-8")
arr = bytes("cvx", "utf-8")
return arr + converted

# return bytes.
# print(encoded_tuple)
# decoded_color = encoded_color.decode()
# orginal_form = json.load(decoded_color)
# return

# raise TypeError(f"Invalid Datatype {type(data)}")


def decode(data: bytes) -> Union[np.ndarray, pd.DataFrame, pl.DataFrame]:
Expand All @@ -67,5 +78,8 @@ def decode(data: bytes) -> Union[np.ndarray, pd.DataFrame, pl.DataFrame]:
if header == b"PAR":
return pd.read_parquet(BytesIO(data))

if header == b"cvx":
return json.loads(data[3:].decode())

# if still here we try numpy
return pa.ipc.read_tensor(data).to_numpy()
13 changes: 13 additions & 0 deletions cvx/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2023 Stanford University Convex Optimization Group
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
31 changes: 31 additions & 0 deletions cvx/data/data_api.py.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2023 Stanford University Convex Optimization Group
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""DataAPI for {{ strategy }}"""
# This class has been generated by cvxbson
from dataclasses import dataclass, field
from typing import Dict

import polars as pl

from cvx.bson.dataclass import Data

@dataclass(frozen=True)
class DataAPI(Data):
# List of tables
{% for name in names.keys() %}
{{ name }}: pl.DataFrame
{% endfor %}

# Mapping from short name to full name
tables: Dict[str, str] = field(default_factory=lambda: {{ names }})
31 changes: 31 additions & 0 deletions cvx/data/render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2023 Stanford University Convex Optimization Group
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a dataclass from a template"""
from pathlib import Path

from jinja2 import Template


def render(names, strategy):
# gain this dictionary through reflection
# every table has a shortname, e.g. prices and a longer descriptive name
# we use the shortname to name the variable refering to the code
# names = {"A": "AAA", "B": "BBB", "C": "CCC"}
path = Path(__file__).parent

# open the template
template = Template(open(path / "data_api.py.jinja2", encoding="utf-8").read())

# we copy & paste the new class into a new file
return template.render(names=names, strategy=strategy)
48 changes: 48 additions & 0 deletions experiment/demo1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from dataclasses import dataclass, field
from typing import Dict

import polars as pl

from cvx.bson.dataclass import Data


@dataclass(frozen=True)
class DataAPI(Data):
# List of tables expected
A: pl.DataFrame
B: pl.DataFrame
C: pl.DataFrame

# Define a mutable value for the mapping from short name to full name
tables: Dict[str, str] = field(
default_factory=lambda: {"A": "AAA", "B": "BBB", "C": "CCC"}
)


if __name__ == "__main__":
# We add this as a little demo
import numpy as np

data = DataAPI(
A=pl.DataFrame(np.random.rand(10, 5)),
B=pl.DataFrame(np.random.rand(20, 3)),
C=pl.DataFrame(np.random.rand(50, 5)),
)

# We can access the tables
print(data.tables)
print(data.A)
print(data.B)
print(data.C)

# convert all data into one bson file
print(data.to_bson("xxx.bson"))

data2 = DataAPI.from_bson("xxx.bson")
print(data2)

def strategy(api: DataAPI):
# the strategy has no idea how the data is stored
print(api.A)

strategy(data2)
16 changes: 16 additions & 0 deletions experiment/fill.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from cvx.data.render import render

if __name__ == "__main__":
# gain this dictionary through reflection
# every table has a shortname, e.g. prices and a longer descriptive name
# we use the shortname to name the variable refering to the code
names = {"A": "AAA", "B": "BBB", "C": "CCC"}
strategy = "s239"

print(render(names, strategy))

# open the template
# template = Template(open("creator.py.jinja2").read())

# we copy & paste the new class into a new file
# print(template.render(names=names, strategy="s239"))
Binary file added experiment/xxx.bson
Binary file not shown.
94 changes: 90 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pyarrow = "*"
bson = "0.5.10"
polars = "*"
typing_extensions = "*"
jinja2 = "*"

[tool.poetry.group.test.dependencies]
pytest = "*"
Expand All @@ -26,7 +27,6 @@ pre-commit = "*"
[tool.poetry.group.dev.dependencies]
loguru = "*"


[[tool.poetry.source]]
name = "PyPI"
priority = "primary"
Expand Down
13 changes: 0 additions & 13 deletions tests/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,6 @@ def test_write(tmp_path, shape):
np.allclose(x["a"], data["a"])


def test_wrong_type(tmp_path):
"""
Test a TypeError is raised when trying to write a dict
of data that is not supported
Args:
tmp_path: temporary path fixture
"""
data = {"B": 3}
with pytest.raises(TypeError):
write_bson(data=data, file=tmp_path / "maffay.bson")


def test_vector(tmp_path):
"""
Test that a vector is written and read correctly
Expand Down

0 comments on commit 6ac79fa

Please sign in to comment.