Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add ibis project #2

Merged
merged 1 commit into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added 002_ibis/audits/.gitkeep
Empty file.
8 changes: 8 additions & 0 deletions 002_ibis/audits/assert_positive_order_ids.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
AUDIT (
name assert_positive_order_ids,
);

SELECT *
FROM @this_model
WHERE
item_id < 0
8 changes: 8 additions & 0 deletions 002_ibis/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
gateways:
my_gateway:
connection:
type: duckdb
catalogs:
local: 'data/local.duckdb'
model_defaults:
dialect: 'duckdb'
3 changes: 3 additions & 0 deletions 002_ibis/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import os

DB_PATH = os.path.join(os.path.dirname(__file__), "data/local.duckdb")
Empty file added 002_ibis/data/.keep
Empty file.
Empty file added 002_ibis/macros/.gitkeep
Empty file.
Empty file added 002_ibis/macros/__init__.py
Empty file.
Empty file added 002_ibis/models/.gitkeep
Empty file.
14 changes: 14 additions & 0 deletions 002_ibis/models/full_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
MODEL (
name ibis.full_model,
kind FULL,
cron '@daily',
grain item_id,
audits (assert_positive_order_ids),
);

SELECT
item_id,
COUNT(DISTINCT id) AS num_orders,
FROM
ibis.incremental_model
GROUP BY item_id
43 changes: 43 additions & 0 deletions 002_ibis/models/ibis_full_model_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import typing as t
from datetime import datetime

import ibis # type: ignore
import pandas as pd
from constants import DB_PATH # type: ignore
from sqlglot import exp

from sqlmesh import ExecutionContext, model
from sqlmesh.core.model import FullKind


@model(
"ibis.ibis_full_model_python",
kind=FullKind(),
columns={
"item_id": "int",
"num_orders": "int",
},
audits=["assert_positive_order_ids"],
description="This model uses ibis to transform a `table` object and return a dataframe",
)
def execute(
context: ExecutionContext,
start: datetime,
end: datetime,
execution_time: datetime,
**kwargs: t.Any,
) -> pd.DataFrame:
# get physical table name
upstream_model = exp.to_table(context.table("ibis.incremental_model"))
# connect ibis to database
con = ibis.duckdb.connect(DB_PATH)

# retrieve table
incremental_model = con.table(name=upstream_model.name, schema=upstream_model.db)

# build query
count = incremental_model.id.nunique()
aggregate = incremental_model.group_by("item_id").aggregate(num_orders=count)
query = aggregate.order_by("item_id")

return query.to_pandas()
28 changes: 28 additions & 0 deletions 002_ibis/models/ibis_full_model_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import ibis # type: ignore
from ibis.expr.operations import Namespace, UnboundTable # type: ignore

from sqlmesh.core.macros import MacroEvaluator
from sqlmesh.core.model import model


@model(
"ibis.ibis_full_model_sql",
is_sql=True,
kind="FULL",
audits=["assert_positive_order_ids"],
description="This model uses ibis to generate and return a SQL string",
)
def entrypoint(evaluator: MacroEvaluator) -> str:
# create table reference
incremental_model = UnboundTable(
name="incremental_model",
schema={"id": "int32", "item_id": "int32", "ds": "varchar"},
namespace=Namespace(database="local", schema="ibis"),
).to_expr()

# build query
count = incremental_model.id.nunique()
aggregate = incremental_model.group_by("item_id").aggregate(num_orders=count)
query = aggregate.order_by("item_id")

return ibis.to_sql(query)
18 changes: 18 additions & 0 deletions 002_ibis/models/incremental_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
MODEL (
name ibis.incremental_model,
kind INCREMENTAL_BY_TIME_RANGE (
time_column event_date
),
start '2020-01-01',
cron '@daily',
grain (id, event_date)
);

SELECT
id,
item_id,
event_date,
FROM
ibis.seed_model
WHERE
event_date BETWEEN @start_date AND @end_date
12 changes: 12 additions & 0 deletions 002_ibis/models/seed_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
MODEL (
name ibis.seed_model,
kind SEED (
path '../seeds/seed_data.csv'
),
columns (
id INTEGER,
item_id INTEGER,
event_date DATE
),
grain (id, event_date)
);
Empty file added 002_ibis/seeds/.gitkeep
Empty file.
8 changes: 8 additions & 0 deletions 002_ibis/seeds/seed_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
id,item_id,event_date
1,2,2020-01-01
2,1,2020-01-01
3,3,2020-01-03
4,1,2020-01-04
5,1,2020-01-05
6,1,2020-01-06
7,1,2020-01-07