Skip to content

Commit

Permalink
add ibis project
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-sha committed Apr 26, 2024
1 parent 289339c commit 3e36cc5
Show file tree
Hide file tree
Showing 15 changed files with 142 additions and 0 deletions.
Empty file added 002_ibis/audits/.gitkeep
Empty file.
8 changes: 8 additions & 0 deletions 002_ibis/audits/assert_positive_order_ids.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
AUDIT (
name assert_positive_order_ids,
);

SELECT *
FROM @this_model
WHERE
item_id < 0
8 changes: 8 additions & 0 deletions 002_ibis/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
gateways:
my_gateway:
connection:
type: duckdb
catalogs:
local: 'data/local.duckdb'
model_defaults:
dialect: 'duckdb'
3 changes: 3 additions & 0 deletions 002_ibis/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import os

DB_PATH = os.path.join(os.path.dirname(__file__), "data/local.duckdb")
Empty file added 002_ibis/data/.keep
Empty file.
Empty file added 002_ibis/macros/.gitkeep
Empty file.
Empty file added 002_ibis/macros/__init__.py
Empty file.
Empty file added 002_ibis/models/.gitkeep
Empty file.
14 changes: 14 additions & 0 deletions 002_ibis/models/full_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
MODEL (
name ibis.full_model,
kind FULL,
cron '@daily',
grain item_id,
audits (assert_positive_order_ids),
);

SELECT
item_id,
COUNT(DISTINCT id) AS num_orders,
FROM
ibis.incremental_model
GROUP BY item_id
43 changes: 43 additions & 0 deletions 002_ibis/models/ibis_full_model_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import typing as t
from datetime import datetime

import ibis # type: ignore
import pandas as pd
from constants import DB_PATH # type: ignore
from sqlglot import exp

from sqlmesh import ExecutionContext, model
from sqlmesh.core.model import FullKind


@model(
"ibis.ibis_full_model_python",
kind=FullKind(),
columns={
"item_id": "int",
"num_orders": "int",
},
audits=["assert_positive_order_ids"],
description="This model uses ibis to transform a `table` object and return a dataframe",
)
def execute(
context: ExecutionContext,
start: datetime,
end: datetime,
execution_time: datetime,
**kwargs: t.Any,
) -> pd.DataFrame:
# get physical table name
upstream_model = exp.to_table(context.table("ibis.incremental_model"))
# connect ibis to database
con = ibis.duckdb.connect(DB_PATH)

# retrieve table
incremental_model = con.table(name=upstream_model.name, schema=upstream_model.db)

# build query
count = incremental_model.id.nunique()
aggregate = incremental_model.group_by("item_id").aggregate(num_orders=count)
query = aggregate.order_by("item_id")

return query.to_pandas()
28 changes: 28 additions & 0 deletions 002_ibis/models/ibis_full_model_sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import ibis # type: ignore
from ibis.expr.operations import Namespace, UnboundTable # type: ignore

from sqlmesh.core.macros import MacroEvaluator
from sqlmesh.core.model import model


@model(
"ibis.ibis_full_model_sql",
is_sql=True,
kind="FULL",
audits=["assert_positive_order_ids"],
description="This model uses ibis to generate and return a SQL string",
)
def entrypoint(evaluator: MacroEvaluator) -> str:
# create table reference
incremental_model = UnboundTable(
name="incremental_model",
schema={"id": "int32", "item_id": "int32", "ds": "varchar"},
namespace=Namespace(database="local", schema="ibis"),
).to_expr()

# build query
count = incremental_model.id.nunique()
aggregate = incremental_model.group_by("item_id").aggregate(num_orders=count)
query = aggregate.order_by("item_id")

return ibis.to_sql(query)
18 changes: 18 additions & 0 deletions 002_ibis/models/incremental_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
MODEL (
name ibis.incremental_model,
kind INCREMENTAL_BY_TIME_RANGE (
time_column event_date
),
start '2020-01-01',
cron '@daily',
grain (id, event_date)
);

SELECT
id,
item_id,
event_date,
FROM
ibis.seed_model
WHERE
event_date BETWEEN @start_date AND @end_date
12 changes: 12 additions & 0 deletions 002_ibis/models/seed_model.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
MODEL (
name ibis.seed_model,
kind SEED (
path '../seeds/seed_data.csv'
),
columns (
id INTEGER,
item_id INTEGER,
event_date DATE
),
grain (id, event_date)
);
Empty file added 002_ibis/seeds/.gitkeep
Empty file.
8 changes: 8 additions & 0 deletions 002_ibis/seeds/seed_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
id,item_id,event_date
1,2,2020-01-01
2,1,2020-01-01
3,3,2020-01-03
4,1,2020-01-04
5,1,2020-01-05
6,1,2020-01-06
7,1,2020-01-07

0 comments on commit 3e36cc5

Please sign in to comment.