Skip to content

Commit

Permalink
probably time to commit all this shit
Browse files Browse the repository at this point in the history
  • Loading branch information
Tania Sabzizar committed Feb 18, 2024
1 parent 19b9994 commit 35c495c
Show file tree
Hide file tree
Showing 13 changed files with 602 additions and 131 deletions.
6 changes: 1 addition & 5 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,5 @@ clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

# seeds:
# +database: tania_test
# +schema: IM

models:
+databricks_compute: "Compute1"

117 changes: 64 additions & 53 deletions models/legacy/customer_orders.sql
Original file line number Diff line number Diff line change
@@ -1,55 +1,66 @@
{{
config(
materialized='incremental'
)
}}

WITH paid_orders as (select Orders.ID as order_id,
Orders.USER_ID as customer_id,
Orders.ORDER_DATE AS order_placed_at,
Orders.STATUS AS order_status,
p.total_amount_paid,
p.payment_finalized_date,
C.FIRST_NAME as customer_first_name,
C.LAST_NAME as customer_last_name
FROM default.jaffle_shop_orders as Orders
left join (select ORDERID as order_id, max(CREATED) as payment_finalized_date, sum(AMOUNT) / 100.0 as total_amount_paid
from default.stripe_payments
where STATUS <> 'fail'
group by 1) p ON orders.ID = p.order_id
left join default.jaffle_shop_customers C on orders.USER_ID = C.ID ),

customer_orders
as (select C.ID as customer_id
, min(ORDER_DATE) as first_order_date
, max(ORDER_DATE) as most_recent_order_date
, count(ORDERS.ID) AS number_of_orders
from default.jaffle_shop_customers C
left join default.jaffle_shop_orders as Orders
on orders.USER_ID = C.ID
group by 1)

select
p.*,
ROW_NUMBER() OVER (ORDER BY p.order_id) as transaction_seq,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY p.order_id) as customer_sales_seq,
CASE WHEN c.first_order_date = p.order_placed_at
THEN 'new'
ELSE 'return' END as nvsr,
x.clv_bad as customer_lifetime_value,
c.first_order_date as fdos
FROM paid_orders p
left join customer_orders as c USING (customer_id)
LEFT OUTER JOIN
(
select
p.order_id,
sum(t2.total_amount_paid) as clv_bad
from paid_orders p
left join paid_orders t2 on p.customer_id = t2.customer_id and p.order_id >= t2.order_id
group by 1
order by p.order_id
) x on x.order_id = p.order_id
ORDER BY order_id
select
orders.id as order_id,
orders.user_id as customer_id,
last_name as surname,
first_name as givenname,
first_order_date,
order_count,
total_lifetime_value,
round(amount/100.0,2) as order_value_dollars,
orders.status as order_status,
payments.status as payment_status
from {{ source('jaffle_shop', 'orders') }} as orders

join (
select
first_name || ' ' || last_name as name,
*
from {{ source('jaffle_shop', 'customers') }}
) customers
on orders.user_id = customers.id

join (

select
b.id as customer_id,
b.name as full_name,
b.last_name as surname,
b.first_name as givenname,
min(order_date) as first_order_date,
min(case when a.status NOT IN ('returned','return_pending') then order_date end) as first_non_returned_order_date,
max(case when a.status NOT IN ('returned','return_pending') then order_date end) as most_recent_non_returned_order_date,
COALESCE(max(user_order_seq),0) as order_count,
COALESCE(count(case when a.status != 'returned' then 1 end),0) as non_returned_order_count,
sum(case when a.status NOT IN ('returned','return_pending') then ROUND(c.amount/100.0,2) else 0 end) as total_lifetime_value,
sum(case when a.status NOT IN ('returned','return_pending') then ROUND(c.amount/100.0,2) else 0 end)/NULLIF(count(case when a.status NOT IN ('returned','return_pending') then 1 end),0) as avg_non_returned_order_value,
array_agg(distinct a.id) as order_ids

from (
select
row_number() over (partition by user_id order by order_date, id) as user_order_seq,
*
from {{ source('jaffle_shop', 'orders') }}
) a

join (
select
first_name || ' ' || last_name as name,
*
from {{ source('jaffle_shop', 'customers') }}
) b
on a.user_id = b.id

left outer join {{ source('stripe', 'payment') }} c
on a.id = c.orderid

where a.status NOT IN ('pending') and c.status != 'fail'

group by b.id, b.name, b.last_name, b.first_name

) customer_order_history
on orders.user_id = customer_order_history.customer_id

left outer join {{ source('stripe', 'payment') }} payments
on orders.id = payments.orderid

where payments.status != 'fail'
203 changes: 156 additions & 47 deletions models/marts/fct_customer_orders.sql
Original file line number Diff line number Diff line change
@@ -1,47 +1,156 @@
WITH paid_orders AS (SELECT Orders.ID AS order_id,
Orders.USER_ID AS customer_id,
Orders.ORDER_DATE AS order_placed_at,
Orders.STATUS AS order_status,
p.total_amount_paid,
p.payment_finalized_date,
C.FIRST_NAME AS customer_first_name,
C.LAST_NAME AS customer_lASt_name
FROM default.jaffle_shop_orders AS Orders
LEFT JOIN (SELECT orderid AS order_id, max(CREATED) AS payment_finalized_date, sum(AMOUNT) / 100.0 AS total_amount_paid
FROM default.stripe_payments
WHERE STATUS <> 'fail'
GROUP BY 1) p ON orders.ID = p.order_id
LEFT JOIN default.jaffle_shop_customers C ON orders.USER_ID = C.ID ),

customer_orders
AS (SELECT C.ID AS customer_id
, min(ORDER_DATE) AS first_order_date
, max(ORDER_DATE) AS most_recent_order_date
, count(ORDERS.ID) AS number_of_orders
FROM default.jaffle_shop_customers C
LEFT JOIN default.jaffle_shop_orders AS Orders
ON orders.USER_ID = C.ID
GROUP BY 1)

SELECT
p.*,
ROW_NUMBER() OVER (ORDER BY p.order_id) AS transactiON_seq,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY p.order_id) AS customer_sales_seq,
CASE WHEN c.first_order_date = p.order_placed_at
THEN 'new'
ELSE 'return' END AS nvsr,
x.clv_bad AS customer_lifetime_value,
c.first_order_date AS fdos
FROM paid_orders p
LEFT JOIN customer_orders AS c USING (customer_id)
LEFT OUTER JOIN
(
SELECT
p.order_id,
sum(t2.total_amount_paid) AS clv_bad
FROM paid_orders p
LEFT JOIN paid_orders t2 ON p.customer_id = t2.customer_id and p.order_id >= t2.order_id
GROUP BY 1
ORDER BY p.order_id
) x ON x.order_id = p.order_id
ORDER BY order_id
-- with statement
WITH

-- import CTEs

base_customers AS (

SELECT *FROM {{ source('jaffle_shop', 'customers') }}

),

orders AS (

SELECT *FROM {{ source('jaffle_shop', 'orders') }}

),

payments AS (

SELECT *FROM {{ source('stripe', 'payment') }}

),


-- Logical CTEs
customers as (

select

first_name || ' ' || last_name as name,
*

from base_customers

),

a as (

select

row_number() over (
partition by user_id
order by order_date, id
) as user_order_seq,
*

from orders

),

b as (

select

first_name || ' ' || last_name as name,
*

from base_customers

),

customer_order_history as (

select

b.id as customer_id,
b.name as full_name,
b.last_name as surname,
b.first_name as givenname,

min(order_date) as first_order_date,

min(case
when a.status not in ('returned','return_pending')
then order_date
end) as first_non_returned_order_date,

max(case
when a.status not in ('returned','return_pending')
then order_date
end) as most_recent_non_returned_order_date,

coalesce(max(user_order_seq),0) as order_count,

coalesce(count(case
when a.status != 'returned'
then 1 end),
0
) as non_returned_order_count,

sum(case
when a.status not in ('returned','return_pending')
then round(c.amount/100.0,2)
else 0
end) as total_lifetime_value,

sum(case
when a.status not in ('returned','return_pending')
then round(c.amount/100.0,2)
else 0
end)
/ nullif(count(case
when a.status not in ('returned','return_pending')
then 1 end),
0
) as avg_non_returned_order_value,

array_agg(distinct a.id) as order_ids

from a

join b
on a.user_id = b.id

left outer join payments as c
on a.id = c.orderid

where a.status not in ('pending') and c.status != 'fail'

group by b.id, b.name, b.last_name, b.first_name

),

-- Final CTEs
final as (

select

orders.id as order_id,
orders.user_id as customer_id,
last_name as surname,
first_name as givenname,
first_order_date,
order_count,
total_lifetime_value,
round(amount/100.0,2) as order_value_dollars,
orders.status as order_status,
payments.status as payment_status

from orders

join customers
on orders.user_id = customers.id

join customer_order_history
on orders.user_id = customer_order_history.customer_id

left outer join payments
on orders.id = payments.orderid

where payments.status != 'fail'

)

-- Simple Select Statement
select * from final
10 changes: 10 additions & 0 deletions models/order_payment_method_amounts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{% set payment_methods = ["bank_transfer", "credit_card", "gift_card"] %}

select
order_id,
{% for payment_method in payment_methods %}
sum(case when payment_method = '{{payment_method}}' then amount end) as {{payment_method}}_amount,
{% endfor %}
sum(amount) as total_amount
from {{ ref('stg_payments') }}
group by 1
7 changes: 7 additions & 0 deletions models/staging/jaffle_shop/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

sources:
- name: stripe
database: default
tables:
- name: payment
8 changes: 8 additions & 0 deletions models/staging/stripe/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

sources:
- name: jaffle_shop
database: default
tables:
- name: customers
- name: orders
25 changes: 25 additions & 0 deletions models/staging/stripe/stg_payments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
with source as (

{#-
Normally we would select from the table here, but we are using seeds to load
our data in this project
#}
select * from {{ ref('raw_payments') }}

),

renamed as (

select
id as payment_id,
order_id,
payment_method,

-- `amount` is currently stored in cents, so we convert it to dollars
amount / 100 as amount

from source

)

select * from renamed
Loading

0 comments on commit 35c495c

Please sign in to comment.