diff --git a/benchmarking/tpcds/queries/58.sql b/benchmarking/tpcds/queries/58.sql index c92ce111cc..b7738d03dd 100644 --- a/benchmarking/tpcds/queries/58.sql +++ b/benchmarking/tpcds/queries/58.sql @@ -11,7 +11,7 @@ WITH ss_items AS WHERE d_week_seq = (SELECT d_week_seq FROM date_dim - WHERE d_date = '2000-01-03')) + WHERE d_date = DATE '2000-01-03')) AND ss_sold_date_sk = d_date_sk GROUP BY i_item_id), cs_items AS @@ -27,7 +27,7 @@ WITH ss_items AS WHERE d_week_seq = (SELECT d_week_seq FROM date_dim - WHERE d_date = '2000-01-03')) + WHERE d_date = DATE '2000-01-03')) AND cs_sold_date_sk = d_date_sk GROUP BY i_item_id), ws_items AS @@ -43,7 +43,7 @@ WITH ss_items AS WHERE d_week_seq = (SELECT d_week_seq FROM date_dim - WHERE d_date = '2000-01-03')) + WHERE d_date = DATE '2000-01-03')) AND ws_sold_date_sk = d_date_sk GROUP BY i_item_id) SELECT ss_items.item_id, diff --git a/benchmarking/tpcds/queries/83.sql b/benchmarking/tpcds/queries/83.sql index fdc74f0028..0b4dfd95be 100644 --- a/benchmarking/tpcds/queries/83.sql +++ b/benchmarking/tpcds/queries/83.sql @@ -11,9 +11,9 @@ WITH sr_items AS WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim - WHERE d_date IN ('2000-06-30', - '2000-09-27', - '2000-11-17'))) + WHERE d_date IN (DATE'2000-06-30', + DATE'2000-09-27', + DATE'2000-11-17'))) AND sr_returned_date_sk = d_date_sk GROUP BY i_item_id), cr_items AS @@ -29,9 +29,9 @@ WITH sr_items AS WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim - WHERE d_date IN ('2000-06-30', - '2000-09-27', - '2000-11-17'))) + WHERE d_date IN (DATE'2000-06-30', + DATE'2000-09-27', + DATE'2000-11-17'))) AND cr_returned_date_sk = d_date_sk GROUP BY i_item_id), wr_items AS @@ -47,9 +47,9 @@ WITH sr_items AS WHERE d_week_seq IN (SELECT d_week_seq FROM date_dim - WHERE d_date IN ('2000-06-30', - '2000-09-27', - '2000-11-17'))) + WHERE d_date IN (DATE'2000-06-30', + DATE'2000-09-27', + DATE'2000-11-17'))) AND wr_returned_date_sk = d_date_sk GROUP BY i_item_id) SELECT sr_items.item_id , diff --git a/benchmarking/tpcds/queries/95.sql b/benchmarking/tpcds/queries/95.sql index 2a57ab3380..0537a2f54b 100644 --- a/benchmarking/tpcds/queries/95.sql +++ b/benchmarking/tpcds/queries/95.sql @@ -13,7 +13,7 @@ FROM web_sales ws1 , date_dim , customer_address , web_site -WHERE d_date BETWEEN '1999-02-01' AND cast('1999-04-02' AS date) +WHERE d_date BETWEEN DATE'1999-02-01' AND cast('1999-04-02' AS date) AND ws1.ws_ship_date_sk = d_date_sk AND ws1.ws_ship_addr_sk = ca_address_sk AND ca_state = 'IL' diff --git a/src/daft-dsl/src/expr/mod.rs b/src/daft-dsl/src/expr/mod.rs index dca5abe311..60fc08fc96 100644 --- a/src/daft-dsl/src/expr/mod.rs +++ b/src/daft-dsl/src/expr/mod.rs @@ -153,6 +153,8 @@ pub enum Expr { Subquery(Subquery), #[display("{_0}, {_1}")] InSubquery(ExprRef, Subquery), + #[display("{_0}")] + Exists(Subquery), } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)] @@ -709,7 +711,9 @@ impl Expr { Self::Agg(agg_expr) => agg_expr.semantic_id(schema), Self::ScalarFunction(sf) => scalar_function_semantic_id(sf, schema), - Self::Subquery(..) | Self::InSubquery(..) => FieldID::new("__subquery__"), // todo: better/unique id + Self::Subquery(..) | Self::InSubquery(..) | Self::Exists(..) => { + FieldID::new("__subquery__") + } // todo: better/unique id } } @@ -719,6 +723,7 @@ impl Expr { Self::Column(..) => vec![], Self::Literal(..) => vec![], Self::Subquery(..) => vec![], + Self::Exists(..) => vec![], // One child. Self::Not(expr) @@ -753,7 +758,7 @@ impl Expr { pub fn with_new_children(&self, children: Vec) -> Self { match self { // no children - Self::Column(..) | Self::Literal(..) | Self::Subquery(..) => { + Self::Column(..) | Self::Literal(..) | Self::Subquery(..) | Self::Exists(..) => { assert!(children.is_empty(), "Should have no children"); self.clone() } @@ -990,6 +995,7 @@ impl Expr { Ok(first_field.clone()) } Self::InSubquery(expr, _) => Ok(Field::new(expr.name(), DataType::Boolean)), + Self::Exists(_) => Ok(Field::new("exists", DataType::Boolean)), } } @@ -1022,6 +1028,7 @@ impl Expr { Self::IfElse { if_true, .. } => if_true.name(), Self::Subquery(subquery) => subquery.name(), Self::InSubquery(expr, _) => expr.name(), + Self::Exists(subquery) => subquery.name(), } } @@ -1096,7 +1103,8 @@ impl Expr { | Expr::FillNull(..) | Expr::ScalarFunction { .. } | Expr::Subquery(..) - | Expr::InSubquery(..) => Err(io::Error::new( + | Expr::InSubquery(..) + | Expr::Exists(..) => Err(io::Error::new( io::ErrorKind::Other, "Unsupported expression for SQL translation", )), diff --git a/src/daft-dsl/src/optimization.rs b/src/daft-dsl/src/optimization.rs index 68d360cb00..e84035a572 100644 --- a/src/daft-dsl/src/optimization.rs +++ b/src/daft-dsl/src/optimization.rs @@ -35,7 +35,8 @@ pub fn requires_computation(e: &Expr) -> bool { | Expr::Between { .. } | Expr::IfElse { .. } | Expr::Subquery { .. } - | Expr::InSubquery { .. } => true, + | Expr::InSubquery { .. } + | Expr::Exists(..) => true, } } diff --git a/src/daft-logical-plan/src/ops/project.rs b/src/daft-logical-plan/src/ops/project.rs index 163acba2b9..93fe5aa375 100644 --- a/src/daft-logical-plan/src/ops/project.rs +++ b/src/daft-logical-plan/src/ops/project.rs @@ -199,7 +199,9 @@ fn replace_column_with_semantic_id( Transformed::yes(new_expr.into()) } else { match e.as_ref() { - Expr::Column(_) | Expr::Literal(_) | Expr::Subquery(_) => Transformed::no(e), + Expr::Column(_) | Expr::Literal(_) | Expr::Subquery(_) | Expr::Exists(_) => { + Transformed::no(e) + } Expr::Agg(agg_expr) => replace_column_with_semantic_id_aggexpr( agg_expr.clone(), subexprs_to_replace, diff --git a/src/daft-logical-plan/src/partitioning.rs b/src/daft-logical-plan/src/partitioning.rs index c33a90823d..78ac00b874 100644 --- a/src/daft-logical-plan/src/partitioning.rs +++ b/src/daft-logical-plan/src/partitioning.rs @@ -237,6 +237,7 @@ fn translate_clustering_spec_expr( }, Expr::Literal(_) => Ok(clustering_spec_expr.clone()), Expr::Subquery(_) => Ok(clustering_spec_expr.clone()), + Expr::Exists(_) => Ok(clustering_spec_expr.clone()), Expr::Alias(child, name) => { let newchild = translate_clustering_spec_expr(child, old_colname_to_new_colname)?; Ok(newchild.alias(name.clone())) diff --git a/src/daft-sql/src/planner.rs b/src/daft-sql/src/planner.rs index a35ba04847..1827c23147 100644 --- a/src/daft-sql/src/planner.rs +++ b/src/daft-sql/src/planner.rs @@ -1296,7 +1296,18 @@ impl SQLPlanner { }, ) } - SQLExpr::Exists { .. } => unsupported_sql_err!("EXISTS"), + SQLExpr::Exists { subquery, negated } => { + let mut this = self.clone(); + let subquery = this.plan_query(subquery)?; + let subquery = Subquery { + plan: subquery.build(), + }; + if *negated { + Ok(Expr::Exists(subquery).arced().not()) + } else { + Ok(Expr::Exists(subquery).arced()) + } + } SQLExpr::Subquery(subquery) => { let mut this = self.clone(); let subquery = this.plan_query(subquery)?; diff --git a/src/daft-table/src/lib.rs b/src/daft-table/src/lib.rs index adf490e9a5..82003f4e5f 100644 --- a/src/daft-table/src/lib.rs +++ b/src/daft-table/src/lib.rs @@ -596,6 +596,9 @@ impl Table { InSubquery(_expr, _subquery) => Err(DaftError::ComputeError( "IN should be optimized away before evaluation. This indicates a bug in the query optimizer.".to_string(), )), + Exists(_subquery) => Err(DaftError::ComputeError( + "EXISTS should be optimized away before evaluation. This indicates a bug in the query optimizer.".to_string(), + )), }?; if expected_field.name != series.field().name {