Skip to content

Commit

Permalink
add expression representation and refactor memo
Browse files Browse the repository at this point in the history
This commit adds the `src/expression` module which contains a very
simple representation of Cascades expressions.

The `Memo` trait interface and implemenation has also changed, where
it now correctly detects exact match duplicates.

TODO(connortsui20): Add the duplicate detection to the other methods
that need them.
TODO(connortsui20): Add more tests.
  • Loading branch information
connortsui20 committed Nov 28, 2024
1 parent 0e54957 commit f92a48f
Show file tree
Hide file tree
Showing 8 changed files with 416 additions and 27 deletions.
121 changes: 121 additions & 0 deletions optd-mvp/src/expression/logical_expression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//! Definition of logical expressions / relations in the Cascades query optimization framework.
//!
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
//!
//! TODO figure out if each relation should be in a different submodule.
use crate::entities::*;
use serde::{Deserialize, Serialize};
use std::hash::{DefaultHasher, Hash, Hasher};

#[derive(Clone, Debug)]
pub enum LogicalExpression {
Scan(Scan),
Filter(Filter),
Join(Join),
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Scan {
table_schema: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Filter {
child: i32,
expression: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Join {
left: i32,
right: i32,
expression: String,
}

/// TODO Use a macro instead.
impl From<logical_expression::Model> for LogicalExpression {
fn from(value: logical_expression::Model) -> Self {
match value.kind {
0 => Self::Scan(
serde_json::from_value(value.data)
.expect("unable to deserialize data into a logical `Scan`"),
),
1 => Self::Filter(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a logical `Filter`"),
),
2 => Self::Join(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a logical `Join`"),
),
_ => panic!(),
}
}
}

/// TODO Use a macro instead.
impl From<LogicalExpression> for logical_expression::Model {
fn from(value: LogicalExpression) -> logical_expression::Model {
fn create_logical_expression(
kind: i16,
data: serde_json::Value,
) -> logical_expression::Model {
let mut hasher = DefaultHasher::new();
kind.hash(&mut hasher);
data.hash(&mut hasher);
let fingerprint = hasher.finish() as i64;

logical_expression::Model {
id: -1,
group_id: -1,
fingerprint,
kind,
data,
}
}

match value {
LogicalExpression::Scan(scan) => create_logical_expression(
0,
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
),
LogicalExpression::Filter(filter) => create_logical_expression(
1,
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
),
LogicalExpression::Join(join) => create_logical_expression(
2,
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
),
}
}
}

#[cfg(test)]
pub use build::*;

#[cfg(test)]
mod build {
use super::*;
use crate::expression::Expression;

pub fn scan(table_schema: String) -> Expression {
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
}

pub fn filter(child_group: i32, expression: String) -> Expression {
Expression::Logical(LogicalExpression::Filter(Filter {
child: child_group,
expression,
}))
}

pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
Expression::Logical(LogicalExpression::Join(Join {
left: left_group,
right: right_group,
expression,
}))
}
}
62 changes: 62 additions & 0 deletions optd-mvp/src/expression/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//! In-memory representation of Cascades logical and physical expression / operators / relations.
//!
//! TODO more docs.
mod logical_expression;
pub use logical_expression::*;

mod physical_expression;
pub use physical_expression::*;

/// The representation of a Cascades expression.
///
/// TODO more docs.
#[derive(Clone, Debug)]
pub enum Expression {
Logical(LogicalExpression),
Physical(PhysicalExpression),
}

/// Converts the database / JSON representation of a logical expression into an in-memory one.
impl From<crate::entities::logical_expression::Model> for Expression {
fn from(value: crate::entities::logical_expression::Model) -> Self {
Self::Logical(value.into())
}
}

/// Converts the in-memory representation of a logical expression into the database / JSON version.
///
/// # Panics
///
/// This will panic if the [`Expression`] is [`Expression::Physical`].
impl From<Expression> for crate::entities::logical_expression::Model {
fn from(value: Expression) -> Self {
let Expression::Logical(expr) = value else {
panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression");
};

expr.into()
}
}

/// Converts the database / JSON representation of a physical expression into an in-memory one.
impl From<crate::entities::physical_expression::Model> for Expression {
fn from(value: crate::entities::physical_expression::Model) -> Self {
Self::Physical(value.into())
}
}

/// Converts the in-memory representation of a physical expression into the database / JSON version.
///
/// # Panics
///
/// This will panic if the [`Expression`] is [`Expression::Physical`].
impl From<Expression> for crate::entities::physical_expression::Model {
fn from(value: Expression) -> Self {
let Expression::Physical(expr) = value else {
panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression");
};

expr.into()
}
}
121 changes: 121 additions & 0 deletions optd-mvp/src/expression/physical_expression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//! Definition of physical expressions / operators in the Cascades query optimization framework.
//!
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
//!
//! TODO figure out if each operator should be in a different submodule.
use crate::entities::*;
use serde::{Deserialize, Serialize};
use std::hash::{DefaultHasher, Hash, Hasher};

#[derive(Clone, Debug)]
pub enum PhysicalExpression {
TableScan(TableScan),
Filter(PhysicalFilter),
HashJoin(HashJoin),
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct TableScan {
table_schema: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct PhysicalFilter {
child: i32,
expression: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct HashJoin {
left: i32,
right: i32,
expression: String,
}

/// TODO Use a macro instead.
impl From<physical_expression::Model> for PhysicalExpression {
fn from(value: physical_expression::Model) -> Self {
match value.kind {
0 => Self::TableScan(
serde_json::from_value(value.data)
.expect("unable to deserialize data into a physical `TableScan`"),
),
1 => Self::Filter(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a physical `Filter`"),
),
2 => Self::HashJoin(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a physical `HashJoin`"),
),
_ => panic!(),
}
}
}

/// TODO Use a macro instead.
impl From<PhysicalExpression> for physical_expression::Model {
fn from(value: PhysicalExpression) -> physical_expression::Model {
fn create_physical_expression(
kind: i16,
data: serde_json::Value,
) -> physical_expression::Model {
let mut hasher = DefaultHasher::new();
kind.hash(&mut hasher);
data.hash(&mut hasher);
let fingerprint = hasher.finish() as i64;

physical_expression::Model {
id: -1,
group_id: -1,
fingerprint,
kind,
data,
}
}

match value {
PhysicalExpression::TableScan(scan) => create_physical_expression(
0,
serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"),
),
PhysicalExpression::Filter(filter) => create_physical_expression(
1,
serde_json::to_value(filter).expect("unable to serialize physical `Filter`"),
),
PhysicalExpression::HashJoin(join) => create_physical_expression(
2,
serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"),
),
}
}
}

#[cfg(test)]
pub use build::*;

#[cfg(test)]
mod build {
use super::*;
use crate::expression::Expression;

pub fn table_scan(table_schema: String) -> Expression {
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
}

pub fn filter(child_group: i32, expression: String) -> Expression {
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
child: child_group,
expression,
}))
}

pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
left: left_group,
right: right_group,
expression,
}))
}
}
2 changes: 2 additions & 0 deletions optd-mvp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ mod entities;
mod memo;
use memo::MemoError;

mod expression;

/// The filename of the SQLite database for migration.
pub const DATABASE_FILENAME: &str = "sqlite.db";
/// The URL of the SQLite database for migration.
Expand Down
9 changes: 9 additions & 0 deletions optd-mvp/src/memo/interface.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//! This module defines the [`Memo`] trait, which defines shared behavior of all memo table that can
//! be used for query optimization in the Cascades framework.
use crate::OptimizerResult;
use thiserror::Error;

Expand Down Expand Up @@ -96,6 +99,9 @@ pub trait Memo {
/// [`MemoError::InvalidExpression`] error.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
/// another group.
async fn add_logical_expression_to_group(
&self,
group_id: Self::GroupId,
Expand All @@ -114,6 +120,9 @@ pub trait Memo {
/// [`MemoError::InvalidExpression`] error.
///
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
///
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
/// another group.
async fn add_physical_expression_to_group(
&self,
group_id: Self::GroupId,
Expand Down
Loading

0 comments on commit f92a48f

Please sign in to comment.