Skip to content

Commit

Permalink
add expression representation and refactor memo
Browse files Browse the repository at this point in the history
This commit adds the `src/expression` module which contains a very
simple representation of Cascades expressions.

The `Memo` trait interface and implemenation has also changed, where
it now correctly detects exact match duplicates, and it does not track
fingerprints for physical expressions (only logical).

TODO: Add more tests.
TODO: Figure out how to test in CI.
  • Loading branch information
connortsui20 committed Nov 29, 2024
1 parent 0e54957 commit 24df49f
Show file tree
Hide file tree
Showing 19 changed files with 646 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ target/

# We will check in all code-generated entity files, as newer versions of `sea-orm-cli` might
# conflict with previous versions.
# **/entities
# **/entities
33 changes: 33 additions & 0 deletions optd-mvp/src/entities/fingerprint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0
use sea_orm::entity::prelude::*;

#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
#[sea_orm(table_name = "fingerprint")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: i32,
pub logical_expression_id: i32,
pub kind: i16,
pub hash: i64,
}

#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(
belongs_to = "super::logical_expression::Entity",
from = "Column::LogicalExpressionId",
to = "super::logical_expression::Column::Id",
on_update = "Cascade",
on_delete = "Cascade"
)]
LogicalExpression,
}

impl Related<super::logical_expression::Entity> for Entity {
fn to() -> RelationDef {
Relation::LogicalExpression.def()
}
}

impl ActiveModelBehavior for ActiveModel {}
9 changes: 8 additions & 1 deletion optd-mvp/src/entities/logical_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ pub struct Model {
#[sea_orm(primary_key)]
pub id: i32,
pub group_id: i32,
pub fingerprint: i64,
pub kind: i16,
pub data: Json,
}
Expand All @@ -23,10 +22,18 @@ pub enum Relation {
on_delete = "Cascade"
)]
CascadesGroup,
#[sea_orm(has_many = "super::fingerprint::Entity")]
Fingerprint,
#[sea_orm(has_many = "super::logical_children::Entity")]
LogicalChildren,
}

impl Related<super::fingerprint::Entity> for Entity {
fn to() -> RelationDef {
Relation::Fingerprint.def()
}
}

impl Related<super::logical_children::Entity> for Entity {
fn to() -> RelationDef {
Relation::LogicalChildren.def()
Expand Down
1 change: 1 addition & 0 deletions optd-mvp/src/entities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub mod prelude;

pub mod cascades_group;
pub mod fingerprint;
pub mod logical_children;
pub mod logical_expression;
pub mod physical_children;
Expand Down
1 change: 0 additions & 1 deletion optd-mvp/src/entities/physical_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ pub struct Model {
#[sea_orm(primary_key)]
pub id: i32,
pub group_id: i32,
pub fingerprint: i64,
pub kind: i16,
pub data: Json,
}
Expand Down
3 changes: 1 addition & 2 deletions optd-mvp/src/entities/prelude.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0
#![allow(unused_imports)]

pub use super::cascades_group::Entity as CascadesGroup;
pub use super::fingerprint::Entity as Fingerprint;
pub use super::logical_children::Entity as LogicalChildren;
pub use super::logical_expression::Entity as LogicalExpression;
pub use super::physical_children::Entity as PhysicalChildren;
Expand Down
114 changes: 114 additions & 0 deletions optd-mvp/src/expression/logical_expression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//! Definition of logical expressions / relations in the Cascades query optimization framework.
//!
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
//!
//! TODO figure out if each relation should be in a different submodule.
use crate::entities::*;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug)]
pub enum LogicalExpression {
Scan(Scan),
Filter(Filter),
Join(Join),
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Scan {
table_schema: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Filter {
child: i32,
expression: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Join {
left: i32,
right: i32,
expression: String,
}

/// TODO Use a macro instead.
impl From<logical_expression::Model> for LogicalExpression {
fn from(value: logical_expression::Model) -> Self {
match value.kind {
0 => Self::Scan(
serde_json::from_value(value.data)
.expect("unable to deserialize data into a logical `Scan`"),
),
1 => Self::Filter(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a logical `Filter`"),
),
2 => Self::Join(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a logical `Join`"),
),
_ => panic!(),
}
}
}

/// TODO Use a macro instead.
impl From<LogicalExpression> for logical_expression::Model {
fn from(value: LogicalExpression) -> logical_expression::Model {
fn create_logical_expression(
kind: i16,
data: serde_json::Value,
) -> logical_expression::Model {
logical_expression::Model {
id: -1,
group_id: -1,
kind,
data,
}
}

match value {
LogicalExpression::Scan(scan) => create_logical_expression(
0,
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
),
LogicalExpression::Filter(filter) => create_logical_expression(
1,
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
),
LogicalExpression::Join(join) => create_logical_expression(
2,
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
),
}
}
}

#[cfg(test)]
pub use build::*;

#[cfg(test)]
mod build {
use super::*;
use crate::expression::Expression;

pub fn scan(table_schema: String) -> Expression {
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
}

pub fn filter(child_group: i32, expression: String) -> Expression {
Expression::Logical(LogicalExpression::Filter(Filter {
child: child_group,
expression,
}))
}

pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
Expression::Logical(LogicalExpression::Join(Join {
left: left_group,
right: right_group,
expression,
}))
}
}
62 changes: 62 additions & 0 deletions optd-mvp/src/expression/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//! In-memory representation of Cascades logical and physical expression / operators / relations.
//!
//! TODO more docs.
mod logical_expression;
pub use logical_expression::*;

mod physical_expression;
pub use physical_expression::*;

/// The representation of a Cascades expression.
///
/// TODO more docs.
#[derive(Clone, Debug)]
pub enum Expression {
Logical(LogicalExpression),
Physical(PhysicalExpression),
}

/// Converts the database / JSON representation of a logical expression into an in-memory one.
impl From<crate::entities::logical_expression::Model> for Expression {
fn from(value: crate::entities::logical_expression::Model) -> Self {
Self::Logical(value.into())
}
}

/// Converts the in-memory representation of a logical expression into the database / JSON version.
///
/// # Panics
///
/// This will panic if the [`Expression`] is [`Expression::Physical`].
impl From<Expression> for crate::entities::logical_expression::Model {
fn from(value: Expression) -> Self {
let Expression::Logical(expr) = value else {
panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression");
};

expr.into()
}
}

/// Converts the database / JSON representation of a physical expression into an in-memory one.
impl From<crate::entities::physical_expression::Model> for Expression {
fn from(value: crate::entities::physical_expression::Model) -> Self {
Self::Physical(value.into())
}
}

/// Converts the in-memory representation of a physical expression into the database / JSON version.
///
/// # Panics
///
/// This will panic if the [`Expression`] is [`Expression::Physical`].
impl From<Expression> for crate::entities::physical_expression::Model {
fn from(value: Expression) -> Self {
let Expression::Physical(expr) = value else {
panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression");
};

expr.into()
}
}
114 changes: 114 additions & 0 deletions optd-mvp/src/expression/physical_expression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//! Definition of physical expressions / operators in the Cascades query optimization framework.
//!
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
//!
//! TODO figure out if each operator should be in a different submodule.
use crate::entities::*;
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug)]
pub enum PhysicalExpression {
TableScan(TableScan),
Filter(PhysicalFilter),
HashJoin(HashJoin),
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct TableScan {
table_schema: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct PhysicalFilter {
child: i32,
expression: String,
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct HashJoin {
left: i32,
right: i32,
expression: String,
}

/// TODO Use a macro instead.
impl From<physical_expression::Model> for PhysicalExpression {
fn from(value: physical_expression::Model) -> Self {
match value.kind {
0 => Self::TableScan(
serde_json::from_value(value.data)
.expect("unable to deserialize data into a physical `TableScan`"),
),
1 => Self::Filter(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a physical `Filter`"),
),
2 => Self::HashJoin(
serde_json::from_value(value.data)
.expect("Unable to deserialize data into a physical `HashJoin`"),
),
_ => panic!(),
}
}
}

/// TODO Use a macro instead.
impl From<PhysicalExpression> for physical_expression::Model {
fn from(value: PhysicalExpression) -> physical_expression::Model {
fn create_physical_expression(
kind: i16,
data: serde_json::Value,
) -> physical_expression::Model {
physical_expression::Model {
id: -1,
group_id: -1,
kind,
data,
}
}

match value {
PhysicalExpression::TableScan(scan) => create_physical_expression(
0,
serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"),
),
PhysicalExpression::Filter(filter) => create_physical_expression(
1,
serde_json::to_value(filter).expect("unable to serialize physical `Filter`"),
),
PhysicalExpression::HashJoin(join) => create_physical_expression(
2,
serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"),
),
}
}
}

#[cfg(test)]
pub use build::*;

#[cfg(test)]
mod build {
use super::*;
use crate::expression::Expression;

pub fn table_scan(table_schema: String) -> Expression {
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
}

pub fn filter(child_group: i32, expression: String) -> Expression {
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
child: child_group,
expression,
}))
}

pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
left: left_group,
right: right_group,
expression,
}))
}
}
Loading

0 comments on commit 24df49f

Please sign in to comment.