-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEAT] New Local Execution Model (#2437)
Prototype for new local execution model --------- Co-authored-by: Colin Ho <[email protected]> Co-authored-by: Colin Ho <[email protected]>
- Loading branch information
1 parent
c3d43cf
commit 0bd1d27
Showing
23 changed files
with
757 additions
and
14 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[dependencies] | ||
common-error = {path = "../common/error", default-features = false} | ||
daft-core = {path = "../daft-core", default-features = false} | ||
daft-dsl = {path = "../daft-dsl", default-features = false} | ||
daft-io = {path = "../daft-io", default-features = false} | ||
daft-micropartition = {path = "../daft-micropartition", default-features = false} | ||
daft-plan = {path = "../daft-plan", default-features = false} | ||
daft-scan = {path = "../daft-scan", default-features = false} | ||
dyn-clone = {workspace = true} | ||
futures = {workspace = true} | ||
lazy_static = {workspace = true} | ||
log = {workspace = true} | ||
pyo3 = {workspace = true, optional = true} | ||
snafu = {workspace = true} | ||
tokio = {workspace = true} | ||
tokio-stream = {workspace = true} | ||
|
||
[features] | ||
default = ["python"] | ||
python = ["dep:pyo3", "common-error/python", "daft-core/python", "daft-dsl/python", "daft-io/python", "daft-micropartition/python", "daft-plan/python", "daft-scan/python"] | ||
|
||
[package] | ||
edition = {workspace = true} | ||
name = "daft-local-execution" | ||
version = {workspace = true} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
use std::{collections::HashMap, sync::Arc}; | ||
|
||
use daft_dsl::Expr; | ||
use daft_micropartition::MicroPartition; | ||
use daft_plan::{ | ||
physical_ops::{Aggregate, Filter, InMemoryScan, Limit, Project, TabularScan}, | ||
PhysicalPlan, | ||
}; | ||
|
||
use crate::{ | ||
intermediate_ops::{filter::FilterOperator, project::ProjectOperator}, | ||
pipeline::Pipeline, | ||
sinks::{aggregate::AggregateSink, limit::LimitSink}, | ||
sources::{in_memory::InMemorySource, scan_task::ScanTaskSource}, | ||
}; | ||
|
||
pub fn physical_plan_to_pipeline( | ||
physical_plan: &Arc<PhysicalPlan>, | ||
psets: &HashMap<String, Vec<Arc<MicroPartition>>>, | ||
) -> Pipeline { | ||
match physical_plan.as_ref() { | ||
PhysicalPlan::InMemoryScan(InMemoryScan { in_memory_info, .. }) => { | ||
let partitions = psets | ||
.get(&in_memory_info.cache_key) | ||
.expect("Cache key not found"); | ||
Pipeline::new(Box::new(InMemorySource::new(partitions.clone()))) | ||
} | ||
PhysicalPlan::TabularScan(TabularScan { scan_tasks, .. }) => { | ||
Pipeline::new(Box::new(ScanTaskSource::new(scan_tasks.clone()))) | ||
} | ||
PhysicalPlan::Project(Project { | ||
input, projection, .. | ||
}) => { | ||
let current_pipeline = physical_plan_to_pipeline(input, psets); | ||
let proj_op = ProjectOperator::new(projection.clone()); | ||
current_pipeline.with_intermediate_operator(Box::new(proj_op)) | ||
} | ||
PhysicalPlan::Filter(Filter { input, predicate }) => { | ||
let current_pipeline = physical_plan_to_pipeline(input, psets); | ||
let filter_op = FilterOperator::new(predicate.clone()); | ||
current_pipeline.with_intermediate_operator(Box::new(filter_op)) | ||
} | ||
PhysicalPlan::Limit(Limit { limit, input, .. }) => { | ||
let current_pipeline = physical_plan_to_pipeline(input, psets); | ||
let sink = LimitSink::new(*limit as usize); | ||
let current_pipeline = current_pipeline.with_sink(Box::new(sink)); | ||
|
||
Pipeline::new(Box::new(current_pipeline)) | ||
} | ||
PhysicalPlan::Aggregate(Aggregate { | ||
input, | ||
aggregations, | ||
groupby, | ||
}) => { | ||
let current_pipeline = physical_plan_to_pipeline(input, psets); | ||
let sink = AggregateSink::new( | ||
aggregations | ||
.iter() | ||
.map(|agg| Arc::new(Expr::Agg(agg.clone()))) | ||
.collect::<Vec<_>>(), | ||
groupby.clone(), | ||
); | ||
let current_pipeline = current_pipeline.with_sink(Box::new(sink)); | ||
|
||
Pipeline::new(Box::new(current_pipeline)) | ||
} | ||
_ => { | ||
unimplemented!("Physical plan not supported: {}", physical_plan.name()); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use std::sync::Arc; | ||
|
||
use common_error::DaftResult; | ||
use daft_dsl::ExprRef; | ||
use daft_micropartition::MicroPartition; | ||
|
||
use super::intermediate_op::IntermediateOperator; | ||
|
||
#[derive(Clone)] | ||
pub struct FilterOperator { | ||
predicate: ExprRef, | ||
} | ||
|
||
impl FilterOperator { | ||
pub fn new(predicate: ExprRef) -> Self { | ||
Self { predicate } | ||
} | ||
} | ||
|
||
impl IntermediateOperator for FilterOperator { | ||
fn execute(&self, input: &Arc<MicroPartition>) -> DaftResult<Arc<MicroPartition>> { | ||
log::debug!("FilterOperator::execute"); | ||
let out = input.filter(&[self.predicate.clone()])?; | ||
Ok(Arc::new(out)) | ||
} | ||
|
||
fn name(&self) -> String { | ||
"FilterOperator".to_string() | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
src/daft-local-execution/src/intermediate_ops/intermediate_op.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
use std::sync::Arc; | ||
|
||
use common_error::DaftResult; | ||
use daft_micropartition::MicroPartition; | ||
|
||
pub trait IntermediateOperator: dyn_clone::DynClone + Send + Sync { | ||
fn execute(&self, input: &Arc<MicroPartition>) -> DaftResult<Arc<MicroPartition>>; | ||
fn name(&self) -> String; | ||
} | ||
|
||
dyn_clone::clone_trait_object!(IntermediateOperator); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pub mod filter; | ||
pub mod intermediate_op; | ||
pub mod project; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use std::sync::Arc; | ||
|
||
use common_error::DaftResult; | ||
use daft_dsl::ExprRef; | ||
use daft_micropartition::MicroPartition; | ||
|
||
use super::intermediate_op::IntermediateOperator; | ||
|
||
#[derive(Clone)] | ||
pub struct ProjectOperator { | ||
projection: Vec<ExprRef>, | ||
} | ||
|
||
impl ProjectOperator { | ||
pub fn new(projection: Vec<ExprRef>) -> Self { | ||
Self { projection } | ||
} | ||
} | ||
|
||
impl IntermediateOperator for ProjectOperator { | ||
fn execute(&self, input: &Arc<MicroPartition>) -> DaftResult<Arc<MicroPartition>> { | ||
log::debug!("ProjectOperator::execute"); | ||
let out = input.eval_expression_list(&self.projection)?; | ||
Ok(Arc::new(out)) | ||
} | ||
|
||
fn name(&self) -> String { | ||
"ProjectOperator".to_string() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
mod create_pipeline; | ||
mod intermediate_ops; | ||
mod pipeline; | ||
mod run; | ||
mod sinks; | ||
mod sources; | ||
|
||
use std::sync::Arc; | ||
|
||
use common_error::{DaftError, DaftResult}; | ||
use daft_micropartition::MicroPartition; | ||
pub use run::run_streaming; | ||
use snafu::Snafu; | ||
|
||
type Sender = tokio::sync::mpsc::Sender<DaftResult<Arc<MicroPartition>>>; | ||
type Receiver = tokio::sync::mpsc::Receiver<DaftResult<Arc<MicroPartition>>>; | ||
|
||
pub fn create_channel() -> (Sender, Receiver) { | ||
tokio::sync::mpsc::channel(1) | ||
} | ||
|
||
#[cfg(feature = "python")] | ||
use pyo3::prelude::*; | ||
|
||
#[derive(Debug, Snafu)] | ||
pub enum Error { | ||
#[snafu(display("Error joining spawned task: {}", source))] | ||
JoinError { source: tokio::task::JoinError }, | ||
#[snafu(display( | ||
"Sender of OneShot Channel Dropped before sending data over: {}", | ||
source | ||
))] | ||
OneShotRecvError { | ||
source: tokio::sync::oneshot::error::RecvError, | ||
}, | ||
} | ||
|
||
impl From<Error> for DaftError { | ||
fn from(err: Error) -> DaftError { | ||
DaftError::External(err.into()) | ||
} | ||
} | ||
|
||
#[cfg(feature = "python")] | ||
pub fn register_modules(_py: Python, _parent: &PyModule) -> PyResult<()> { | ||
Ok(()) | ||
} |
Oops, something went wrong.