powdr-labs · chriseth · Nov 13, 2024 · Nov 13, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/executor/Cargo.toml b/executor/Cargo.toml
@@ -14,6 +14,7 @@ powdr-parser-util.workspace = true
 powdr-pil-analyzer.workspace = true
 powdr-jit-compiler.workspace = true
 
+auto_enums = "0.8.5"
 itertools = "0.13"
 log = { version = "0.4.17" }
 rayon = "1.7.0"

diff --git a/executor/src/witgen/data_structures/finalizable_data.rs b/executor/src/witgen/data_structures/finalizable_data.rs
@@ -3,6 +3,7 @@ use std::{
     ops::{Index, IndexMut},
 };
 
+use auto_enums::auto_enum;
 use bit_vec::BitVec;
 use itertools::Itertools;
 use powdr_ast::analyzed::{PolyID, PolynomialType};
@@ -13,7 +14,7 @@ use crate::witgen::rows::Row;
 /// Sequence of rows of field elements, stored in a compact form.
 /// Optimized for contiguous column IDs, but works with any combination.
 #[derive(Clone)]
-struct CompactData<T: FieldElement> {
+pub struct CompactData<T> {
     /// The ID of the first column used in the table.
     first_column_id: u64,
     /// The length of a row in the table.
@@ -26,7 +27,7 @@ struct CompactData<T: FieldElement> {
 
 impl<T: FieldElement> CompactData<T> {
     /// Creates a new empty compact data storage.
-    fn new(column_ids: &[PolyID]) -> Self {
+    pub fn new(column_ids: &[PolyID]) -> Self {
         let col_id_range = column_ids.iter().map(|id| id.id).minmax();
         let (first_column_id, last_column_id) = col_id_range.into_option().unwrap();
         Self {
@@ -37,28 +38,28 @@ impl<T: FieldElement> CompactData<T> {
         }
     }
 
-    fn is_empty(&self) -> bool {
+    pub fn is_empty(&self) -> bool {
         self.data.is_empty()
     }
 
     /// Returns the number of stored rows.
-    fn len(&self) -> usize {
+    pub fn len(&self) -> usize {
         self.data.len() / self.column_count
     }
 
     /// Truncates the data to `len` rows.
-    fn truncate(&mut self, len: usize) {
+    pub fn truncate(&mut self, len: usize) {
         self.data.truncate(len * self.column_count);
         self.known_cells.truncate(len * self.column_count);
     }
 
-    fn clear(&mut self) {
+    pub fn clear(&mut self) {
         self.data.clear();
         self.known_cells.clear();
     }
 
     /// Appends a non-finalized row to the data, turning it into a finalized row.
-    fn push(&mut self, row: Row<T>) {
+    pub fn push(&mut self, row: Row<T>) {
         self.data.reserve(self.column_count);
         self.known_cells.reserve(self.column_count);
         for col_id in self.first_column_id..(self.first_column_id + self.column_count as u64) {
@@ -75,11 +76,69 @@ impl<T: FieldElement> CompactData<T> {
         }
     }
 
-    fn get(&self, row: usize, col: u64) -> (T, bool) {
+    pub fn append_new_rows(&mut self, count: usize) {
+        self.data
+            .resize(self.data.len() + count * self.column_count, T::zero());
+        self.known_cells
+            .grow(self.known_cells.len() + count * self.column_count, false);
-            .grow(self.known_cells.len() + count * self.column_count, false);
+            .grow(count * self.column_count, false);
-            .grow(self.known_cells.len() + count * self.column_count, false);
+            .grow(count * self.column_count, false);
+    }
+
+    fn index(&self, row: usize, col: u64) -> usize {
         let col = col - self.first_column_id;
-        let idx = row * self.column_count + col as usize;
+        row * self.column_count + col as usize
+    }
+
+    pub fn get(&self, row: usize, col: u64) -> (T, bool) {
+        let idx = self.index(row, col);
         (self.data[idx], self.known_cells[idx])
     }
+
+    pub fn set(&mut self, row: usize, col: u64, value: T) {
+        let idx = self.index(row, col);
+        assert!(!self.known_cells[idx] || self.data[idx] == value);
+        self.data[idx] = value;
+        self.known_cells.set(idx, true);
+    }
+
+    pub fn known_values_in_row(&self, row: usize) -> impl Iterator<Item = (u64, &T)> {
+        (0..self.column_count).filter_map(move |i| {
+            let idx = row * self.column_count + i;
-            let idx = row * self.column_count + i;
+            let idx = self.index(row, i);
-            let idx = row * self.column_count + i;
+            let idx = self.index(row, i);
+            self.known_cells[idx].then(|| {
+                let col_id = self.first_column_id + i as u64;
+                (col_id, &self.data[idx])
+            })
+        })
+    }
+}
+
+/// A mutable reference into CompactData that is meant to be used
+/// only for a certain block of rows, starting from row index zero.
+/// It allows negative row indices as well.
+pub struct CompactDataRef<'a, T> {
+    data: &'a mut CompactData<T>,
+    row_offset: usize,
+}
+
+impl<'a, T: FieldElement> CompactDataRef<'a, T> {
+    /// Creates a new reference to the data, supplying the offset of the row
+    /// that is supposed to be "row zero".
+    pub fn new(data: &'a mut CompactData<T>, row_offset: usize) -> Self {
+        Self { data, row_offset }
+    }
+
+    pub fn get(&self, row: i32, col: u32) -> T {
+        let (v, known) = self.data.get(self.inner_row(row), col as u64);
+        assert!(known);
+        v
+    }
+
+    pub fn set(&mut self, row: i32, col: u32, value: T) {
+        self.data.set(self.inner_row(row), col as u64, value);
+    }
+
+    fn inner_row(&self, row: i32) -> usize {
+        (row + self.row_offset as i32) as usize
+    }
 }
 
 /// A data structure that stores witness data.
@@ -215,6 +274,41 @@ impl<T: FieldElement> FinalizableData<T> {
         }
     }
 
+    /// Returns an iterator over the values known in that row together with the PolyIDs.
+    #[auto_enum(Iterator)]
+    pub fn known_values_in_row(
+        &self,
+        row: usize,
+    ) -> impl Iterator<Item = (PolyID, T)> + use<'_, T> {
-    ) -> impl Iterator<Item = (PolyID, T)> + use<'_, T> {
+    ) -> impl Iterator<Item = (PolyID, T)> + '_ {
-    ) -> impl Iterator<Item = (PolyID, T)> + use<'_, T> {
+    ) -> impl Iterator<Item = (PolyID, T)> + '_ {
+        match self.location_of_row(row) {
+            Location::PreFinalized(local) => {
+                let row = &self.pre_finalized_data[local];
+                self.column_ids
+                    .iter()
+                    .filter_map(move |id| row.value(id).map(|v| (*id, v)))
+            }
+            Location::Finalized(local) => {
+                self.finalized_data
+                    .known_values_in_row(local)
+                    .map(|(id, v)| {
+                        (
+                            PolyID {
+                                id,
+                                ptype: PolynomialType::Committed,
+                            },
+                            *v,
+                        )
+                    })
+            }
+            Location::PostFinalized(local) => {
+                let row = &self.post_finalized_data[local];
+                self.column_ids
+                    .iter()
+                    .filter_map(move |id| row.value(id).map(|v| (*id, v)))
+            }
+        }
+    }
+
     pub fn last(&self) -> Option<&Row<T>> {
         match self.location_of_last_row()? {
             Location::PreFinalized(local) => self.pre_finalized_data.get(local),
@@ -283,6 +377,18 @@ impl<T: FieldElement> FinalizableData<T> {
         }
     }
 
+    /// Appends a given amount of new finalized rows set to zero and "unknown".
+    /// Returns a `CompactDataRef` that is built so that its "row zero" is the
+    /// first newly appended row.
+    ///
+    /// Panics if there are any non-finalized rows at the end.
+    pub fn append_new_finalized_rows(&mut self, count: usize) -> CompactDataRef<'_, T> {
+        assert!(self.post_finalized_data.is_empty());
+        let row_zero = self.finalized_data.len();
+        self.finalized_data.append_new_rows(count);
+        CompactDataRef::new(&mut self.finalized_data, row_zero)
+    }
+
     /// Takes all data out of the [FinalizableData] and returns it as a list of columns.
     /// Columns are represented as a tuple of:
     /// - A list of values

diff --git a/executor/src/witgen/jit/jit_processor.rs b/executor/src/witgen/jit/jit_processor.rs
@@ -0,0 +1,63 @@
+use bit_vec::BitVec;
+use powdr_number::FieldElement;
+
+use crate::witgen::{
+    data_structures::finalizable_data::CompactDataRef,
+    machines::{LookupCell, MachineParts},
+    util::try_to_simple_poly,
+    EvalError, FixedData, MutableState, QueryCallback,
+};
+
+pub struct JitProcessor<'a, T: FieldElement> {
+    _fixed_data: &'a FixedData<'a, T>,
+    parts: MachineParts<'a, T>,
+    _block_size: usize,
+    latch_row: usize,
+}
+
+impl<'a, T: FieldElement> JitProcessor<'a, T> {
+    pub fn new(
+        fixed_data: &'a FixedData<'a, T>,
+        parts: MachineParts<'a, T>,
+        block_size: usize,
+        latch_row: usize,
+    ) -> Self {
+        JitProcessor {
+            _fixed_data: fixed_data,
+            parts,
+            _block_size: block_size,
+            latch_row,
+        }
+    }
+
+    pub fn can_answer_lookup(&self, _identity_id: u64, _known_inputs: &BitVec) -> bool {
+        // TODO call the JIT compiler here.
+        false
+    }
+
+    pub fn process_lookup_direct<'b, 'c, 'd, Q: QueryCallback<T>>(
+        &self,
+        _mutable_state: &'b mut MutableState<'a, 'b, T, Q>,
+        connection_id: u64,
+        values: Vec<LookupCell<'c, T>>,
+        mut data: CompactDataRef<'d, T>,
+    ) -> Result<bool, EvalError<T>> {
+        // Transfer inputs.
+        let right = self.parts.connections[&connection_id].right;
+        for (e, v) in right.expressions.iter().zip(&values) {
+            match v {
+                LookupCell::Input(&v) => {
+                    let col = try_to_simple_poly(e).unwrap();
+                    data.set(self.latch_row as i32, col.poly_id.id as u32, v);
+                }
+                LookupCell::Output(_) => {}
+            }
+        }
+
+        // Just some code here to avoid "unused" warnings.
+        // This code will not be called as long as `can_answer_lookup` returns false.
+        data.get(self.latch_row as i32, 0);
+
+        unimplemented!();
+    }
+}
diff --git a/executor/src/witgen/jit/mod.rs b/executor/src/witgen/jit/mod.rs
@@ -0,0 +1 @@
+pub mod jit_processor;
diff --git a/executor/src/witgen/machines/block_machine.rs b/executor/src/witgen/machines/block_machine.rs
@@ -9,6 +9,7 @@ use crate::witgen::analysis::detect_connection_type_and_block_size;
 use crate::witgen::block_processor::BlockProcessor;
 use crate::witgen::data_structures::finalizable_data::FinalizableData;
 use crate::witgen::data_structures::multiplicity_counter::MultiplicityCounter;
+use crate::witgen::jit::jit_processor::JitProcessor;
 use crate::witgen::processor::{OuterQuery, Processor, SolverState};
 use crate::witgen::rows::{Row, RowIndex, RowPair};
 use crate::witgen::sequence_iterator::{
@@ -70,6 +71,9 @@ pub struct BlockMachine<'a, T: FieldElement> {
     /// Cache that states the order in which to evaluate identities
     /// to make progress most quickly.
     processing_sequence_cache: ProcessingSequenceCache,
+    /// The JIT processor for this machine, i.e. the component that tries to generate
+    /// witgen code based on which elements of the connection are known.
+    jit_processor: JitProcessor<'a, T>,
     name: String,
     multiplicity_counter: MultiplicityCounter,
 }
@@ -130,6 +134,7 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
                 latch_row,
                 parts.identities.len(),
             ),
+            jit_processor: JitProcessor::new(fixed_data, parts.clone(), block_size, latch_row),
         })
     }
 }
@@ -337,24 +342,28 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         RowIndex::from_i64(self.rows() as i64 - 1, self.degree)
     }
 
-    fn get_row(&self, row: RowIndex) -> &Row<T> {
-        // The first block is a dummy block corresponding to rows (-block_size, 0),
-        // so we have to add the block size to the row index.
-        &self.data[(row + self.block_size).into()]
-    }
-
     fn process_plookup_internal<'b, Q: QueryCallback<T>>(
         &mut self,
-        mutable_state: &mut MutableState<'a, 'b, T, Q>,
+        mutable_state: &'b mut MutableState<'a, 'b, T, Q>,
         identity_id: u64,
         caller_rows: &'b RowPair<'b, 'a, T>,
     ) -> EvalResult<'a, T> {
         let outer_query = OuterQuery::new(caller_rows, self.parts.connections[&identity_id]);
 
         log::trace!("Start processing block machine '{}'", self.name());
         log::trace!("Left values of lookup:");
-        for l in &outer_query.left {
-            log::trace!("  {}", l);
+        if log::log_enabled!(log::Level::Trace) {
+            for l in &outer_query.left {
+                log::trace!("  {}", l);
+            }
+        }
+
+        let known_inputs = outer_query.left.iter().map(|e| e.is_constant()).collect();
+        if self
+            .jit_processor
+            .can_answer_lookup(identity_id, &known_inputs)
+        {
+            return self.process_lookup_via_jit(mutable_state, identity_id, outer_query);
         }
 
         // TODO this assumes we are always using the same lookup for this machine.
@@ -412,6 +421,35 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         }
     }
 
+    fn process_lookup_via_jit<'b, Q: QueryCallback<T>>(
+        &mut self,
+        mutable_state: &'b mut MutableState<'a, 'b, T, Q>,
+        identity_id: u64,
+        outer_query: OuterQuery<'a, 'b, T>,
+    ) -> EvalResult<'a, T> {
+        let mut input_output_data = vec![T::zero(); outer_query.left.len()];
+        let values = outer_query.prepare_for_direct_lookup(&mut input_output_data);
+
+        assert!(
+            (self.rows() + self.block_size as DegreeType) < self.degree,
+            "Block machine is full (this should have been checked before)"
+        );
+        self.data
+            .finalize_range(self.first_in_progress_row..self.data.len());
+        self.first_in_progress_row = self.data.len() + self.block_size;
+        //TODO can we properly access the last row of the dummy block?
+        let data = self.data.append_new_finalized_rows(self.block_size);
+
+        let success =
+            self.jit_processor
+                .process_lookup_direct(mutable_state, identity_id, values, data)?;
+        assert!(success);
+
+        Ok(outer_query
+            .direct_lookup_to_eval_result(input_output_data)?
+            .report_side_effect())
+    }
+
     fn process<'b, Q: QueryCallback<T>>(
         &self,
         mutable_state: &mut MutableState<'a, 'b, T, Q>,
@@ -462,7 +500,7 @@ impl<'a, T: FieldElement> BlockMachine<'a, T> {
         new_block
             .get_mut(0)
             .unwrap()
-            .merge_with(self.get_row(self.last_row_index()))
+            .merge_with_values(self.data.known_values_in_row(self.data.len() - 1))
             .map_err(|_| {
                 EvalError::Generic(
                     "Block machine overwrites existing value with different value!".to_string(),