Skip to content

Commit

Permalink
clusterlin: permit passing in existing linearization to Linearize
Browse files Browse the repository at this point in the history
This implements the LIMO algorithm for linearizing by improving an existing
linearization. See
https://delvingbitcoin.org/t/limo-combining-the-best-parts-of-linearization-search-and-merging
for details.
  • Loading branch information
sipa committed Jul 25, 2024
1 parent 97d9871 commit 2854979
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 9 deletions.
6 changes: 4 additions & 2 deletions src/bench/cluster_linearize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void BenchLinearizePerIterWorstCase(ClusterIndex ntx, benchmark::Bench& bench)
});
}

/** Benchmark for linearization of a trivial linear graph using just ancestor sort.
/** Benchmark for linearization improvement of a trivial linear graph using just ancestor sort.
*
* Its goal is measuring how much time linearization may take without any search iterations.
*
Expand All @@ -124,8 +124,10 @@ void BenchLinearizeNoItersWorstCase(ClusterIndex ntx, benchmark::Bench& bench)
{
const auto depgraph = MakeLinearGraph<SetType>(ntx);
uint64_t rng_seed = 0;
std::vector<ClusterIndex> old_lin(ntx);
for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i;
bench.run([&] {
Linearize(depgraph, /*max_iterations=*/0, rng_seed++);
Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin);
});
}

Expand Down
29 changes: 25 additions & 4 deletions src/cluster_linearize.h
Original file line number Diff line number Diff line change
Expand Up @@ -663,23 +663,27 @@ class SearchCandidateFinder
}
};

/** Find a linearization for a cluster.
/** Find or improve a linearization for a cluster.
*
* @param[in] depgraph Dependency graph of the cluster to be linearized.
* @param[in] max_iterations Upper bound on the number of optimization steps that will be done.
* @param[in] rng_seed A random number seed to control search order. This prevents peers
* from predicting exactly which clusters would be hard for us to
* linearize.
* @param[in] old_linearization An existing linearization for the cluster (which must be
* topologically valid), or empty.
* @return A pair of:
* - The resulting linearization.
* - The resulting linearization. It is guaranteed to be at least as
* good (in the feerate diagram sense) as old_linearization.
* - A boolean indicating whether the result is guaranteed to be
* optimal.
*
* Complexity: O(N * min(max_iterations + N, 2^N)) where N=depgraph.TxCount().
*/
template<typename SetType>
std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed) noexcept
std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span<const ClusterIndex> old_linearization = {}) noexcept
{
Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount());
if (depgraph.TxCount() == 0) return {{}, true};

uint64_t iterations_left = max_iterations;
Expand All @@ -690,9 +694,17 @@ std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& de
linearization.reserve(depgraph.TxCount());
bool optimal = true;

/** Chunking of what remains of the old linearization. */
LinearizationChunking old_chunking(depgraph, old_linearization);

while (true) {
// Initialize best as the best remaining ancestor set.
// Find the highest-feerate prefix of the remainder of old_linearization.
SetInfo<SetType> best_prefix;
if (old_chunking.NumChunksLeft()) best_prefix = old_chunking.GetChunk(0);

// Then initialize best to be either the best remaining ancestor set, or the first chunk.
auto best = anc_finder.FindCandidateSet();
if (!best_prefix.feerate.IsEmpty() && best_prefix.feerate >= best.feerate) best = best_prefix;

// Invoke bounded search to update best, with up to half of our remaining iterations as
// limit.
Expand All @@ -703,6 +715,12 @@ std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& de

if (iterations_done_now == max_iterations_now) {
optimal = false;
// If the search result is not (guaranteed to be) optimal, run intersections to make
// sure we don't pick something that makes us unable to reach further diagram points
// of the old linearization.
if (old_chunking.NumChunksLeft() > 0) {
best = old_chunking.Intersect(best);
}
}

// Add to output in topological order.
Expand All @@ -712,6 +730,9 @@ std::pair<std::vector<ClusterIndex>, bool> Linearize(const DepGraph<SetType>& de
anc_finder.MarkDone(best.transactions);
if (anc_finder.AllDone()) break;
src_finder.MarkDone(best.transactions);
if (old_chunking.NumChunksLeft() > 0) {
old_chunking.MarkDone(best.transactions);
}
}

return {std::move(linearization), optimal};
Expand Down
27 changes: 24 additions & 3 deletions src/test/fuzz/cluster_linearize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,9 @@ class ExhaustiveCandidateFinder

/** A simple linearization algorithm.
*
* This matches Linearize() in interface and behavior, though with fewer optimizations, and using
* just SimpleCandidateFinder rather than AncestorCandidateFinder and SearchCandidateFinder.
* This matches Linearize() in interface and behavior, though with fewer optimizations, lacking
* the ability to pass in an existing linearization, and using just SimpleCandidateFinder rather
* than AncestorCandidateFinder and SearchCandidateFinder.
*/
template<typename SetType>
std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations)
Expand Down Expand Up @@ -614,12 +615,32 @@ FUZZ_TARGET(clusterlin_linearize)
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed;
} catch (const std::ios_base::failure&) {}

// Optionally construct an old linearization for it.
std::vector<ClusterIndex> old_linearization;
{
uint8_t have_old_linearization{0};
try {
reader >> have_old_linearization;
} catch(const std::ios_base::failure&) {}
if (have_old_linearization & 1) {
old_linearization = ReadLinearization(depgraph, reader);
SanityCheck(depgraph, old_linearization);
}
}

// Invoke Linearize().
iter_count &= 0x7ffff;
auto [linearization, optimal] = Linearize(depgraph, iter_count, rng_seed);
auto [linearization, optimal] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
SanityCheck(depgraph, linearization);
auto chunking = ChunkLinearization(depgraph, linearization);

// Linearization must always be as good as the old one, if provided.
if (!old_linearization.empty()) {
auto old_chunking = ChunkLinearization(depgraph, old_linearization);
auto cmp = CompareChunks(chunking, old_chunking);
assert(cmp >= 0);
}

// If the iteration count is sufficiently high, an optimal linearization must be found.
// Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is
// 2 * (2^n - 1)
Expand Down

0 comments on commit 2854979

Please sign in to comment.