From 5e272627f62abf5a369d4c9eab257969d49f463e Mon Sep 17 00:00:00 2001 From: Hamish Peebles Date: Thu, 12 Dec 2024 12:02:29 +0000 Subject: [PATCH] Include some entropy when choosing the storage bucket for a file (#7048) --- backend/canisters/storage_index/CHANGELOG.md | 1 + backend/canisters/storage_index/impl/src/model/buckets.rs | 6 ++++-- .../storage_index/impl/src/queries/allocated_bucket.rs | 5 ++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/canisters/storage_index/CHANGELOG.md b/backend/canisters/storage_index/CHANGELOG.md index 8f2b23fc7f..9977945e5e 100644 --- a/backend/canisters/storage_index/CHANGELOG.md +++ b/backend/canisters/storage_index/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Update the canister creation fee to 0.5T ([#6700](https://github.com/open-chat-labs/open-chat/pull/6700)) - Expose size of each virtual stable memory in metrics ([#6981](https://github.com/open-chat-labs/open-chat/pull/6981)) - Use `GroupedTimerJobQueue` to sync events to storage buckets ([#7045](https://github.com/open-chat-labs/open-chat/pull/7045)) +- Include some entropy when choosing the storage bucket for a file ([#7048](https://github.com/open-chat-labs/open-chat/pull/7048)) ## [[2.0.1419](https://github.com/open-chat-labs/open-chat/releases/tag/v2.0.1419-storage_index)] - 2024-10-28 diff --git a/backend/canisters/storage_index/impl/src/model/buckets.rs b/backend/canisters/storage_index/impl/src/model/buckets.rs index 5a7bc7dd21..9e17e7fd8c 100644 --- a/backend/canisters/storage_index/impl/src/model/buckets.rs +++ b/backend/canisters/storage_index/impl/src/model/buckets.rs @@ -49,12 +49,14 @@ impl Buckets { } } - pub fn allocate(&self, blob_hash: Hash) -> Option { + pub fn allocate(&self, blob_hash: Hash, entropy: u64) -> Option { let bucket_count = self.active_buckets.len(); if bucket_count == 0 { None } else { - let usize_from_hash = u64::from_le_bytes(blob_hash[..8].try_into().unwrap()) as usize; + let mut bucket_allocation_hash = blob_hash; + bucket_allocation_hash.rotate_left((entropy % 32) as usize); + let usize_from_hash = u64::from_le_bytes(bucket_allocation_hash[..8].try_into().unwrap()) as usize; // Use a modified modulo of the hash to slightly favour the first bucket // so that they don't all run out of space at the same time diff --git a/backend/canisters/storage_index/impl/src/queries/allocated_bucket.rs b/backend/canisters/storage_index/impl/src/queries/allocated_bucket.rs index 01ff29caeb..e53294eaf5 100644 --- a/backend/canisters/storage_index/impl/src/queries/allocated_bucket.rs +++ b/backend/canisters/storage_index/impl/src/queries/allocated_bucket.rs @@ -33,15 +33,14 @@ fn allocated_bucket_impl(args: Args, state: &RuntimeState) -> Response { }); } + let now = state.env.now(); let bucket = state .data .files .bucket_for_blob(args.file_hash) - .or_else(|| state.data.buckets.allocate(args.file_hash)); + .or_else(|| state.data.buckets.allocate(args.file_hash, now)); if let Some(canister_id) = bucket { - let now = state.env.now(); - Success(SuccessResult { canister_id, file_id: generate_file_id(