Skip to content

Commit

Permalink
Reduce storage bucket memory usage (#7103)
Browse files Browse the repository at this point in the history
  • Loading branch information
hpeebles authored Dec 20, 2024
1 parent e27be5e commit 3bb8977
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 15 deletions.
6 changes: 6 additions & 0 deletions backend/canisters/storage_bucket/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Changed

- Reduce storage bucket memory usage ([#7103](https://github.com/open-chat-labs/open-chat/pull/7103))

## [[2.0.1532](https://github.com/open-chat-labs/open-chat/releases/tag/v2.0.1532-storage_bucket)] - 2024-12-19

### Changed

- Push any remaining events still queued in the old events system ([#7065](https://github.com/open-chat-labs/open-chat/pull/7065))

## [[2.0.1522](https://github.com/open-chat-labs/open-chat/releases/tag/v2.0.1522-storage_bucket)] - 2024-12-16
Expand Down
38 changes: 26 additions & 12 deletions backend/canisters/storage_bucket/impl/src/model/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ use crate::{calc_chunk_count, MAX_BLOB_SIZE_BYTES};
use candid::Principal;
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::collections::btree_map::Entry::{Occupied, Vacant};
use std::collections::{BTreeMap, BTreeSet, VecDeque};
use storage_bucket_canister::upload_chunk_v2::Args as UploadChunkArgs;
use types::{AccessorId, CanisterId, FileAdded, FileId, FileMetaData, FileRemoved, Hash, TimestampMillis};
use utils::file_id::generate_file_id;
use utils::hasher::hash_bytes;

#[derive(Serialize, Deserialize, Default)]
pub struct Files {
files: HashMap<FileId, File>,
pending_files: HashMap<FileId, PendingFile>,
files: BTreeMap<FileId, File>,
pending_files: BTreeMap<FileId, PendingFile>,
reference_counts: ReferenceCounts,
accessors_map: AccessorsMap,
blobs: StableBlobStorage,
Expand All @@ -23,10 +23,15 @@ pub struct Files {

#[derive(Serialize, Deserialize, Clone)]
pub struct File {
#[serde(rename = "o", alias = "owner")]
pub owner: Principal,
#[serde(rename = "c", alias = "created")]
pub created: TimestampMillis,
pub accessors: HashSet<AccessorId>,
#[serde(rename = "a", alias = "accessors")]
pub accessors: BTreeSet<AccessorId>,
#[serde(rename = "h", alias = "hash")]
pub hash: Hash,
#[serde(rename = "m", alias = "mine_type")]
pub mime_type: String,
}

Expand Down Expand Up @@ -172,7 +177,7 @@ impl Files {
file_id: FileId,
canister_id: CanisterId,
file_id_seed: u128,
accessors: HashSet<AccessorId>,
accessors: BTreeSet<AccessorId>,
now: TimestampMillis,
) -> ForwardFileResult {
let (file, size) = match self.file_and_size(&file_id) {
Expand Down Expand Up @@ -379,7 +384,7 @@ impl Files {

#[derive(Serialize, Deserialize, Default)]
struct ReferenceCounts {
counts: HashMap<Hash, u32>,
counts: BTreeMap<Hash, u32>,
}

impl ReferenceCounts {
Expand Down Expand Up @@ -409,7 +414,7 @@ impl ReferenceCounts {

#[derive(Serialize, Deserialize, Default)]
struct AccessorsMap {
map: HashMap<AccessorId, HashSet<FileId>>,
map: BTreeMap<AccessorId, BTreeSet<FileId>>,
}

impl AccessorsMap {
Expand All @@ -435,23 +440,32 @@ impl AccessorsMap {
}
}

pub fn remove(&mut self, accessor_id: &AccessorId) -> Option<HashSet<FileId>> {
pub fn remove(&mut self, accessor_id: &AccessorId) -> Option<BTreeSet<FileId>> {
self.map.remove(accessor_id)
}
}

#[derive(Serialize, Deserialize)]
pub struct PendingFile {
#[serde(rename = "o", alias = "owner")]
pub owner: Principal,
#[serde(rename = "c", alias = "created")]
pub created: TimestampMillis,
#[serde(rename = "h", alias = "hash")]
pub hash: Hash,
#[serde(rename = "m", alias = "mime_type")]
pub mime_type: String,
pub accessors: HashSet<AccessorId>,
#[serde(rename = "a", alias = "accessors")]
pub accessors: BTreeSet<AccessorId>,
#[serde(rename = "u", alias = "chunk_size")]
pub chunk_size: u32,
#[serde(rename = "t", alias = "total_size")]
pub total_size: u64,
pub remaining_chunks: HashSet<u32>,
#[serde(with = "serde_bytes")]
#[serde(rename = "r", alias = "remaining_chunks")]
pub remaining_chunks: BTreeSet<u32>,
#[serde(rename = "b", alias = "bytes", with = "serde_bytes")]
pub bytes: Vec<u8>,
#[serde(rename = "e", alias = "expiry", skip_serializing_if = "Option::is_none")]
pub expiry: Option<TimestampMillis>,
}

Expand Down
11 changes: 8 additions & 3 deletions backend/canisters/storage_bucket/impl/src/model/users.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use candid::Principal;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::BTreeMap;
use types::{FileId, RejectedReason};

#[derive(Serialize, Deserialize, Default)]
pub struct Users {
users: HashMap<Principal, UserRecord>,
users: BTreeMap<Principal, UserRecord>,
}

impl Users {
Expand Down Expand Up @@ -45,7 +45,7 @@ impl Users {

#[derive(Serialize, Deserialize, Default)]
pub struct UserRecord {
files_owned: HashMap<FileId, FileStatusInternal>,
files_owned: BTreeMap<FileId, FileStatusInternal>,
}

impl UserRecord {
Expand All @@ -64,13 +64,18 @@ impl UserRecord {

#[derive(Serialize, Deserialize)]
pub enum FileStatusInternal {
#[serde(rename = "c", alias = "Complete")]
Complete(IndexSyncComplete),
#[serde(rename = "u", alias = "Uploading")]
Uploading(IndexSyncComplete),
#[serde(rename = "r", alias = "Rejected")]
Rejected(RejectedReason),
}

#[derive(Serialize, Deserialize, Copy, Clone)]
pub enum IndexSyncComplete {
#[serde(rename = "y", alias = "Yes")]
Yes,
#[serde(rename = "n", alias = "No")]
No,
}

0 comments on commit 3bb8977

Please sign in to comment.