From 11e2d7f0f12d05baa58022e5ba4d6e424ef90654 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 11:24:24 +0300 Subject: [PATCH 01/18] Make TempTag non-Clone I found that in most places it is not needed, and if somebody needs them to be clone they can always wrap them in an Arc. Also, this complicates extending the concept of temp tags via the rpc boundary. With this change if will be possible to use the same TempTag type both in the low level blobs store API and in the higher level blobs API of iroh. --- iroh-blobs/src/store.rs | 4 +++- iroh-blobs/src/store/fs.rs | 27 ++++++++++++++++--------- iroh-blobs/src/store/mem.rs | 23 +++++++++++++--------- iroh-blobs/src/util.rs | 39 ++++++++++++++++++++----------------- 4 files changed, 56 insertions(+), 37 deletions(-) diff --git a/iroh-blobs/src/store.rs b/iroh-blobs/src/store.rs index 0e8f35d301..029e00a65c 100644 --- a/iroh-blobs/src/store.rs +++ b/iroh-blobs/src/store.rs @@ -66,7 +66,9 @@ impl TempCounterMap { fn dec(&mut self, value: &HashAndFormat) { let HashAndFormat { hash, format } = value; - let counters = self.0.get_mut(hash).unwrap(); + let Some(counters) = self.0.get_mut(hash) else { + return; + }; counters.dec(*format); if counters.is_empty() { self.0.remove(hash); diff --git a/iroh-blobs/src/store/fs.rs b/iroh-blobs/src/store/fs.rs index e0a4d192f0..88d8a96fb1 100644 --- a/iroh-blobs/src/store/fs.rs +++ b/iroh-blobs/src/store/fs.rs @@ -68,7 +68,7 @@ use std::{ collections::{BTreeMap, BTreeSet}, io::{self, BufReader, Read}, path::{Path, PathBuf}, - sync::{Arc, RwLock}, + sync::{Arc, RwLock, Weak}, time::{Duration, SystemTime}, }; @@ -111,7 +111,7 @@ use crate::{ BoxedProgressSender, IdGenerator, IgnoreProgressSender, ProgressSendError, ProgressSender, }, - raw_outboard_size, LivenessTracker, MemOrFile, + raw_outboard_size, MemOrFile, TagCounter, TagDrop, }, Tag, TempTag, IROH_BLOCK_SIZE, }; @@ -775,20 +775,23 @@ impl Store { struct StoreInner { tx: flume::Sender, temp: Arc>, + temp_weak: Weak, handle: Option>, path_options: Arc, } -impl LivenessTracker for RwLock { - fn on_clone(&self, content: &HashAndFormat) { - self.write().unwrap().inc(content); - } - +impl TagDrop for RwLock { fn on_drop(&self, content: &HashAndFormat) { self.write().unwrap().dec(content); } } +impl TagCounter for RwLock { + fn on_create(&self, content: &HashAndFormat) { + self.write().unwrap().inc(content); + } +} + impl StoreInner { fn new_sync(path: PathBuf, options: Options, rt: tokio::runtime::Handle) -> io::Result { tracing::trace!( @@ -807,6 +810,7 @@ impl StoreInner { ); std::fs::create_dir_all(path.parent().unwrap())?; let temp: Arc> = Default::default(); + let temp_weak = Arc::downgrade(&temp); let (actor, tx) = Actor::new(&path, options.clone(), temp.clone(), rt)?; let handle = std::thread::Builder::new() .name("redb-actor".to_string()) @@ -819,6 +823,7 @@ impl StoreInner { Ok(Self { tx, temp, + temp_weak, handle: Some(handle), path_options: Arc::new(options.path), }) @@ -1049,7 +1054,8 @@ impl StoreInner { } fn temp_tag(&self, content: HashAndFormat) -> TempTag { - TempTag::new(content, Some(self.temp.clone())) + self.temp.on_create(&content); + TempTag::new(content, Some(self.temp_weak.clone())) } fn import_file_sync( @@ -1717,7 +1723,10 @@ impl ActorState { let inline_outboard = outboard_size <= self.options.inline.max_outboard_inlined && outboard_size != 0; // from here on, everything related to the hash is protected by the temp tag - let tag = TempTag::new(content_id, Some(self.temp.clone())); + self.temp.on_create(&content_id); + let temp: Arc = self.temp.clone(); + let liveness = Arc::downgrade(&temp); + let tag = TempTag::new(content_id, Some(liveness)); let hash = *tag.hash(); self.protected.insert(hash); // move the data file into place, or create a reference to it diff --git a/iroh-blobs/src/store/mem.rs b/iroh-blobs/src/store/mem.rs index 7b14b2a14b..950bbdd8cc 100644 --- a/iroh-blobs/src/store/mem.rs +++ b/iroh-blobs/src/store/mem.rs @@ -23,7 +23,7 @@ use crate::{ }, util::{ progress::{BoxedProgressSender, IdGenerator, IgnoreProgressSender, ProgressSender}, - LivenessTracker, + TagCounter, TagDrop, }, Tag, TempTag, IROH_BLOCK_SIZE, }; @@ -43,13 +43,7 @@ pub struct Store { #[derive(Debug, Default)] struct StoreInner(RwLock); -impl LivenessTracker for StoreInner { - fn on_clone(&self, inner: &HashAndFormat) { - tracing::trace!("temp tagging: {:?}", inner); - let mut state = self.0.write().unwrap(); - state.temp.inc(inner); - } - +impl TagDrop for StoreInner { fn on_drop(&self, inner: &HashAndFormat) { tracing::trace!("temp tag drop: {:?}", inner); let mut state = self.0.write().unwrap(); @@ -57,6 +51,14 @@ impl LivenessTracker for StoreInner { } } +impl TagCounter for StoreInner { + fn on_create(&self, inner: &HashAndFormat) { + tracing::trace!("temp tagging: {:?}", inner); + let mut state = self.0.write().unwrap(); + state.temp.inc(inner); + } +} + impl Store { /// Create a new in memory store pub fn new() -> Self { @@ -217,7 +219,10 @@ impl super::Store for Store { } fn temp_tag(&self, tag: HashAndFormat) -> TempTag { - TempTag::new(tag, Some(self.inner.clone())) + self.inner.on_create(&tag); + let temp: Arc = self.inner.clone(); + let liveness = Arc::downgrade(&temp); + TempTag::new(tag, Some(liveness)) } async fn gc_start(&self) -> io::Result<()> { diff --git a/iroh-blobs/src/util.rs b/iroh-blobs/src/util.rs index b540b88562..260c0687ef 100644 --- a/iroh-blobs/src/util.rs +++ b/iroh-blobs/src/util.rs @@ -4,7 +4,7 @@ use bytes::Bytes; use derive_more::{Debug, Display, From, Into}; use range_collections::range_set::RangeSetRange; use serde::{Deserialize, Serialize}; -use std::{borrow::Borrow, fmt, sync::Arc, time::SystemTime}; +use std::{borrow::Borrow, fmt, sync::Weak, time::SystemTime}; use crate::{store::Store, BlobFormat, Hash, HashAndFormat, IROH_BLOCK_SIZE}; @@ -179,6 +179,13 @@ pub enum SetTagOption { Named(Tag), } +/// Trait used from temp tags to notify an abstract store that a temp tag is +/// being dropped. +pub trait TagDrop: std::fmt::Debug + Send + Sync + 'static { + /// Called on drop + fn on_drop(&self, inner: &HashAndFormat); +} + /// A trait for things that can track liveness of blobs and collections. /// /// This trait works together with [TempTag] to keep track of the liveness of a @@ -187,11 +194,9 @@ pub enum SetTagOption { /// It is important to include the format in the liveness tracking, since /// protecting a collection means protecting the blob and all its children, /// whereas protecting a raw blob only protects the blob itself. -pub trait LivenessTracker: std::fmt::Debug + Send + Sync + 'static { - /// Called on clone - fn on_clone(&self, inner: &HashAndFormat); - /// Called on drop - fn on_drop(&self, inner: &HashAndFormat); +pub trait TagCounter: TagDrop { + /// Called on creation of a temp tag + fn on_create(&self, inner: &HashAndFormat); } /// A hash and format pair that is protected from garbage collection. @@ -203,7 +208,7 @@ pub struct TempTag { /// The hash and format we are pinning inner: HashAndFormat, /// liveness tracker - liveness: Option>, + liveness: Option>, } impl TempTag { @@ -214,10 +219,12 @@ impl TempTag { /// The caller is responsible for increasing the refcount on creation and to /// make sure that temp tags that are created between a mark phase and a sweep /// phase are protected. - pub fn new(inner: HashAndFormat, liveness: Option>) -> Self { - if let Some(liveness) = liveness.as_ref() { - liveness.on_clone(&inner); - } + pub fn new(inner: HashAndFormat, liveness: Option>) -> Self { + // if let Some(liveness) = liveness.as_ref() { + // if let Some(liveness) = liveness.upgrade() { + // liveness.on_clone(&inner); + // } + // } Self { inner, liveness } } @@ -245,16 +252,12 @@ impl TempTag { } } -impl Clone for TempTag { - fn clone(&self) -> Self { - Self::new(self.inner, self.liveness.clone()) - } -} - impl Drop for TempTag { fn drop(&mut self) { if let Some(liveness) = self.liveness.as_ref() { - liveness.on_drop(&self.inner); + if let Some(liveness) = liveness.upgrade() { + liveness.on_drop(&self.inner); + } } } } From a7842d5112941c5e2323f03047b2edf351739e87 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 11:50:03 +0300 Subject: [PATCH 02/18] refactor: DRY the temp tag creation --- iroh-blobs/src/store/fs.rs | 21 +++++--------------- iroh-blobs/src/store/mem.rs | 5 +---- iroh-blobs/src/util.rs | 38 ++++++++++++++++++++++++------------- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/iroh-blobs/src/store/fs.rs b/iroh-blobs/src/store/fs.rs index 88d8a96fb1..5febe54457 100644 --- a/iroh-blobs/src/store/fs.rs +++ b/iroh-blobs/src/store/fs.rs @@ -68,7 +68,7 @@ use std::{ collections::{BTreeMap, BTreeSet}, io::{self, BufReader, Read}, path::{Path, PathBuf}, - sync::{Arc, RwLock, Weak}, + sync::{Arc, RwLock}, time::{Duration, SystemTime}, }; @@ -775,7 +775,6 @@ impl Store { struct StoreInner { tx: flume::Sender, temp: Arc>, - temp_weak: Weak, handle: Option>, path_options: Arc, } @@ -810,7 +809,6 @@ impl StoreInner { ); std::fs::create_dir_all(path.parent().unwrap())?; let temp: Arc> = Default::default(); - let temp_weak = Arc::downgrade(&temp); let (actor, tx) = Actor::new(&path, options.clone(), temp.clone(), rt)?; let handle = std::thread::Builder::new() .name("redb-actor".to_string()) @@ -823,7 +821,6 @@ impl StoreInner { Ok(Self { tx, temp, - temp_weak, handle: Some(handle), path_options: Arc::new(options.path), }) @@ -986,7 +983,7 @@ impl StoreInner { )) })?; std::fs::create_dir_all(parent)?; - let temp_tag = self.temp_tag(HashAndFormat::raw(hash)); + let temp_tag = self.temp.temp_tag(HashAndFormat::raw(hash)); let (tx, rx) = oneshot::channel(); self.tx .send_async(ActorMessage::Export { @@ -1053,11 +1050,6 @@ impl StoreInner { Ok(rx.await?) } - fn temp_tag(&self, content: HashAndFormat) -> TempTag { - self.temp.on_create(&content); - TempTag::new(content, Some(self.temp_weak.clone())) - } - fn import_file_sync( &self, path: PathBuf, @@ -1147,7 +1139,7 @@ impl StoreInner { }; progress.blocking_send(ImportProgress::OutboardDone { id, hash })?; // from here on, everything related to the hash is protected by the temp tag - let tag = self.temp_tag(HashAndFormat { hash, format }); + let tag = self.temp.temp_tag(HashAndFormat { hash, format }); let hash = *tag.hash(); // blocking send for the import let (tx, rx) = flume::bounded(1); @@ -1429,7 +1421,7 @@ impl super::Store for Store { } fn temp_tag(&self, value: HashAndFormat) -> TempTag { - self.0.temp_tag(value) + self.0.temp.temp_tag(value) } async fn shutdown(&self) { @@ -1723,10 +1715,7 @@ impl ActorState { let inline_outboard = outboard_size <= self.options.inline.max_outboard_inlined && outboard_size != 0; // from here on, everything related to the hash is protected by the temp tag - self.temp.on_create(&content_id); - let temp: Arc = self.temp.clone(); - let liveness = Arc::downgrade(&temp); - let tag = TempTag::new(content_id, Some(liveness)); + let tag = self.temp.temp_tag(content_id); let hash = *tag.hash(); self.protected.insert(hash); // move the data file into place, or create a reference to it diff --git a/iroh-blobs/src/store/mem.rs b/iroh-blobs/src/store/mem.rs index 950bbdd8cc..e10849e2b7 100644 --- a/iroh-blobs/src/store/mem.rs +++ b/iroh-blobs/src/store/mem.rs @@ -219,10 +219,7 @@ impl super::Store for Store { } fn temp_tag(&self, tag: HashAndFormat) -> TempTag { - self.inner.on_create(&tag); - let temp: Arc = self.inner.clone(); - let liveness = Arc::downgrade(&temp); - TempTag::new(tag, Some(liveness)) + self.inner.temp_tag(tag) } async fn gc_start(&self) -> io::Result<()> { diff --git a/iroh-blobs/src/util.rs b/iroh-blobs/src/util.rs index 260c0687ef..0a0047c0fd 100644 --- a/iroh-blobs/src/util.rs +++ b/iroh-blobs/src/util.rs @@ -4,7 +4,12 @@ use bytes::Bytes; use derive_more::{Debug, Display, From, Into}; use range_collections::range_set::RangeSetRange; use serde::{Deserialize, Serialize}; -use std::{borrow::Borrow, fmt, sync::Weak, time::SystemTime}; +use std::{ + borrow::Borrow, + fmt, + sync::{Arc, Weak}, + time::SystemTime, +}; use crate::{store::Store, BlobFormat, Hash, HashAndFormat, IROH_BLOCK_SIZE}; @@ -194,9 +199,21 @@ pub trait TagDrop: std::fmt::Debug + Send + Sync + 'static { /// It is important to include the format in the liveness tracking, since /// protecting a collection means protecting the blob and all its children, /// whereas protecting a raw blob only protects the blob itself. -pub trait TagCounter: TagDrop { +pub trait TagCounter: TagDrop + Sized { /// Called on creation of a temp tag fn on_create(&self, inner: &HashAndFormat); + + /// Get this as a weak reference for use in temp tags + fn as_weak(self: &Arc) -> Weak { + let on_drop: Arc = self.clone(); + Arc::downgrade(&on_drop) + } + + /// Create a new temp tag for the given hash and format + fn temp_tag(self: &Arc, inner: HashAndFormat) -> TempTag { + self.on_create(&inner); + TempTag::new(inner, Some(self.as_weak())) + } } /// A hash and format pair that is protected from garbage collection. @@ -207,8 +224,8 @@ pub trait TagCounter: TagDrop { pub struct TempTag { /// The hash and format we are pinning inner: HashAndFormat, - /// liveness tracker - liveness: Option>, + /// optional callback to call on drop + on_drop: Option>, } impl TempTag { @@ -219,13 +236,8 @@ impl TempTag { /// The caller is responsible for increasing the refcount on creation and to /// make sure that temp tags that are created between a mark phase and a sweep /// phase are protected. - pub fn new(inner: HashAndFormat, liveness: Option>) -> Self { - // if let Some(liveness) = liveness.as_ref() { - // if let Some(liveness) = liveness.upgrade() { - // liveness.on_clone(&inner); - // } - // } - Self { inner, liveness } + pub fn new(inner: HashAndFormat, on_drop: Option>) -> Self { + Self { inner, on_drop } } /// The hash of the pinned item @@ -248,13 +260,13 @@ impl TempTag { // set the liveness tracker to None, so that the refcount is not decreased // during drop. This means that the refcount will never reach 0 and the // item will not be gced until the end of the process. - self.liveness = None; + self.on_drop = None; } } impl Drop for TempTag { fn drop(&mut self) { - if let Some(liveness) = self.liveness.as_ref() { + if let Some(liveness) = self.on_drop.as_ref() { if let Some(liveness) = liveness.upgrade() { liveness.on_drop(&self.inner); } From 573051cc61d6c657ec54f28f3089f0685ef43c66 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 12:04:11 +0300 Subject: [PATCH 03/18] refactor: some renaming --- iroh-blobs/src/util.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/iroh-blobs/src/util.rs b/iroh-blobs/src/util.rs index 0a0047c0fd..751886492c 100644 --- a/iroh-blobs/src/util.rs +++ b/iroh-blobs/src/util.rs @@ -266,9 +266,9 @@ impl TempTag { impl Drop for TempTag { fn drop(&mut self) { - if let Some(liveness) = self.on_drop.as_ref() { - if let Some(liveness) = liveness.upgrade() { - liveness.on_drop(&self.inner); + if let Some(on_drop) = self.on_drop.take() { + if let Some(on_drop) = on_drop.upgrade() { + on_drop.on_drop(&self.inner); } } } From b443becc65c76379cc32431e57d35738a4007529 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 12:59:34 +0300 Subject: [PATCH 04/18] WIP add batch API --- iroh/src/client/blobs.rs | 101 +++++++++++++++++++++++++++++++-- iroh/src/node/rpc.rs | 117 +++++++++++++++++++++++++++++++++++---- iroh/src/rpc_protocol.rs | 71 ++++++++++++++++++++++++ 3 files changed, 273 insertions(+), 16 deletions(-) diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 61d075e7fc..64f4f10b82 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -5,31 +5,36 @@ use std::{ io, path::PathBuf, pin::Pin, - sync::Arc, + sync::{Arc, Mutex}, task::{Context, Poll}, }; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context as _, Result}; use bytes::Bytes; use futures_lite::{Stream, StreamExt}; -use futures_util::SinkExt; +use futures_util::{FutureExt, SinkExt}; use iroh_base::{node_addr::AddrInfoOptions, ticket::BlobTicket}; use iroh_blobs::{ export::ExportProgress as BytesExportProgress, format::collection::Collection, get::db::DownloadProgress as BytesDownloadProgress, store::{ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, - BlobFormat, Hash, Tag, + util::TagDrop, + BlobFormat, Hash, HashAndFormat, Tag, TempTag, }; use iroh_net::NodeAddr; use portable_atomic::{AtomicU64, Ordering}; -use quic_rpc::{client::BoxStreamSync, RpcClient, ServiceConnection}; +use quic_rpc::{ + client::{BoxStreamSync, UpdateSink}, + RpcClient, ServiceConnection, +}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf}; use tokio_util::io::{ReaderStream, StreamReader}; use tracing::warn; use crate::rpc_protocol::{ + BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchUpdate, BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, @@ -368,6 +373,92 @@ where } } +/// A scope in which blobs can be added. +#[derive(derive_more::Debug)] +struct BatchInner> { + /// The id of the scope. + id: u64, + /// The rpc client. + rpc: RpcClient, + /// The stream to send drop + #[debug(skip)] + updates: Mutex>, +} + +/// + +#[derive(derive_more::Debug)] +pub struct Batch>(Arc>); + +impl> TagDrop for BatchInner { + fn on_drop(&self, content: &HashAndFormat) { + let mut updates = self.updates.lock().unwrap(); + updates.send(BatchUpdate::Drop(*content)).now_or_never(); + } +} + +impl> Batch { + /// Write a blob by passing bytes. + pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { + let input = futures_lite::stream::once(Ok(bytes.into())); + self.add_stream(input, format).await + } + + /// Write a blob by passing a stream of bytes. + pub async fn add_stream( + &self, + mut input: impl Stream> + Send + Unpin + 'static, + format: BlobFormat, + ) -> Result { + let (mut sink, mut stream) = self + .0 + .rpc + .bidi(BatchAddStreamRequest { + scope: self.0.id, + format, + }) + .await?; + while let Some(item) = input.next().await { + match item { + Ok(chunk) => { + sink.send(BatchAddStreamUpdate::Chunk(chunk)) + .await + .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; + } + Err(err) => { + warn!("Abort send, reason: failed to read from source stream: {err:?}"); + sink.send(BatchAddStreamUpdate::Abort) + .await + .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; + break; + } + } + } + sink.close() + .await + .map_err(|err| anyhow!("Failed to close the stream: {err:?}"))?; + // this is needed for the remote to notice that the stream is closed + drop(sink); + let mut res = None; + while let Some(item) = stream.next().await { + match item? { + BatchAddStreamResponse::Abort(cause) => { + Err(cause)?; + } + BatchAddStreamResponse::Result { hash } => { + res = Some(hash); + } + } + } + let hash = res.context("Missing answer")?; + let t: Arc = self.0.clone(); + Ok(TempTag::new( + HashAndFormat { hash, format }, + Some(Arc::downgrade(&t)), + )) + } +} + /// Whether to wrap the added data in a collection. #[derive(Debug, Serialize, Deserialize)] pub enum WrapOption { diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index ba03e10486..0f50a253e2 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -39,17 +39,19 @@ use crate::client::blobs::{ use crate::client::tags::TagInfo; use crate::client::NodeStatus; use crate::rpc_protocol::{ - BlobAddPathRequest, BlobAddPathResponse, BlobAddStreamRequest, BlobAddStreamResponse, - BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, - BlobDownloadResponse, BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, - BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, - BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, - CreateCollectionRequest, CreateCollectionResponse, DeleteTagRequest, DocExportFileRequest, - DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, DocSetHashRequest, - ListTagsRequest, NodeAddrRequest, NodeConnectionInfoRequest, NodeConnectionInfoResponse, - NodeConnectionsRequest, NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, - NodeShutdownRequest, NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, - NodeWatchResponse, Request, RpcService, SetTagOption, + BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchCreateRequest, + BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddPathResponse, + BlobAddStreamRequest, BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, + BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, + BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, + BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, + BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, + DeleteTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, + DocImportFileResponse, DocSetHashRequest, ListTagsRequest, NodeAddrRequest, + NodeConnectionInfoRequest, NodeConnectionInfoResponse, NodeConnectionsRequest, + NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, NodeShutdownRequest, + NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, NodeWatchResponse, + Request, RpcService, SetTagOption, }; use super::NodeInner; @@ -99,6 +101,11 @@ impl Handler { } CreateCollection(msg) => chan.rpc(msg, handler, Self::create_collection).await, BlobGetCollection(msg) => chan.rpc(msg, handler, Self::blob_get_collection).await, + BatchAddStreamRequest(msg) => { + chan.bidi_streaming(msg, handler, Self::batch_add_stream) + .await + } + BatchAddStreamUpdate(_msg) => Err(RpcServerError::UnexpectedUpdateMessage), ListTags(msg) => { chan.server_streaming(msg, handler, Self::blob_list_tags) .await @@ -131,6 +138,8 @@ impl Handler { .await } BlobAddStreamUpdate(_msg) => Err(RpcServerError::UnexpectedUpdateMessage), + BatchCreate(msg) => chan.bidi_streaming(msg, handler, Self::batch_create).await, + BatchUpdate(_msg) => Err(RpcServerError::UnexpectedUpdateMessage), AuthorList(msg) => { chan.server_streaming(msg, handler, |handler, req| { handler.inner.sync.author_list(req) @@ -840,6 +849,92 @@ impl Handler { }) } + fn batch_create( + self, + _: BatchCreateRequest, + mut updates: impl Stream + Send + Unpin + 'static, + ) -> impl Stream { + let scope_id = 0; + // let scope_id = self.inner.temp_tags.lock().unwrap().create(); + tokio::spawn(async move { + while let Some(item) = updates.next().await { + match item { + BatchUpdate::Drop(content) => { + // println!("dropping tag {} {}", scope_id, tag_id); + // self.inner + // .temp_tags + // .lock() + // .unwrap() + // .remove_one(scope_id, tag_id); + } + } + } + println!("dropping scope {}", scope_id); + // self.inner.temp_tags.lock().unwrap().remove(scope_id); + }); + futures_lite::stream::once(BatchCreateResponse::Id(scope_id)) + } + + fn batch_add_stream( + self, + msg: BatchAddStreamRequest, + stream: impl Stream + Send + Unpin + 'static, + ) -> impl Stream { + let (tx, rx) = flume::bounded(32); + let this = self.clone(); + + self.rt().spawn_pinned(|| async move { + if let Err(err) = this.batch_add_stream0(msg, stream, tx.clone()).await { + tx.send_async(BatchAddStreamResponse::Abort(err.into())) + .await + .ok(); + } + }); + rx.into_stream() + } + + async fn batch_add_stream0( + self, + msg: BatchAddStreamRequest, + stream: impl Stream + Send + Unpin + 'static, + progress: flume::Sender, + ) -> anyhow::Result<()> { + println!("batch_add_stream0"); + let progress = FlumeProgressSender::new(progress); + + let stream = stream.map(|item| match item { + BatchAddStreamUpdate::Chunk(chunk) => Ok(chunk), + BatchAddStreamUpdate::Abort => { + Err(io::Error::new(io::ErrorKind::Interrupted, "Remote abort")) + } + }); + + let import_progress = progress.clone().with_filter_map(move |x| match x { + _ => None, + }); + println!("collecting stream"); + let items: Vec<_> = stream.collect().await; + println!("stream collected"); + let stream = futures_lite::stream::iter(items.into_iter()); + let (temp_tag, _len) = self + .inner + .db + .import_stream(stream, BlobFormat::Raw, import_progress) + .await?; + println!("stream imported {:?}", temp_tag.inner().hash); + let hash = temp_tag.inner().hash; + // let tag = self + // .inner + // .temp_tags + // .lock() + // .unwrap() + // .create_one(msg.scope, temp_tag); + progress + .send(BatchAddStreamResponse::Result { hash }) + .await?; + Ok(()) + } + fn blob_add_stream( self, msg: BlobAddStreamRequest, diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 7bfb5d60b3..1117773e0f 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -17,6 +17,7 @@ use iroh_blobs::{ format::collection::Collection, store::{BaoBlobSize, ConsistencyCheckProgress}, util::Tag, + HashAndFormat, }; use iroh_net::{ endpoint::{ConnectionInfo, NodeAddr}, @@ -53,6 +54,33 @@ use crate::{ }; pub use iroh_blobs::util::SetTagOption; +/// Request to create a new scope for temp tags +#[derive(Debug, Serialize, Deserialize)] +pub struct BatchCreateRequest; + +/// Update to a temp tag scope +#[derive(Debug, Serialize, Deserialize)] +pub enum BatchUpdate { + /// Drop of a remote temp tag + Drop(HashAndFormat), +} + +/// Response to a temp tag scope request +#[derive(Debug, Serialize, Deserialize)] +pub enum BatchCreateResponse { + /// We got the id of the scope + Id(u64), +} + +impl Msg for BatchCreateRequest { + type Pattern = BidiStreaming; +} + +impl BidiStreamingMsg for BatchCreateRequest { + type Update = BatchUpdate; + type Response = BatchCreateResponse; +} + /// A request to the node to provide the data at the given path /// /// Will produce a stream of [`AddProgress`] messages. @@ -1015,6 +1043,40 @@ impl BidiStreamingMsg for BlobAddStreamRequest { #[derive(Debug, Serialize, Deserialize, derive_more::Into)] pub struct BlobAddStreamResponse(pub AddProgress); +/// Write a blob from a byte stream +#[derive(Serialize, Deserialize, Debug)] +pub struct BatchAddStreamRequest { + /// What format to use for the blob + pub format: BlobFormat, + /// Scope to create the temp tag in + pub scope: u64, +} + +/// Write a blob from a byte stream +#[derive(Serialize, Deserialize, Debug)] +pub enum BatchAddStreamUpdate { + /// A chunk of stream data + Chunk(Bytes), + /// Abort the request due to an error on the client side + Abort, +} + +impl Msg for BatchAddStreamRequest { + type Pattern = BidiStreaming; +} + +impl BidiStreamingMsg for BatchAddStreamRequest { + type Update = BatchAddStreamUpdate; + type Response = BatchAddStreamResponse; +} + +/// Wrapper around [`AddProgress`]. +#[derive(Debug, Serialize, Deserialize)] +pub enum BatchAddStreamResponse { + Abort(RpcError), + Result { hash: Hash }, +} + /// Get stats for the running Iroh node #[derive(Serialize, Deserialize, Debug)] pub struct NodeStatsRequest {} @@ -1072,6 +1134,11 @@ pub enum Request { CreateCollection(CreateCollectionRequest), BlobGetCollection(BlobGetCollectionRequest), + BatchCreate(BatchCreateRequest), + BatchUpdate(BatchUpdate), + BatchAddStreamRequest(BatchAddStreamRequest), + BatchAddStreamUpdate(BatchAddStreamUpdate), + DeleteTag(DeleteTagRequest), ListTags(ListTagsRequest), @@ -1133,6 +1200,10 @@ pub enum Response { CreateCollection(RpcResult), BlobGetCollection(RpcResult), + BatchCreateResponse(BatchCreateResponse), + BatchRequest(BatchCreateRequest), + BatchAddStream(BatchAddStreamResponse), + ListTags(TagInfo), DeleteTag(RpcResult<()>), From 23c90d300a6b4a66f799b7dfa0f8eedd10a1f545 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 14:02:59 +0300 Subject: [PATCH 05/18] WIP make batch API work and add some tests --- iroh-blobs/src/store/fs.rs | 4 ++ iroh-blobs/src/store/mem.rs | 4 ++ iroh-blobs/src/store/readonly_mem.rs | 6 ++- iroh-blobs/src/store/traits.rs | 5 ++- iroh-blobs/src/util.rs | 8 +++- iroh/src/client/blobs.rs | 22 ++++++---- iroh/src/node.rs | 62 +++++++++++++++++++++++++++- iroh/src/node/builder.rs | 2 +- iroh/src/node/rpc.rs | 37 +++++++---------- iroh/tests/batch.rs | 54 ++++++++++++++++++++++++ 10 files changed, 171 insertions(+), 33 deletions(-) create mode 100644 iroh/tests/batch.rs diff --git a/iroh-blobs/src/store/fs.rs b/iroh-blobs/src/store/fs.rs index 5febe54457..6308d58c61 100644 --- a/iroh-blobs/src/store/fs.rs +++ b/iroh-blobs/src/store/fs.rs @@ -1424,6 +1424,10 @@ impl super::Store for Store { self.0.temp.temp_tag(value) } + fn tag_drop(&self) -> Option<&dyn TagDrop> { + Some(self.0.temp.as_ref()) + } + async fn shutdown(&self) { self.0.shutdown().await; } diff --git a/iroh-blobs/src/store/mem.rs b/iroh-blobs/src/store/mem.rs index e10849e2b7..d98af09f04 100644 --- a/iroh-blobs/src/store/mem.rs +++ b/iroh-blobs/src/store/mem.rs @@ -222,6 +222,10 @@ impl super::Store for Store { self.inner.temp_tag(tag) } + fn tag_drop(&self) -> Option<&dyn TagDrop> { + Some(self.inner.as_ref()) + } + async fn gc_start(&self) -> io::Result<()> { Ok(()) } diff --git a/iroh-blobs/src/store/readonly_mem.rs b/iroh-blobs/src/store/readonly_mem.rs index 4b77698313..2ef0a2b89e 100644 --- a/iroh-blobs/src/store/readonly_mem.rs +++ b/iroh-blobs/src/store/readonly_mem.rs @@ -15,7 +15,7 @@ use crate::{ }, util::{ progress::{BoxedProgressSender, IdGenerator, ProgressSender}, - Tag, + Tag, TagDrop, }, BlobFormat, Hash, HashAndFormat, TempTag, IROH_BLOCK_SIZE, }; @@ -324,6 +324,10 @@ impl super::Store for Store { TempTag::new(inner, None) } + fn tag_drop(&self) -> Option<&dyn TagDrop> { + None + } + async fn gc_start(&self) -> io::Result<()> { Ok(()) } diff --git a/iroh-blobs/src/store/traits.rs b/iroh-blobs/src/store/traits.rs index e0ec3e6b39..49d0a43abd 100644 --- a/iroh-blobs/src/store/traits.rs +++ b/iroh-blobs/src/store/traits.rs @@ -19,7 +19,7 @@ use crate::{ protocol::RangeSpec, util::{ progress::{BoxedProgressSender, IdGenerator, ProgressSender}, - Tag, + Tag, TagDrop, }, BlobFormat, Hash, HashAndFormat, TempTag, IROH_BLOCK_SIZE, }; @@ -356,6 +356,9 @@ pub trait Store: ReadableStore + MapMut { /// Create a temporary pin for this store fn temp_tag(&self, value: HashAndFormat) -> TempTag; + /// Handle to use to drop tags + fn tag_drop(&self) -> Option<&dyn TagDrop>; + /// Notify the store that a new gc phase is about to start. /// /// This should not fail unless the store is shut down or otherwise in a diff --git a/iroh-blobs/src/util.rs b/iroh-blobs/src/util.rs index 751886492c..d1c3dd3ebd 100644 --- a/iroh-blobs/src/util.rs +++ b/iroh-blobs/src/util.rs @@ -255,11 +255,17 @@ impl TempTag { self.inner.format } + /// The hash and format of the pinned item + pub fn hash_and_format(&self) -> HashAndFormat { + self.inner + } + /// Keep the item alive until the end of the process pub fn leak(mut self) { // set the liveness tracker to None, so that the refcount is not decreased // during drop. This means that the refcount will never reach 0 and the - // item will not be gced until the end of the process. + // item will not be gced until the end of the process, unless you manually + // invoke on_drop. self.on_drop = None; } } diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 64f4f10b82..2a88d9787c 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -34,12 +34,13 @@ use tokio_util::io::{ReaderStream, StreamReader}; use tracing::warn; use crate::rpc_protocol::{ - BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchUpdate, - BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, - BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, - BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, - BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, - CreateCollectionRequest, CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, + BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchCreateRequest, + BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddStreamRequest, + BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, + BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, + BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, + BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, + NodeStatusRequest, RpcService, SetTagOption, }; use super::{flatten, Iroh}; @@ -60,6 +61,14 @@ impl Client where C: ServiceConnection, { + /// Create a new batch for adding data. + pub async fn batch(&self) -> Result> { + let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; + let updates = Mutex::new(updates); + let BatchCreateResponse::Id(id) = stream.next().await.context("expected scope id")??; + let rpc = self.rpc.clone(); + Ok(Batch(Arc::new(BatchInner { id, rpc, updates }))) + } /// Stream the contents of a a single blob. /// /// Returns a [`Reader`], which can report the size of the blob before reading it. @@ -956,7 +965,6 @@ pub enum DownloadMode { mod tests { use super::*; - use anyhow::Context as _; use rand::RngCore; use tokio::io::AsyncWriteExt; diff --git a/iroh/src/node.rs b/iroh/src/node.rs index 058363276f..112a7868ca 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -3,16 +3,19 @@ //! A node is a server that serves various protocols. //! //! To shut down the node, call [`Node::shutdown`]. +use std::collections::BTreeMap; use std::fmt::Debug; use std::net::SocketAddr; use std::path::Path; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use anyhow::{anyhow, Result}; use futures_lite::StreamExt; use iroh_base::key::PublicKey; use iroh_blobs::downloader::Downloader; use iroh_blobs::store::Store as BaoStore; +use iroh_blobs::util::TagDrop; +use iroh_blobs::{HashAndFormat, TempTag}; use iroh_net::util::AbortingJoinHandle; use iroh_net::{endpoint::LocalEndpointsStream, key::SecretKey, Endpoint}; use quic_rpc::transport::flume::FlumeConnection; @@ -62,6 +65,63 @@ struct NodeInner { rt: LocalPoolHandle, pub(crate) sync: Engine, downloader: Downloader, + blob_scopes: Mutex, +} + +#[derive(Debug, Default)] +struct BlobScopes { + scopes: BTreeMap, + max: u64, +} + +#[derive(Debug, Default)] +struct BlobScope { + tags: BTreeMap, +} + +impl BlobScopes { + /// Create a new blob scope. + fn create(&mut self) -> u64 { + let id = self.max; + self.max += 1; + id + } + + /// Store a tag in a scope. + fn store(&mut self, scope: u64, tt: TempTag) { + let entry = self.scopes.entry(scope).or_default(); + let count = entry.tags.entry(tt.hash_and_format()).or_default(); + tt.leak(); + *count += 1; + } + + /// Remove a tag from a scope. + fn remove_one(&mut self, scope: u64, content: &HashAndFormat, u: Option<&dyn TagDrop>) { + if let Some(scope) = self.scopes.get_mut(&scope) { + if let Some(counter) = scope.tags.get_mut(content) { + *counter -= 1; + if let Some(u) = u { + u.on_drop(content); + } + if *counter == 0 { + scope.tags.remove(content); + } + } + } + } + + /// Remove an entire scope. + fn remove(&mut self, scope: u64, u: Option<&dyn TagDrop>) { + if let Some(scope) = self.scopes.remove(&scope) { + for (content, count) in scope.tags { + if let Some(u) = u { + for _ in 0..count { + u.on_drop(&content); + } + } + } + } + } } /// In memory node. diff --git a/iroh/src/node/builder.rs b/iroh/src/node/builder.rs index 7c9875f3c1..3015c4961c 100644 --- a/iroh/src/node/builder.rs +++ b/iroh/src/node/builder.rs @@ -480,7 +480,6 @@ where }; let (internal_rpc, controller) = quic_rpc::transport::flume::connection(1); let client = crate::client::Iroh::new(quic_rpc::RpcClient::new(controller.clone())); - let inner = Arc::new(NodeInner { db: self.blobs_store, endpoint: endpoint.clone(), @@ -491,6 +490,7 @@ where rt: lp.clone(), sync, downloader, + blob_scopes: Default::default(), }); let task = { let gossip = gossip.clone(); diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index 0f50a253e2..4aa4cb600d 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -854,23 +854,25 @@ impl Handler { _: BatchCreateRequest, mut updates: impl Stream + Send + Unpin + 'static, ) -> impl Stream { - let scope_id = 0; - // let scope_id = self.inner.temp_tags.lock().unwrap().create(); + let scope_id = self.inner.blob_scopes.lock().unwrap().create(); tokio::spawn(async move { while let Some(item) = updates.next().await { match item { BatchUpdate::Drop(content) => { - // println!("dropping tag {} {}", scope_id, tag_id); - // self.inner - // .temp_tags - // .lock() - // .unwrap() - // .remove_one(scope_id, tag_id); + self.inner.blob_scopes.lock().unwrap().remove_one( + scope_id, + &content, + self.inner.db.tag_drop(), + ); } } } println!("dropping scope {}", scope_id); - // self.inner.temp_tags.lock().unwrap().remove(scope_id); + self.inner + .blob_scopes + .lock() + .unwrap() + .remove(scope_id, self.inner.db.tag_drop()); }); futures_lite::stream::once(BatchCreateResponse::Id(scope_id)) } @@ -899,7 +901,6 @@ impl Handler { stream: impl Stream + Send + Unpin + 'static, progress: flume::Sender, ) -> anyhow::Result<()> { - println!("batch_add_stream0"); let progress = FlumeProgressSender::new(progress); let stream = stream.map(|item| match item { @@ -912,23 +913,17 @@ impl Handler { let import_progress = progress.clone().with_filter_map(move |x| match x { _ => None, }); - println!("collecting stream"); - let items: Vec<_> = stream.collect().await; - println!("stream collected"); - let stream = futures_lite::stream::iter(items.into_iter()); let (temp_tag, _len) = self .inner .db .import_stream(stream, BlobFormat::Raw, import_progress) .await?; - println!("stream imported {:?}", temp_tag.inner().hash); let hash = temp_tag.inner().hash; - // let tag = self - // .inner - // .temp_tags - // .lock() - // .unwrap() - // .create_one(msg.scope, temp_tag); + self.inner + .blob_scopes + .lock() + .unwrap() + .store(msg.scope, temp_tag); progress .send(BatchAddStreamResponse::Result { hash }) .await?; diff --git a/iroh/tests/batch.rs b/iroh/tests/batch.rs new file mode 100644 index 0000000000..e36e0c31de --- /dev/null +++ b/iroh/tests/batch.rs @@ -0,0 +1,54 @@ +use std::time::Duration; + +use bao_tree::blake3; +use iroh::node::GcPolicy; +use iroh_blobs::{store::mem::Store, BlobFormat}; + +async fn create_node() -> anyhow::Result> { + iroh::node::Node::memory() + .gc_policy(GcPolicy::Interval(Duration::from_millis(10))) + .spawn() + .await +} + +#[tokio::test] +async fn test_batch_create_1() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let expected_data: &[u8] = b"test"; + let expected_hash = blake3::hash(expected_data).into(); + let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let hash = *tag.hash(); + assert_eq!(hash, expected_hash); + // Check that the store has the data and that it is protected from gc + tokio::time::sleep(Duration::from_millis(50)).await; + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), expected_data); + drop(tag); + // Check that the store drops the data when the temp tag gets dropped + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(client.read_to_bytes(hash).await.is_err()); + Ok(()) +} + +#[tokio::test] +async fn test_batch_create_2() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let expected_data: &[u8] = b"test"; + let expected_hash = blake3::hash(expected_data).into(); + let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let hash = *tag.hash(); + assert_eq!(hash, expected_hash); + // Check that the store has the data and that it is protected from gc + tokio::time::sleep(Duration::from_millis(50)).await; + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), expected_data); + drop(batch); + // Check that the store drops the data when the temp tag gets dropped + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(client.read_to_bytes(hash).await.is_err()); + Ok(()) +} From bf0baead088b73d4587658e9ba7aa8906f8a4be2 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 17:04:16 +0300 Subject: [PATCH 06/18] Add ability to add from path in batch api --- iroh-blobs/src/provider.rs | 24 ++++++++++++ iroh/src/client/blobs.rs | 78 +++++++++++++++++++++++++++++++------ iroh/src/node/rpc.rs | 79 +++++++++++++++++++++++++++++++++++--- iroh/src/rpc_protocol.rs | 28 ++++++++++++++ 4 files changed, 192 insertions(+), 17 deletions(-) diff --git a/iroh-blobs/src/provider.rs b/iroh-blobs/src/provider.rs index 7fe4e13004..7e481f4c2b 100644 --- a/iroh-blobs/src/provider.rs +++ b/iroh-blobs/src/provider.rs @@ -153,6 +153,30 @@ pub enum AddProgress { Abort(RpcError), } +/// Progress updates for the batch add operation. +#[derive(Debug, Serialize, Deserialize)] +pub enum BatchAddProgress { + /// An item was found with the given size + Found { + /// The size of the entry in bytes. + size: u64, + }, + /// We got progress ingesting item `id`. + Progress { + /// The offset of the progress, in bytes. + offset: u64, + }, + /// We are done with `id`, and the hash is `hash`. + Done { + /// The hash of the entry. + hash: Hash, + }, + /// We got an error and need to abort. + /// + /// This will be the last message in the stream. + Abort(RpcError), +} + /// Read the request from the getter. /// /// Will fail if there is an error while reading, if the reader diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 2a88d9787c..6631add2e9 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -18,6 +18,7 @@ use iroh_blobs::{ export::ExportProgress as BytesExportProgress, format::collection::Collection, get::db::DownloadProgress as BytesDownloadProgress, + provider::BatchAddProgress, store::{ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, util::TagDrop, BlobFormat, Hash, HashAndFormat, Tag, TempTag, @@ -34,8 +35,8 @@ use tokio_util::io::{ReaderStream, StreamReader}; use tracing::warn; use crate::rpc_protocol::{ - BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchCreateRequest, - BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddStreamRequest, + BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, + BatchCreateRequest, BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, @@ -65,9 +66,13 @@ where pub async fn batch(&self) -> Result> { let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; let updates = Mutex::new(updates); - let BatchCreateResponse::Id(id) = stream.next().await.context("expected scope id")??; + let BatchCreateResponse::Id(scope) = stream.next().await.context("expected scope id")??; let rpc = self.rpc.clone(); - Ok(Batch(Arc::new(BatchInner { id, rpc, updates }))) + Ok(Batch(Arc::new(BatchInner { + scope, + rpc, + updates, + }))) } /// Stream the contents of a a single blob. /// @@ -386,7 +391,7 @@ where #[derive(derive_more::Debug)] struct BatchInner> { /// The id of the scope. - id: u64, + scope: u64, /// The rpc client. rpc: RpcClient, /// The stream to send drop @@ -407,6 +412,17 @@ impl> TagDrop for BatchInner { } impl> Batch { + /// Write a blob by passing an async reader. + pub async fn add_reader( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + format: BlobFormat, + ) -> anyhow::Result { + const CAP: usize = 1024 * 64; // send 64KB per request by default + let input = ReaderStream::with_capacity(reader, CAP); + self.add_stream(input, format).await + } + /// Write a blob by passing bytes. pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { let input = futures_lite::stream::once(Ok(bytes.into())); @@ -423,7 +439,7 @@ impl> Batch { .0 .rpc .bidi(BatchAddStreamRequest { - scope: self.0.id, + scope: self.0.scope, format, }) .await?; @@ -460,11 +476,51 @@ impl> Batch { } } let hash = res.context("Missing answer")?; - let t: Arc = self.0.clone(); - Ok(TempTag::new( - HashAndFormat { hash, format }, - Some(Arc::downgrade(&t)), - )) + Ok(self.temp_tag(HashAndFormat { hash, format })) + } + + /// Import a blob from a filesystem path. + /// + /// `path` should be an absolute path valid for the file system on which + /// the node runs. + /// If `in_place` is true, Iroh will assume that the data will not change and will share it in + /// place without copying to the Iroh data directory. + pub async fn add_from_path( + &self, + path: PathBuf, + in_place: bool, + format: BlobFormat, + ) -> Result { + let mut stream = self + .0 + .rpc + .server_streaming(BatchAddPathRequest { + path, + in_place, + format, + scope: self.0.scope, + }) + .await?; + let mut res = None; + while let Some(item) = stream.next().await { + match item?.0 { + BatchAddProgress::Abort(cause) => { + Err(cause)?; + } + BatchAddProgress::Done { hash } => { + res = Some(hash); + } + _ => {} + } + } + let hash = res.context("Missing answer")?; + Ok(self.temp_tag(HashAndFormat { hash, format })) + } + + fn temp_tag(&self, inner: HashAndFormat) -> TempTag { + let on_drop: Arc = self.0.clone(); + let on_drop = Some(Arc::downgrade(&on_drop)); + TempTag::new(inner, on_drop) } } diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index 4aa4cb600d..e07de2165a 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -13,6 +13,7 @@ use iroh_blobs::export::ExportProgress; use iroh_blobs::format::collection::Collection; use iroh_blobs::get::db::DownloadProgress; use iroh_blobs::get::Stats; +use iroh_blobs::provider::BatchAddProgress; use iroh_blobs::store::{ConsistencyCheckProgress, ExportFormat, ImportProgress, MapEntry}; use iroh_blobs::util::progress::ProgressSender; use iroh_blobs::BlobFormat; @@ -39,11 +40,11 @@ use crate::client::blobs::{ use crate::client::tags::TagInfo; use crate::client::NodeStatus; use crate::rpc_protocol::{ - BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, BatchCreateRequest, - BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddPathResponse, - BlobAddStreamRequest, BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, - BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, - BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, + BatchAddPathRequest, BatchAddPathResponse, BatchAddStreamRequest, BatchAddStreamResponse, + BatchAddStreamUpdate, BatchCreateRequest, BatchCreateResponse, BatchUpdate, BlobAddPathRequest, + BlobAddPathResponse, BlobAddStreamRequest, BlobAddStreamResponse, BlobAddStreamUpdate, + BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, + BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, DeleteTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, @@ -106,6 +107,10 @@ impl Handler { .await } BatchAddStreamUpdate(_msg) => Err(RpcServerError::UnexpectedUpdateMessage), + BatchAddPath(msg) => { + chan.server_streaming(msg, handler, Self::batch_add_from_path) + .await + } ListTags(msg) => { chan.server_streaming(msg, handler, Self::blob_list_tags) .await @@ -785,6 +790,54 @@ impl Handler { Ok(()) } + async fn batch_add_from_path0( + self, + msg: BatchAddPathRequest, + progress: flume::Sender, + ) -> anyhow::Result<()> { + use iroh_blobs::store::ImportMode; + + let progress = FlumeProgressSender::new(progress); + // convert import progress to provide progress + let import_progress = progress.clone().with_filter_map(move |x| match x { + ImportProgress::Size { size, .. } => Some(BatchAddProgress::Found { size }), + ImportProgress::OutboardProgress { offset, .. } => { + Some(BatchAddProgress::Progress { offset }) + } + ImportProgress::OutboardDone { hash, .. } => Some(BatchAddProgress::Done { hash }), + _ => None, + }); + let BatchAddPathRequest { + path: root, + in_place, + format, + scope, + } = msg; + // Check that the path is absolute and exists. + anyhow::ensure!(root.is_absolute(), "path must be absolute"); + anyhow::ensure!( + root.exists(), + "trying to add missing path: {}", + root.display() + ); + + let import_mode = match in_place { + true => ImportMode::TryReference, + false => ImportMode::Copy, + }; + + let (tag, _) = self + .inner + .db + .import_file(root, import_mode, format, import_progress) + .await?; + let hash = *tag.hash(); + self.inner.blob_scopes.lock().unwrap().store(scope, tag); + + progress.send(BatchAddProgress::Done { hash }).await?; + Ok(()) + } + #[allow(clippy::unused_async)] async fn node_stats(self, _req: NodeStatsRequest) -> RpcResult { #[cfg(feature = "metrics")] @@ -867,7 +920,6 @@ impl Handler { } } } - println!("dropping scope {}", scope_id); self.inner .blob_scopes .lock() @@ -895,6 +947,21 @@ impl Handler { rx.into_stream() } + fn batch_add_from_path( + self, + msg: BatchAddPathRequest, + ) -> impl Stream { + // provide a little buffer so that we don't slow down the sender + let (tx, rx) = flume::bounded(32); + let tx2 = tx.clone(); + self.rt().spawn_pinned(|| async move { + if let Err(e) = self.batch_add_from_path0(msg, tx).await { + tx2.send_async(BatchAddProgress::Abort(e.into())).await.ok(); + } + }); + rx.into_stream().map(BatchAddPathResponse) + } + async fn batch_add_stream0( self, msg: BatchAddStreamRequest, diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 1117773e0f..76ae87551a 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -15,6 +15,7 @@ use iroh_base::node_addr::AddrInfoOptions; pub use iroh_blobs::{export::ExportProgress, get::db::DownloadProgress, BlobFormat, Hash}; use iroh_blobs::{ format::collection::Collection, + provider::BatchAddProgress, store::{BaoBlobSize, ConsistencyCheckProgress}, util::Tag, HashAndFormat, @@ -1077,6 +1078,31 @@ pub enum BatchAddStreamResponse { Result { hash: Hash }, } +/// Write a blob from a byte stream +#[derive(Serialize, Deserialize, Debug)] +pub struct BatchAddPathRequest { + /// The path to the data to provide. + pub path: PathBuf, + /// Add the data in place + pub in_place: bool, + /// What format to use for the blob + pub format: BlobFormat, + /// Scope to create the temp tag in + pub scope: u64, +} + +/// Response to a batch add path request +#[derive(Serialize, Deserialize, Debug)] +pub struct BatchAddPathResponse(pub BatchAddProgress); + +impl Msg for BatchAddPathRequest { + type Pattern = ServerStreaming; +} + +impl ServerStreamingMsg for BatchAddPathRequest { + type Response = BatchAddPathResponse; +} + /// Get stats for the running Iroh node #[derive(Serialize, Deserialize, Debug)] pub struct NodeStatsRequest {} @@ -1138,6 +1164,7 @@ pub enum Request { BatchUpdate(BatchUpdate), BatchAddStreamRequest(BatchAddStreamRequest), BatchAddStreamUpdate(BatchAddStreamUpdate), + BatchAddPath(BatchAddPathRequest), DeleteTag(DeleteTagRequest), ListTags(ListTagsRequest), @@ -1203,6 +1230,7 @@ pub enum Response { BatchCreateResponse(BatchCreateResponse), BatchRequest(BatchCreateRequest), BatchAddStream(BatchAddStreamResponse), + BatchAddPath(BatchAddPathResponse), ListTags(TagInfo), DeleteTag(RpcResult<()>), From 08ccd3179b5838b2c69be3922bf13e855a90cb89 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Mon, 3 Jun 2024 19:08:12 +0300 Subject: [PATCH 07/18] WIP add more rich tags api --- iroh-blobs/src/provider.rs | 2 +- iroh/src/client/blobs.rs | 6 ++--- iroh/src/client/tags.rs | 31 ++++++++++++++++++++--- iroh/src/node/rpc.rs | 43 ++++++++++++++++++------------- iroh/src/rpc_protocol.rs | 25 ++++++++++++++---- iroh/tests/batch.rs | 52 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 31 deletions(-) diff --git a/iroh-blobs/src/provider.rs b/iroh-blobs/src/provider.rs index 7e481f4c2b..fd9ce37acf 100644 --- a/iroh-blobs/src/provider.rs +++ b/iroh-blobs/src/provider.rs @@ -155,7 +155,7 @@ pub enum AddProgress { /// Progress updates for the batch add operation. #[derive(Debug, Serialize, Deserialize)] -pub enum BatchAddProgress { +pub enum BatchAddPathProgress { /// An item was found with the given size Found { /// The size of the entry in bytes. diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 6631add2e9..33be81a1cf 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -18,7 +18,7 @@ use iroh_blobs::{ export::ExportProgress as BytesExportProgress, format::collection::Collection, get::db::DownloadProgress as BytesDownloadProgress, - provider::BatchAddProgress, + provider::BatchAddPathProgress, store::{ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, util::TagDrop, BlobFormat, Hash, HashAndFormat, Tag, TempTag, @@ -504,10 +504,10 @@ impl> Batch { let mut res = None; while let Some(item) = stream.next().await { match item?.0 { - BatchAddProgress::Abort(cause) => { + BatchAddPathProgress::Abort(cause) => { Err(cause)?; } - BatchAddProgress::Done { hash } => { + BatchAddPathProgress::Done { hash } => { res = Some(hash); } _ => {} diff --git a/iroh/src/client/tags.rs b/iroh/src/client/tags.rs index c2d4309977..926834d783 100644 --- a/iroh/src/client/tags.rs +++ b/iroh/src/client/tags.rs @@ -2,11 +2,11 @@ use anyhow::Result; use futures_lite::{Stream, StreamExt}; -use iroh_blobs::{BlobFormat, Hash, Tag}; +use iroh_blobs::{BlobFormat, Hash, HashAndFormat, Tag}; use quic_rpc::{RpcClient, ServiceConnection}; use serde::{Deserialize, Serialize}; -use crate::rpc_protocol::{DeleteTagRequest, ListTagsRequest, RpcService}; +use crate::rpc_protocol::{CreateTagRequest, ListTagsRequest, RpcService, SetTagRequest}; /// Iroh tags client. #[derive(Debug, Clone)] @@ -24,10 +24,33 @@ where Ok(stream.map(|res| res.map_err(anyhow::Error::from))) } + /// Create a tag, where the name is automatically generated. + /// + /// Use this method if you want a new tag with a unique name. + pub async fn create(&self, value: HashAndFormat) -> Result { + Ok(self.rpc.rpc(CreateTagRequest { value }).await??) + } + + /// Set a tag to a value, overwriting any existing value. + /// + /// Setting the value to `None` deletes the tag. Setting the value to `Some` creates or updates the tag. + pub async fn set_opt(&self, name: Tag, value: Option) -> Result<()> { + self.rpc.rpc(SetTagRequest { name, value }).await??; + Ok(()) + } + + /// Set a tag to a value, overwriting any existing value. + /// + /// This is a convenience wrapper around `set_opt`. + pub async fn set(&self, name: Tag, value: HashAndFormat) -> Result<()> { + self.set_opt(name, Some(value)).await + } + /// Delete a tag. + /// + /// This is a convenience wrapper around `set_opt`. pub async fn delete(&self, name: Tag) -> Result<()> { - self.rpc.rpc(DeleteTagRequest { name }).await??; - Ok(()) + self.set_opt(name, None).await } } diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index e07de2165a..e6093abec0 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -13,10 +13,9 @@ use iroh_blobs::export::ExportProgress; use iroh_blobs::format::collection::Collection; use iroh_blobs::get::db::DownloadProgress; use iroh_blobs::get::Stats; -use iroh_blobs::provider::BatchAddProgress; +use iroh_blobs::provider::BatchAddPathProgress; use iroh_blobs::store::{ConsistencyCheckProgress, ExportFormat, ImportProgress, MapEntry}; use iroh_blobs::util::progress::ProgressSender; -use iroh_blobs::BlobFormat; use iroh_blobs::{ hashseq::parse_hash_seq, provider::AddProgress, @@ -24,6 +23,7 @@ use iroh_blobs::{ util::progress::FlumeProgressSender, HashAndFormat, }; +use iroh_blobs::{BlobFormat, Tag}; use iroh_io::AsyncSliceReader; use iroh_net::relay::RelayUrl; use iroh_net::{Endpoint, NodeAddr, NodeId}; @@ -47,12 +47,12 @@ use crate::rpc_protocol::{ BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, - DeleteTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, + CreateTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, DocSetHashRequest, ListTagsRequest, NodeAddrRequest, NodeConnectionInfoRequest, NodeConnectionInfoResponse, NodeConnectionsRequest, NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, NodeShutdownRequest, NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, NodeWatchResponse, - Request, RpcService, SetTagOption, + Request, RpcService, SetTagOption, SetTagRequest, }; use super::NodeInner; @@ -112,10 +112,11 @@ impl Handler { .await } ListTags(msg) => { - chan.server_streaming(msg, handler, Self::blob_list_tags) + chan.server_streaming(msg, handler, Self::tags_list_tags) .await } - DeleteTag(msg) => chan.rpc(msg, handler, Self::blob_delete_tag).await, + SetTag(msg) => chan.rpc(msg, handler, Self::tags_set_tag).await, + CreateTag(msg) => chan.rpc(msg, handler, Self::tags_create_tag).await, BlobDeleteBlob(msg) => chan.rpc(msg, handler, Self::blob_delete_blob).await, BlobAddPath(msg) => { chan.server_streaming(msg, handler, Self::blob_add_from_path) @@ -426,18 +427,22 @@ impl Handler { }) } - async fn blob_delete_tag(self, msg: DeleteTagRequest) -> RpcResult<()> { - self.inner.db.set_tag(msg.name, None).await?; + async fn blob_delete_blob(self, msg: BlobDeleteBlobRequest) -> RpcResult<()> { + self.inner.db.delete(vec![msg.hash]).await?; Ok(()) } - async fn blob_delete_blob(self, msg: BlobDeleteBlobRequest) -> RpcResult<()> { - self.inner.db.delete(vec![msg.hash]).await?; + async fn tags_set_tag(self, msg: SetTagRequest) -> RpcResult<()> { + self.inner.db.set_tag(msg.name, None).await?; Ok(()) } - fn blob_list_tags(self, _msg: ListTagsRequest) -> impl Stream + Send + 'static { - tracing::info!("blob_list_tags"); + async fn tags_create_tag(self, msg: CreateTagRequest) -> RpcResult { + let tag = self.inner.db.create_tag(msg.value).await?; + Ok(tag) + } + + fn tags_list_tags(self, _msg: ListTagsRequest) -> impl Stream + Send + 'static { Gen::new(|co| async move { let tags = self.inner.db.tags().await.unwrap(); #[allow(clippy::manual_flatten)] @@ -793,18 +798,18 @@ impl Handler { async fn batch_add_from_path0( self, msg: BatchAddPathRequest, - progress: flume::Sender, + progress: flume::Sender, ) -> anyhow::Result<()> { use iroh_blobs::store::ImportMode; let progress = FlumeProgressSender::new(progress); // convert import progress to provide progress let import_progress = progress.clone().with_filter_map(move |x| match x { - ImportProgress::Size { size, .. } => Some(BatchAddProgress::Found { size }), + ImportProgress::Size { size, .. } => Some(BatchAddPathProgress::Found { size }), ImportProgress::OutboardProgress { offset, .. } => { - Some(BatchAddProgress::Progress { offset }) + Some(BatchAddPathProgress::Progress { offset }) } - ImportProgress::OutboardDone { hash, .. } => Some(BatchAddProgress::Done { hash }), + ImportProgress::OutboardDone { hash, .. } => Some(BatchAddPathProgress::Done { hash }), _ => None, }); let BatchAddPathRequest { @@ -834,7 +839,7 @@ impl Handler { let hash = *tag.hash(); self.inner.blob_scopes.lock().unwrap().store(scope, tag); - progress.send(BatchAddProgress::Done { hash }).await?; + progress.send(BatchAddPathProgress::Done { hash }).await?; Ok(()) } @@ -956,7 +961,9 @@ impl Handler { let tx2 = tx.clone(); self.rt().spawn_pinned(|| async move { if let Err(e) = self.batch_add_from_path0(msg, tx).await { - tx2.send_async(BatchAddProgress::Abort(e.into())).await.ok(); + tx2.send_async(BatchAddPathProgress::Abort(e.into())) + .await + .ok(); } }); rx.into_stream().map(BatchAddPathResponse) diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 76ae87551a..939c37f4d6 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -15,7 +15,7 @@ use iroh_base::node_addr::AddrInfoOptions; pub use iroh_blobs::{export::ExportProgress, get::db::DownloadProgress, BlobFormat, Hash}; use iroh_blobs::{ format::collection::Collection, - provider::BatchAddProgress, + provider::BatchAddPathProgress, store::{BaoBlobSize, ConsistencyCheckProgress}, util::Tag, HashAndFormat, @@ -273,15 +273,28 @@ impl RpcMsg for BlobDeleteBlobRequest { /// Delete a tag #[derive(Debug, Serialize, Deserialize)] -pub struct DeleteTagRequest { +pub struct SetTagRequest { /// Name of the tag pub name: Tag, + /// Value of the tag, None to delete + pub value: Option, } -impl RpcMsg for DeleteTagRequest { +impl RpcMsg for SetTagRequest { type Response = RpcResult<()>; } +/// Create a tag +#[derive(Debug, Serialize, Deserialize)] +pub struct CreateTagRequest { + /// Value of the tag + pub value: HashAndFormat, +} + +impl RpcMsg for CreateTagRequest { + type Response = RpcResult; +} + /// Get a collection #[derive(Debug, Serialize, Deserialize)] pub struct BlobGetCollectionRequest { @@ -1093,7 +1106,7 @@ pub struct BatchAddPathRequest { /// Response to a batch add path request #[derive(Serialize, Deserialize, Debug)] -pub struct BatchAddPathResponse(pub BatchAddProgress); +pub struct BatchAddPathResponse(pub BatchAddPathProgress); impl Msg for BatchAddPathRequest { type Pattern = ServerStreaming; @@ -1166,7 +1179,8 @@ pub enum Request { BatchAddStreamUpdate(BatchAddStreamUpdate), BatchAddPath(BatchAddPathRequest), - DeleteTag(DeleteTagRequest), + SetTag(SetTagRequest), + CreateTag(CreateTagRequest), ListTags(ListTagsRequest), DocOpen(DocOpenRequest), @@ -1234,6 +1248,7 @@ pub enum Response { ListTags(TagInfo), DeleteTag(RpcResult<()>), + CreateTag(RpcResult), DocOpen(RpcResult), DocClose(RpcResult), diff --git a/iroh/tests/batch.rs b/iroh/tests/batch.rs index e36e0c31de..8d48565a57 100644 --- a/iroh/tests/batch.rs +++ b/iroh/tests/batch.rs @@ -52,3 +52,55 @@ async fn test_batch_create_2() -> anyhow::Result<()> { assert!(client.read_to_bytes(hash).await.is_err()); Ok(()) } + +#[tokio::test] +async fn test_batch_create_from_path_1() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let dir = tempfile::tempdir()?; + let expected_data: &[u8] = b"test"; + let expected_hash = blake3::hash(expected_data).into(); + let temp_path = dir.path().join("test"); + std::fs::write(&temp_path, expected_data)?; + let tag = batch + .add_from_path(temp_path, false, BlobFormat::Raw) + .await?; + let hash = *tag.hash(); + assert_eq!(hash, expected_hash); + // Check that the store has the data and that it is protected from gc + tokio::time::sleep(Duration::from_millis(50)).await; + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), expected_data); + drop(tag); + // Check that the store drops the data when the temp tag gets dropped + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(client.read_to_bytes(hash).await.is_err()); + Ok(()) +} + +#[tokio::test] +async fn test_batch_create_from_path_2() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let dir = tempfile::tempdir()?; + let expected_data: &[u8] = b"test"; + let expected_hash = blake3::hash(expected_data).into(); + let temp_path = dir.path().join("test"); + std::fs::write(&temp_path, expected_data)?; + let tag = batch + .add_from_path(temp_path, false, BlobFormat::Raw) + .await?; + let hash = *tag.hash(); + assert_eq!(hash, expected_hash); + // Check that the store has the data and that it is protected from gc + tokio::time::sleep(Duration::from_millis(50)).await; + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), expected_data); + drop(batch); + // Check that the store drops the data when the temp tag gets dropped + tokio::time::sleep(Duration::from_millis(50)).await; + assert!(client.read_to_bytes(hash).await.is_err()); + Ok(()) +} From 978587aef29e9465f3eb0deb1dc9153c9f0253bd Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 12:24:10 +0300 Subject: [PATCH 08/18] clippy --- iroh-blobs/src/format/collection.rs | 2 +- iroh-blobs/src/store/traits.rs | 2 +- iroh-net/src/net/interfaces/bsd.rs | 2 +- iroh/src/client/blobs.rs | 217 ++++++++++++++++++++++------ iroh/src/node.rs | 7 + iroh/src/node/rpc.rs | 51 ++++--- iroh/src/rpc_protocol.rs | 24 ++- iroh/tests/batch.rs | 97 +++++++++++-- 8 files changed, 310 insertions(+), 92 deletions(-) diff --git a/iroh-blobs/src/format/collection.rs b/iroh-blobs/src/format/collection.rs index ab13572cc1..2e4966308f 100644 --- a/iroh-blobs/src/format/collection.rs +++ b/iroh-blobs/src/format/collection.rs @@ -84,7 +84,7 @@ impl Collection { /// /// To persist the collection, write all the blobs to storage, and use the /// hash of the last blob as the collection hash. - pub fn to_blobs(&self) -> impl Iterator { + pub fn to_blobs(&self) -> impl DoubleEndedIterator { let meta = CollectionMeta { header: *Self::HEADER, names: self.names(), diff --git a/iroh-blobs/src/store/traits.rs b/iroh-blobs/src/store/traits.rs index 49d0a43abd..9d1e42fc33 100644 --- a/iroh-blobs/src/store/traits.rs +++ b/iroh-blobs/src/store/traits.rs @@ -703,7 +703,7 @@ pub enum ImportProgress { /// does not make any sense. E.g. an in memory implementation will always have /// to copy the file into memory. Also, a disk based implementation might choose /// to copy small files even if the mode is `Reference`. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] pub enum ImportMode { /// This mode will copy the file into the database before hashing. /// diff --git a/iroh-net/src/net/interfaces/bsd.rs b/iroh-net/src/net/interfaces/bsd.rs index dd6ca7e3ca..7ef0cd1eb0 100644 --- a/iroh-net/src/net/interfaces/bsd.rs +++ b/iroh-net/src/net/interfaces/bsd.rs @@ -300,7 +300,7 @@ impl WireFormat { Ok(Some(WireMessage::Route(m))) } - #[cfg(any(target_os = "openbsd",))] + #[cfg(target_os = "openbsd")] MessageType::Route => { if data.len() < self.body_off { return Err(RouteError::MessageTooShort); diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 33be81a1cf..2eea0479c2 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -11,6 +11,7 @@ use std::{ use anyhow::{anyhow, Context as _, Result}; use bytes::Bytes; +use futures_buffered::BufferedStreamExt; use futures_lite::{Stream, StreamExt}; use futures_util::{FutureExt, SinkExt}; use iroh_base::{node_addr::AddrInfoOptions, ticket::BlobTicket}; @@ -36,16 +37,18 @@ use tracing::warn; use crate::rpc_protocol::{ BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, - BatchCreateRequest, BatchCreateResponse, BatchUpdate, BlobAddPathRequest, BlobAddStreamRequest, - BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, - BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, - BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, - BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, - NodeStatusRequest, RpcService, SetTagOption, + BatchCreateRequest, BatchCreateResponse, BatchCreateTempTagRequest, BatchUpdate, + BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, + BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, + BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, + BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, + CreateCollectionRequest, CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, }; use super::{flatten, Iroh}; +pub use iroh_blobs::store::ImportMode; + /// Iroh blobs client. #[derive(Debug, Clone)] pub struct Client { @@ -399,8 +402,12 @@ struct BatchInner> { updates: Mutex>, } +/// A batch for write operations. /// - +/// This serves mostly as a scope for temporary tags. +/// +/// It is not a transaction, so things in a batch are not atomic. Also, there is +/// no isolation between batches. #[derive(derive_more::Debug)] pub struct Batch>(Arc>); @@ -412,6 +419,120 @@ impl> TagDrop for BatchInner { } impl> Batch { + /// Write a blob by passing bytes. + pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { + let input = futures_lite::stream::once(Ok(bytes.into())); + self.add_stream(input, format).await + } + + /// Import a blob from a filesystem path. + /// + /// `path` should be an absolute path valid for the file system on which + /// the node runs, which refers to a file. + /// + /// If `import_mode` is TryReference, Iroh will assume that the data will not + /// change and will share it in place without copying to the Iroh data directory + /// if appropriate. However, for tiny files, Iroh will copy the data. + /// + /// If `import_mode` is Copy, Iroh will always copy the data. + /// + /// Will return a temp tag for the added blob, as well as the size of the file. + pub async fn add_file( + &self, + path: PathBuf, + import_mode: ImportMode, + format: BlobFormat, + ) -> Result<(TempTag, u64)> { + anyhow::ensure!( + path.is_absolute(), + "Path must be absolute, but got: {:?}", + path + ); + anyhow::ensure!(path.is_file(), "Path does not refer to a file: {:?}", path); + let mut stream = self + .0 + .rpc + .server_streaming(BatchAddPathRequest { + path, + import_mode, + format, + scope: self.0.scope, + }) + .await?; + let mut res_hash = None; + let mut res_size = None; + while let Some(item) = stream.next().await { + match item?.0 { + BatchAddPathProgress::Abort(cause) => { + Err(cause)?; + } + BatchAddPathProgress::Done { hash } => { + res_hash = Some(hash); + } + BatchAddPathProgress::Found { size } => { + res_size = Some(size); + } + _ => {} + } + } + let hash = res_hash.context("Missing hash")?; + let size = res_size.context("Missing size")?; + Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) + } + + /// Add a directory as a hashseq in collection format + pub async fn add_dir( + &self, + root: PathBuf, + import_mode: ImportMode, + wrap: WrapOption, + ) -> Result { + anyhow::ensure!(root.is_absolute(), "Path must be absolute",); + anyhow::ensure!(root.is_dir(), "Path must be a directory",); + + // let (send, recv) = flume::bounded(32); + // let import_progress = FlumeProgressSender::new(send); + + // import all files below root recursively + let data_sources = crate::util::fs::scan_path(root, wrap)?; + const IO_PARALLELISM: usize = 4; + let result: Vec<_> = futures_lite::stream::iter(data_sources) + .map(|source| { + // let import_progress = import_progress.clone(); + async move { + let name = source.name().to_string(); + let (tag, size) = self + .add_file(source.path().to_owned(), import_mode, BlobFormat::Raw) + .await?; + let hash = *tag.hash(); + anyhow::Ok((name, hash, size, tag)) + } + }) + .buffered_ordered(IO_PARALLELISM) + .try_collect() + .await?; + println!("{:?}", result); + + // create a collection + let (collection, child_tags): (Collection, Vec<_>) = result + .into_iter() + .map(|(name, hash, _, tag)| ((name, hash), tag)) + .unzip(); + + let tag = self.add_collection(collection).await?; + drop(child_tags); + Ok(tag) + } + + /// Add a collection + /// + /// This is a convenience function that converts the collection into two blobs + /// (the metadata and the hash sequence) and adds them, returning a temp tag for + /// the hash sequence. + pub async fn add_collection(&self, collection: Collection) -> Result { + self.add_blob_seq(collection.to_blobs()).await + } + /// Write a blob by passing an async reader. pub async fn add_reader( &self, @@ -423,12 +544,6 @@ impl> Batch { self.add_stream(input, format).await } - /// Write a blob by passing bytes. - pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { - let input = futures_lite::stream::once(Ok(bytes.into())); - self.add_stream(input, format).await - } - /// Write a blob by passing a stream of bytes. pub async fn add_stream( &self, @@ -473,51 +588,59 @@ impl> Batch { BatchAddStreamResponse::Result { hash } => { res = Some(hash); } + _ => {} } } let hash = res.context("Missing answer")?; - Ok(self.temp_tag(HashAndFormat { hash, format })) + println!( + "creating temp tag with hash {:?} and format {}", + hash, format + ); + Ok(self.local_temp_tag(HashAndFormat { hash, format })) } - /// Import a blob from a filesystem path. + /// Add a sequence of blobs, where the last is a hash sequence. /// - /// `path` should be an absolute path valid for the file system on which - /// the node runs. - /// If `in_place` is true, Iroh will assume that the data will not change and will share it in - /// place without copying to the Iroh data directory. - pub async fn add_from_path( - &self, - path: PathBuf, - in_place: bool, - format: BlobFormat, - ) -> Result { - let mut stream = self - .0 + /// It is a common pattern in iroh to have a hash sequence with one or more + /// blobs of metadata, and the remaining blobs being the actual data. E.g. + /// a collection is a hash sequence where the first child is the metadata. + pub async fn add_blob_seq(&self, iter: impl Iterator) -> Result { + let mut blobs = iter.peekable(); + let mut res = vec![]; + let res = loop { + let blob = blobs.next().context("Failed to get next blob")?; + if blobs.peek().is_none() { + println!("last blob"); + break self.add_bytes(blob, BlobFormat::HashSeq).await?; + } else { + res.push(self.add_bytes(blob, BlobFormat::Raw).await?); + } + }; + Ok(res) + } + + /// Create a temp tag to protect some content (blob or hashseq) from being deleted. + /// + /// A typical use case is that you are downloading some data and want to protect it + /// from deletion while the download is ongoing, but don't want to protect it permanently + /// until the download is completed. + pub async fn temp_tag(&self, content: HashAndFormat) -> Result { + // Notify the server that we want one temp tag for the given content + self.0 .rpc - .server_streaming(BatchAddPathRequest { - path, - in_place, - format, + .rpc(BatchCreateTempTagRequest { scope: self.0.scope, + content, }) - .await?; - let mut res = None; - while let Some(item) = stream.next().await { - match item?.0 { - BatchAddPathProgress::Abort(cause) => { - Err(cause)?; - } - BatchAddPathProgress::Done { hash } => { - res = Some(hash); - } - _ => {} - } - } - let hash = res.context("Missing answer")?; - Ok(self.temp_tag(HashAndFormat { hash, format })) + .await??; + // Only after success of the above call, we can create the corresponding local temp tag + Ok(self.local_temp_tag(content)) } - fn temp_tag(&self, inner: HashAndFormat) -> TempTag { + /// Creates a temp tag for the given hash and format, without notifying the server. + /// + /// Caution: only do this for data for which you know the server side has created a temp tag. + fn local_temp_tag(&self, inner: HashAndFormat) -> TempTag { let on_drop: Arc = self.0.clone(); let on_drop = Some(Arc::downgrade(&on_drop)); TempTag::new(inner, on_drop) diff --git a/iroh/src/node.rs b/iroh/src/node.rs index 112a7868ca..9c96fccf24 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -91,12 +91,19 @@ impl BlobScopes { fn store(&mut self, scope: u64, tt: TempTag) { let entry = self.scopes.entry(scope).or_default(); let count = entry.tags.entry(tt.hash_and_format()).or_default(); + println!( + "storing tag {:?} {} in scope {}", + tt.hash(), + tt.format(), + scope + ); tt.leak(); *count += 1; } /// Remove a tag from a scope. fn remove_one(&mut self, scope: u64, content: &HashAndFormat, u: Option<&dyn TagDrop>) { + println!("removing tag {:?} from scope {}", content, scope); if let Some(scope) = self.scopes.get_mut(&scope) { if let Some(counter) = scope.tags.get_mut(content) { *counter -= 1; diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index e6093abec0..e958bf5ac8 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -41,18 +41,18 @@ use crate::client::tags::TagInfo; use crate::client::NodeStatus; use crate::rpc_protocol::{ BatchAddPathRequest, BatchAddPathResponse, BatchAddStreamRequest, BatchAddStreamResponse, - BatchAddStreamUpdate, BatchCreateRequest, BatchCreateResponse, BatchUpdate, BlobAddPathRequest, - BlobAddPathResponse, BlobAddStreamRequest, BlobAddStreamResponse, BlobAddStreamUpdate, - BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, - BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, - BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, - BlobReadAtResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, - CreateTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, - DocImportFileResponse, DocSetHashRequest, ListTagsRequest, NodeAddrRequest, - NodeConnectionInfoRequest, NodeConnectionInfoResponse, NodeConnectionsRequest, - NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, NodeShutdownRequest, - NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, NodeWatchResponse, - Request, RpcService, SetTagOption, SetTagRequest, + BatchAddStreamUpdate, BatchCreateRequest, BatchCreateResponse, BatchCreateTempTagRequest, + BatchUpdate, BlobAddPathRequest, BlobAddPathResponse, BlobAddStreamRequest, + BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, + BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, BlobExportResponse, + BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, + BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, + BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, CreateTagRequest, + DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, + DocSetHashRequest, ListTagsRequest, NodeAddrRequest, NodeConnectionInfoRequest, + NodeConnectionInfoResponse, NodeConnectionsRequest, NodeConnectionsResponse, NodeIdRequest, + NodeRelayRequest, NodeShutdownRequest, NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, + NodeWatchRequest, NodeWatchResponse, Request, RpcService, SetTagOption, SetTagRequest, }; use super::NodeInner; @@ -102,7 +102,10 @@ impl Handler { } CreateCollection(msg) => chan.rpc(msg, handler, Self::create_collection).await, BlobGetCollection(msg) => chan.rpc(msg, handler, Self::blob_get_collection).await, - BatchAddStreamRequest(msg) => { + BatchCreateTempTag(msg) => { + chan.rpc(msg, handler, Self::batch_create_temp_tag).await + } + BatchAddStream(msg) => { chan.bidi_streaming(msg, handler, Self::batch_add_stream) .await } @@ -800,8 +803,6 @@ impl Handler { msg: BatchAddPathRequest, progress: flume::Sender, ) -> anyhow::Result<()> { - use iroh_blobs::store::ImportMode; - let progress = FlumeProgressSender::new(progress); // convert import progress to provide progress let import_progress = progress.clone().with_filter_map(move |x| match x { @@ -814,7 +815,7 @@ impl Handler { }); let BatchAddPathRequest { path: root, - in_place, + import_mode, format, scope, } = msg; @@ -825,12 +826,6 @@ impl Handler { "trying to add missing path: {}", root.display() ); - - let import_mode = match in_place { - true => ImportMode::TryReference, - false => ImportMode::Copy, - }; - let (tag, _) = self .inner .db @@ -934,6 +929,13 @@ impl Handler { futures_lite::stream::once(BatchCreateResponse::Id(scope_id)) } + #[allow(clippy::unused_async)] + async fn batch_create_temp_tag(self, msg: BatchCreateTempTagRequest) -> RpcResult<()> { + let tag = self.inner.db.temp_tag(msg.content); + self.inner.blob_scopes.lock().unwrap().store(msg.scope, tag); + Ok(()) + } + fn batch_add_stream( self, msg: BatchAddStreamRequest, @@ -985,12 +987,15 @@ impl Handler { }); let import_progress = progress.clone().with_filter_map(move |x| match x { + ImportProgress::OutboardProgress { offset, .. } => { + Some(BatchAddStreamResponse::OutboardProgress { offset }) + } _ => None, }); let (temp_tag, _len) = self .inner .db - .import_stream(stream, BlobFormat::Raw, import_progress) + .import_stream(stream, msg.format, import_progress) .await?; let hash = temp_tag.inner().hash; self.inner diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 939c37f4d6..da4dba5f7e 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -16,7 +16,7 @@ pub use iroh_blobs::{export::ExportProgress, get::db::DownloadProgress, BlobForm use iroh_blobs::{ format::collection::Collection, provider::BatchAddPathProgress, - store::{BaoBlobSize, ConsistencyCheckProgress}, + store::{BaoBlobSize, ConsistencyCheckProgress, ImportMode}, util::Tag, HashAndFormat, }; @@ -1057,6 +1057,19 @@ impl BidiStreamingMsg for BlobAddStreamRequest { #[derive(Debug, Serialize, Deserialize, derive_more::Into)] pub struct BlobAddStreamResponse(pub AddProgress); +/// Create a temp tag with a given hash and format +#[derive(Debug, Serialize, Deserialize)] +pub struct BatchCreateTempTagRequest { + /// Content to protect + pub content: HashAndFormat, + /// Scope to create the temp tag in + pub scope: u64, +} + +impl RpcMsg for BatchCreateTempTagRequest { + type Response = RpcResult<()>; +} + /// Write a blob from a byte stream #[derive(Serialize, Deserialize, Debug)] pub struct BatchAddStreamRequest { @@ -1088,6 +1101,7 @@ impl BidiStreamingMsg for BatchAddStreamRequest { #[derive(Debug, Serialize, Deserialize)] pub enum BatchAddStreamResponse { Abort(RpcError), + OutboardProgress { offset: u64 }, Result { hash: Hash }, } @@ -1097,7 +1111,7 @@ pub struct BatchAddPathRequest { /// The path to the data to provide. pub path: PathBuf, /// Add the data in place - pub in_place: bool, + pub import_mode: ImportMode, /// What format to use for the blob pub format: BlobFormat, /// Scope to create the temp tag in @@ -1175,7 +1189,8 @@ pub enum Request { BatchCreate(BatchCreateRequest), BatchUpdate(BatchUpdate), - BatchAddStreamRequest(BatchAddStreamRequest), + BatchCreateTempTag(BatchCreateTempTagRequest), + BatchAddStream(BatchAddStreamRequest), BatchAddStreamUpdate(BatchAddStreamUpdate), BatchAddPath(BatchAddPathRequest), @@ -1241,8 +1256,7 @@ pub enum Response { CreateCollection(RpcResult), BlobGetCollection(RpcResult), - BatchCreateResponse(BatchCreateResponse), - BatchRequest(BatchCreateRequest), + BatchCreate(BatchCreateResponse), BatchAddStream(BatchAddStreamResponse), BatchAddPath(BatchAddPathResponse), diff --git a/iroh/tests/batch.rs b/iroh/tests/batch.rs index 8d48565a57..240338d123 100644 --- a/iroh/tests/batch.rs +++ b/iroh/tests/batch.rs @@ -1,6 +1,7 @@ use std::time::Duration; use bao_tree::blake3; +use iroh::client::blobs::{ImportMode, WrapOption}; use iroh::node::GcPolicy; use iroh_blobs::{store::mem::Store, BlobFormat}; @@ -11,6 +12,10 @@ async fn create_node() -> anyhow::Result> { .await } +async fn wait_for_gc() { + tokio::time::sleep(Duration::from_millis(50)).await; +} + #[tokio::test] async fn test_batch_create_1() -> anyhow::Result<()> { let node = create_node().await?; @@ -22,12 +27,12 @@ async fn test_batch_create_1() -> anyhow::Result<()> { let hash = *tag.hash(); assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; let data = client.read_to_bytes(hash).await?; assert_eq!(data.as_ref(), expected_data); drop(tag); // Check that the store drops the data when the temp tag gets dropped - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; assert!(client.read_to_bytes(hash).await.is_err()); Ok(()) } @@ -43,18 +48,44 @@ async fn test_batch_create_2() -> anyhow::Result<()> { let hash = *tag.hash(); assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; let data = client.read_to_bytes(hash).await?; assert_eq!(data.as_ref(), expected_data); drop(batch); // Check that the store drops the data when the temp tag gets dropped - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; assert!(client.read_to_bytes(hash).await.is_err()); Ok(()) } #[tokio::test] -async fn test_batch_create_from_path_1() -> anyhow::Result<()> { +async fn test_batch_create_3() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let expected_data: &[u8] = b"test"; + let expected_hash = blake3::hash(expected_data).into(); + let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let hash = *tag.hash(); + assert_eq!(hash, expected_hash); + // Check that the store has the data and that it is protected from gc + wait_for_gc().await; + assert!(client.read_to_bytes(hash).await.is_ok()); + // Create an additional temp tag for the same data + let tag2 = batch.temp_tag(tag.hash_and_format()).await?; + drop(tag); + // Check that the data is still present + wait_for_gc().await; + assert!(client.read_to_bytes(hash).await.is_ok()); + drop(tag2); + // Check that the data is gone since both temp tags are dropped + wait_for_gc().await; + assert!(client.read_to_bytes(hash).await.is_err()); + Ok(()) +} + +#[tokio::test] +async fn test_batch_add_file_1() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; @@ -63,24 +94,24 @@ async fn test_batch_create_from_path_1() -> anyhow::Result<()> { let expected_hash = blake3::hash(expected_data).into(); let temp_path = dir.path().join("test"); std::fs::write(&temp_path, expected_data)?; - let tag = batch - .add_from_path(temp_path, false, BlobFormat::Raw) + let (tag, _) = batch + .add_file(temp_path, ImportMode::Copy, BlobFormat::Raw) .await?; let hash = *tag.hash(); assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; let data = client.read_to_bytes(hash).await?; assert_eq!(data.as_ref(), expected_data); drop(tag); // Check that the store drops the data when the temp tag gets dropped - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; assert!(client.read_to_bytes(hash).await.is_err()); Ok(()) } #[tokio::test] -async fn test_batch_create_from_path_2() -> anyhow::Result<()> { +async fn test_batch_add_file_2() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; @@ -89,18 +120,56 @@ async fn test_batch_create_from_path_2() -> anyhow::Result<()> { let expected_hash = blake3::hash(expected_data).into(); let temp_path = dir.path().join("test"); std::fs::write(&temp_path, expected_data)?; - let tag = batch - .add_from_path(temp_path, false, BlobFormat::Raw) + let (tag, _) = batch + .add_file(temp_path, ImportMode::Copy, BlobFormat::Raw) .await?; let hash = *tag.hash(); assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; let data = client.read_to_bytes(hash).await?; assert_eq!(data.as_ref(), expected_data); drop(batch); // Check that the store drops the data when the temp tag gets dropped - tokio::time::sleep(Duration::from_millis(50)).await; + wait_for_gc().await; assert!(client.read_to_bytes(hash).await.is_err()); Ok(()) } + +#[tokio::test] +async fn test_batch_add_dir_1() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let dir = tempfile::tempdir()?; + let data: [(&str, &[u8]); 2] = [("test1", b"test1"), ("test2", b"test2")]; + for (name, content) in &data { + let temp_path = dir.path().join(name); + std::fs::write(&temp_path, content)?; + } + let tag = batch + .add_dir(dir.path().to_owned(), ImportMode::Copy, WrapOption::NoWrap) + .await?; + let check_present = || async { + assert!(client.read_to_bytes(*tag.hash()).await.is_ok()); + for (_, content) in &data { + let hash = blake3::hash(content).into(); + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), *content); + } + anyhow::Ok(()) + }; + // Check that the store has the data immediately + check_present().await?; + // Check that the store has the data and that it is protected from gc + wait_for_gc().await; + check_present().await?; + drop(tag); + // Check that the store drops the data when the temp tag gets dropped + wait_for_gc().await; + for (_, content) in &data { + let hash = blake3::hash(content).into(); + assert!(client.read_to_bytes(hash).await.is_err()); + } + Ok(()) +} From cb7dec980c802bd27b7416e027dccfdb0119e34b Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 14:53:17 +0300 Subject: [PATCH 09/18] - properly implement node status fn - document batches - _with_opts fns to simplify the simple cases --- iroh-cli/src/commands/blob.rs | 9 +- iroh/src/client/blobs.rs | 165 ++++++++++++++++++++++++---------- iroh/src/node.rs | 45 +++++----- iroh/src/node/rpc.rs | 32 +++++-- iroh/src/rpc_protocol.rs | 34 +++++++ iroh/tests/batch.rs | 120 +++++++++++++------------ 6 files changed, 267 insertions(+), 138 deletions(-) diff --git a/iroh-cli/src/commands/blob.rs b/iroh-cli/src/commands/blob.rs index 82ea5bd4e9..978918b03c 100644 --- a/iroh-cli/src/commands/blob.rs +++ b/iroh-cli/src/commands/blob.rs @@ -374,10 +374,15 @@ impl BlobCommands { let (blob_status, size) = match (status, format) { (BlobStatus::Complete { size }, BlobFormat::Raw) => ("blob", size), - (BlobStatus::Partial { size }, BlobFormat::Raw) => ("incomplete blob", size), + (BlobStatus::Partial { size }, BlobFormat::Raw) => { + ("incomplete blob", size.value()) + } (BlobStatus::Complete { size }, BlobFormat::HashSeq) => ("collection", size), (BlobStatus::Partial { size }, BlobFormat::HashSeq) => { - ("incomplete collection", size) + ("incomplete collection", size.value()) + } + (BlobStatus::NotFound, _) => { + return Err(anyhow!("blob is missing")); } }; println!( diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 2eea0479c2..c4467ea8d5 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -41,12 +41,13 @@ use crate::rpc_protocol::{ BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, - BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobValidateRequest, + BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobStatusRequest, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, }; use super::{flatten, Iroh}; +pub use crate::rpc_protocol::BlobStatus; pub use iroh_blobs::store::ImportMode; /// Iroh blobs client. @@ -65,6 +66,26 @@ impl Client where C: ServiceConnection, { + /// Check if a blob is completely stored on the node. + /// + /// Note that this will return false for blobs that are partially stored on + /// the node. + pub async fn status(&self, hash: Hash) -> Result { + let status = self.rpc.rpc(BlobStatusRequest { hash }).await??; + Ok(status.0) + } + + /// Check if a blob is completely stored on the node. + /// + /// This is just a convenience wrapper around `status` that returns a boolean. + pub async fn has(&self, hash: Hash) -> Result { + match self.status(hash).await { + Ok(BlobStatus::Complete { .. }) => Ok(true), + Ok(_) => Ok(false), + Err(err) => Err(err), + } + } + /// Create a new batch for adding data. pub async fn batch(&self) -> Result> { let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; @@ -377,17 +398,6 @@ where Ok(ticket) } - - /// Get the status of a blob. - pub async fn status(&self, hash: Hash) -> Result { - // TODO: this could be implemented more efficiently - let reader = self.read(hash).await?; - if reader.is_complete { - Ok(BlobStatus::Complete { size: reader.size }) - } else { - Ok(BlobStatus::Partial { size: reader.size }) - } - } } /// A scope in which blobs can be added. @@ -418,6 +428,42 @@ impl> TagDrop for BatchInner { } } +/// Options for adding a file as a blob +#[derive(Debug, Clone, Copy, Default)] +pub struct AddFileOpts { + /// The import mode + import_mode: ImportMode, + /// The format of the blob + format: BlobFormat, +} + +/// Options for adding a directory as a collection +#[derive(Debug, Clone, Default)] +pub struct AddDirOpts { + /// The import mode + import_mode: ImportMode, + /// Whether to preserve the directory name + wrap: WrapOption, +} + +/// Options for adding a directory as a collection +#[derive(Debug, Clone)] +pub struct AddReaderOpts { + /// The format of the blob + format: BlobFormat, + /// Size of the chunks to send + chunk_size: usize, +} + +impl Default for AddReaderOpts { + fn default() -> Self { + Self { + format: BlobFormat::Raw, + chunk_size: 1024 * 64, + } + } +} + impl> Batch { /// Write a blob by passing bytes. pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { @@ -425,24 +471,34 @@ impl> Batch { self.add_stream(input, format).await } + /// Import a blob from a filesystem path, using the default options. + /// + /// For more control, use [`Self::add_file_with_opts`]. + pub async fn add_file(&self, path: PathBuf) -> Result<(TempTag, u64)> { + self.add_file_with_opts(path, AddFileOpts::default()).await + } + /// Import a blob from a filesystem path. /// /// `path` should be an absolute path valid for the file system on which /// the node runs, which refers to a file. /// - /// If `import_mode` is TryReference, Iroh will assume that the data will not + /// If you use [ImportMode::TryReference], Iroh will assume that the data will not /// change and will share it in place without copying to the Iroh data directory /// if appropriate. However, for tiny files, Iroh will copy the data. /// - /// If `import_mode` is Copy, Iroh will always copy the data. + /// If you use [ImportMode::Copy], Iroh will always copy the data. /// /// Will return a temp tag for the added blob, as well as the size of the file. - pub async fn add_file( + pub async fn add_file_with_opts( &self, path: PathBuf, - import_mode: ImportMode, - format: BlobFormat, + opts: AddFileOpts, ) -> Result<(TempTag, u64)> { + let AddFileOpts { + import_mode, + format, + } = opts; anyhow::ensure!( path.is_absolute(), "Path must be absolute, but got: {:?}", @@ -480,15 +536,20 @@ impl> Batch { Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) } - /// Add a directory as a hashseq in collection format - pub async fn add_dir( - &self, - root: PathBuf, - import_mode: ImportMode, - wrap: WrapOption, - ) -> Result { - anyhow::ensure!(root.is_absolute(), "Path must be absolute",); - anyhow::ensure!(root.is_dir(), "Path must be a directory",); + /// Add a directory as a hashseq in iroh collection format + pub async fn add_dir(&self, root: PathBuf) -> Result { + self.add_dir_with_opts(root, Default::default()).await + } + + /// Add a directory as a hashseq in iroh collection format + /// + /// This can also be used to add a single file as a collection, if + /// wrap is set to [WrapOption::Wrap]. + /// + /// However, if you want to add a single file as a raw blob, use add_file instead. + pub async fn add_dir_with_opts(&self, root: PathBuf, opts: AddDirOpts) -> Result { + let AddDirOpts { import_mode, wrap } = opts; + anyhow::ensure!(root.is_absolute(), "Path must be absolute"); // let (send, recv) = flume::bounded(32); // let import_progress = FlumeProgressSender::new(send); @@ -496,13 +557,17 @@ impl> Batch { // import all files below root recursively let data_sources = crate::util::fs::scan_path(root, wrap)?; const IO_PARALLELISM: usize = 4; + let opts = AddFileOpts { + import_mode, + format: BlobFormat::Raw, + }; let result: Vec<_> = futures_lite::stream::iter(data_sources) .map(|source| { // let import_progress = import_progress.clone(); async move { let name = source.name().to_string(); let (tag, size) = self - .add_file(source.path().to_owned(), import_mode, BlobFormat::Raw) + .add_file_with_opts(source.path().to_owned(), opts) .await?; let hash = *tag.hash(); anyhow::Ok((name, hash, size, tag)) @@ -529,22 +594,41 @@ impl> Batch { /// This is a convenience function that converts the collection into two blobs /// (the metadata and the hash sequence) and adds them, returning a temp tag for /// the hash sequence. + /// + /// Note that this does not guarantee that the data that the collection refers to + /// actually exists. It will just create 2 blobs, the metadata and the hash sequence + /// itself. pub async fn add_collection(&self, collection: Collection) -> Result { self.add_blob_seq(collection.to_blobs()).await } /// Write a blob by passing an async reader. + /// + /// This will use a default chunk size of 64KB, and a format of [BlobFormat::Raw]. pub async fn add_reader( &self, reader: impl AsyncRead + Unpin + Send + 'static, - format: BlobFormat, ) -> anyhow::Result { - const CAP: usize = 1024 * 64; // send 64KB per request by default - let input = ReaderStream::with_capacity(reader, CAP); + self.add_reader_with_opts(reader, Default::default()).await + } + + /// Write a blob by passing an async reader. + /// + /// This produces a stream from the reader with a hardcoded buffer size of 64KB. + pub async fn add_reader_with_opts( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + opts: AddReaderOpts, + ) -> anyhow::Result { + let AddReaderOpts { format, chunk_size } = opts; + let input = ReaderStream::with_capacity(reader, chunk_size); self.add_stream(input, format).await } /// Write a blob by passing a stream of bytes. + /// + /// For convenient interop with common sources of data, this function takes a stream of io::Result. + /// If you have raw bytes, you need to wrap them in io::Result::Ok. pub async fn add_stream( &self, mut input: impl Stream> + Send + Unpin + 'static, @@ -574,9 +658,6 @@ impl> Batch { } } } - sink.close() - .await - .map_err(|err| anyhow!("Failed to close the stream: {err:?}"))?; // this is needed for the remote to notice that the stream is closed drop(sink); let mut res = None; @@ -648,9 +729,10 @@ impl> Batch { } /// Whether to wrap the added data in a collection. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Default, Clone)] pub enum WrapOption { /// Do not wrap the file or directory. + #[default] NoWrap, /// Wrap the file or directory in a collection. Wrap { @@ -659,21 +741,6 @@ pub enum WrapOption { }, } -/// Status information about a blob. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BlobStatus { - /// The blob is only stored partially. - Partial { - /// The size of the currently stored partial blob. - size: u64, - }, - /// The blob is stored completely. - Complete { - /// The size of the blob. - size: u64, - }, -} - /// Outcome of a blob add operation. #[derive(Debug, Clone)] pub struct AddOutcome { diff --git a/iroh/src/node.rs b/iroh/src/node.rs index 9c96fccf24..c4fa1aaeb1 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -65,46 +65,47 @@ struct NodeInner { rt: LocalPoolHandle, pub(crate) sync: Engine, downloader: Downloader, - blob_scopes: Mutex, + blob_scopes: Mutex, } +/// Keeps track of all the currently active batch operations of the blobs api. +/// +/// #[derive(Debug, Default)] -struct BlobScopes { - scopes: BTreeMap, +struct BlobBatches { + /// Currently active batches + batches: BTreeMap, + /// Used to generate new batch ids. max: u64, } +/// A single batch of blob operations #[derive(Debug, Default)] -struct BlobScope { +struct BlobBatch { + /// Each counter corresponds to the number of temp tags we have sent to the client + /// for this hash and format. Counters should never be zero. tags: BTreeMap, } -impl BlobScopes { - /// Create a new blob scope. +impl BlobBatches { + /// Create a new unique batch id. fn create(&mut self) -> u64 { let id = self.max; self.max += 1; id } - /// Store a tag in a scope. - fn store(&mut self, scope: u64, tt: TempTag) { - let entry = self.scopes.entry(scope).or_default(); + /// Store a temp tag in a batch identified by a batch id. + fn store(&mut self, batch: u64, tt: TempTag) { + let entry = self.batches.entry(batch).or_default(); let count = entry.tags.entry(tt.hash_and_format()).or_default(); - println!( - "storing tag {:?} {} in scope {}", - tt.hash(), - tt.format(), - scope - ); tt.leak(); *count += 1; } - /// Remove a tag from a scope. - fn remove_one(&mut self, scope: u64, content: &HashAndFormat, u: Option<&dyn TagDrop>) { - println!("removing tag {:?} from scope {}", content, scope); - if let Some(scope) = self.scopes.get_mut(&scope) { + /// Remove a tag from a batch. + fn remove_one(&mut self, batch: u64, content: &HashAndFormat, u: Option<&dyn TagDrop>) { + if let Some(scope) = self.batches.get_mut(&batch) { if let Some(counter) = scope.tags.get_mut(content) { *counter -= 1; if let Some(u) = u { @@ -117,9 +118,9 @@ impl BlobScopes { } } - /// Remove an entire scope. - fn remove(&mut self, scope: u64, u: Option<&dyn TagDrop>) { - if let Some(scope) = self.scopes.remove(&scope) { + /// Remove an entire batch. + fn remove(&mut self, batch: u64, u: Option<&dyn TagDrop>) { + if let Some(scope) = self.batches.remove(&batch) { for (content, count) in scope.tags { if let Some(u) = u { for _ in 0..count { diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index e958bf5ac8..d0054ad891 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -46,13 +46,14 @@ use crate::rpc_protocol::{ BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, - BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, - BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, CreateTagRequest, - DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, - DocSetHashRequest, ListTagsRequest, NodeAddrRequest, NodeConnectionInfoRequest, - NodeConnectionInfoResponse, NodeConnectionsRequest, NodeConnectionsResponse, NodeIdRequest, - NodeRelayRequest, NodeShutdownRequest, NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, - NodeWatchRequest, NodeWatchResponse, Request, RpcService, SetTagOption, SetTagRequest, + BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobStatus, + BlobStatusRequest, BlobStatusResponse, BlobValidateRequest, CreateCollectionRequest, + CreateCollectionResponse, CreateTagRequest, DocExportFileRequest, DocExportFileResponse, + DocImportFileRequest, DocImportFileResponse, DocSetHashRequest, ListTagsRequest, + NodeAddrRequest, NodeConnectionInfoRequest, NodeConnectionInfoResponse, NodeConnectionsRequest, + NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, NodeShutdownRequest, + NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, NodeWatchResponse, + Request, RpcService, SetTagOption, SetTagRequest, }; use super::NodeInner; @@ -91,6 +92,7 @@ impl Handler { .await } NodeConnectionInfo(msg) => chan.rpc(msg, handler, Self::node_connection_info).await, + BlobStatus(msg) => chan.rpc(msg, handler, Self::blob_status).await, BlobList(msg) => chan.server_streaming(msg, handler, Self::blob_list).await, BlobListIncomplete(msg) => { chan.server_streaming(msg, handler, Self::blob_list_incomplete) @@ -397,6 +399,22 @@ impl Handler { Ok(()) } + async fn blob_status(self, msg: BlobStatusRequest) -> RpcResult { + let entry = self.inner.db.get(&msg.hash).await?; + Ok(BlobStatusResponse(match entry { + Some(entry) => { + if entry.is_complete() { + BlobStatus::Complete { + size: entry.size().value(), + } + } else { + BlobStatus::Partial { size: entry.size() } + } + } + None => BlobStatus::NotFound, + })) + } + fn blob_list( self, _msg: BlobListRequest, diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index da4dba5f7e..9d8d92af47 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -208,6 +208,38 @@ impl ServerStreamingMsg for BlobValidateRequest { type Response = ValidateProgress; } +/// Get the status of a blob +#[derive(Debug, Serialize, Deserialize)] +pub struct BlobStatusRequest { + /// The hash of the blob + pub hash: Hash, +} + +/// The response to a status request +#[derive(Debug, Serialize, Deserialize, derive_more::From, derive_more::Into)] +pub struct BlobStatusResponse(pub BlobStatus); + +/// Status information about a blob. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum BlobStatus { + /// The blob is not stored on the node. + NotFound, + /// The blob is only stored partially. + Partial { + /// The size of the currently stored partial blob. + size: BaoBlobSize, + }, + /// The blob is stored completely. + Complete { + /// The size of the blob. + size: u64, + }, +} + +impl RpcMsg for BlobStatusRequest { + type Response = RpcResult; +} + /// List all blobs, including collections #[derive(Debug, Serialize, Deserialize)] pub struct BlobListRequest; @@ -1179,6 +1211,7 @@ pub enum Request { BlobDownload(BlobDownloadRequest), BlobExport(BlobExportRequest), BlobList(BlobListRequest), + BlobStatus(BlobStatusRequest), BlobListIncomplete(BlobListIncompleteRequest), BlobListCollections(BlobListCollectionsRequest), BlobDeleteBlob(BlobDeleteBlobRequest), @@ -1247,6 +1280,7 @@ pub enum Response { BlobAddStream(BlobAddStreamResponse), BlobAddPath(BlobAddPathResponse), BlobList(RpcResult), + BlobStatus(RpcResult), BlobListIncomplete(RpcResult), BlobListCollections(RpcResult), BlobDownload(BlobDownloadResponse), diff --git a/iroh/tests/batch.rs b/iroh/tests/batch.rs index 240338d123..ff1c4fb81e 100644 --- a/iroh/tests/batch.rs +++ b/iroh/tests/batch.rs @@ -1,7 +1,6 @@ use std::time::Duration; use bao_tree::blake3; -use iroh::client::blobs::{ImportMode, WrapOption}; use iroh::node::GcPolicy; use iroh_blobs::{store::mem::Store, BlobFormat}; @@ -13,6 +12,7 @@ async fn create_node() -> anyhow::Result> { } async fn wait_for_gc() { + // wait for multiple gc cycles to ensure that the data is actually gone tokio::time::sleep(Duration::from_millis(50)).await; } @@ -21,19 +21,16 @@ async fn test_batch_create_1() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let expected_data: &[u8] = b"test"; - let expected_hash = blake3::hash(expected_data).into(); - let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data, BlobFormat::Raw).await?; let hash = *tag.hash(); - assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc wait_for_gc().await; - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), expected_data); + assert!(client.has(hash).await?); drop(tag); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_err()); + assert!(!client.has(hash).await?); Ok(()) } @@ -42,102 +39,90 @@ async fn test_batch_create_2() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let expected_data: &[u8] = b"test"; - let expected_hash = blake3::hash(expected_data).into(); - let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data, BlobFormat::Raw).await?; let hash = *tag.hash(); - assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc wait_for_gc().await; - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), expected_data); + assert!(client.has(hash).await?); drop(batch); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_err()); + assert!(!client.has(hash).await?); Ok(()) } +/// Tests that data is preserved if a second temp tag is created for it +/// before the first temp tag is dropped. #[tokio::test] async fn test_batch_create_3() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let expected_data: &[u8] = b"test"; - let expected_hash = blake3::hash(expected_data).into(); - let tag = batch.add_bytes(expected_data, BlobFormat::Raw).await?; + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data, BlobFormat::Raw).await?; let hash = *tag.hash(); - assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_ok()); + assert!(client.has(hash).await?); // Create an additional temp tag for the same data let tag2 = batch.temp_tag(tag.hash_and_format()).await?; drop(tag); // Check that the data is still present wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_ok()); + assert!(client.has(hash).await?); drop(tag2); // Check that the data is gone since both temp tags are dropped wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_err()); + assert!(!client.has(hash).await?); Ok(()) } +/// Tests that data goes away when the temp tag is dropped #[tokio::test] async fn test_batch_add_file_1() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; let dir = tempfile::tempdir()?; - let expected_data: &[u8] = b"test"; - let expected_hash = blake3::hash(expected_data).into(); let temp_path = dir.path().join("test"); - std::fs::write(&temp_path, expected_data)?; - let (tag, _) = batch - .add_file(temp_path, ImportMode::Copy, BlobFormat::Raw) - .await?; + std::fs::write(&temp_path, b"test")?; + let (tag, _) = batch.add_file(temp_path).await?; let hash = *tag.hash(); - assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc wait_for_gc().await; - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), expected_data); + assert!(client.has(hash).await?); drop(tag); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_err()); + assert!(!client.has(hash).await?); Ok(()) } +/// Tests that data goes away when the batch is dropped #[tokio::test] async fn test_batch_add_file_2() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; let dir = tempfile::tempdir()?; - let expected_data: &[u8] = b"test"; - let expected_hash = blake3::hash(expected_data).into(); let temp_path = dir.path().join("test"); - std::fs::write(&temp_path, expected_data)?; - let (tag, _) = batch - .add_file(temp_path, ImportMode::Copy, BlobFormat::Raw) - .await?; + std::fs::write(&temp_path, b"test")?; + let (tag, _) = batch.add_file(temp_path).await?; let hash = *tag.hash(); - assert_eq!(hash, expected_hash); // Check that the store has the data and that it is protected from gc wait_for_gc().await; - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), expected_data); + assert!(client.has(hash).await?); drop(batch); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; - assert!(client.read_to_bytes(hash).await.is_err()); + assert!(!client.has(hash).await?); Ok(()) } +/// Tests that add_dir adds the right data #[tokio::test] -async fn test_batch_add_dir_1() -> anyhow::Result<()> { +async fn test_batch_add_dir_works() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; @@ -147,29 +132,48 @@ async fn test_batch_add_dir_1() -> anyhow::Result<()> { let temp_path = dir.path().join(name); std::fs::write(&temp_path, content)?; } - let tag = batch - .add_dir(dir.path().to_owned(), ImportMode::Copy, WrapOption::NoWrap) - .await?; - let check_present = || async { - assert!(client.read_to_bytes(*tag.hash()).await.is_ok()); + let tag = batch.add_dir(dir.path().to_owned()).await?; + assert!(client.read_to_bytes(*tag.hash()).await.is_ok()); + for (_, content) in &data { + let hash = blake3::hash(content).into(); + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), *content); + } + Ok(()) +} + +/// Tests that temp tags work properly for hash sequences, using add_dir +/// to add the data. +#[tokio::test] +async fn test_batch_add_dir_2() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let dir = tempfile::tempdir()?; + let data: [(&str, &[u8]); 2] = [("test1", b"test1"), ("test2", b"test2")]; + for (name, content) in &data { + let temp_path = dir.path().join(name); + std::fs::write(&temp_path, content)?; + } + let tag = batch.add_dir(dir.path().to_owned()).await?; + let hash = *tag.hash(); + // weird signature to avoid async move issues + let check_present = |present: &'static bool| async { + assert!(client.has(hash).await? == *present); for (_, content) in &data { let hash = blake3::hash(content).into(); - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), *content); + assert!(client.has(hash).await? == *present); } anyhow::Ok(()) }; - // Check that the store has the data immediately - check_present().await?; - // Check that the store has the data and that it is protected from gc + // Check that the store has the data immediately after adding it + check_present(&true).await?; + // Check that it is protected from gc wait_for_gc().await; - check_present().await?; + check_present(&true).await?; drop(tag); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; - for (_, content) in &data { - let hash = blake3::hash(content).into(); - assert!(client.read_to_bytes(hash).await.is_err()); - } + check_present(&false).await?; Ok(()) } From 70916f4a4bc00e51572eb63fa48268488b5c001b Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 15:19:39 +0300 Subject: [PATCH 10/18] Move the batch API into its own file Still exported in the blobs module though. --- iroh/src/client/blobs.rs | 365 ++------------------------------ iroh/src/client/blobs/batch.rs | 369 +++++++++++++++++++++++++++++++++ 2 files changed, 383 insertions(+), 351 deletions(-) create mode 100644 iroh/src/client/blobs/batch.rs diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index c4467ea8d5..ad9e9453b7 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -5,47 +5,42 @@ use std::{ io, path::PathBuf, pin::Pin, - sync::{Arc, Mutex}, + sync::Arc, task::{Context, Poll}, }; use anyhow::{anyhow, Context as _, Result}; use bytes::Bytes; -use futures_buffered::BufferedStreamExt; use futures_lite::{Stream, StreamExt}; -use futures_util::{FutureExt, SinkExt}; +use futures_util::SinkExt; use iroh_base::{node_addr::AddrInfoOptions, ticket::BlobTicket}; use iroh_blobs::{ export::ExportProgress as BytesExportProgress, format::collection::Collection, get::db::DownloadProgress as BytesDownloadProgress, - provider::BatchAddPathProgress, store::{ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, - util::TagDrop, - BlobFormat, Hash, HashAndFormat, Tag, TempTag, + BlobFormat, Hash, Tag, }; use iroh_net::NodeAddr; use portable_atomic::{AtomicU64, Ordering}; -use quic_rpc::{ - client::{BoxStreamSync, UpdateSink}, - RpcClient, ServiceConnection, -}; +use quic_rpc::{client::BoxStreamSync, RpcClient, ServiceConnection}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf}; use tokio_util::io::{ReaderStream, StreamReader}; use tracing::warn; use crate::rpc_protocol::{ - BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, - BatchCreateRequest, BatchCreateResponse, BatchCreateTempTagRequest, BatchUpdate, - BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, - BlobDeleteBlobRequest, BlobDownloadRequest, BlobExportRequest, BlobGetCollectionRequest, - BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, - BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobStatusRequest, BlobValidateRequest, - CreateCollectionRequest, CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, + BatchCreateRequest, BatchCreateResponse, BlobAddPathRequest, BlobAddStreamRequest, + BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, + BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, + BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, + BlobReadAtResponse, BlobStatusRequest, BlobValidateRequest, CreateCollectionRequest, + CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, }; use super::{flatten, Iroh}; +mod batch; +pub use batch::{AddDirOpts, AddFileOpts, AddReaderOpts, Batch}; pub use crate::rpc_protocol::BlobStatus; pub use iroh_blobs::store::ImportMode; @@ -89,15 +84,11 @@ where /// Create a new batch for adding data. pub async fn batch(&self) -> Result> { let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; - let updates = Mutex::new(updates); let BatchCreateResponse::Id(scope) = stream.next().await.context("expected scope id")??; let rpc = self.rpc.clone(); - Ok(Batch(Arc::new(BatchInner { - scope, - rpc, - updates, - }))) + Ok(Batch::new(scope, rpc, updates)) } + /// Stream the contents of a a single blob. /// /// Returns a [`Reader`], which can report the size of the blob before reading it. @@ -400,334 +391,6 @@ where } } -/// A scope in which blobs can be added. -#[derive(derive_more::Debug)] -struct BatchInner> { - /// The id of the scope. - scope: u64, - /// The rpc client. - rpc: RpcClient, - /// The stream to send drop - #[debug(skip)] - updates: Mutex>, -} - -/// A batch for write operations. -/// -/// This serves mostly as a scope for temporary tags. -/// -/// It is not a transaction, so things in a batch are not atomic. Also, there is -/// no isolation between batches. -#[derive(derive_more::Debug)] -pub struct Batch>(Arc>); - -impl> TagDrop for BatchInner { - fn on_drop(&self, content: &HashAndFormat) { - let mut updates = self.updates.lock().unwrap(); - updates.send(BatchUpdate::Drop(*content)).now_or_never(); - } -} - -/// Options for adding a file as a blob -#[derive(Debug, Clone, Copy, Default)] -pub struct AddFileOpts { - /// The import mode - import_mode: ImportMode, - /// The format of the blob - format: BlobFormat, -} - -/// Options for adding a directory as a collection -#[derive(Debug, Clone, Default)] -pub struct AddDirOpts { - /// The import mode - import_mode: ImportMode, - /// Whether to preserve the directory name - wrap: WrapOption, -} - -/// Options for adding a directory as a collection -#[derive(Debug, Clone)] -pub struct AddReaderOpts { - /// The format of the blob - format: BlobFormat, - /// Size of the chunks to send - chunk_size: usize, -} - -impl Default for AddReaderOpts { - fn default() -> Self { - Self { - format: BlobFormat::Raw, - chunk_size: 1024 * 64, - } - } -} - -impl> Batch { - /// Write a blob by passing bytes. - pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { - let input = futures_lite::stream::once(Ok(bytes.into())); - self.add_stream(input, format).await - } - - /// Import a blob from a filesystem path, using the default options. - /// - /// For more control, use [`Self::add_file_with_opts`]. - pub async fn add_file(&self, path: PathBuf) -> Result<(TempTag, u64)> { - self.add_file_with_opts(path, AddFileOpts::default()).await - } - - /// Import a blob from a filesystem path. - /// - /// `path` should be an absolute path valid for the file system on which - /// the node runs, which refers to a file. - /// - /// If you use [ImportMode::TryReference], Iroh will assume that the data will not - /// change and will share it in place without copying to the Iroh data directory - /// if appropriate. However, for tiny files, Iroh will copy the data. - /// - /// If you use [ImportMode::Copy], Iroh will always copy the data. - /// - /// Will return a temp tag for the added blob, as well as the size of the file. - pub async fn add_file_with_opts( - &self, - path: PathBuf, - opts: AddFileOpts, - ) -> Result<(TempTag, u64)> { - let AddFileOpts { - import_mode, - format, - } = opts; - anyhow::ensure!( - path.is_absolute(), - "Path must be absolute, but got: {:?}", - path - ); - anyhow::ensure!(path.is_file(), "Path does not refer to a file: {:?}", path); - let mut stream = self - .0 - .rpc - .server_streaming(BatchAddPathRequest { - path, - import_mode, - format, - scope: self.0.scope, - }) - .await?; - let mut res_hash = None; - let mut res_size = None; - while let Some(item) = stream.next().await { - match item?.0 { - BatchAddPathProgress::Abort(cause) => { - Err(cause)?; - } - BatchAddPathProgress::Done { hash } => { - res_hash = Some(hash); - } - BatchAddPathProgress::Found { size } => { - res_size = Some(size); - } - _ => {} - } - } - let hash = res_hash.context("Missing hash")?; - let size = res_size.context("Missing size")?; - Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) - } - - /// Add a directory as a hashseq in iroh collection format - pub async fn add_dir(&self, root: PathBuf) -> Result { - self.add_dir_with_opts(root, Default::default()).await - } - - /// Add a directory as a hashseq in iroh collection format - /// - /// This can also be used to add a single file as a collection, if - /// wrap is set to [WrapOption::Wrap]. - /// - /// However, if you want to add a single file as a raw blob, use add_file instead. - pub async fn add_dir_with_opts(&self, root: PathBuf, opts: AddDirOpts) -> Result { - let AddDirOpts { import_mode, wrap } = opts; - anyhow::ensure!(root.is_absolute(), "Path must be absolute"); - - // let (send, recv) = flume::bounded(32); - // let import_progress = FlumeProgressSender::new(send); - - // import all files below root recursively - let data_sources = crate::util::fs::scan_path(root, wrap)?; - const IO_PARALLELISM: usize = 4; - let opts = AddFileOpts { - import_mode, - format: BlobFormat::Raw, - }; - let result: Vec<_> = futures_lite::stream::iter(data_sources) - .map(|source| { - // let import_progress = import_progress.clone(); - async move { - let name = source.name().to_string(); - let (tag, size) = self - .add_file_with_opts(source.path().to_owned(), opts) - .await?; - let hash = *tag.hash(); - anyhow::Ok((name, hash, size, tag)) - } - }) - .buffered_ordered(IO_PARALLELISM) - .try_collect() - .await?; - println!("{:?}", result); - - // create a collection - let (collection, child_tags): (Collection, Vec<_>) = result - .into_iter() - .map(|(name, hash, _, tag)| ((name, hash), tag)) - .unzip(); - - let tag = self.add_collection(collection).await?; - drop(child_tags); - Ok(tag) - } - - /// Add a collection - /// - /// This is a convenience function that converts the collection into two blobs - /// (the metadata and the hash sequence) and adds them, returning a temp tag for - /// the hash sequence. - /// - /// Note that this does not guarantee that the data that the collection refers to - /// actually exists. It will just create 2 blobs, the metadata and the hash sequence - /// itself. - pub async fn add_collection(&self, collection: Collection) -> Result { - self.add_blob_seq(collection.to_blobs()).await - } - - /// Write a blob by passing an async reader. - /// - /// This will use a default chunk size of 64KB, and a format of [BlobFormat::Raw]. - pub async fn add_reader( - &self, - reader: impl AsyncRead + Unpin + Send + 'static, - ) -> anyhow::Result { - self.add_reader_with_opts(reader, Default::default()).await - } - - /// Write a blob by passing an async reader. - /// - /// This produces a stream from the reader with a hardcoded buffer size of 64KB. - pub async fn add_reader_with_opts( - &self, - reader: impl AsyncRead + Unpin + Send + 'static, - opts: AddReaderOpts, - ) -> anyhow::Result { - let AddReaderOpts { format, chunk_size } = opts; - let input = ReaderStream::with_capacity(reader, chunk_size); - self.add_stream(input, format).await - } - - /// Write a blob by passing a stream of bytes. - /// - /// For convenient interop with common sources of data, this function takes a stream of io::Result. - /// If you have raw bytes, you need to wrap them in io::Result::Ok. - pub async fn add_stream( - &self, - mut input: impl Stream> + Send + Unpin + 'static, - format: BlobFormat, - ) -> Result { - let (mut sink, mut stream) = self - .0 - .rpc - .bidi(BatchAddStreamRequest { - scope: self.0.scope, - format, - }) - .await?; - while let Some(item) = input.next().await { - match item { - Ok(chunk) => { - sink.send(BatchAddStreamUpdate::Chunk(chunk)) - .await - .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; - } - Err(err) => { - warn!("Abort send, reason: failed to read from source stream: {err:?}"); - sink.send(BatchAddStreamUpdate::Abort) - .await - .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; - break; - } - } - } - // this is needed for the remote to notice that the stream is closed - drop(sink); - let mut res = None; - while let Some(item) = stream.next().await { - match item? { - BatchAddStreamResponse::Abort(cause) => { - Err(cause)?; - } - BatchAddStreamResponse::Result { hash } => { - res = Some(hash); - } - _ => {} - } - } - let hash = res.context("Missing answer")?; - println!( - "creating temp tag with hash {:?} and format {}", - hash, format - ); - Ok(self.local_temp_tag(HashAndFormat { hash, format })) - } - - /// Add a sequence of blobs, where the last is a hash sequence. - /// - /// It is a common pattern in iroh to have a hash sequence with one or more - /// blobs of metadata, and the remaining blobs being the actual data. E.g. - /// a collection is a hash sequence where the first child is the metadata. - pub async fn add_blob_seq(&self, iter: impl Iterator) -> Result { - let mut blobs = iter.peekable(); - let mut res = vec![]; - let res = loop { - let blob = blobs.next().context("Failed to get next blob")?; - if blobs.peek().is_none() { - println!("last blob"); - break self.add_bytes(blob, BlobFormat::HashSeq).await?; - } else { - res.push(self.add_bytes(blob, BlobFormat::Raw).await?); - } - }; - Ok(res) - } - - /// Create a temp tag to protect some content (blob or hashseq) from being deleted. - /// - /// A typical use case is that you are downloading some data and want to protect it - /// from deletion while the download is ongoing, but don't want to protect it permanently - /// until the download is completed. - pub async fn temp_tag(&self, content: HashAndFormat) -> Result { - // Notify the server that we want one temp tag for the given content - self.0 - .rpc - .rpc(BatchCreateTempTagRequest { - scope: self.0.scope, - content, - }) - .await??; - // Only after success of the above call, we can create the corresponding local temp tag - Ok(self.local_temp_tag(content)) - } - - /// Creates a temp tag for the given hash and format, without notifying the server. - /// - /// Caution: only do this for data for which you know the server side has created a temp tag. - fn local_temp_tag(&self, inner: HashAndFormat) -> TempTag { - let on_drop: Arc = self.0.clone(); - let on_drop = Some(Arc::downgrade(&on_drop)); - TempTag::new(inner, on_drop) - } -} - /// Whether to wrap the added data in a collection. #[derive(Debug, Serialize, Deserialize, Default, Clone)] pub enum WrapOption { diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs new file mode 100644 index 0000000000..0987b37360 --- /dev/null +++ b/iroh/src/client/blobs/batch.rs @@ -0,0 +1,369 @@ +use std::{ + io, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use anyhow::{anyhow, Context, Result}; +use bytes::Bytes; +use futures_buffered::BufferedStreamExt; +use futures_lite::StreamExt; +use futures_util::{FutureExt, SinkExt, Stream}; +use iroh_blobs::{ + format::collection::Collection, provider::BatchAddPathProgress, store::ImportMode, + util::TagDrop, BlobFormat, HashAndFormat, TempTag, +}; +use quic_rpc::{client::UpdateSink, RpcClient, ServiceConnection}; +use tokio::io::AsyncRead; +use tokio_util::io::ReaderStream; +use tracing::warn; + +use crate::{ + client::RpcService, + rpc_protocol::{ + BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, + BatchCreateTempTagRequest, BatchUpdate, + }, +}; + +use super::WrapOption; + +/// A scope in which blobs can be added. +#[derive(derive_more::Debug)] +struct BatchInner> { + /// The id of the scope. + scope: u64, + /// The rpc client. + rpc: RpcClient, + /// The stream to send drop + #[debug(skip)] + updates: Mutex>, +} + +/// A batch for write operations. +/// +/// This serves mostly as a scope for temporary tags. +/// +/// It is not a transaction, so things in a batch are not atomic. Also, there is +/// no isolation between batches. +#[derive(derive_more::Debug)] +pub struct Batch>(Arc>); + +impl> TagDrop for BatchInner { + fn on_drop(&self, content: &HashAndFormat) { + let mut updates = self.updates.lock().unwrap(); + updates.send(BatchUpdate::Drop(*content)).now_or_never(); + } +} + +/// Options for adding a file as a blob +#[derive(Debug, Clone, Copy, Default)] +pub struct AddFileOpts { + /// The import mode + import_mode: ImportMode, + /// The format of the blob + format: BlobFormat, +} + +/// Options for adding a directory as a collection +#[derive(Debug, Clone, Default)] +pub struct AddDirOpts { + /// The import mode + import_mode: ImportMode, + /// Whether to preserve the directory name + wrap: WrapOption, +} + +/// Options for adding a directory as a collection +#[derive(Debug, Clone)] +pub struct AddReaderOpts { + /// The format of the blob + format: BlobFormat, + /// Size of the chunks to send + chunk_size: usize, +} + +impl Default for AddReaderOpts { + fn default() -> Self { + Self { + format: BlobFormat::Raw, + chunk_size: 1024 * 64, + } + } +} + +impl> Batch { + pub(super) fn new( + scope: u64, + rpc: RpcClient, + updates: UpdateSink, + ) -> Self { + Self(Arc::new(BatchInner { + scope, + rpc, + updates: updates.into(), + })) + } + + /// Write a blob by passing bytes. + pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { + let input = futures_lite::stream::once(Ok(bytes.into())); + self.add_stream(input, format).await + } + + /// Import a blob from a filesystem path, using the default options. + /// + /// For more control, use [`Self::add_file_with_opts`]. + pub async fn add_file(&self, path: PathBuf) -> Result<(TempTag, u64)> { + self.add_file_with_opts(path, AddFileOpts::default()).await + } + + /// Import a blob from a filesystem path. + /// + /// `path` should be an absolute path valid for the file system on which + /// the node runs, which refers to a file. + /// + /// If you use [ImportMode::TryReference], Iroh will assume that the data will not + /// change and will share it in place without copying to the Iroh data directory + /// if appropriate. However, for tiny files, Iroh will copy the data. + /// + /// If you use [ImportMode::Copy], Iroh will always copy the data. + /// + /// Will return a temp tag for the added blob, as well as the size of the file. + pub async fn add_file_with_opts( + &self, + path: PathBuf, + opts: AddFileOpts, + ) -> Result<(TempTag, u64)> { + let AddFileOpts { + import_mode, + format, + } = opts; + anyhow::ensure!( + path.is_absolute(), + "Path must be absolute, but got: {:?}", + path + ); + anyhow::ensure!(path.is_file(), "Path does not refer to a file: {:?}", path); + let mut stream = self + .0 + .rpc + .server_streaming(BatchAddPathRequest { + path, + import_mode, + format, + scope: self.0.scope, + }) + .await?; + let mut res_hash = None; + let mut res_size = None; + while let Some(item) = stream.next().await { + match item?.0 { + BatchAddPathProgress::Abort(cause) => { + Err(cause)?; + } + BatchAddPathProgress::Done { hash } => { + res_hash = Some(hash); + } + BatchAddPathProgress::Found { size } => { + res_size = Some(size); + } + _ => {} + } + } + let hash = res_hash.context("Missing hash")?; + let size = res_size.context("Missing size")?; + Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) + } + + /// Add a directory as a hashseq in iroh collection format + pub async fn add_dir(&self, root: PathBuf) -> Result { + self.add_dir_with_opts(root, Default::default()).await + } + + /// Add a directory as a hashseq in iroh collection format + /// + /// This can also be used to add a single file as a collection, if + /// wrap is set to [WrapOption::Wrap]. + /// + /// However, if you want to add a single file as a raw blob, use add_file instead. + pub async fn add_dir_with_opts(&self, root: PathBuf, opts: AddDirOpts) -> Result { + let AddDirOpts { import_mode, wrap } = opts; + anyhow::ensure!(root.is_absolute(), "Path must be absolute"); + + // let (send, recv) = flume::bounded(32); + // let import_progress = FlumeProgressSender::new(send); + + // import all files below root recursively + let data_sources = crate::util::fs::scan_path(root, wrap)?; + const IO_PARALLELISM: usize = 4; + let opts = AddFileOpts { + import_mode, + format: BlobFormat::Raw, + }; + let result: Vec<_> = futures_lite::stream::iter(data_sources) + .map(|source| { + // let import_progress = import_progress.clone(); + async move { + let name = source.name().to_string(); + let (tag, size) = self + .add_file_with_opts(source.path().to_owned(), opts) + .await?; + let hash = *tag.hash(); + anyhow::Ok((name, hash, size, tag)) + } + }) + .buffered_ordered(IO_PARALLELISM) + .try_collect() + .await?; + println!("{:?}", result); + + // create a collection + let (collection, child_tags): (Collection, Vec<_>) = result + .into_iter() + .map(|(name, hash, _, tag)| ((name, hash), tag)) + .unzip(); + + let tag = self.add_collection(collection).await?; + drop(child_tags); + Ok(tag) + } + + /// Add a collection + /// + /// This is a convenience function that converts the collection into two blobs + /// (the metadata and the hash sequence) and adds them, returning a temp tag for + /// the hash sequence. + /// + /// Note that this does not guarantee that the data that the collection refers to + /// actually exists. It will just create 2 blobs, the metadata and the hash sequence + /// itself. + pub async fn add_collection(&self, collection: Collection) -> Result { + self.add_blob_seq(collection.to_blobs()).await + } + + /// Write a blob by passing an async reader. + /// + /// This will use a default chunk size of 64KB, and a format of [BlobFormat::Raw]. + pub async fn add_reader( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + ) -> anyhow::Result { + self.add_reader_with_opts(reader, Default::default()).await + } + + /// Write a blob by passing an async reader. + /// + /// This produces a stream from the reader with a hardcoded buffer size of 64KB. + pub async fn add_reader_with_opts( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + opts: AddReaderOpts, + ) -> anyhow::Result { + let AddReaderOpts { format, chunk_size } = opts; + let input = ReaderStream::with_capacity(reader, chunk_size); + self.add_stream(input, format).await + } + + /// Write a blob by passing a stream of bytes. + /// + /// For convenient interop with common sources of data, this function takes a stream of io::Result. + /// If you have raw bytes, you need to wrap them in io::Result::Ok. + pub async fn add_stream( + &self, + mut input: impl Stream> + Send + Unpin + 'static, + format: BlobFormat, + ) -> Result { + let (mut sink, mut stream) = self + .0 + .rpc + .bidi(BatchAddStreamRequest { + scope: self.0.scope, + format, + }) + .await?; + while let Some(item) = input.next().await { + match item { + Ok(chunk) => { + sink.send(BatchAddStreamUpdate::Chunk(chunk)) + .await + .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; + } + Err(err) => { + warn!("Abort send, reason: failed to read from source stream: {err:?}"); + sink.send(BatchAddStreamUpdate::Abort) + .await + .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; + break; + } + } + } + // this is needed for the remote to notice that the stream is closed + drop(sink); + let mut res = None; + while let Some(item) = stream.next().await { + match item? { + BatchAddStreamResponse::Abort(cause) => { + Err(cause)?; + } + BatchAddStreamResponse::Result { hash } => { + res = Some(hash); + } + _ => {} + } + } + let hash = res.context("Missing answer")?; + println!( + "creating temp tag with hash {:?} and format {}", + hash, format + ); + Ok(self.local_temp_tag(HashAndFormat { hash, format })) + } + + /// Add a sequence of blobs, where the last is a hash sequence. + /// + /// It is a common pattern in iroh to have a hash sequence with one or more + /// blobs of metadata, and the remaining blobs being the actual data. E.g. + /// a collection is a hash sequence where the first child is the metadata. + pub async fn add_blob_seq(&self, iter: impl Iterator) -> Result { + let mut blobs = iter.peekable(); + let mut res = vec![]; + let res = loop { + let blob = blobs.next().context("Failed to get next blob")?; + if blobs.peek().is_none() { + println!("last blob"); + break self.add_bytes(blob, BlobFormat::HashSeq).await?; + } else { + res.push(self.add_bytes(blob, BlobFormat::Raw).await?); + } + }; + Ok(res) + } + + /// Create a temp tag to protect some content (blob or hashseq) from being deleted. + /// + /// A typical use case is that you are downloading some data and want to protect it + /// from deletion while the download is ongoing, but don't want to protect it permanently + /// until the download is completed. + pub async fn temp_tag(&self, content: HashAndFormat) -> Result { + // Notify the server that we want one temp tag for the given content + self.0 + .rpc + .rpc(BatchCreateTempTagRequest { + scope: self.0.scope, + content, + }) + .await??; + // Only after success of the above call, we can create the corresponding local temp tag + Ok(self.local_temp_tag(content)) + } + + /// Creates a temp tag for the given hash and format, without notifying the server. + /// + /// Caution: only do this for data for which you know the server side has created a temp tag. + fn local_temp_tag(&self, inner: HashAndFormat) -> TempTag { + let on_drop: Arc = self.0.clone(); + let on_drop = Some(Arc::downgrade(&on_drop)); + TempTag::new(inner, on_drop) + } +} From 2258eb60a53a848197c036c08db3f728270d360d Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 17:00:39 +0300 Subject: [PATCH 11/18] more docs, and a test reorg --- iroh/src/client/blobs.rs | 4 + iroh/src/client/blobs/batch.rs | 45 +++++---- iroh/src/rpc_protocol.rs | 5 +- iroh/tests/batch.rs | 170 ++++++++++++++++++++------------- 4 files changed, 140 insertions(+), 84 deletions(-) diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index ad9e9453b7..e9af9f6336 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -82,6 +82,10 @@ where } /// Create a new batch for adding data. + /// + /// A batch is a context in which temp tags are created and data is added to the node. Temp tags + /// are automatically deleted when the batch is dropped, leading to the data being garbage collected + /// unless a permanent tag is created for it. pub async fn batch(&self) -> Result> { let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; let BatchCreateResponse::Id(scope) = stream.next().await.context("expected scope id")??; diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 0987b37360..57abdedb3a 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -60,27 +60,27 @@ impl> TagDrop for BatchInner { #[derive(Debug, Clone, Copy, Default)] pub struct AddFileOpts { /// The import mode - import_mode: ImportMode, + pub import_mode: ImportMode, /// The format of the blob - format: BlobFormat, + pub format: BlobFormat, } /// Options for adding a directory as a collection #[derive(Debug, Clone, Default)] pub struct AddDirOpts { /// The import mode - import_mode: ImportMode, + pub import_mode: ImportMode, /// Whether to preserve the directory name - wrap: WrapOption, + pub wrap: WrapOption, } /// Options for adding a directory as a collection #[derive(Debug, Clone)] pub struct AddReaderOpts { /// The format of the blob - format: BlobFormat, + pub format: BlobFormat, /// Size of the chunks to send - chunk_size: usize, + pub chunk_size: usize, } impl Default for AddReaderOpts { @@ -106,9 +106,18 @@ impl> Batch { } /// Write a blob by passing bytes. - pub async fn add_bytes(&self, bytes: impl Into, format: BlobFormat) -> Result { + pub async fn add_bytes(&self, bytes: impl Into) -> Result { + self.add_bytes_with_opts(bytes, Default::default()).await + } + + /// Write a blob by passing bytes. + pub async fn add_bytes_with_opts( + &self, + bytes: impl Into, + format: BlobFormat, + ) -> Result { let input = futures_lite::stream::once(Ok(bytes.into())); - self.add_stream(input, format).await + self.add_stream_with_opts(input, format).await } /// Import a blob from a filesystem path, using the default options. @@ -262,14 +271,22 @@ impl> Batch { ) -> anyhow::Result { let AddReaderOpts { format, chunk_size } = opts; let input = ReaderStream::with_capacity(reader, chunk_size); - self.add_stream(input, format).await + self.add_stream_with_opts(input, format).await + } + + /// Write a blob by passing a stream of bytes. + pub async fn add_stream( + &self, + input: impl Stream> + Send + Unpin + 'static, + ) -> Result { + self.add_stream_with_opts(input, Default::default()).await } /// Write a blob by passing a stream of bytes. /// /// For convenient interop with common sources of data, this function takes a stream of io::Result. /// If you have raw bytes, you need to wrap them in io::Result::Ok. - pub async fn add_stream( + pub async fn add_stream_with_opts( &self, mut input: impl Stream> + Send + Unpin + 'static, format: BlobFormat, @@ -313,10 +330,6 @@ impl> Batch { } } let hash = res.context("Missing answer")?; - println!( - "creating temp tag with hash {:?} and format {}", - hash, format - ); Ok(self.local_temp_tag(HashAndFormat { hash, format })) } @@ -332,9 +345,9 @@ impl> Batch { let blob = blobs.next().context("Failed to get next blob")?; if blobs.peek().is_none() { println!("last blob"); - break self.add_bytes(blob, BlobFormat::HashSeq).await?; + break self.add_bytes_with_opts(blob, BlobFormat::HashSeq).await?; } else { - res.push(self.add_bytes(blob, BlobFormat::Raw).await?); + res.push(self.add_bytes(blob).await?); } }; Ok(res) diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index b38c5d7cc2..51ccf42832 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -225,11 +225,14 @@ pub enum BlobStatus { /// The blob is only stored partially. Partial { /// The size of the currently stored partial blob. + /// + /// This can be either a verified size if the last chunk was received, + /// or an unverified size if the last chunk was not yet received. size: BaoBlobSize, }, /// The blob is stored completely. Complete { - /// The size of the blob. + /// The size of the blob. For a complete blob the size is always known. size: u64, }, } diff --git a/iroh/tests/batch.rs b/iroh/tests/batch.rs index ff1c4fb81e..31e64207bf 100644 --- a/iroh/tests/batch.rs +++ b/iroh/tests/batch.rs @@ -1,8 +1,12 @@ -use std::time::Duration; +use std::{io, time::Duration}; use bao_tree::blake3; -use iroh::node::GcPolicy; -use iroh_blobs::{store::mem::Store, BlobFormat}; +use bytes::Bytes; +use iroh::{ + client::blobs::{AddDirOpts, WrapOption}, + node::GcPolicy, +}; +use iroh_blobs::store::mem::Store; async fn create_node() -> anyhow::Result> { iroh::node::Node::memory() @@ -16,136 +20,168 @@ async fn wait_for_gc() { tokio::time::sleep(Duration::from_millis(50)).await; } +/// Test that add_bytes adds the right data #[tokio::test] -async fn test_batch_create_1() -> anyhow::Result<()> { +async fn add_bytes() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; let data: &[u8] = b"test"; - let tag = batch.add_bytes(data, BlobFormat::Raw).await?; + let tag = batch.add_bytes(data).await?; let hash = *tag.hash(); - // Check that the store has the data and that it is protected from gc - wait_for_gc().await; - assert!(client.has(hash).await?); - drop(tag); - // Check that the store drops the data when the temp tag gets dropped - wait_for_gc().await; - assert!(!client.has(hash).await?); + let actual = client.read_to_bytes(hash).await?; + assert_eq!(hash, blake3::hash(data).into()); + assert_eq!(actual.as_ref(), data); Ok(()) } +/// Test that add_bytes adds the right data #[tokio::test] -async fn test_batch_create_2() -> anyhow::Result<()> { +async fn add_stream() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; let data: &[u8] = b"test"; - let tag = batch.add_bytes(data, BlobFormat::Raw).await?; + let data_stream = futures_lite::stream::iter([io::Result::Ok(Bytes::copy_from_slice(data))]); + let tag = batch.add_stream(data_stream).await?; let hash = *tag.hash(); - // Check that the store has the data and that it is protected from gc - wait_for_gc().await; - assert!(client.has(hash).await?); - drop(batch); - // Check that the store drops the data when the temp tag gets dropped - wait_for_gc().await; - assert!(!client.has(hash).await?); + let actual = client.read_to_bytes(hash).await?; + assert_eq!(hash, blake3::hash(data).into()); + assert_eq!(actual.as_ref(), data); Ok(()) } -/// Tests that data is preserved if a second temp tag is created for it -/// before the first temp tag is dropped. +/// Test that add_file adds the right data #[tokio::test] -async fn test_batch_create_3() -> anyhow::Result<()> { +async fn add_file() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let data: &[u8] = b"test"; - let tag = batch.add_bytes(data, BlobFormat::Raw).await?; + let dir = tempfile::tempdir()?; + let temp_path = dir.path().join("test"); + std::fs::write(&temp_path, b"test")?; + let (tag, _) = batch.add_file(temp_path).await?; let hash = *tag.hash(); - // Check that the store has the data and that it is protected from gc - wait_for_gc().await; - assert!(client.has(hash).await?); - // Create an additional temp tag for the same data - let tag2 = batch.temp_tag(tag.hash_and_format()).await?; - drop(tag); - // Check that the data is still present - wait_for_gc().await; - assert!(client.has(hash).await?); - drop(tag2); - // Check that the data is gone since both temp tags are dropped - wait_for_gc().await; - assert!(!client.has(hash).await?); + let actual = client.read_to_bytes(hash).await?; + assert_eq!(hash, blake3::hash(b"test").into()); + assert_eq!(actual.as_ref(), b"test"); Ok(()) } -/// Tests that data goes away when the temp tag is dropped +/// Tests that add_dir adds the right data #[tokio::test] -async fn test_batch_add_file_1() -> anyhow::Result<()> { +async fn add_dir() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let dir = tempfile::tempdir()?; + let data: [(&str, &[u8]); 2] = [("test1", b"test1"), ("test2", b"test2")]; + for (name, content) in &data { + let temp_path = dir.path().join(name); + std::fs::write(&temp_path, content)?; + } + let tag = batch.add_dir(dir.path().to_owned()).await?; + assert!(client.has(*tag.hash()).await?); + for (_, content) in &data { + let hash = blake3::hash(content).into(); + let data = client.read_to_bytes(hash).await?; + assert_eq!(data.as_ref(), *content); + } + Ok(()) +} + +/// Tests that add_dir adds the right data +#[tokio::test] +async fn add_dir_single_file() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; let dir = tempfile::tempdir()?; let temp_path = dir.path().join("test"); - std::fs::write(&temp_path, b"test")?; - let (tag, _) = batch.add_file(temp_path).await?; + let data: &[u8] = b"test"; + std::fs::write(&temp_path, data)?; + let tag = batch + .add_dir_with_opts( + temp_path, + AddDirOpts { + wrap: WrapOption::Wrap { name: None }, + ..Default::default() + }, + ) + .await?; + assert!(client.read_to_bytes(*tag.hash()).await.is_ok()); + let hash = blake3::hash(data).into(); + let actual_data = client.read_to_bytes(hash).await?; + assert_eq!(actual_data.as_ref(), data); + Ok(()) +} + +#[tokio::test] +async fn batch_drop() -> anyhow::Result<()> { + let node = create_node().await?; + let client = &node.client().blobs; + let batch = client.batch().await?; + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data).await?; let hash = *tag.hash(); // Check that the store has the data and that it is protected from gc wait_for_gc().await; assert!(client.has(hash).await?); - drop(tag); + drop(batch); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; assert!(!client.has(hash).await?); Ok(()) } -/// Tests that data goes away when the batch is dropped #[tokio::test] -async fn test_batch_add_file_2() -> anyhow::Result<()> { +async fn temp_tag_drop() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let dir = tempfile::tempdir()?; - let temp_path = dir.path().join("test"); - std::fs::write(&temp_path, b"test")?; - let (tag, _) = batch.add_file(temp_path).await?; + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data).await?; let hash = *tag.hash(); // Check that the store has the data and that it is protected from gc wait_for_gc().await; assert!(client.has(hash).await?); - drop(batch); + drop(tag); // Check that the store drops the data when the temp tag gets dropped wait_for_gc().await; assert!(!client.has(hash).await?); Ok(()) } -/// Tests that add_dir adds the right data +/// Tests that data is preserved if a second temp tag is created for it +/// before the first temp tag is dropped. #[tokio::test] -async fn test_batch_add_dir_works() -> anyhow::Result<()> { +async fn temp_tag_copy() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; - let dir = tempfile::tempdir()?; - let data: [(&str, &[u8]); 2] = [("test1", b"test1"), ("test2", b"test2")]; - for (name, content) in &data { - let temp_path = dir.path().join(name); - std::fs::write(&temp_path, content)?; - } - let tag = batch.add_dir(dir.path().to_owned()).await?; - assert!(client.read_to_bytes(*tag.hash()).await.is_ok()); - for (_, content) in &data { - let hash = blake3::hash(content).into(); - let data = client.read_to_bytes(hash).await?; - assert_eq!(data.as_ref(), *content); - } + let data: &[u8] = b"test"; + let tag = batch.add_bytes(data).await?; + let hash = *tag.hash(); + // Check that the store has the data and that it is protected from gc + wait_for_gc().await; + assert!(client.has(hash).await?); + // Create an additional temp tag for the same data + let tag2 = batch.temp_tag(tag.hash_and_format()).await?; + drop(tag); + // Check that the data is still present + wait_for_gc().await; + assert!(client.has(hash).await?); + drop(tag2); + // Check that the data is gone since both temp tags are dropped + wait_for_gc().await; + assert!(!client.has(hash).await?); Ok(()) } /// Tests that temp tags work properly for hash sequences, using add_dir /// to add the data. #[tokio::test] -async fn test_batch_add_dir_2() -> anyhow::Result<()> { +async fn tag_drop_hashseq() -> anyhow::Result<()> { let node = create_node().await?; let client = &node.client().blobs; let batch = client.batch().await?; From 669c9ebc9224edf3806045a4931331fa577b7006 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 17:09:15 +0300 Subject: [PATCH 12/18] Make io parallelism a part of the config options for add_dir also remove some println! --- iroh/src/client/blobs/batch.rs | 37 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 57abdedb3a..6dba8f6cd7 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -66,12 +66,24 @@ pub struct AddFileOpts { } /// Options for adding a directory as a collection -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct AddDirOpts { /// The import mode pub import_mode: ImportMode, /// Whether to preserve the directory name pub wrap: WrapOption, + /// Io parallelism + pub io_parallelism: usize, +} + +impl Default for AddDirOpts { + fn default() -> Self { + Self { + import_mode: ImportMode::TryReference, + wrap: WrapOption::NoWrap, + io_parallelism: 4, + } + } } /// Options for adding a directory as a collection @@ -197,7 +209,11 @@ impl> Batch { /// /// However, if you want to add a single file as a raw blob, use add_file instead. pub async fn add_dir_with_opts(&self, root: PathBuf, opts: AddDirOpts) -> Result { - let AddDirOpts { import_mode, wrap } = opts; + let AddDirOpts { + import_mode, + wrap, + io_parallelism, + } = opts; anyhow::ensure!(root.is_absolute(), "Path must be absolute"); // let (send, recv) = flume::bounded(32); @@ -205,7 +221,6 @@ impl> Batch { // import all files below root recursively let data_sources = crate::util::fs::scan_path(root, wrap)?; - const IO_PARALLELISM: usize = 4; let opts = AddFileOpts { import_mode, format: BlobFormat::Raw, @@ -222,10 +237,9 @@ impl> Batch { anyhow::Ok((name, hash, size, tag)) } }) - .buffered_ordered(IO_PARALLELISM) + .buffered_ordered(io_parallelism) .try_collect() .await?; - println!("{:?}", result); // create a collection let (collection, child_tags): (Collection, Vec<_>) = result @@ -340,17 +354,16 @@ impl> Batch { /// a collection is a hash sequence where the first child is the metadata. pub async fn add_blob_seq(&self, iter: impl Iterator) -> Result { let mut blobs = iter.peekable(); - let mut res = vec![]; - let res = loop { + // put the tags somewhere + let mut tags = vec![]; + loop { let blob = blobs.next().context("Failed to get next blob")?; if blobs.peek().is_none() { - println!("last blob"); - break self.add_bytes_with_opts(blob, BlobFormat::HashSeq).await?; + return self.add_bytes_with_opts(blob, BlobFormat::HashSeq).await; } else { - res.push(self.add_bytes(blob).await?); + tags.push(self.add_bytes(blob).await?); } - }; - Ok(res) + } } /// Create a temp tag to protect some content (blob or hashseq) from being deleted. From 28439d54ca3ce56b73d4eb0bf3f05b15627ee748 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 17:29:47 +0300 Subject: [PATCH 13/18] Rename scope to batch --- iroh/src/client/blobs/batch.rs | 162 ++++++++++++++++----------------- iroh/src/node/rpc.rs | 8 +- iroh/src/rpc_protocol.rs | 12 +-- 3 files changed, 91 insertions(+), 91 deletions(-) diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 6dba8f6cd7..08a51d7bb9 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -32,7 +32,7 @@ use super::WrapOption; #[derive(derive_more::Debug)] struct BatchInner> { /// The id of the scope. - scope: u64, + batch: u64, /// The rpc client. rpc: RpcClient, /// The stream to send drop @@ -106,12 +106,12 @@ impl Default for AddReaderOpts { impl> Batch { pub(super) fn new( - scope: u64, + batch: u64, rpc: RpcClient, updates: UpdateSink, ) -> Self { Self(Arc::new(BatchInner { - scope, + batch, rpc, updates: updates.into(), })) @@ -122,6 +122,67 @@ impl> Batch { self.add_bytes_with_opts(bytes, Default::default()).await } + /// Import a blob from a filesystem path, using the default options. + /// + /// For more control, use [`Self::add_file_with_opts`]. + pub async fn add_file(&self, path: PathBuf) -> Result<(TempTag, u64)> { + self.add_file_with_opts(path, AddFileOpts::default()).await + } + + /// Add a directory as a hashseq in iroh collection format + pub async fn add_dir(&self, root: PathBuf) -> Result { + self.add_dir_with_opts(root, Default::default()).await + } + + /// Write a blob by passing an async reader. + /// + /// This will use a default chunk size of 64KB, and a format of [BlobFormat::Raw]. + pub async fn add_reader( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + ) -> anyhow::Result { + self.add_reader_with_opts(reader, Default::default()).await + } + + /// Write a blob by passing a stream of bytes. + pub async fn add_stream( + &self, + input: impl Stream> + Send + Unpin + 'static, + ) -> Result { + self.add_stream_with_opts(input, Default::default()).await + } + + /// Create a temp tag to protect some content (blob or hashseq) from being deleted. + /// + /// A typical use case is that you are downloading some data and want to protect it + /// from deletion while the download is ongoing, but don't want to protect it permanently + /// until the download is completed. + pub async fn temp_tag(&self, content: HashAndFormat) -> Result { + // Notify the server that we want one temp tag for the given content + self.0 + .rpc + .rpc(BatchCreateTempTagRequest { + batch: self.0.batch, + content, + }) + .await??; + // Only after success of the above call, we can create the corresponding local temp tag + Ok(self.local_temp_tag(content)) + } + + /// Write a blob by passing an async reader. + /// + /// This produces a stream from the reader with a hardcoded buffer size of 64KB. + pub async fn add_reader_with_opts( + &self, + reader: impl AsyncRead + Unpin + Send + 'static, + opts: AddReaderOpts, + ) -> anyhow::Result { + let AddReaderOpts { format, chunk_size } = opts; + let input = ReaderStream::with_capacity(reader, chunk_size); + self.add_stream_with_opts(input, format).await + } + /// Write a blob by passing bytes. pub async fn add_bytes_with_opts( &self, @@ -132,13 +193,6 @@ impl> Batch { self.add_stream_with_opts(input, format).await } - /// Import a blob from a filesystem path, using the default options. - /// - /// For more control, use [`Self::add_file_with_opts`]. - pub async fn add_file(&self, path: PathBuf) -> Result<(TempTag, u64)> { - self.add_file_with_opts(path, AddFileOpts::default()).await - } - /// Import a blob from a filesystem path. /// /// `path` should be an absolute path valid for the file system on which @@ -173,7 +227,7 @@ impl> Batch { path, import_mode, format, - scope: self.0.scope, + batch: self.0.batch, }) .await?; let mut res_hash = None; @@ -197,11 +251,6 @@ impl> Batch { Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) } - /// Add a directory as a hashseq in iroh collection format - pub async fn add_dir(&self, root: PathBuf) -> Result { - self.add_dir_with_opts(root, Default::default()).await - } - /// Add a directory as a hashseq in iroh collection format /// /// This can also be used to add a single file as a collection, if @@ -252,54 +301,10 @@ impl> Batch { Ok(tag) } - /// Add a collection - /// - /// This is a convenience function that converts the collection into two blobs - /// (the metadata and the hash sequence) and adds them, returning a temp tag for - /// the hash sequence. - /// - /// Note that this does not guarantee that the data that the collection refers to - /// actually exists. It will just create 2 blobs, the metadata and the hash sequence - /// itself. - pub async fn add_collection(&self, collection: Collection) -> Result { - self.add_blob_seq(collection.to_blobs()).await - } - - /// Write a blob by passing an async reader. - /// - /// This will use a default chunk size of 64KB, and a format of [BlobFormat::Raw]. - pub async fn add_reader( - &self, - reader: impl AsyncRead + Unpin + Send + 'static, - ) -> anyhow::Result { - self.add_reader_with_opts(reader, Default::default()).await - } - - /// Write a blob by passing an async reader. - /// - /// This produces a stream from the reader with a hardcoded buffer size of 64KB. - pub async fn add_reader_with_opts( - &self, - reader: impl AsyncRead + Unpin + Send + 'static, - opts: AddReaderOpts, - ) -> anyhow::Result { - let AddReaderOpts { format, chunk_size } = opts; - let input = ReaderStream::with_capacity(reader, chunk_size); - self.add_stream_with_opts(input, format).await - } - - /// Write a blob by passing a stream of bytes. - pub async fn add_stream( - &self, - input: impl Stream> + Send + Unpin + 'static, - ) -> Result { - self.add_stream_with_opts(input, Default::default()).await - } - /// Write a blob by passing a stream of bytes. /// - /// For convenient interop with common sources of data, this function takes a stream of io::Result. - /// If you have raw bytes, you need to wrap them in io::Result::Ok. + /// For convenient interop with common sources of data, this function takes a stream of `io::Result`. + /// If you have raw bytes, you need to wrap them in `io::Result::Ok`. pub async fn add_stream_with_opts( &self, mut input: impl Stream> + Send + Unpin + 'static, @@ -309,7 +314,7 @@ impl> Batch { .0 .rpc .bidi(BatchAddStreamRequest { - scope: self.0.scope, + batch: self.0.batch, format, }) .await?; @@ -347,6 +352,19 @@ impl> Batch { Ok(self.local_temp_tag(HashAndFormat { hash, format })) } + /// Add a collection + /// + /// This is a convenience function that converts the collection into two blobs + /// (the metadata and the hash sequence) and adds them, returning a temp tag for + /// the hash sequence. + /// + /// Note that this does not guarantee that the data that the collection refers to + /// actually exists. It will just create 2 blobs, the metadata and the hash sequence + /// itself. + pub async fn add_collection(&self, collection: Collection) -> Result { + self.add_blob_seq(collection.to_blobs()).await + } + /// Add a sequence of blobs, where the last is a hash sequence. /// /// It is a common pattern in iroh to have a hash sequence with one or more @@ -366,24 +384,6 @@ impl> Batch { } } - /// Create a temp tag to protect some content (blob or hashseq) from being deleted. - /// - /// A typical use case is that you are downloading some data and want to protect it - /// from deletion while the download is ongoing, but don't want to protect it permanently - /// until the download is completed. - pub async fn temp_tag(&self, content: HashAndFormat) -> Result { - // Notify the server that we want one temp tag for the given content - self.0 - .rpc - .rpc(BatchCreateTempTagRequest { - scope: self.0.scope, - content, - }) - .await??; - // Only after success of the above call, we can create the corresponding local temp tag - Ok(self.local_temp_tag(content)) - } - /// Creates a temp tag for the given hash and format, without notifying the server. /// /// Caution: only do this for data for which you know the server side has created a temp tag. diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index 96bebe68b3..f9a29b347d 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -837,7 +837,7 @@ impl Handler { path: root, import_mode, format, - scope, + batch, } = msg; // Check that the path is absolute and exists. anyhow::ensure!(root.is_absolute(), "path must be absolute"); @@ -852,7 +852,7 @@ impl Handler { .import_file(root, import_mode, format, import_progress) .await?; let hash = *tag.hash(); - self.inner.blob_scopes.lock().unwrap().store(scope, tag); + self.inner.blob_scopes.lock().unwrap().store(batch, tag); progress.send(BatchAddPathProgress::Done { hash }).await?; Ok(()) @@ -952,7 +952,7 @@ impl Handler { #[allow(clippy::unused_async)] async fn batch_create_temp_tag(self, msg: BatchCreateTempTagRequest) -> RpcResult<()> { let tag = self.inner.db.temp_tag(msg.content); - self.inner.blob_scopes.lock().unwrap().store(msg.scope, tag); + self.inner.blob_scopes.lock().unwrap().store(msg.batch, tag); Ok(()) } @@ -1022,7 +1022,7 @@ impl Handler { .blob_scopes .lock() .unwrap() - .store(msg.scope, temp_tag); + .store(msg.batch, temp_tag); progress .send(BatchAddStreamResponse::Result { hash }) .await?; diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 51ccf42832..cd87e09b6b 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -1095,8 +1095,8 @@ pub struct BlobAddStreamResponse(pub AddProgress); pub struct BatchCreateTempTagRequest { /// Content to protect pub content: HashAndFormat, - /// Scope to create the temp tag in - pub scope: u64, + /// Batch to create the temp tag in + pub batch: u64, } impl RpcMsg for BatchCreateTempTagRequest { @@ -1108,8 +1108,8 @@ impl RpcMsg for BatchCreateTempTagRequest { pub struct BatchAddStreamRequest { /// What format to use for the blob pub format: BlobFormat, - /// Scope to create the temp tag in - pub scope: u64, + /// Batch to create the temp tag in + pub batch: u64, } /// Write a blob from a byte stream @@ -1147,8 +1147,8 @@ pub struct BatchAddPathRequest { pub import_mode: ImportMode, /// What format to use for the blob pub format: BlobFormat, - /// Scope to create the temp tag in - pub scope: u64, + /// Batch to create the temp tag in + pub batch: u64, } /// Response to a batch add path request From be8209c3171901398b655802f1119897dd0ebe1e Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Tue, 4 Jun 2024 17:51:18 +0300 Subject: [PATCH 14/18] rename scope to batch everywhere and add a newtype --- iroh/src/client/blobs.rs | 4 ++-- iroh/src/client/blobs/batch.rs | 6 +++--- iroh/src/node.rs | 17 ++++++++--------- iroh/src/node/builder.rs | 2 +- iroh/src/node/rpc.rs | 22 +++++++++++++--------- iroh/src/rpc_protocol.rs | 11 +++++++---- 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index e9af9f6336..73732e4a0e 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -88,9 +88,9 @@ where /// unless a permanent tag is created for it. pub async fn batch(&self) -> Result> { let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; - let BatchCreateResponse::Id(scope) = stream.next().await.context("expected scope id")??; + let BatchCreateResponse::Id(batch) = stream.next().await.context("expected scope id")??; let rpc = self.rpc.clone(); - Ok(Batch::new(scope, rpc, updates)) + Ok(Batch::new(batch, rpc, updates)) } /// Stream the contents of a a single blob. diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 08a51d7bb9..470ec75b03 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -22,7 +22,7 @@ use crate::{ client::RpcService, rpc_protocol::{ BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, - BatchCreateTempTagRequest, BatchUpdate, + BatchCreateTempTagRequest, BatchId, BatchUpdate, }, }; @@ -32,7 +32,7 @@ use super::WrapOption; #[derive(derive_more::Debug)] struct BatchInner> { /// The id of the scope. - batch: u64, + batch: BatchId, /// The rpc client. rpc: RpcClient, /// The stream to send drop @@ -106,7 +106,7 @@ impl Default for AddReaderOpts { impl> Batch { pub(super) fn new( - batch: u64, + batch: BatchId, rpc: RpcClient, updates: UpdateSink, ) -> Self { diff --git a/iroh/src/node.rs b/iroh/src/node.rs index aa19a6e460..7e5bd12406 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -27,6 +27,7 @@ use tokio_util::task::LocalPoolHandle; use tracing::debug; use crate::client::RpcService; +use crate::rpc_protocol::BatchId; mod builder; mod rpc; @@ -65,16 +66,14 @@ struct NodeInner { rt: LocalPoolHandle, pub(crate) sync: DocsEngine, downloader: Downloader, - blob_scopes: Mutex, + blob_batches: Mutex, } /// Keeps track of all the currently active batch operations of the blobs api. -/// -/// #[derive(Debug, Default)] struct BlobBatches { /// Currently active batches - batches: BTreeMap, + batches: BTreeMap, /// Used to generate new batch ids. max: u64, } @@ -89,14 +88,14 @@ struct BlobBatch { impl BlobBatches { /// Create a new unique batch id. - fn create(&mut self) -> u64 { + fn create(&mut self) -> BatchId { let id = self.max; self.max += 1; - id + BatchId(id) } /// Store a temp tag in a batch identified by a batch id. - fn store(&mut self, batch: u64, tt: TempTag) { + fn store(&mut self, batch: BatchId, tt: TempTag) { let entry = self.batches.entry(batch).or_default(); let count = entry.tags.entry(tt.hash_and_format()).or_default(); tt.leak(); @@ -104,7 +103,7 @@ impl BlobBatches { } /// Remove a tag from a batch. - fn remove_one(&mut self, batch: u64, content: &HashAndFormat, u: Option<&dyn TagDrop>) { + fn remove_one(&mut self, batch: BatchId, content: &HashAndFormat, u: Option<&dyn TagDrop>) { if let Some(scope) = self.batches.get_mut(&batch) { if let Some(counter) = scope.tags.get_mut(content) { *counter -= 1; @@ -119,7 +118,7 @@ impl BlobBatches { } /// Remove an entire batch. - fn remove(&mut self, batch: u64, u: Option<&dyn TagDrop>) { + fn remove(&mut self, batch: BatchId, u: Option<&dyn TagDrop>) { if let Some(scope) = self.batches.remove(&batch) { for (content, count) in scope.tags { if let Some(u) = u { diff --git a/iroh/src/node/builder.rs b/iroh/src/node/builder.rs index fce7496b6d..ad929cc934 100644 --- a/iroh/src/node/builder.rs +++ b/iroh/src/node/builder.rs @@ -490,7 +490,7 @@ where rt: lp.clone(), sync, downloader, - blob_scopes: Default::default(), + blob_batches: Default::default(), }); let task = { let gossip = gossip.clone(); diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index f9a29b347d..d4042dd86e 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -852,7 +852,7 @@ impl Handler { .import_file(root, import_mode, format, import_progress) .await?; let hash = *tag.hash(); - self.inner.blob_scopes.lock().unwrap().store(batch, tag); + self.inner.blob_batches.lock().unwrap().store(batch, tag); progress.send(BatchAddPathProgress::Done { hash }).await?; Ok(()) @@ -927,13 +927,13 @@ impl Handler { _: BatchCreateRequest, mut updates: impl Stream + Send + Unpin + 'static, ) -> impl Stream { - let scope_id = self.inner.blob_scopes.lock().unwrap().create(); + let batch = self.inner.blob_batches.lock().unwrap().create(); tokio::spawn(async move { while let Some(item) = updates.next().await { match item { BatchUpdate::Drop(content) => { - self.inner.blob_scopes.lock().unwrap().remove_one( - scope_id, + self.inner.blob_batches.lock().unwrap().remove_one( + batch, &content, self.inner.db.tag_drop(), ); @@ -941,18 +941,22 @@ impl Handler { } } self.inner - .blob_scopes + .blob_batches .lock() .unwrap() - .remove(scope_id, self.inner.db.tag_drop()); + .remove(batch, self.inner.db.tag_drop()); }); - futures_lite::stream::once(BatchCreateResponse::Id(scope_id)) + futures_lite::stream::once(BatchCreateResponse::Id(batch)) } #[allow(clippy::unused_async)] async fn batch_create_temp_tag(self, msg: BatchCreateTempTagRequest) -> RpcResult<()> { let tag = self.inner.db.temp_tag(msg.content); - self.inner.blob_scopes.lock().unwrap().store(msg.batch, tag); + self.inner + .blob_batches + .lock() + .unwrap() + .store(msg.batch, tag); Ok(()) } @@ -1019,7 +1023,7 @@ impl Handler { .await?; let hash = temp_tag.inner().hash; self.inner - .blob_scopes + .blob_batches .lock() .unwrap() .store(msg.batch, temp_tag); diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index cd87e09b6b..3894bb2d2c 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -68,7 +68,7 @@ pub enum BatchUpdate { #[derive(Debug, Serialize, Deserialize)] pub enum BatchCreateResponse { /// We got the id of the scope - Id(u64), + Id(BatchId), } impl Msg for BatchCreateRequest { @@ -1090,13 +1090,16 @@ impl BidiStreamingMsg for BlobAddStreamRequest { #[derive(Debug, Serialize, Deserialize, derive_more::Into)] pub struct BlobAddStreamResponse(pub AddProgress); +#[derive(Debug, PartialEq, Eq, PartialOrd, Serialize, Deserialize, Ord, Clone, Copy, Hash)] +pub struct BatchId(pub(crate) u64); + /// Create a temp tag with a given hash and format #[derive(Debug, Serialize, Deserialize)] pub struct BatchCreateTempTagRequest { /// Content to protect pub content: HashAndFormat, /// Batch to create the temp tag in - pub batch: u64, + pub batch: BatchId, } impl RpcMsg for BatchCreateTempTagRequest { @@ -1109,7 +1112,7 @@ pub struct BatchAddStreamRequest { /// What format to use for the blob pub format: BlobFormat, /// Batch to create the temp tag in - pub batch: u64, + pub batch: BatchId, } /// Write a blob from a byte stream @@ -1148,7 +1151,7 @@ pub struct BatchAddPathRequest { /// What format to use for the blob pub format: BlobFormat, /// Batch to create the temp tag in - pub batch: u64, + pub batch: BatchId, } /// Response to a batch add path request From aa7f73d3dc42fa7840e21371044dd58bf4f15367 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 5 Jun 2024 11:05:57 +0300 Subject: [PATCH 15/18] Add batch option to CreateTagRequest and SetTagRequest --- iroh/src/client/blobs/batch.rs | 50 +++++++++++++++++++++++++++++++--- iroh/src/client/tags.rs | 31 +++++++++++++-------- iroh/src/node.rs | 20 ++++++++++---- iroh/src/node/rpc.rs | 26 +++++++++++++++--- iroh/src/rpc_protocol.rs | 6 ++++ 5 files changed, 108 insertions(+), 25 deletions(-) diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 470ec75b03..9a7d3586a0 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -11,18 +11,18 @@ use futures_lite::StreamExt; use futures_util::{FutureExt, SinkExt, Stream}; use iroh_blobs::{ format::collection::Collection, provider::BatchAddPathProgress, store::ImportMode, - util::TagDrop, BlobFormat, HashAndFormat, TempTag, + util::TagDrop, BlobFormat, HashAndFormat, Tag, TempTag, }; use quic_rpc::{client::UpdateSink, RpcClient, ServiceConnection}; use tokio::io::AsyncRead; use tokio_util::io::ReaderStream; -use tracing::warn; +use tracing::{debug, warn}; use crate::{ client::RpcService, rpc_protocol::{ BatchAddPathRequest, BatchAddStreamRequest, BatchAddStreamResponse, BatchAddStreamUpdate, - BatchCreateTempTagRequest, BatchId, BatchUpdate, + BatchCreateTempTagRequest, BatchId, BatchUpdate, CreateTagRequest, SetTagRequest, }, }; @@ -52,7 +52,23 @@ pub struct Batch>(Arc>); impl> TagDrop for BatchInner { fn on_drop(&self, content: &HashAndFormat) { let mut updates = self.updates.lock().unwrap(); - updates.send(BatchUpdate::Drop(*content)).now_or_never(); + // send a drop to the server + // this will occasionally fail, but that's acceptable. The temp tags for the batch + // will be cleaned up as soon as the entire batch is dropped. + // + // E.g. a typical scenario is that you create a large array of temp tags, and then + // store them in a hash sequence and then drop the array. You will get many drops + // at the same time, and might get a send failure here. + // + // But that just means that the server will clean up the temp tags when the batch is + // dropped. + if updates + .send(BatchUpdate::Drop(*content)) + .now_or_never() + .is_none() + { + debug!("Failed to send drop to server"); + } } } @@ -384,6 +400,32 @@ impl> Batch { } } + /// Upgrade a temp tag to a persistent tag. + pub async fn upgrade(&self, tt: TempTag) -> Result { + let tag = self + .0 + .rpc + .rpc(CreateTagRequest { + value: tt.hash_and_format(), + batch: Some(self.0.batch), + }) + .await??; + Ok(tag) + } + + /// Upgrade a temp tag to a persistent tag with a specific name. + pub async fn upgrade_to(&self, tt: TempTag, tag: Tag) -> Result<()> { + self.0 + .rpc + .rpc(SetTagRequest { + name: tag, + value: Some(tt.hash_and_format()), + batch: Some(self.0.batch), + }) + .await??; + Ok(()) + } + /// Creates a temp tag for the given hash and format, without notifying the server. /// /// Caution: only do this for data for which you know the server side has created a temp tag. diff --git a/iroh/src/client/tags.rs b/iroh/src/client/tags.rs index 926834d783..1e12a55676 100644 --- a/iroh/src/client/tags.rs +++ b/iroh/src/client/tags.rs @@ -28,29 +28,38 @@ where /// /// Use this method if you want a new tag with a unique name. pub async fn create(&self, value: HashAndFormat) -> Result { - Ok(self.rpc.rpc(CreateTagRequest { value }).await??) - } - - /// Set a tag to a value, overwriting any existing value. - /// - /// Setting the value to `None` deletes the tag. Setting the value to `Some` creates or updates the tag. - pub async fn set_opt(&self, name: Tag, value: Option) -> Result<()> { - self.rpc.rpc(SetTagRequest { name, value }).await??; - Ok(()) + Ok(self + .rpc + .rpc(CreateTagRequest { value, batch: None }) + .await??) } /// Set a tag to a value, overwriting any existing value. /// /// This is a convenience wrapper around `set_opt`. pub async fn set(&self, name: Tag, value: HashAndFormat) -> Result<()> { - self.set_opt(name, Some(value)).await + self.set_with_opts(name, Some(value)).await } /// Delete a tag. /// /// This is a convenience wrapper around `set_opt`. pub async fn delete(&self, name: Tag) -> Result<()> { - self.set_opt(name, None).await + self.set_with_opts(name, None).await + } + + /// Set a tag to a value, overwriting any existing value. + /// + /// Setting the value to `None` deletes the tag. Setting the value to `Some` creates or updates the tag. + pub async fn set_with_opts(&self, name: Tag, value: Option) -> Result<()> { + self.rpc + .rpc(SetTagRequest { + name, + value, + batch: None, + }) + .await??; + Ok(()) } } diff --git a/iroh/src/node.rs b/iroh/src/node.rs index 7e5bd12406..1154763220 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -103,27 +103,35 @@ impl BlobBatches { } /// Remove a tag from a batch. - fn remove_one(&mut self, batch: BatchId, content: &HashAndFormat, u: Option<&dyn TagDrop>) { + fn remove_one( + &mut self, + batch: BatchId, + content: &HashAndFormat, + tag_drop: Option<&dyn TagDrop>, + ) -> Result<()> { if let Some(scope) = self.batches.get_mut(&batch) { if let Some(counter) = scope.tags.get_mut(content) { *counter -= 1; - if let Some(u) = u { - u.on_drop(content); + if let Some(tag_drop) = tag_drop { + tag_drop.on_drop(content); } if *counter == 0 { scope.tags.remove(content); } } + } else { + anyhow::bail!("batch not found"); } + Ok(()) } /// Remove an entire batch. - fn remove(&mut self, batch: BatchId, u: Option<&dyn TagDrop>) { + fn remove(&mut self, batch: BatchId, tag_drop: Option<&dyn TagDrop>) { if let Some(scope) = self.batches.remove(&batch) { for (content, count) in scope.tags { - if let Some(u) = u { + if let Some(tag_drop) = tag_drop { for _ in 0..count { - u.on_drop(&content); + tag_drop.on_drop(&content); } } } diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index d4042dd86e..2d5e08d1c1 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -456,13 +456,28 @@ impl Handler { } async fn tags_set_tag(self, msg: SetTagRequest) -> RpcResult<()> { - self.inner.db.set_tag(msg.name, None).await?; + if let Some(batch) = msg.batch { + if let Some(content) = msg.value.as_ref() { + self.inner.blob_batches.lock().unwrap().remove_one( + batch, + content, + self.inner.db.tag_drop(), + )?; + } + } + self.inner.db.set_tag(msg.name, msg.value).await?; Ok(()) } async fn tags_create_tag(self, msg: CreateTagRequest) -> RpcResult { - let tag = self.inner.db.create_tag(msg.value).await?; - Ok(tag) + if let Some(batch) = msg.batch { + self.inner.blob_batches.lock().unwrap().remove_one( + batch, + &msg.value, + self.inner.db.tag_drop(), + )?; + } + Ok(self.inner.db.create_tag(msg.value).await?) } fn tags_list_tags(self, _msg: ListTagsRequest) -> impl Stream + Send + 'static { @@ -932,12 +947,15 @@ impl Handler { while let Some(item) = updates.next().await { match item { BatchUpdate::Drop(content) => { - self.inner.blob_batches.lock().unwrap().remove_one( + // this can not fail, since we keep the batch alive. + // therefore it is safe to ignore the result. + let _ = self.inner.blob_batches.lock().unwrap().remove_one( batch, &content, self.inner.db.tag_drop(), ); } + BatchUpdate::Ping => {} } } self.inner diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 3894bb2d2c..ec70024fd8 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -62,6 +62,8 @@ pub struct BatchCreateRequest; pub enum BatchUpdate { /// Drop of a remote temp tag Drop(HashAndFormat), + /// Message to check that the connection is still alive + Ping, } /// Response to a temp tag scope request @@ -311,6 +313,8 @@ pub struct SetTagRequest { pub name: Tag, /// Value of the tag, None to delete pub value: Option, + /// Batch to use, none for global + pub batch: Option, } impl RpcMsg for SetTagRequest { @@ -322,6 +326,8 @@ impl RpcMsg for SetTagRequest { pub struct CreateTagRequest { /// Value of the tag pub value: HashAndFormat, + /// Batch to use, none for global + pub batch: Option, } impl RpcMsg for CreateTagRequest { From c3573926daeda6a43c19a06022c52b62b63d3389 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 5 Jun 2024 12:34:00 +0300 Subject: [PATCH 16/18] Add buffer to update sink --- iroh/src/client/blobs.rs | 2 +- iroh/src/client/blobs/batch.rs | 32 +++++++++++++++----------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 73732e4a0e..798f540fe8 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -90,7 +90,7 @@ where let (updates, mut stream) = self.rpc.bidi(BatchCreateRequest).await?; let BatchCreateResponse::Id(batch) = stream.next().await.context("expected scope id")??; let rpc = self.rpc.clone(); - Ok(Batch::new(batch, rpc, updates)) + Ok(Batch::new(batch, rpc, updates, 1024)) } /// Stream the contents of a a single blob. diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index 9a7d3586a0..7fd716662e 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -1,14 +1,12 @@ use std::{ - io, - path::PathBuf, - sync::{Arc, Mutex}, + io, path::PathBuf, sync::{Arc, Mutex} }; use anyhow::{anyhow, Context, Result}; use bytes::Bytes; use futures_buffered::BufferedStreamExt; use futures_lite::StreamExt; -use futures_util::{FutureExt, SinkExt, Stream}; +use futures_util::{sink::Buffer, FutureExt, SinkExt, Stream}; use iroh_blobs::{ format::collection::Collection, provider::BatchAddPathProgress, store::ImportMode, util::TagDrop, BlobFormat, HashAndFormat, Tag, TempTag, @@ -37,7 +35,7 @@ struct BatchInner> { rpc: RpcClient, /// The stream to send drop #[debug(skip)] - updates: Mutex>, + updates: Mutex, BatchUpdate>>, } /// A batch for write operations. @@ -52,7 +50,8 @@ pub struct Batch>(Arc>); impl> TagDrop for BatchInner { fn on_drop(&self, content: &HashAndFormat) { let mut updates = self.updates.lock().unwrap(); - // send a drop to the server + // make a spirited attempt to notify the server that we are dropping the content + // // this will occasionally fail, but that's acceptable. The temp tags for the batch // will be cleaned up as soon as the entire batch is dropped. // @@ -62,13 +61,8 @@ impl> TagDrop for BatchInner { // // But that just means that the server will clean up the temp tags when the batch is // dropped. - if updates - .send(BatchUpdate::Drop(*content)) - .now_or_never() - .is_none() - { - debug!("Failed to send drop to server"); - } + updates.feed(BatchUpdate::Drop(*content)).now_or_never(); + updates.flush().now_or_never(); } } @@ -125,7 +119,9 @@ impl> Batch { batch: BatchId, rpc: RpcClient, updates: UpdateSink, + buffer_size: usize, ) -> Self { + let updates = updates.buffer(buffer_size); Self(Arc::new(BatchInner { batch, rpc, @@ -183,7 +179,7 @@ impl> Batch { }) .await??; // Only after success of the above call, we can create the corresponding local temp tag - Ok(self.local_temp_tag(content)) + Ok(self.local_temp_tag(content, None)) } /// Write a blob by passing an async reader. @@ -264,7 +260,7 @@ impl> Batch { } let hash = res_hash.context("Missing hash")?; let size = res_size.context("Missing size")?; - Ok((self.local_temp_tag(HashAndFormat { hash, format }), size)) + Ok((self.local_temp_tag(HashAndFormat { hash, format }, Some(size)), size)) } /// Add a directory as a hashseq in iroh collection format @@ -334,9 +330,11 @@ impl> Batch { format, }) .await?; + let mut size = 0u64; while let Some(item) = input.next().await { match item { Ok(chunk) => { + size += chunk.len() as u64; sink.send(BatchAddStreamUpdate::Chunk(chunk)) .await .map_err(|err| anyhow!("Failed to send input stream to remote: {err:?}"))?; @@ -365,7 +363,7 @@ impl> Batch { } } let hash = res.context("Missing answer")?; - Ok(self.local_temp_tag(HashAndFormat { hash, format })) + Ok(self.local_temp_tag(HashAndFormat { hash, format }, Some(size))) } /// Add a collection @@ -429,7 +427,7 @@ impl> Batch { /// Creates a temp tag for the given hash and format, without notifying the server. /// /// Caution: only do this for data for which you know the server side has created a temp tag. - fn local_temp_tag(&self, inner: HashAndFormat) -> TempTag { + fn local_temp_tag(&self, inner: HashAndFormat, _size: Option) -> TempTag { let on_drop: Arc = self.0.clone(); let on_drop = Some(Arc::downgrade(&on_drop)); TempTag::new(inner, on_drop) From 9cefcadbfabc0bbbd6fcbc504f0eab124c81ba52 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Thu, 6 Jun 2024 12:29:55 +0300 Subject: [PATCH 17/18] PR review: move around the BlobStatus, use async lock --- iroh-blobs/src/store/traits.rs | 3 ++ iroh/src/client/blobs.rs | 23 ++++++++++-- iroh/src/node.rs | 4 +-- iroh/src/node/rpc.rs | 64 +++++++++++++++++----------------- iroh/src/rpc_protocol.rs | 22 +----------- 5 files changed, 59 insertions(+), 57 deletions(-) diff --git a/iroh-blobs/src/store/traits.rs b/iroh-blobs/src/store/traits.rs index 9d1e42fc33..62791cbd48 100644 --- a/iroh-blobs/src/store/traits.rs +++ b/iroh-blobs/src/store/traits.rs @@ -357,6 +357,9 @@ pub trait Store: ReadableStore + MapMut { fn temp_tag(&self, value: HashAndFormat) -> TempTag; /// Handle to use to drop tags + /// + /// Return None for stores that don't keep track of tags, such as read-only + /// stores. fn tag_drop(&self) -> Option<&dyn TagDrop>; /// Notify the store that a new gc phase is about to start. diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 7ff196163e..1cabcdb008 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -18,7 +18,7 @@ use iroh_blobs::{ export::ExportProgress as BytesExportProgress, format::collection::Collection, get::db::DownloadProgress as BytesDownloadProgress, - store::{ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, + store::{BaoBlobSize, ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, BlobFormat, Hash, Tag, }; use iroh_net::NodeAddr; @@ -42,7 +42,6 @@ use super::{flatten, Iroh}; mod batch; pub use batch::{AddDirOpts, AddFileOpts, AddReaderOpts, Batch}; -pub use crate::rpc_protocol::BlobStatus; pub use iroh_blobs::store::ImportMode; pub use iroh_blobs::TempTag; @@ -875,6 +874,26 @@ pub enum DownloadMode { Queued, } +/// Status information about a blob. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum BlobStatus { + /// The blob is not stored on the node. + NotFound, + /// The blob is only stored partially. + Partial { + /// The size of the currently stored partial blob. + /// + /// This can be either a verified size if the last chunk was received, + /// or an unverified size if the last chunk was not yet received. + size: BaoBlobSize, + }, + /// The blob is stored completely. + Complete { + /// The size of the blob. For a complete blob the size is always known. + size: u64, + }, +} + #[cfg(test)] mod tests { use super::*; diff --git a/iroh/src/node.rs b/iroh/src/node.rs index 5d34323c34..0785018d0b 100644 --- a/iroh/src/node.rs +++ b/iroh/src/node.rs @@ -7,7 +7,7 @@ use std::collections::BTreeMap; use std::fmt::Debug; use std::net::SocketAddr; use std::path::Path; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use anyhow::{anyhow, Result}; use futures_lite::StreamExt; @@ -66,7 +66,7 @@ struct NodeInner { rt: LocalPoolHandle, pub(crate) sync: DocsEngine, downloader: Downloader, - blob_batches: Mutex, + blob_batches: tokio::sync::Mutex, } /// Keeps track of all the currently active batch operations of the blobs api. diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index 2d5e08d1c1..94df3831b1 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -6,6 +6,7 @@ use std::time::Duration; use anyhow::{anyhow, ensure, Result}; use futures_buffered::BufferedStreamExt; use futures_lite::{Stream, StreamExt}; +use futures_util::FutureExt; use genawaiter::sync::{Co, Gen}; use iroh_base::rpc::RpcResult; use iroh_blobs::downloader::{DownloadRequest, Downloader}; @@ -35,7 +36,7 @@ use tokio_util::task::LocalPoolHandle; use tracing::{debug, info}; use crate::client::blobs::{ - BlobInfo, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption, + BlobInfo, BlobStatus, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption, }; use crate::client::tags::TagInfo; use crate::client::NodeStatus; @@ -46,7 +47,7 @@ use crate::rpc_protocol::{ BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, BlobExportResponse, BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, - BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobStatus, + BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, BlobStatusRequest, BlobStatusResponse, BlobValidateRequest, CreateCollectionRequest, CreateCollectionResponse, CreateTagRequest, DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, DocSetHashRequest, ListTagsRequest, @@ -458,7 +459,7 @@ impl Handler { async fn tags_set_tag(self, msg: SetTagRequest) -> RpcResult<()> { if let Some(batch) = msg.batch { if let Some(content) = msg.value.as_ref() { - self.inner.blob_batches.lock().unwrap().remove_one( + self.inner.blob_batches.lock().await.remove_one( batch, content, self.inner.db.tag_drop(), @@ -471,7 +472,7 @@ impl Handler { async fn tags_create_tag(self, msg: CreateTagRequest) -> RpcResult { if let Some(batch) = msg.batch { - self.inner.blob_batches.lock().unwrap().remove_one( + self.inner.blob_batches.lock().await.remove_one( batch, &msg.value, self.inner.db.tag_drop(), @@ -867,7 +868,7 @@ impl Handler { .import_file(root, import_mode, format, import_progress) .await?; let hash = *tag.hash(); - self.inner.blob_batches.lock().unwrap().store(batch, tag); + self.inner.blob_batches.lock().await.store(batch, tag); progress.send(BatchAddPathProgress::Done { hash }).await?; Ok(()) @@ -942,39 +943,38 @@ impl Handler { _: BatchCreateRequest, mut updates: impl Stream + Send + Unpin + 'static, ) -> impl Stream { - let batch = self.inner.blob_batches.lock().unwrap().create(); - tokio::spawn(async move { - while let Some(item) = updates.next().await { - match item { - BatchUpdate::Drop(content) => { - // this can not fail, since we keep the batch alive. - // therefore it is safe to ignore the result. - let _ = self.inner.blob_batches.lock().unwrap().remove_one( - batch, - &content, - self.inner.db.tag_drop(), - ); + async move { + let batch = self.inner.blob_batches.lock().await.create(); + tokio::spawn(async move { + while let Some(item) = updates.next().await { + match item { + BatchUpdate::Drop(content) => { + // this can not fail, since we keep the batch alive. + // therefore it is safe to ignore the result. + let _ = self.inner.blob_batches.lock().await.remove_one( + batch, + &content, + self.inner.db.tag_drop(), + ); + } + BatchUpdate::Ping => {} } - BatchUpdate::Ping => {} } - } - self.inner - .blob_batches - .lock() - .unwrap() - .remove(batch, self.inner.db.tag_drop()); - }); - futures_lite::stream::once(BatchCreateResponse::Id(batch)) + self.inner + .blob_batches + .lock() + .await + .remove(batch, self.inner.db.tag_drop()); + }); + BatchCreateResponse::Id(batch) + } + .into_stream() } #[allow(clippy::unused_async)] async fn batch_create_temp_tag(self, msg: BatchCreateTempTagRequest) -> RpcResult<()> { let tag = self.inner.db.temp_tag(msg.content); - self.inner - .blob_batches - .lock() - .unwrap() - .store(msg.batch, tag); + self.inner.blob_batches.lock().await.store(msg.batch, tag); Ok(()) } @@ -1043,7 +1043,7 @@ impl Handler { self.inner .blob_batches .lock() - .unwrap() + .await .store(msg.batch, temp_tag); progress .send(BatchAddStreamResponse::Result { hash }) diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index ec70024fd8..2777e99cd3 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -46,7 +46,7 @@ pub use iroh_blobs::{provider::AddProgress, store::ValidateProgress}; use iroh_docs::engine::LiveEvent; use crate::client::{ - blobs::{BlobInfo, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption}, + blobs::{BlobInfo, BlobStatus, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption}, docs::{ImportProgress, ShareMode}, tags::TagInfo, NodeStatus, @@ -219,26 +219,6 @@ pub struct BlobStatusRequest { #[derive(Debug, Serialize, Deserialize, derive_more::From, derive_more::Into)] pub struct BlobStatusResponse(pub BlobStatus); -/// Status information about a blob. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum BlobStatus { - /// The blob is not stored on the node. - NotFound, - /// The blob is only stored partially. - Partial { - /// The size of the currently stored partial blob. - /// - /// This can be either a verified size if the last chunk was received, - /// or an unverified size if the last chunk was not yet received. - size: BaoBlobSize, - }, - /// The blob is stored completely. - Complete { - /// The size of the blob. For a complete blob the size is always known. - size: u64, - }, -} - impl RpcMsg for BlobStatusRequest { type Response = RpcResult; } From f6a8d150a824e1d0022b37d3239b6a73a268a7cd Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Thu, 6 Jun 2024 18:19:56 +0300 Subject: [PATCH 18/18] Merge branch 'main' into batch-blob-api-2 # Conflicts: # iroh/src/client/blobs.rs # iroh/src/node/rpc.rs # iroh/src/rpc_protocol.rs --- iroh-blobs/src/export.rs | 2 +- iroh-blobs/src/format/collection.rs | 24 ++++- iroh-cli/src/commands/blob.rs | 2 +- iroh-docs/Cargo.toml | 6 +- iroh-docs/src/actor.rs | 69 ++++++++++---- iroh-docs/src/engine/live.rs | 2 +- iroh-docs/src/store/fs.rs | 88 ++++++++--------- iroh-docs/src/store/fs/query.rs | 27 +++--- iroh-docs/src/store/fs/ranges.rs | 40 +++++--- iroh-gossip/src/net.rs | 30 ++++-- iroh-gossip/src/net/util.rs | 13 ++- iroh-gossip/src/proto/state.rs | 5 + iroh-gossip/src/proto/topic.rs | 25 ++++- iroh/src/client/blobs.rs | 83 +++++++++++----- iroh/src/client/blobs/batch.rs | 19 +++- iroh/src/client/tags.rs | 11 ++- iroh/src/node/rpc.rs | 142 ++++------------------------ iroh/src/rpc_protocol.rs | 96 +++++++------------ iroh/tests/sync.rs | 38 ++++++++ 19 files changed, 398 insertions(+), 324 deletions(-) diff --git a/iroh-blobs/src/export.rs b/iroh-blobs/src/export.rs index 75b282fd6c..cdbda28881 100644 --- a/iroh-blobs/src/export.rs +++ b/iroh-blobs/src/export.rs @@ -46,7 +46,7 @@ pub async fn export_collection( progress: impl ProgressSender + IdGenerator, ) -> anyhow::Result<()> { tokio::fs::create_dir_all(&outpath).await?; - let collection = Collection::load(db, &hash).await?; + let collection = Collection::load_db(db, &hash).await?; for (name, hash) in collection.into_iter() { #[allow(clippy::needless_borrow)] let path = outpath.join(pathbuf_from_name(&name)); diff --git a/iroh-blobs/src/format/collection.rs b/iroh-blobs/src/format/collection.rs index 2e4966308f..cdf4448e98 100644 --- a/iroh-blobs/src/format/collection.rs +++ b/iroh-blobs/src/format/collection.rs @@ -1,5 +1,5 @@ //! The collection type used by iroh -use std::collections::BTreeMap; +use std::{collections::BTreeMap, future::Future}; use anyhow::Context; use bao_tree::blake3; @@ -64,6 +64,12 @@ impl IntoIterator for Collection { } } +/// A simple store trait for loading blobs +pub trait SimpleStore { + /// Load a blob from the store + fn load(&self, hash: Hash) -> impl Future> + Send + '_; +} + /// Metadata for a collection /// /// This is the wire format for the metadata blob. @@ -160,11 +166,25 @@ impl Collection { Ok((collection, res, stats)) } + /// Create a new collection from a hash sequence and metadata. + pub async fn load(root: Hash, store: &impl SimpleStore) -> anyhow::Result { + let hs = store.load(root).await?; + let hs = HashSeq::try_from(hs)?; + let meta_hash = hs.iter().next().context("empty hash seq")?; + let meta = store.load(meta_hash).await?; + let meta: CollectionMeta = postcard::from_bytes(&meta)?; + anyhow::ensure!( + meta.names.len() + 1 == hs.len(), + "names and links length mismatch" + ); + Ok(Self::from_parts(hs.into_iter(), meta)) + } + /// Load a collection from a store given a root hash /// /// This assumes that both the links and the metadata of the collection is stored in the store. /// It does not require that all child blobs are stored in the store. - pub async fn load(db: &D, root: &Hash) -> anyhow::Result + pub async fn load_db(db: &D, root: &Hash) -> anyhow::Result where D: crate::store::Map, { diff --git a/iroh-cli/src/commands/blob.rs b/iroh-cli/src/commands/blob.rs index 978918b03c..2a802bd7be 100644 --- a/iroh-cli/src/commands/blob.rs +++ b/iroh-cli/src/commands/blob.rs @@ -472,7 +472,7 @@ impl ListCommands { } } Self::Collections => { - let mut response = iroh.blobs.list_collections().await?; + let mut response = iroh.blobs.list_collections()?; while let Some(item) = response.next().await { let CollectionInfo { tag, diff --git a/iroh-docs/Cargo.toml b/iroh-docs/Cargo.toml index 005d2e1ea3..f08c97fee0 100644 --- a/iroh-docs/Cargo.toml +++ b/iroh-docs/Cargo.toml @@ -23,7 +23,7 @@ ed25519-dalek = { version = "2.0.0", features = ["serde", "rand_core"] } flume = "0.11" futures-buffered = "0.2.4" futures-lite = "2.3.0" -futures-util = { version = "0.3.25", optional = true } +futures-util = { version = "0.3.25" } hex = "0.4" iroh-base = { version = "0.17.0", path = "../iroh-base" } iroh-blobs = { version = "0.17.0", path = "../iroh-blobs", optional = true, features = ["downloader"] } @@ -42,7 +42,7 @@ serde = { version = "1.0.164", features = ["derive"] } strum = { version = "0.25", features = ["derive"] } tempfile = { version = "3.4" } thiserror = "1" -tokio = { version = "1", features = ["sync"] } +tokio = { version = "1", features = ["sync", "rt", "time", "macros"] } tokio-stream = { version = "0.1", optional = true, features = ["sync"]} tokio-util = { version = "0.7", optional = true, features = ["codec", "io-util", "io"] } tracing = "0.1" @@ -57,7 +57,7 @@ test-strategy = "0.3.1" [features] default = ["net", "metrics", "engine"] -net = ["dep:iroh-net", "tokio/io-util", "dep:tokio-stream", "dep:tokio-util", "dep:futures-util"] +net = ["dep:iroh-net", "tokio/io-util", "dep:tokio-stream", "dep:tokio-util"] metrics = ["dep:iroh-metrics"] engine = ["net", "dep:iroh-gossip", "dep:iroh-blobs"] diff --git a/iroh-docs/src/actor.rs b/iroh-docs/src/actor.rs index bbe91181cb..a48e8f55b3 100644 --- a/iroh-docs/src/actor.rs +++ b/iroh-docs/src/actor.rs @@ -10,9 +10,10 @@ use std::{ use anyhow::{anyhow, Context, Result}; use bytes::Bytes; +use futures_util::FutureExt; use iroh_base::hash::Hash; use serde::{Deserialize, Serialize}; -use tokio::sync::oneshot; +use tokio::{sync::oneshot, task::JoinSet}; use tracing::{debug, error, error_span, trace, warn}; use crate::{ @@ -253,6 +254,7 @@ impl SyncHandle { states: Default::default(), action_rx, content_status_callback, + tasks: Default::default(), }; let join_handle = std::thread::Builder::new() .name("sync-actor".to_string()) @@ -570,22 +572,37 @@ struct Actor { states: OpenReplicas, action_rx: flume::Receiver, content_status_callback: Option, + tasks: JoinSet<()>, } impl Actor { - fn run(mut self) -> Result<()> { + fn run(self) -> Result<()> { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_time() + .build()?; + let local_set = tokio::task::LocalSet::new(); + local_set.block_on(&rt, async move { self.run_async().await }) + } + async fn run_async(mut self) -> Result<()> { loop { - let action = match self.action_rx.recv_timeout(MAX_COMMIT_DELAY) { - Ok(action) => action, - Err(flume::RecvTimeoutError::Timeout) => { + let timeout = tokio::time::sleep(MAX_COMMIT_DELAY); + tokio::pin!(timeout); + let action = tokio::select! { + _ = &mut timeout => { if let Err(cause) = self.store.flush() { error!(?cause, "failed to flush store"); } continue; } - Err(flume::RecvTimeoutError::Disconnected) => { - debug!("action channel disconnected"); - break; + action = self.action_rx.recv_async() => { + match action { + Ok(action) => action, + Err(flume::RecvError::Disconnected) => { + debug!("action channel disconnected"); + break; + } + + } } }; trace!(%action, "tick"); @@ -607,6 +624,7 @@ impl Actor { } } } + self.tasks.abort_all(); debug!("shutdown"); Ok(()) } @@ -636,13 +654,21 @@ impl Actor { } Ok(id) }), - Action::ListAuthors { reply } => iter_to_channel( - reply, - self.store + Action::ListAuthors { reply } => { + let iter = self + .store .list_authors() - .map(|a| a.map(|a| a.map(|a| a.id()))), - ), - Action::ListReplicas { reply } => iter_to_channel(reply, self.store.list_namespaces()), + .map(|a| a.map(|a| a.map(|a| a.id()))); + self.tasks + .spawn_local(iter_to_channel_async(reply, iter).map(|_| ())); + Ok(()) + } + Action::ListReplicas { reply } => { + let iter = self.store.list_namespaces(); + self.tasks + .spawn_local(iter_to_channel_async(reply, iter).map(|_| ())); + Ok(()) + } Action::ContentHashes { reply } => { send_reply_with(reply, self, |this| this.store.content_hashes()) } @@ -657,7 +683,9 @@ impl Actor { ) -> Result<(), SendReplyError> { match action { ReplicaAction::Open { reply, opts } => { + tracing::trace!("open in"); let res = self.open(namespace, opts); + tracing::trace!("open out"); send_reply(reply, res) } ReplicaAction::Close { reply } => { @@ -759,7 +787,9 @@ impl Actor { .states .ensure_open(&namespace) .and_then(|_| self.store.get_many(namespace, query)); - iter_to_channel(reply, iter) + self.tasks + .spawn_local(iter_to_channel_async(reply, iter).map(|_| ())); + Ok(()) } ReplicaAction::DropReplica { reply } => send_reply_with(reply, self, |this| { this.close(namespace); @@ -921,15 +951,18 @@ impl OpenReplicas { } } -fn iter_to_channel( +async fn iter_to_channel_async( channel: flume::Sender>, iter: Result>>, ) -> Result<(), SendReplyError> { match iter { - Err(err) => channel.send(Err(err)).map_err(send_reply_error)?, + Err(err) => channel + .send_async(Err(err)) + .await + .map_err(send_reply_error)?, Ok(iter) => { for item in iter { - channel.send(item).map_err(send_reply_error)?; + channel.send_async(item).await.map_err(send_reply_error)?; } } } diff --git a/iroh-docs/src/engine/live.rs b/iroh-docs/src/engine/live.rs index 5c7608722b..88f4b39e22 100644 --- a/iroh-docs/src/engine/live.rs +++ b/iroh-docs/src/engine/live.rs @@ -543,7 +543,7 @@ impl LiveActor { match details .outcome .heads_received - .encode(Some(iroh_gossip::net::MAX_MESSAGE_SIZE)) + .encode(Some(self.gossip.max_message_size())) { Err(err) => warn!(?err, "Failed to encode author heads for sync report"), Ok(heads) => { diff --git a/iroh-docs/src/store/fs.rs b/iroh-docs/src/store/fs.rs index ab1171b756..981143ca86 100644 --- a/iroh-docs/src/store/fs.rs +++ b/iroh-docs/src/store/fs.rs @@ -154,6 +154,22 @@ impl Store { } } + /// Get an owned read-only snapshot of the database. + /// + /// This will open a new read transaction. The read transaction won't be reused for other + /// reads. + /// + /// This has the side effect of committing any open write transaction, + /// so it can be used as a way to ensure that the data is persisted. + pub fn snapshot_owned(&mut self) -> Result { + // make sure the current transaction is committed + self.flush()?; + assert!(matches!(self.transaction, CurrentTransaction::None)); + let tx = self.db.begin_read()?; + let tables = ReadOnlyTables::new(tx)?; + Ok(tables) + } + /// Get access to the tables to read from them. /// /// The underlying transaction is a write transaction, but with a non-mut @@ -223,8 +239,6 @@ impl Store { } } -type AuthorsIter = std::vec::IntoIter>; -type NamespaceIter = std::vec::IntoIter>; type PeersIter = std::vec::IntoIter; impl Store { @@ -297,18 +311,16 @@ impl Store { } /// List all replica namespaces in this store. - pub fn list_namespaces(&mut self) -> Result { - // TODO: avoid collect - let tables = self.tables()?; - let namespaces: Vec<_> = tables - .namespaces - .iter()? - .map(|res| { - let capability = parse_capability(res?.1.value())?; - Ok((capability.id(), capability.kind())) - }) - .collect(); - Ok(namespaces.into_iter()) + pub fn list_namespaces( + &mut self, + ) -> Result>> { + let snapshot = self.snapshot()?; + let iter = snapshot.namespaces.range::<&'static [u8; 32]>(..)?; + let iter = iter.map(|res| { + let capability = parse_capability(res?.1.value())?; + Ok((capability.id(), capability.kind())) + }); + Ok(iter) } /// Get an author key from the store. @@ -340,19 +352,16 @@ impl Store { } /// List all author keys in this store. - pub fn list_authors(&mut self) -> Result { - // TODO: avoid collect - let tables = self.tables()?; - let authors: Vec<_> = tables + pub fn list_authors(&mut self) -> Result>> { + let tables = self.snapshot()?; + let iter = tables .authors - .iter()? + .range::<&'static [u8; 32]>(..)? .map(|res| match res { Ok((_key, value)) => Ok(Author::from_bytes(value.value())), Err(err) => Err(err.into()), - }) - .collect(); - - Ok(authors.into_iter()) + }); + Ok(iter) } /// Import a new replica namespace. @@ -413,7 +422,8 @@ impl Store { namespace: NamespaceId, query: impl Into, ) -> Result { - QueryIterator::new(self.tables()?, namespace, query.into()) + let tables = self.snapshot_owned()?; + QueryIterator::new(tables, namespace, query.into()) } /// Get an entry by key and author. @@ -435,13 +445,8 @@ impl Store { /// Get all content hashes of all replicas in the store. pub fn content_hashes(&mut self) -> Result { - // make sure the current transaction is committed - self.flush()?; - assert!(matches!(self.transaction, CurrentTransaction::None)); - let tx = self.db.begin_read()?; - let tables = ReadOnlyTables::new(tx)?; - let records = tables.records; - ContentHashesIterator::all(records) + let tables = self.snapshot_owned()?; + ContentHashesIterator::all(&tables.records) } /// Get the latest entry for each author in a namespace. @@ -870,14 +875,6 @@ impl Iterator for ParentIterator { } } -self_cell::self_cell!( - struct ContentHashesIteratorInner { - owner: RecordsTable, - #[covariant] - dependent: RecordsRange, - } -); - /// Iterator for all content hashes /// /// Note that you might get duplicate hashes. Also, the iterator will keep @@ -886,13 +883,16 @@ self_cell::self_cell!( /// Also, this represents a snapshot of the database at the time of creation. /// It nees a copy of a redb::ReadOnlyTable to be self-contained. #[derive(derive_more::Debug)] -pub struct ContentHashesIterator(#[debug(skip)] ContentHashesIteratorInner); +pub struct ContentHashesIterator { + #[debug(skip)] + range: RecordsRange<'static>, +} impl ContentHashesIterator { /// Create a new iterator over all content hashes. - pub fn all(owner: RecordsTable) -> anyhow::Result { - let inner = ContentHashesIteratorInner::try_new(owner, |owner| RecordsRange::all(owner))?; - Ok(Self(inner)) + pub fn all(table: &RecordsTable) -> anyhow::Result { + let range = RecordsRange::all_static(table)?; + Ok(Self { range }) } } @@ -900,7 +900,7 @@ impl Iterator for ContentHashesIterator { type Item = Result; fn next(&mut self) -> Option { - let v = self.0.with_dependent_mut(|_, d| d.next())?; + let v = self.range.next()?; Some(v.map(|e| e.content_hash())) } } diff --git a/iroh-docs/src/store/fs/query.rs b/iroh-docs/src/store/fs/query.rs index a73dbcd8e7..f05b4ecfb3 100644 --- a/iroh-docs/src/store/fs/query.rs +++ b/iroh-docs/src/store/fs/query.rs @@ -3,6 +3,7 @@ use iroh_base::hash::Hash; use crate::{ store::{ + fs::tables::ReadOnlyTables, util::{IndexKind, LatestPerKeySelector, SelectorRes}, AuthorFilter, KeyFilter, Query, }, @@ -12,34 +13,33 @@ use crate::{ use super::{ bounds::{ByKeyBounds, RecordsBounds}, ranges::{RecordsByKeyRange, RecordsRange}, - tables::Tables, RecordsValue, }; /// A query iterator for entry queries. #[derive(Debug)] -pub struct QueryIterator<'a> { - range: QueryRange<'a>, +pub struct QueryIterator { + range: QueryRange, query: Query, offset: u64, count: u64, } #[derive(Debug)] -enum QueryRange<'a> { +enum QueryRange { AuthorKey { - range: RecordsRange<'a>, + range: RecordsRange<'static>, key_filter: KeyFilter, }, KeyAuthor { - range: RecordsByKeyRange<'a>, + range: RecordsByKeyRange, author_filter: AuthorFilter, selector: Option, }, } -impl<'a> QueryIterator<'a> { - pub fn new(tables: &'a Tables<'a>, namespace: NamespaceId, query: Query) -> Result { +impl QueryIterator { + pub fn new(tables: ReadOnlyTables, namespace: NamespaceId, query: Query) -> Result { let index_kind = IndexKind::from(&query); let range = match index_kind { IndexKind::AuthorKey { range, key_filter } => { @@ -53,7 +53,7 @@ impl<'a> QueryIterator<'a> { // no author set => full table scan with the provided key filter AuthorFilter::Any => (RecordsBounds::namespace(namespace), key_filter), }; - let range = RecordsRange::with_bounds(&tables.records, bounds)?; + let range = RecordsRange::with_bounds_static(&tables.records, bounds)?; QueryRange::AuthorKey { range, key_filter: filter, @@ -65,11 +65,8 @@ impl<'a> QueryIterator<'a> { latest_per_key, } => { let bounds = ByKeyBounds::new(namespace, &range); - let range = RecordsByKeyRange::with_bounds( - &tables.records_by_key, - &tables.records, - bounds, - )?; + let range = + RecordsByKeyRange::with_bounds(tables.records_by_key, tables.records, bounds)?; let selector = latest_per_key.then(LatestPerKeySelector::default); QueryRange::KeyAuthor { author_filter, @@ -88,7 +85,7 @@ impl<'a> QueryIterator<'a> { } } -impl<'a> Iterator for QueryIterator<'a> { +impl Iterator for QueryIterator { type Item = Result; fn next(&mut self) -> Option> { diff --git a/iroh-docs/src/store/fs/ranges.rs b/iroh-docs/src/store/fs/ranges.rs index 9219c620ac..f28d95ae63 100644 --- a/iroh-docs/src/store/fs/ranges.rs +++ b/iroh-docs/src/store/fs/ranges.rs @@ -1,6 +1,6 @@ //! Ranges and helpers for working with [`redb`] tables -use redb::{Key, Range, ReadableTable, Table, Value}; +use redb::{Key, Range, ReadOnlyTable, ReadableTable, Value}; use crate::{store::SortDirection, SignedEntry}; @@ -74,14 +74,9 @@ impl<'a, K: Key + 'static, V: Value + 'static> RangeExt for Range<'a, K, V #[debug("RecordsRange")] pub struct RecordsRange<'a>(Range<'a, RecordsId<'static>, RecordsValue<'static>>); -impl<'a> RecordsRange<'a> { - pub(super) fn all( - records: &'a impl ReadableTable, RecordsValue<'static>>, - ) -> anyhow::Result { - let range = records.range::>(..)?; - Ok(Self(range)) - } +// pub type RecordsRange<'a> = Range<'a, RecordsId<'static>, RecordsValue<'static>>; +impl<'a> RecordsRange<'a> { pub(super) fn with_bounds( records: &'a impl ReadableTable, RecordsValue<'static>>, bounds: RecordsBounds, @@ -90,6 +85,7 @@ impl<'a> RecordsRange<'a> { Ok(Self(range)) } + // /// Get the next item in the range. /// /// Omit items for which the `matcher` function returns false. @@ -103,6 +99,22 @@ impl<'a> RecordsRange<'a> { } } +impl RecordsRange<'static> { + pub(super) fn all_static( + records: &ReadOnlyTable, RecordsValue<'static>>, + ) -> anyhow::Result { + let range = records.range::>(..)?; + Ok(Self(range)) + } + pub(super) fn with_bounds_static( + records: &ReadOnlyTable, RecordsValue<'static>>, + bounds: RecordsBounds, + ) -> anyhow::Result { + let range = records.range(bounds.as_ref())?; + Ok(Self(range)) + } +} + impl<'a> Iterator for RecordsRange<'a> { type Item = anyhow::Result; fn next(&mut self) -> Option { @@ -112,15 +124,15 @@ impl<'a> Iterator for RecordsRange<'a> { #[derive(derive_more::Debug)] #[debug("RecordsByKeyRange")] -pub struct RecordsByKeyRange<'a> { - records_table: &'a Table<'a, RecordsId<'static>, RecordsValue<'static>>, - by_key_range: Range<'a, RecordsByKeyId<'static>, ()>, +pub struct RecordsByKeyRange { + records_table: ReadOnlyTable, RecordsValue<'static>>, + by_key_range: Range<'static, RecordsByKeyId<'static>, ()>, } -impl<'a> RecordsByKeyRange<'a> { +impl RecordsByKeyRange { pub fn with_bounds( - records_by_key_table: &'a impl ReadableTable, ()>, - records_table: &'a Table<'a, RecordsId<'static>, RecordsValue<'static>>, + records_by_key_table: ReadOnlyTable, ()>, + records_table: ReadOnlyTable, RecordsValue<'static>>, bounds: ByKeyBounds, ) -> anyhow::Result { let by_key_range = records_by_key_table.range(bounds.as_ref())?; diff --git a/iroh-gossip/src/net.rs b/iroh-gossip/src/net.rs index 4083e3a113..756ccfee68 100644 --- a/iroh-gossip/src/net.rs +++ b/iroh-gossip/src/net.rs @@ -26,10 +26,6 @@ pub mod util; /// ALPN protocol name pub const GOSSIP_ALPN: &[u8] = b"/iroh-gossip/0"; -/// Maximum message size is limited currently. The limit is more-or-less arbitrary. -// TODO: Make the limit configurable. -pub const MAX_MESSAGE_SIZE: usize = 4096; - /// Channel capacity for all subscription broadcast channels (single) const SUBSCRIBE_ALL_CAP: usize = 2048; /// Channel capacity for topic subscription broadcast channels (one per topic) @@ -76,6 +72,7 @@ pub struct Gossip { to_actor_tx: mpsc::Sender, on_endpoints_tx: mpsc::Sender>, _actor_handle: Arc>>, + max_message_size: usize, } impl Gossip { @@ -94,6 +91,7 @@ impl Gossip { let (on_endpoints_tx, on_endpoints_rx) = mpsc::channel(ON_ENDPOINTS_CAP); let me = endpoint.node_id().fmt_short(); + let max_message_size = state.max_message_size(); let actor = Actor { endpoint, state, @@ -125,9 +123,15 @@ impl Gossip { to_actor_tx, on_endpoints_tx, _actor_handle: Arc::new(actor_handle), + max_message_size, } } + /// Get the maximum message size configured for this gossip actor. + pub fn max_message_size(&self) -> usize { + self.max_message_size + } + /// Join a topic and connect to peers. /// /// @@ -427,12 +431,23 @@ impl Actor { let (send_tx, send_rx) = mpsc::channel(SEND_QUEUE_CAP); self.conn_send_tx.insert(peer_id, send_tx.clone()); + let max_message_size = self.state.max_message_size(); + // Spawn a task for this connection let in_event_tx = self.in_event_tx.clone(); tokio::spawn( async move { debug!("connection established"); - match connection_loop(peer_id, conn, origin, send_rx, &in_event_tx).await { + match connection_loop( + peer_id, + conn, + origin, + send_rx, + &in_event_tx, + max_message_size, + ) + .await + { Ok(()) => { debug!("connection closed without error") } @@ -605,6 +620,7 @@ async fn connection_loop( origin: ConnOrigin, mut send_rx: mpsc::Receiver, in_event_tx: &mpsc::Sender, + max_message_size: usize, ) -> anyhow::Result<()> { let (mut send, mut recv) = match origin { ConnOrigin::Accept => conn.accept_bi().await?, @@ -621,10 +637,10 @@ async fn connection_loop( // but the other side may still want to use it to // send data to us. Some(msg) = send_rx.recv(), if !send_rx.is_closed() => { - write_message(&mut send, &mut send_buf, &msg).await? + write_message(&mut send, &mut send_buf, &msg, max_message_size).await? } - msg = read_message(&mut recv, &mut recv_buf) => { + msg = read_message(&mut recv, &mut recv_buf, max_message_size) => { let msg = msg?; match msg { None => break, diff --git a/iroh-gossip/src/net/util.rs b/iroh-gossip/src/net/util.rs index 1101300292..2a45fa4961 100644 --- a/iroh-gossip/src/net/util.rs +++ b/iroh-gossip/src/net/util.rs @@ -11,16 +11,17 @@ use tokio::{ use crate::proto::util::TimerMap; -use super::{ProtoMessage, MAX_MESSAGE_SIZE}; +use super::ProtoMessage; /// Write a `ProtoMessage` as a length-prefixed, postcard-encoded message. pub async fn write_message( writer: &mut W, buffer: &mut BytesMut, frame: &ProtoMessage, + max_message_size: usize, ) -> Result<()> { let len = postcard::experimental::serialized_size(&frame)?; - ensure!(len < MAX_MESSAGE_SIZE); + ensure!(len < max_message_size); buffer.clear(); buffer.resize(len, 0u8); let slice = postcard::to_slice(&frame, buffer)?; @@ -33,8 +34,9 @@ pub async fn write_message( pub async fn read_message( reader: impl AsyncRead + Unpin, buffer: &mut BytesMut, + max_message_size: usize, ) -> Result> { - match read_lp(reader, buffer).await? { + match read_lp(reader, buffer, max_message_size).await? { None => Ok(None), Some(data) => { let message = postcard::from_bytes(&data)?; @@ -52,6 +54,7 @@ pub async fn read_message( pub async fn read_lp( mut reader: impl AsyncRead + Unpin, buffer: &mut BytesMut, + max_message_size: usize, ) -> Result> { let size = match reader.read_u32().await { Ok(size) => size, @@ -60,8 +63,8 @@ pub async fn read_lp( }; let mut reader = reader.take(size as u64); let size = usize::try_from(size).context("frame larger than usize")?; - if size > MAX_MESSAGE_SIZE { - bail!("Incoming message exceeds MAX_MESSAGE_SIZE"); + if size > max_message_size { + bail!("Incoming message exceeds the maximum message size of {max_message_size} bytes"); } buffer.reserve(size); loop { diff --git a/iroh-gossip/src/proto/state.rs b/iroh-gossip/src/proto/state.rs index f8b1ebd1e3..a841342014 100644 --- a/iroh-gossip/src/proto/state.rs +++ b/iroh-gossip/src/proto/state.rs @@ -196,6 +196,11 @@ impl State { .unwrap_or(false) } + /// Returns the maximum message size configured in the gossip protocol. + pub fn max_message_size(&self) -> usize { + self.config.max_message_size + } + /// Handle an [`InEvent`] /// /// This returns an iterator of [`OutEvent`]s that must be processed. diff --git a/iroh-gossip/src/proto/topic.rs b/iroh-gossip/src/proto/topic.rs index df36578dbb..0ac50d4f1f 100644 --- a/iroh-gossip/src/proto/topic.rs +++ b/iroh-gossip/src/proto/topic.rs @@ -18,6 +18,10 @@ use super::{ }; use super::{PeerData, PeerIdentity}; +/// The default maximum size in bytes for a gossip message. +/// This is a sane but arbitrary default and can be changed in the [`Config`]. +pub const DEFAULT_MAX_MESSAGE_SIZE: usize = 4096; + /// Input event to the topic state handler. #[derive(Clone, Debug)] pub enum InEvent { @@ -170,13 +174,32 @@ impl IO for VecDeque> { self.push_back(event.into()) } } + /// Protocol configuration -#[derive(Clone, Default, Debug)] +#[derive(Clone, Debug)] pub struct Config { /// Configuration for the swarm membership layer pub membership: hyparview::Config, /// Configuration for the gossip broadcast layer pub broadcast: plumtree::Config, + /// Max message size in bytes. + /// + /// This size should be the same across a network to ensure all nodes can transmit and read large messages. + /// + /// At minimum, this size should be large enough to send gossip control messages. This can vary, depending on the size of the [`PeerIdentity`] you use and the size of the [`PeerData`] you transmit in your messages. + /// + /// The default is [`DEFAULT_MAX_MESSAGE_SIZE`]. + pub max_message_size: usize, +} + +impl Default for Config { + fn default() -> Self { + Self { + membership: Default::default(), + broadcast: Default::default(), + max_message_size: DEFAULT_MAX_MESSAGE_SIZE, + } + } } /// The topic state maintains the swarm membership and broadcast tree for a particular topic. diff --git a/iroh/src/client/blobs.rs b/iroh/src/client/blobs.rs index 1cabcdb008..92b1ae88a1 100644 --- a/iroh/src/client/blobs.rs +++ b/iroh/src/client/blobs.rs @@ -13,10 +13,11 @@ use anyhow::{anyhow, Context as _, Result}; use bytes::Bytes; use futures_lite::{Stream, StreamExt}; use futures_util::SinkExt; +use genawaiter::sync::{Co, Gen}; use iroh_base::{node_addr::AddrInfoOptions, ticket::BlobTicket}; use iroh_blobs::{ export::ExportProgress as BytesExportProgress, - format::collection::Collection, + format::collection::{Collection, SimpleStore}, get::db::DownloadProgress as BytesDownloadProgress, store::{BaoBlobSize, ConsistencyCheckProgress, ExportFormat, ExportMode, ValidateProgress}, BlobFormat, Hash, Tag, @@ -32,10 +33,9 @@ use tracing::warn; use crate::rpc_protocol::{ BatchCreateRequest, BatchCreateResponse, BlobAddPathRequest, BlobAddStreamRequest, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, - BlobExportRequest, BlobGetCollectionRequest, BlobGetCollectionResponse, - BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, - BlobReadAtResponse, BlobStatusRequest, BlobValidateRequest, CreateCollectionRequest, - CreateCollectionResponse, NodeStatusRequest, RpcService, SetTagOption, + BlobExportRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, + BlobReadAtResponse, BlobStatusRequest, BlobValidateRequest, NodeStatusRequest, RpcService, + SetTagOption, }; use super::{flatten, Iroh}; @@ -166,17 +166,19 @@ where pub async fn create_collection( &self, collection: Collection, - tag: SetTagOption, + opts: SetTagOption, tags_to_delete: Vec, ) -> anyhow::Result<(Hash, Tag)> { - let CreateCollectionResponse { hash, tag } = self - .rpc - .rpc(CreateCollectionRequest { - collection, - tag, - tags_to_delete, - }) - .await??; + let batch = self.batch().await?; + let temp_tag = batch.add_collection(collection).await?; + let hash = *temp_tag.hash(); + let tag = batch.upgrade_with_opts(temp_tag, opts).await?; + if !tags_to_delete.is_empty() { + let tags = self.tags_client(); + for tag in tags_to_delete { + tags.delete(tag).await?; + } + } Ok((hash, tag)) } @@ -360,18 +362,35 @@ where /// Read the content of a collection. pub async fn get_collection(&self, hash: Hash) -> Result { - let BlobGetCollectionResponse { collection } = - self.rpc.rpc(BlobGetCollectionRequest { hash }).await??; - Ok(collection) + Collection::load(hash, self).await } /// List all collections. - pub async fn list_collections(&self) -> Result>> { - let stream = self - .rpc - .server_streaming(BlobListCollectionsRequest) - .await?; - Ok(flatten(stream)) + pub fn list_collections(&self) -> Result>> { + let this = self.clone(); + Ok(Gen::new(|co| async move { + if let Err(cause) = this.list_collections_impl(&co).await { + co.yield_(Err(cause)).await; + } + })) + } + + async fn list_collections_impl(&self, co: &Co>) -> Result<()> { + let tags = self.tags_client(); + let mut tags = tags.list_hash_seq().await?; + while let Some(tag) = tags.next().await { + let tag = tag?; + if let Ok(collection) = self.get_collection(tag.hash).await { + let info = CollectionInfo { + tag: tag.name, + hash: tag.hash, + total_blobs_count: Some(collection.len() as u64 + 1), + total_blobs_size: Some(0), + }; + co.yield_(Ok(info)).await; + } + } + Ok(()) } /// Delete a blob. @@ -393,6 +412,22 @@ where Ok(ticket) } + + fn tags_client(&self) -> crate::client::tags::Client { + crate::client::tags::Client { + rpc: self.rpc.clone(), + } + } +} + +impl SimpleStore for Client +where + C: ServiceConnection, +{ + async fn load(&self, hash: Hash) -> anyhow::Result { + let mut reader = self.read(hash).await?; + Ok(reader.read_to_bytes().await?) + } } /// Whether to wrap the added data in a collection. @@ -961,7 +996,7 @@ mod tests { .create_collection(collection, SetTagOption::Auto, tags) .await?; - let collections: Vec<_> = client.blobs.list_collections().await?.try_collect().await?; + let collections: Vec<_> = client.blobs.list_collections()?.try_collect().await?; assert_eq!(collections.len(), 1); { diff --git a/iroh/src/client/blobs/batch.rs b/iroh/src/client/blobs/batch.rs index b82ab1788b..d6cd9fb738 100644 --- a/iroh/src/client/blobs/batch.rs +++ b/iroh/src/client/blobs/batch.rs @@ -10,8 +10,11 @@ use futures_buffered::BufferedStreamExt; use futures_lite::StreamExt; use futures_util::{sink::Buffer, FutureExt, SinkExt, Stream}; use iroh_blobs::{ - format::collection::Collection, provider::BatchAddPathProgress, store::ImportMode, - util::TagDrop, BlobFormat, HashAndFormat, Tag, TempTag, + format::collection::Collection, + provider::BatchAddPathProgress, + store::ImportMode, + util::{SetTagOption, TagDrop}, + BlobFormat, HashAndFormat, Tag, TempTag, }; use quic_rpc::{client::UpdateSink, RpcClient, ServiceConnection}; use tokio::io::AsyncRead; @@ -429,6 +432,18 @@ impl> Batch { Ok(()) } + /// Upgrade a temp tag to a persistent tag with either a specific name or + /// an automatically generated name. + pub async fn upgrade_with_opts(&self, tt: TempTag, opts: SetTagOption) -> Result { + match opts { + SetTagOption::Auto => self.upgrade(tt).await, + SetTagOption::Named(tag) => { + self.upgrade_to(tt, tag.clone()).await?; + Ok(tag) + } + } + } + /// Creates a temp tag for the given hash and format, without notifying the server. /// /// Caution: only do this for data for which you know the server side has created a temp tag. diff --git a/iroh/src/client/tags.rs b/iroh/src/client/tags.rs index 1e12a55676..8a67c4b58b 100644 --- a/iroh/src/client/tags.rs +++ b/iroh/src/client/tags.rs @@ -20,7 +20,16 @@ where { /// List all tags. pub async fn list(&self) -> Result>> { - let stream = self.rpc.server_streaming(ListTagsRequest).await?; + let stream = self.rpc.server_streaming(ListTagsRequest::all()).await?; + Ok(stream.map(|res| res.map_err(anyhow::Error::from))) + } + + /// List all tags with a hash_seq format. + pub async fn list_hash_seq(&self) -> Result>> { + let stream = self + .rpc + .server_streaming(ListTagsRequest::hash_seq()) + .await?; Ok(stream.map(|res| res.map_err(anyhow::Error::from))) } diff --git a/iroh/src/node/rpc.rs b/iroh/src/node/rpc.rs index 94df3831b1..36bec09477 100644 --- a/iroh/src/node/rpc.rs +++ b/iroh/src/node/rpc.rs @@ -18,7 +18,6 @@ use iroh_blobs::provider::BatchAddPathProgress; use iroh_blobs::store::{ConsistencyCheckProgress, ExportFormat, ImportProgress, MapEntry}; use iroh_blobs::util::progress::ProgressSender; use iroh_blobs::{ - hashseq::parse_hash_seq, provider::AddProgress, store::{Store as BaoStore, ValidateProgress}, util::progress::FlumeProgressSender, @@ -35,9 +34,7 @@ use quic_rpc::{ use tokio_util::task::LocalPoolHandle; use tracing::{debug, info}; -use crate::client::blobs::{ - BlobInfo, BlobStatus, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption, -}; +use crate::client::blobs::{BlobInfo, BlobStatus, DownloadMode, IncompleteBlobInfo, WrapOption}; use crate::client::tags::TagInfo; use crate::client::NodeStatus; use crate::rpc_protocol::{ @@ -46,15 +43,13 @@ use crate::rpc_protocol::{ BatchUpdate, BlobAddPathRequest, BlobAddPathResponse, BlobAddStreamRequest, BlobAddStreamResponse, BlobAddStreamUpdate, BlobConsistencyCheckRequest, BlobDeleteBlobRequest, BlobDownloadRequest, BlobDownloadResponse, BlobExportRequest, BlobExportResponse, - BlobGetCollectionRequest, BlobGetCollectionResponse, BlobListCollectionsRequest, BlobListIncompleteRequest, BlobListRequest, BlobReadAtRequest, BlobReadAtResponse, - BlobStatusRequest, BlobStatusResponse, BlobValidateRequest, CreateCollectionRequest, - CreateCollectionResponse, CreateTagRequest, DocExportFileRequest, DocExportFileResponse, - DocImportFileRequest, DocImportFileResponse, DocSetHashRequest, ListTagsRequest, - NodeAddrRequest, NodeConnectionInfoRequest, NodeConnectionInfoResponse, NodeConnectionsRequest, - NodeConnectionsResponse, NodeIdRequest, NodeRelayRequest, NodeShutdownRequest, - NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, NodeWatchRequest, NodeWatchResponse, - Request, RpcService, SetTagOption, SetTagRequest, + BlobStatusRequest, BlobStatusResponse, BlobValidateRequest, CreateTagRequest, + DocExportFileRequest, DocExportFileResponse, DocImportFileRequest, DocImportFileResponse, + DocSetHashRequest, ListTagsRequest, NodeAddrRequest, NodeConnectionInfoRequest, + NodeConnectionInfoResponse, NodeConnectionsRequest, NodeConnectionsResponse, NodeIdRequest, + NodeRelayRequest, NodeShutdownRequest, NodeStatsRequest, NodeStatsResponse, NodeStatusRequest, + NodeWatchRequest, NodeWatchResponse, Request, RpcService, SetTagOption, SetTagRequest, }; use super::NodeInner; @@ -101,12 +96,6 @@ impl Handler { chan.server_streaming(msg, handler, Self::blob_list_incomplete) .await } - BlobListCollections(msg) => { - chan.server_streaming(msg, handler, Self::blob_list_collections) - .await - } - CreateCollection(msg) => chan.rpc(msg, handler, Self::create_collection).await, - BlobGetCollection(msg) => chan.rpc(msg, handler, Self::blob_get_collection).await, BatchCreateTempTag(msg) => { chan.rpc(msg, handler, Self::batch_create_temp_tag).await } @@ -369,39 +358,6 @@ impl Handler { Ok(()) } - async fn blob_list_collections_impl( - self, - co: &Co>, - ) -> anyhow::Result<()> { - let db = self.inner.db.clone(); - let local = self.inner.rt.clone(); - let tags = db.tags().await.unwrap(); - for item in tags { - let (name, HashAndFormat { hash, format }) = item?; - if !format.is_hash_seq() { - continue; - } - let Some(entry) = db.get(&hash).await? else { - continue; - }; - let count = local - .spawn_pinned(|| async move { - let reader = entry.data_reader().await?; - let (_collection, count) = parse_hash_seq(reader).await?; - anyhow::Ok(count) - }) - .await??; - co.yield_(Ok(CollectionInfo { - tag: name, - hash, - total_blobs_count: Some(count), - total_blobs_size: None, - })) - .await; - } - Ok(()) - } - async fn blob_status(self, msg: BlobStatusRequest) -> RpcResult { let entry = self.inner.db.get(&msg.hash).await?; Ok(BlobStatusResponse(match entry { @@ -440,22 +396,25 @@ impl Handler { }) } - fn blob_list_collections( - self, - _msg: BlobListCollectionsRequest, - ) -> impl Stream> + Send + 'static { - Gen::new(move |co| async move { - if let Err(e) = self.blob_list_collections_impl(&co).await { - co.yield_(Err(e.into())).await; - } - }) - } - async fn blob_delete_blob(self, msg: BlobDeleteBlobRequest) -> RpcResult<()> { self.inner.db.delete(vec![msg.hash]).await?; Ok(()) } + fn tags_list_tags(self, msg: ListTagsRequest) -> impl Stream + Send + 'static { + Gen::new(|co| async move { + let tags = self.inner.db.tags().await.unwrap(); + #[allow(clippy::manual_flatten)] + for item in tags { + if let Ok((name, HashAndFormat { hash, format })) = item { + if (format.is_raw() && msg.raw) || (format.is_hash_seq() && msg.hash_seq) { + co.yield_(TagInfo { name, hash, format }).await; + } + } + } + }) + } + async fn tags_set_tag(self, msg: SetTagRequest) -> RpcResult<()> { if let Some(batch) = msg.batch { if let Some(content) = msg.value.as_ref() { @@ -481,19 +440,6 @@ impl Handler { Ok(self.inner.db.create_tag(msg.value).await?) } - fn tags_list_tags(self, _msg: ListTagsRequest) -> impl Stream + Send + 'static { - Gen::new(|co| async move { - let tags = self.inner.db.tags().await.unwrap(); - #[allow(clippy::manual_flatten)] - for item in tags { - if let Ok((name, HashAndFormat { hash, format })) = item { - tracing::info!("{:?} {} {:?}", name, hash, format); - co.yield_(TagInfo { name, hash, format }).await; - } - } - }) - } - /// Invoke validate on the database and stream out the result fn blob_validate( self, @@ -1222,52 +1168,6 @@ impl Handler { let conn_info = self.inner.endpoint.connection_info(node_id); Ok(NodeConnectionInfoResponse { conn_info }) } - - async fn create_collection( - self, - req: CreateCollectionRequest, - ) -> RpcResult { - let CreateCollectionRequest { - collection, - tag, - tags_to_delete, - } = req; - - let temp_tag = collection.store(&self.inner.db).await?; - let hash_and_format = temp_tag.inner(); - let HashAndFormat { hash, .. } = *hash_and_format; - let tag = match tag { - SetTagOption::Named(tag) => { - self.inner - .db - .set_tag(tag.clone(), Some(*hash_and_format)) - .await?; - tag - } - SetTagOption::Auto => self.inner.db.create_tag(*hash_and_format).await?, - }; - - for tag in tags_to_delete { - self.inner.db.set_tag(tag, None).await?; - } - - Ok(CreateCollectionResponse { hash, tag }) - } - - async fn blob_get_collection( - self, - req: BlobGetCollectionRequest, - ) -> RpcResult { - let hash = req.hash; - let db = self.inner.db.clone(); - let collection = self - .rt() - .spawn_pinned(move || async move { Collection::load(&db, &hash).await }) - .await - .map_err(|_| anyhow!("join failed"))??; - - Ok(BlobGetCollectionResponse { collection }) - } } async fn download( diff --git a/iroh/src/rpc_protocol.rs b/iroh/src/rpc_protocol.rs index 2777e99cd3..1a808ec1af 100644 --- a/iroh/src/rpc_protocol.rs +++ b/iroh/src/rpc_protocol.rs @@ -14,7 +14,6 @@ use derive_more::{From, TryInto}; use iroh_base::node_addr::AddrInfoOptions; pub use iroh_blobs::{export::ExportProgress, get::db::DownloadProgress, BlobFormat, Hash}; use iroh_blobs::{ - format::collection::Collection, provider::BatchAddPathProgress, store::{BaoBlobSize, ConsistencyCheckProgress, ImportMode}, util::Tag, @@ -46,7 +45,7 @@ pub use iroh_blobs::{provider::AddProgress, store::ValidateProgress}; use iroh_docs::engine::LiveEvent; use crate::client::{ - blobs::{BlobInfo, BlobStatus, CollectionInfo, DownloadMode, IncompleteBlobInfo, WrapOption}, + blobs::{BlobInfo, BlobStatus, DownloadMode, IncompleteBlobInfo, WrapOption}, docs::{ImportProgress, ShareMode}, tags::TagInfo, NodeStatus, @@ -251,22 +250,39 @@ impl ServerStreamingMsg for BlobListIncompleteRequest { /// /// Lists all collections that have been explicitly added to the database. #[derive(Debug, Serialize, Deserialize)] -pub struct BlobListCollectionsRequest; - -impl Msg for BlobListCollectionsRequest { - type Pattern = ServerStreaming; -} - -impl ServerStreamingMsg for BlobListCollectionsRequest { - type Response = RpcResult; +pub struct ListTagsRequest { + /// List raw tags + pub raw: bool, + /// List hash seq tags + pub hash_seq: bool, +} + +impl ListTagsRequest { + /// List all tags + pub fn all() -> Self { + Self { + raw: true, + hash_seq: true, + } + } + + /// List raw tags + pub fn raw() -> Self { + Self { + raw: true, + hash_seq: false, + } + } + + /// List hash seq tags + pub fn hash_seq() -> Self { + Self { + raw: false, + hash_seq: true, + } + } } -/// List all collections -/// -/// Lists all collections that have been explicitly added to the database. -#[derive(Debug, Serialize, Deserialize)] -pub struct ListTagsRequest; - impl Msg for ListTagsRequest { type Pattern = ServerStreaming; } @@ -314,48 +330,6 @@ impl RpcMsg for CreateTagRequest { type Response = RpcResult; } -/// Get a collection -#[derive(Debug, Serialize, Deserialize)] -pub struct BlobGetCollectionRequest { - /// Hash of the collection - pub hash: Hash, -} - -impl RpcMsg for BlobGetCollectionRequest { - type Response = RpcResult; -} - -/// The response for a `BlobGetCollectionRequest`. -#[derive(Debug, Serialize, Deserialize)] -pub struct BlobGetCollectionResponse { - /// The collection. - pub collection: Collection, -} - -/// Create a collection. -#[derive(Debug, Serialize, Deserialize)] -pub struct CreateCollectionRequest { - /// The collection - pub collection: Collection, - /// Tag option. - pub tag: SetTagOption, - /// Tags that should be deleted after creation. - pub tags_to_delete: Vec, -} - -/// A response to a create collection request -#[derive(Debug, Serialize, Deserialize)] -pub struct CreateCollectionResponse { - /// The resulting hash. - pub hash: Hash, - /// The resulting tag. - pub tag: Tag, -} - -impl RpcMsg for CreateCollectionRequest { - type Response = RpcResult; -} - /// List connection information about all the nodes we know about /// /// These can be nodes that we have explicitly connected to or nodes @@ -1203,12 +1177,9 @@ pub enum Request { BlobList(BlobListRequest), BlobStatus(BlobStatusRequest), BlobListIncomplete(BlobListIncompleteRequest), - BlobListCollections(BlobListCollectionsRequest), BlobDeleteBlob(BlobDeleteBlobRequest), BlobValidate(BlobValidateRequest), BlobFsck(BlobConsistencyCheckRequest), - CreateCollection(CreateCollectionRequest), - BlobGetCollection(BlobGetCollectionRequest), BatchCreate(BatchCreateRequest), BatchUpdate(BatchUpdate), @@ -1272,13 +1243,10 @@ pub enum Response { BlobList(RpcResult), BlobStatus(RpcResult), BlobListIncomplete(RpcResult), - BlobListCollections(RpcResult), BlobDownload(BlobDownloadResponse), BlobFsck(ConsistencyCheckProgress), BlobExport(BlobExportResponse), BlobValidate(ValidateProgress), - CreateCollection(RpcResult), - BlobGetCollection(RpcResult), BatchCreate(BatchCreateResponse), BatchAddStream(BatchAddStreamResponse), diff --git a/iroh/tests/sync.rs b/iroh/tests/sync.rs index 556f5829a7..afa2591588 100644 --- a/iroh/tests/sync.rs +++ b/iroh/tests/sync.rs @@ -973,6 +973,44 @@ async fn sync_big() -> Result<()> { Ok(()) } +#[tokio::test] +#[cfg(feature = "test-utils")] +async fn test_list_docs_stream() -> Result<()> { + let node = Node::memory() + .node_discovery(iroh::node::DiscoveryConfig::None) + .relay_mode(iroh::net::relay::RelayMode::Disabled) + .spawn() + .await?; + let count = 200; + + // create docs + for _i in 0..count { + let doc = node.docs.create().await?; + doc.close().await?; + } + + // create doc stream + let mut stream = node.docs.list().await?; + + // process each doc and call into the docs actor. + // this makes sure that we don't deadlock the docs actor. + let mut i = 0; + let fut = async { + while let Some((id, _)) = stream.try_next().await.unwrap() { + let _doc = node.docs.open(id).await.unwrap().unwrap(); + i += 1; + } + }; + + tokio::time::timeout(Duration::from_secs(2), fut) + .await + .expect("not to timeout"); + + assert_eq!(i, count); + + Ok(()) +} + /// Get all entries of a document. async fn get_all(doc: &MemDoc) -> anyhow::Result> { let entries = doc.get_many(Query::all()).await?;